From 5283140a89f55564cb353070c260e6f642efa047 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Thu, 20 Apr 2017 15:17:49 +0200 Subject: [PATCH 01/11] [vier] Support authentication Complete episodes now require authentication. --- youtube_dl/extractor/vier.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 5ef7635b6..e09d23c40 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -5,12 +5,16 @@ import re import itertools from .common import InfoExtractor +from ..utils import ( + urlencode_postdata, +) class VierIE(InfoExtractor): IE_NAME = 'vier' IE_DESC = 'vier.be and vijf.be' _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?:[^/]+/videos/(?P[^/]+)(?:/(?P\d+))?|video/v3/embed/(?P\d+))' + _NETRC_MACHINE = 'vier' _TESTS = [{ 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 'info_dict': { @@ -45,6 +49,24 @@ class VierIE(InfoExtractor): 'only_matching': True, }] + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + self._request_webpage( + 'http://www.vier.be/user/login', + None, note='Logging in', errnote='Could not log in', + data=urlencode_postdata({ + 'form_id': 'user_login', + 'name': username, + 'pass': password, + }), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) embed_id = mobj.group('embed_id') From 02c411a9e6a471dbef84a5bc220e5744193cdffb Mon Sep 17 00:00:00 2001 From: mrBliss Date: Thu, 20 Apr 2017 15:50:45 +0200 Subject: [PATCH 02/11] [vier] Improve authentication logic * Authenticate only when required * Support vijf.be in addition to vier.be --- youtube_dl/extractor/vier.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index e09d23c40..6a8d526b7 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -49,16 +49,13 @@ class VierIE(InfoExtractor): 'only_matching': True, }] - def _real_initialize(self): - self._login() - - def _login(self): + def _login(self, site): username, password = self._get_login_info() - if username is None: - return + if username is None or password is None: + self.raise_login_required() self._request_webpage( - 'http://www.vier.be/user/login', + 'http://www.%s.be/user/login' % site, None, note='Logging in', errnote='Could not log in', data=urlencode_postdata({ 'form_id': 'user_login', @@ -75,6 +72,10 @@ class VierIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + if re.search(r'id="user-login"', webpage): + self._login(site) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], webpage, 'video id') From 4bfab778784ec828404d3e776b8db26b6a4ca016 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Wed, 3 May 2017 09:44:34 +0200 Subject: [PATCH 03/11] [vier] Add test for video requiring authentication --- youtube_dl/extractor/vier.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 6a8d526b7..46ad9d9ec 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -41,6 +41,19 @@ class VierIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839', + 'info_dict': { + 'id': '2674839', + 'display_id': 'jani-gaat-naar-tokio-aflevering-4', + 'ext': 'mp4', + 'title': 'Jani gaat naar Tokio - Aflevering 4', + 'description': 'Bekijk hier de volledige vierde aflevering van het 2de seizoen van Jani gaat...', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 'only_matching': True, From 5bd43a87d58909962a30b2ffd62e43832bd8edb6 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Tue, 9 May 2017 15:19:45 +0200 Subject: [PATCH 04/11] [vier] Address feedback --- youtube_dl/extractor/vier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 46ad9d9ec..cde2787da 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -69,7 +69,7 @@ class VierIE(InfoExtractor): self._request_webpage( 'http://www.%s.be/user/login' % site, - None, note='Logging in', errnote='Could not log in', + None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata({ 'form_id': 'user_login', 'name': username, @@ -85,7 +85,7 @@ class VierIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - if re.search(r'id="user-login"', webpage): + if r'id="user-login"' in webpage: self._login(site) webpage = self._download_webpage(url, display_id) From 624e220f11008414b30f4cfa65c01217501f160c Mon Sep 17 00:00:00 2001 From: mrBliss Date: Tue, 9 May 2017 15:32:57 +0200 Subject: [PATCH 05/11] [vier] Without login, fall back to extraction without metadata --- youtube_dl/extractor/vier.py | 39 ++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index cde2787da..8c52202aa 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -28,13 +28,14 @@ class VierIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + # 'skip': 'Requires account credentials', }, { 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614', 'info_dict': { 'id': '2561614', 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas', 'ext': 'mp4', - 'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', + 'title': 'EXTRA: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', 'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.', }, 'params': { @@ -53,7 +54,21 @@ class VierIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'skip': 'Requires account credentials', + }, { + 'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839', + 'info_dict': { + 'id': '2674839', + 'display_id': 'jani-gaat-naar-tokio-aflevering-4', + 'ext': 'mp4', + 'title': 'jani-gaat-naar-tokio-aflevering-4', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Log in to extract metadata'], }, { 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 'only_matching': True, @@ -62,10 +77,14 @@ class VierIE(InfoExtractor): 'only_matching': True, }] + def _real_initialize(self): + self._logged_in = False + def _login(self, site): username, password = self._get_login_info() if username is None or password is None: - self.raise_login_required() + self.logged_in = False + return self._request_webpage( 'http://www.%s.be/user/login' % site, @@ -76,22 +95,26 @@ class VierIE(InfoExtractor): 'pass': password, }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + self.logged_in = True def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) embed_id = mobj.group('embed_id') display_id = mobj.group('display_id') or embed_id + video_id = mobj.group('id') or embed_id site = mobj.group('site') + if not self._logged_in: + self._login(site) + webpage = self._download_webpage(url, display_id) if r'id="user-login"' in webpage: - self._login(site) - webpage = self._download_webpage(url, display_id) + self.report_warning('Log in to extract metadata', video_id=video_id) + webpage = self._download_webpage( + 'http://www.%s.be/video/v3/embed/%s' % (site, video_id), + display_id) - video_id = self._search_regex( - [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], - webpage, 'video id') application = self._search_regex( [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], webpage, 'application', default=site + '_vod') From 9202579636174af5e5e4f3a67e8e5478108f74ec Mon Sep 17 00:00:00 2001 From: mrBliss Date: Tue, 9 May 2017 20:18:43 +0200 Subject: [PATCH 06/11] [vier] Fix typos and remove useless assignment --- youtube_dl/extractor/vier.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 8c52202aa..456441998 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -83,7 +83,6 @@ class VierIE(InfoExtractor): def _login(self, site): username, password = self._get_login_info() if username is None or password is None: - self.logged_in = False return self._request_webpage( @@ -95,7 +94,7 @@ class VierIE(InfoExtractor): 'pass': password, }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - self.logged_in = True + self._logged_in = True def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 43aab80c9280b8aadf6e012a8c13e884d6f9dd32 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Tue, 9 May 2017 20:19:08 +0200 Subject: [PATCH 07/11] [vier] Check whether the login succeeded --- youtube_dl/extractor/vier.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 456441998..8d71a1e47 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -6,6 +6,7 @@ import itertools from .common import InfoExtractor from ..utils import ( + ExtractorError, urlencode_postdata, ) @@ -85,7 +86,7 @@ class VierIE(InfoExtractor): if username is None or password is None: return - self._request_webpage( + login_page = self._download_webpage( 'http://www.%s.be/user/login' % site, None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata({ @@ -94,6 +95,11 @@ class VierIE(InfoExtractor): 'pass': password, }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + if 'Gebruikersnaam of wachtwoord is onbekend' in login_page: + raise ExtractorError( + 'Unable to login: incorrect credentials', expected=True) + self._logged_in = True def _real_extract(self, url): From 16918299ddd32eaeb01102147f7a26323d1f41a3 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Wed, 10 May 2017 08:57:24 +0200 Subject: [PATCH 08/11] [vier] Just warn about failed login Also, report the precise error we get. --- youtube_dl/extractor/vier.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 8d71a1e47..071bdd394 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -96,11 +96,13 @@ class VierIE(InfoExtractor): }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - if 'Gebruikersnaam of wachtwoord is onbekend' in login_page: - raise ExtractorError( - 'Unable to login: incorrect credentials', expected=True) - - self._logged_in = True + login_error = self._html_search_regex( + r'(?s)
\s*
\s*(.+?)<', + login_page, 'login error', default=None, fatal=False) + if login_error: + self.report_warning('Unable to log in: %s' % login_error) + else: + self._logged_in = True def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From dd9327c782d65eaf9f3822bc3bdfabfb28785ed0 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Thu, 11 May 2017 11:02:36 +0200 Subject: [PATCH 09/11] [vier] Restore video_id extraction from the webpage --- youtube_dl/extractor/vier.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 071bdd394..26a160d91 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -122,6 +122,9 @@ class VierIE(InfoExtractor): 'http://www.%s.be/video/v3/embed/%s' % (site, video_id), display_id) + video_id = self._search_regex( + [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], + webpage, 'video id') application = self._search_regex( [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], webpage, 'application', default=site + '_vod') From 458a3c91ae11d04cd73f585192c6db8a1d87214b Mon Sep 17 00:00:00 2001 From: mrBliss Date: Sun, 14 May 2017 15:59:48 +0200 Subject: [PATCH 10/11] [vier] Address issues --- youtube_dl/extractor/vier.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 26a160d91..af42e8e70 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -98,7 +98,7 @@ class VierIE(InfoExtractor): login_error = self._html_search_regex( r'(?s)
\s*
\s*(.+?)<', - login_page, 'login error', default=None, fatal=False) + login_page, 'login error', default=None) if login_error: self.report_warning('Unable to log in: %s' % login_error) else: @@ -117,7 +117,8 @@ class VierIE(InfoExtractor): webpage = self._download_webpage(url, display_id) if r'id="user-login"' in webpage: - self.report_warning('Log in to extract metadata', video_id=video_id) + self.report_warning( + 'Log in to extract metadata', video_id=display_id) webpage = self._download_webpage( 'http://www.%s.be/video/v3/embed/%s' % (site, video_id), display_id) From f8dfd8371a8dd9bffbdf41cbb69637440cce2d75 Mon Sep 17 00:00:00 2001 From: mrBliss Date: Mon, 15 May 2017 10:05:50 +0200 Subject: [PATCH 11/11] [vier] Make video_id extraction from page non-fatal --- youtube_dl/extractor/vier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index af42e8e70..b6efaa284 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -125,7 +125,7 @@ class VierIE(InfoExtractor): video_id = self._search_regex( [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], - webpage, 'video id') + webpage, 'video id', default=video_id) application = self._search_regex( [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], webpage, 'application', default=site + '_vod')