From c26e983e2fffab36651b90d0bedc234ea8442897 Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Wed, 24 Jan 2018 18:54:39 +0100 Subject: [PATCH 1/5] Updated extractor for Vier to support Kijk Online. Updated extractor for Vier to support Kijk Online. Change includes a general extractor for Incognito IDP by AWS. boto3 and warrant packages are required. --- youtube_dl/extractor/cognito.py | 24 +++++++ youtube_dl/extractor/extractors.py | 4 +- youtube_dl/extractor/vier.py | 109 ++++++++++++++++++++++++++--- 3 files changed, 126 insertions(+), 11 deletions(-) create mode 100755 youtube_dl/extractor/cognito.py mode change 100644 => 100755 youtube_dl/extractor/vier.py diff --git a/youtube_dl/extractor/cognito.py b/youtube_dl/extractor/cognito.py new file mode 100755 index 000000000..441e77e57 --- /dev/null +++ b/youtube_dl/extractor/cognito.py @@ -0,0 +1,24 @@ +from .common import InfoExtractor + +import boto3 +from warrant import Cognito +from warrant.aws_srp import AWSSRP + +class CognitoBaseIE(InfoExtractor): + + def _cognito_login(self, auth_data): + region = auth_data['PoolId'].split('_')[0] + client = boto3.client( + 'cognito-idp', + region_name = region, + aws_access_key_id = 'SomeNonsenseValue', + aws_secret_access_key = 'YetAnotherNonsenseValue' + ) + aws = AWSSRP( + username = auth_data['Username'], + password = auth_data['Password'], + pool_id = auth_data['PoolId'], + client_id = auth_data['ClientId'], + client=client + ) + return aws.authenticate_user() diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 57e74ba62..d4e3d182f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1212,7 +1212,7 @@ from .vidme import ( VidmeUserLikesIE, ) from .vidzi import VidziIE -from .vier import VierIE, VierVideosIE +from .vier import VierIE, VierVideosIE, VierVijfKijkOnlineIE from .viewlift import ( ViewLiftIE, ViewLiftEmbedIE, @@ -1300,7 +1300,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py old mode 100644 new mode 100755 index dbd5ba9ba..1fe5cde5b --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -11,6 +11,105 @@ from ..utils import ( unified_strdate, ) +from .cognito import CognitoBaseIE + +class VierVijfKijkOnlineIE(CognitoBaseIE): + IE_NAME = 'viervijfkijkonline' + IE_DESC = 'vier.be and vijf.be - Kijk Online' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/video/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)' + _TESTS = [{ + 'url': 'https://www.vier.be/video/hotel-romantiek/2017/hotel-romantiek-aflevering-1', + 'info_dict': { + 'id': 'ebcd3c39-10a2-4730-b137-b0e7aaed247c', + 'ext': 'mp4', + 'title': 'Hotel Römantiek - Seizoen 1 - Aflevering 1', + 'series': 'Hotel Römantiek', + 'season_number' : 1, + 'episode_number': 1, + }, + 'skip': 'This video is only available for registered users' + + }, { + 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', + 'only_matching': True, + }, { + 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6', + 'only_matching': True, + }] + _POOL_ID = 'eu-west-1_dViSsKM5Y' + _CLIENT_ID = '6s1h851s8uplco5h6mqh1jac8m' + + + def _real_initialize(self): + self._logged_in = False + self.id_token = '' + + def _login(self): + + username, password = self._get_login_info() + if username is None or password is None: + self.raise_login_required() + + auth_data = { + 'PoolId' : self._POOL_ID, + 'ClientId' : self._CLIENT_ID, + 'Username': username, + 'Password': password, + } + + tokens = self._cognito_login(auth_data) + self.id_token = tokens.get('AuthenticationResult').get('IdToken') + self._logged_in = True + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site = mobj.group('site') + + if not self._logged_in: + self._login() + + webpage = self._download_webpage(url, None) + + title = self._html_search_regex( + r'', + webpage, 'title') + + title_split = title.split('-') + series = title.split('-')[0].strip() + if len(title_split) == 3: + season = title.split('-')[1].split('Seizoen ')[1].strip() + episode = title.split('-')[2].split('Aflevering ')[1].strip() + else: + season = None + episode = title.split('-')[1].split('Aflevering ')[1].strip() + + video_id = self._html_search_regex( + r'
]+>', + webpage, 'video_id') + + api_url = 'https://api.viervijfzes.be/content/%s' % (video_id) + api_headers = { + 'authorization' : self.id_token, + } + api = self._download_json( + api_url, + None, note='Peforming API Call', errnote='API Call Failed', + headers = api_headers, + ) + + formats = [] + formats.extend(self._extract_m3u8_formats( + api.get('video').get('S'), video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='HLS', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'series': series, + 'season_number' : int_or_none(season), + 'episode_number': int_or_none(episode), + 'formats': formats, + } class VierIE(InfoExtractor): IE_NAME = 'vier' @@ -20,8 +119,7 @@ class VierIE(InfoExtractor): (?:www\.)?(?Pvier|vijf)\.be/ (?: (?: - [^/]+/videos| - video(?:/[^/]+)* + [^/]+/videos )/ (?P[^/]+)(?:/(?P\d+))?| (?: @@ -100,12 +198,6 @@ class VierIE(InfoExtractor): }, { 'url': 'https://www.vijf.be/embed/video/public/4093', 'only_matching': True, - }, { - 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', - 'only_matching': True, - }, { - 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6', - 'only_matching': True, }] def _real_initialize(self): @@ -203,7 +295,6 @@ class VierIE(InfoExtractor): 'formats': formats, } - class VierVideosIE(InfoExtractor): IE_NAME = 'vier:videos' _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?P[^/]+)/videos(?:\?.*\bpage=(?P\d+)|$)' From efc2dbd3a5299512b7cb5511803bd035fc6bb74d Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Wed, 24 Jan 2018 21:10:50 +0100 Subject: [PATCH 2/5] soft import warrant and boto3 Hard import broke build, now using soft import. --- youtube_dl/extractor/cognito.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cognito.py b/youtube_dl/extractor/cognito.py index 441e77e57..70cca2c7c 100755 --- a/youtube_dl/extractor/cognito.py +++ b/youtube_dl/extractor/cognito.py @@ -1,12 +1,18 @@ from .common import InfoExtractor - -import boto3 -from warrant import Cognito -from warrant.aws_srp import AWSSRP +from ..utils import ExtractorError class CognitoBaseIE(InfoExtractor): + def _cognito_login(self, auth_data): + + try: + import boto3 + from warrant import Cognito + from warrant.aws_srp import AWSSRP + except ImportError: + raise ExtractorError('%s depends on boto3 and warrant.' % self.IE_NAME) + region = auth_data['PoolId'].split('_')[0] client = boto3.client( 'cognito-idp', From 14309551921d475442997b15f5be65614784764a Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Wed, 24 Jan 2018 21:26:03 +0100 Subject: [PATCH 3/5] cognito and vier coding guidelines update --- youtube_dl/extractor/cognito.py | 17 ++++++++--------- youtube_dl/extractor/vier.py | 19 +++++++++---------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/cognito.py b/youtube_dl/extractor/cognito.py index 70cca2c7c..ab0075659 100755 --- a/youtube_dl/extractor/cognito.py +++ b/youtube_dl/extractor/cognito.py @@ -1,14 +1,13 @@ from .common import InfoExtractor from ..utils import ExtractorError -class CognitoBaseIE(InfoExtractor): +class CognitoBaseIE(InfoExtractor): def _cognito_login(self, auth_data): try: import boto3 - from warrant import Cognito from warrant.aws_srp import AWSSRP except ImportError: raise ExtractorError('%s depends on boto3 and warrant.' % self.IE_NAME) @@ -16,15 +15,15 @@ class CognitoBaseIE(InfoExtractor): region = auth_data['PoolId'].split('_')[0] client = boto3.client( 'cognito-idp', - region_name = region, - aws_access_key_id = 'SomeNonsenseValue', - aws_secret_access_key = 'YetAnotherNonsenseValue' + region_name=region, + aws_access_key_id='SomeNonsenseValue', + aws_secret_access_key='YetAnotherNonsenseValue' ) aws = AWSSRP( - username = auth_data['Username'], - password = auth_data['Password'], - pool_id = auth_data['PoolId'], - client_id = auth_data['ClientId'], + username=auth_data['Username'], + password=auth_data['Password'], + pool_id=auth_data['PoolId'], + client_id=auth_data['ClientId'], client=client ) return aws.authenticate_user() diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 1fe5cde5b..75bf2f1cb 100755 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -13,6 +13,7 @@ from ..utils import ( from .cognito import CognitoBaseIE + class VierVijfKijkOnlineIE(CognitoBaseIE): IE_NAME = 'viervijfkijkonline' IE_DESC = 'vier.be and vijf.be - Kijk Online' @@ -24,7 +25,7 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): 'ext': 'mp4', 'title': 'Hotel Römantiek - Seizoen 1 - Aflevering 1', 'series': 'Hotel Römantiek', - 'season_number' : 1, + 'season_number': 1, 'episode_number': 1, }, 'skip': 'This video is only available for registered users' @@ -39,7 +40,6 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): _POOL_ID = 'eu-west-1_dViSsKM5Y' _CLIENT_ID = '6s1h851s8uplco5h6mqh1jac8m' - def _real_initialize(self): self._logged_in = False self.id_token = '' @@ -51,8 +51,8 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): self.raise_login_required() auth_data = { - 'PoolId' : self._POOL_ID, - 'ClientId' : self._CLIENT_ID, + 'PoolId': self._POOL_ID, + 'ClientId': self._CLIENT_ID, 'Username': username, 'Password': password, } @@ -62,9 +62,6 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): self._logged_in = True def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site = mobj.group('site') - if not self._logged_in: self._login() @@ -89,12 +86,12 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): api_url = 'https://api.viervijfzes.be/content/%s' % (video_id) api_headers = { - 'authorization' : self.id_token, + 'authorization': self.id_token, } api = self._download_json( api_url, None, note='Peforming API Call', errnote='API Call Failed', - headers = api_headers, + headers=api_headers, ) formats = [] @@ -106,11 +103,12 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): 'id': video_id, 'title': title, 'series': series, - 'season_number' : int_or_none(season), + 'season_number': int_or_none(season), 'episode_number': int_or_none(episode), 'formats': formats, } + class VierIE(InfoExtractor): IE_NAME = 'vier' IE_DESC = 'vier.be and vijf.be' @@ -295,6 +293,7 @@ class VierIE(InfoExtractor): 'formats': formats, } + class VierVideosIE(InfoExtractor): IE_NAME = 'vier:videos' _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?P[^/]+)/videos(?:\?.*\bpage=(?P\d+)|$)' From 7e645e906386cf9e8e6dbadb7cd197bf985fb391 Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Sat, 27 Jan 2018 14:14:57 +0100 Subject: [PATCH 4/5] final optimization of kijk online regex --- youtube_dl/extractor/vier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 75bf2f1cb..7f90e1e45 100755 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -17,7 +17,8 @@ from .cognito import CognitoBaseIE class VierVijfKijkOnlineIE(CognitoBaseIE): IE_NAME = 'viervijfkijkonline' IE_DESC = 'vier.be and vijf.be - Kijk Online' - _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/video/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/video/(?P(?!v3)[^/]+)/(?P[^/]+)(/(?P[^/]+)|)' + _NETRC_MACHINE = 'vier' _TESTS = [{ 'url': 'https://www.vier.be/video/hotel-romantiek/2017/hotel-romantiek-aflevering-1', 'info_dict': { @@ -29,7 +30,6 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): 'episode_number': 1, }, 'skip': 'This video is only available for registered users' - }, { 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', 'only_matching': True, From d9c4aaa5a083e033d4ebe830946ec0e840d6c317 Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Sat, 27 Jan 2018 14:26:35 +0100 Subject: [PATCH 5/5] added unicode_literals in generic cognito extractor --- youtube_dl/extractor/cognito.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/cognito.py b/youtube_dl/extractor/cognito.py index ab0075659..f89698b7d 100755 --- a/youtube_dl/extractor/cognito.py +++ b/youtube_dl/extractor/cognito.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from .common import InfoExtractor from ..utils import ExtractorError