From 2b5a935ea45b387f342f92c34546b79c89cf394c Mon Sep 17 00:00:00 2001 From: mrBliss Date: Fri, 20 Oct 2017 14:41:07 +0200 Subject: [PATCH 1/4] [vrtnu] New extractor for VrtNU based on Canvas --- youtube_dl/extractor/canvas.py | 149 ++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index 6899f8431..8fc386412 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -1,16 +1,22 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( - float_or_none, + ExtractorError, strip_or_none, + float_or_none, + int_or_none, + parse_iso8601, + urlencode_postdata, ) class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet)/assets/(?Pm[dz]-ast-[^/?#&]+)' + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrtvideo)/assets/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -166,3 +172,142 @@ class CanvasEenIE(InfoExtractor): 'title': title, 'description': self._og_search_description(webpage), } + + +class VrtNUIE(InfoExtractor): + IE_DESC = 'VrtNU.be' + _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?Pvrtnu)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', + 'info_dict': { + 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', + 'ext': 'flv', + 'title': 'De zwarte weduwe', + 'description': 'md5:d90c21dced7db869a85db89a623998d4', + 'duration': 1457.04, + 'thumbnail': r're:^https?://.*\.jpg$', + 'season': '1', + 'season_number': 1, + 'episode_number': 1, + }, + # 'skip': 'This video is only available for registered users' + }] + _NETRC_MACHINE = 'vrtnu' + _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' + _CONTEXT_ID = 'R3595707040' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + self.raise_login_required() + + auth_data = { + 'APIKey': self._APIKEY, + 'targetEnv': 'jssdk', + 'loginID': username, + 'password': password, + 'authMode': 'cookie', + } + + auth_info = self._download_json( + 'https://accounts.eu1.gigya.com/accounts.login', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(auth_data)) + + error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') + if error_message: + raise ExtractorError( + 'Unable to login: %s' % error_message, expected=True) + + # Sometimes authentication fails for no good reason, retry + login_attempt = 1 + while login_attempt <= 3: + try: + # When requesting a token, no actual token is returned, but the + # necessary cookies are set. + self._request_webpage( + 'https://token.vrt.be', + None, note='Requesting a token', errnote='Could not get a token', + headers={ + 'Content-Type': 'application/json', + 'Referer': 'https://www.vrt.be/vrtnu/', + }, + data=json.dumps({ + 'uid': auth_info['UID'], + 'uidsig': auth_info['UIDSignature'], + 'ts': auth_info['signatureTimestamp'], + 'email': auth_info['profile']['email'], + }).encode('utf-8')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + login_attempt += 1 + self.report_warning('Authentication failed') + self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') + else: + raise e + else: + break + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'(?ms)

(.+?)

', + webpage, 'title').strip() + + description = self._html_search_regex( + r'(?ms)
(.+?)
', + webpage, 'description', default=None) + + season = self._html_search_regex( + [r'''(?xms)\s* + seizoen\ (.+?)\s* + ''', + r'