From 57012bbee24f8954417ef2553f651289bcbe1226 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sun, 5 Nov 2017 18:07:35 +0100 Subject: [PATCH 1/7] [zattoo] Add information extractor (closes #14668) --- youtube_dl/extractor/extractors.py | 6 + youtube_dl/extractor/zattoo.py | 261 +++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 youtube_dl/extractor/zattoo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2eed706f9..6d6e1bc93 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1363,5 +1363,11 @@ from .youtube import ( ) from .zapiks import ZapiksIE from .zaq1 import Zaq1IE +from .zattoo import ( + QuicklineIE, + QuicklineLiveIE, + ZattooIE, + ZattooLiveIE, +) from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py new file mode 100644 index 000000000..d1f63a36f --- /dev/null +++ b/youtube_dl/extractor/zattoo.py @@ -0,0 +1,261 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import uuid +import re + +from .common import InfoExtractor +from ..utils import ( + compat_str, + ExtractorError, + sanitized_Request, + urlencode_postdata, + urljoin, +) + + +class ZattooBaseIE(InfoExtractor): + + _NETRC_MACHINE = 'zattoo' + _HOST_URL = 'https://zattoo.com/' + + def _login(self, uuid, session_id, video_id): + (username, password) = self._get_login_info() + if not username or not password: + raise ExtractorError( + 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE, + expected=True) + login_form = { + 'login': username, + 'password': password, + 'remember': True, + } + request = sanitized_Request( + urljoin(self._HOST_URL, '/zapi/v2/account/login'), + urlencode_postdata(login_form)) + request.add_header( + 'Referer', urljoin(self._HOST_URL, '/login')) + request.add_header( + 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') + request.add_header( + 'Cookie', self._generate_cookie(uuid, session_id)) + response = self._request_webpage( + request, video_id, 'Logging in as %s' % login_form['login']) + cookie = response.headers.get('Set-Cookie') + pzuid = self._search_regex(r'pzuid\s*=\s*(.+?);', cookie, 'pzuid') + data = self._parse_json( + response.read(), video_id) + + return { + 'ppid': data['session']['ppid'], + 'powerhash': data['session']['power_guide_hash'], + 'pzuid': pzuid, + 'uuid': uuid, + 'session_id': session_id + } + + def _get_app_token_and_version(self, video_id): + host_webpage = self._download_webpage( + self._HOST_URL, video_id) + app_token = self._html_search_regex( + r'[^/]+)/(?P[0-9]+)' + + def _real_extract(self, url): + channel_name, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_video(channel_name, video_id) + + +class QuicklineLiveIE(QuicklineBaseIE): + _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)' + + def _real_extract(self, url): + channel_name = video_id = self._match_id(url) + return self._extract_video(channel_name, video_id, is_live=True) + + +class ZattooIE(ZattooBaseIE): + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+)/(?P[0-9]+)' + + # Since videos are only available for 7 days, we cannot have detailed tests. + _TEST = { + 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', + 'only_matching': True, + } + + def _real_extract(self, url): + channel_name, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_video(channel_name, video_id) + + +class ZattooLiveIE(ZattooBaseIE): + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+)' + + _TEST = { + 'url': 'https://zattoo.com/watch/srf1', + 'only_matching': True, + } + + def _real_extract(self, url): + channel_name = video_id = self._match_id(url) + return self._extract_video(channel_name, video_id, is_live=True) From 9c26cddf994211febf1bbc804a5a05a33360c992 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sun, 5 Nov 2017 19:00:03 +0100 Subject: [PATCH 2/7] [zattoo] Don't allow conflicting _VALID_URL expressions --- youtube_dl/extractor/zattoo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index d1f63a36f..8cbef4e44 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -227,7 +227,7 @@ class QuicklineIE(QuicklineBaseIE): class QuicklineLiveIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)$' def _real_extract(self, url): channel_name = video_id = self._match_id(url) @@ -249,7 +249,7 @@ class ZattooIE(ZattooBaseIE): class ZattooLiveIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+)$' _TEST = { 'url': 'https://zattoo.com/watch/srf1', From 0c0843166d4244240d0b65b2ecc265b1872edfae Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Fri, 10 Nov 2017 20:37:09 +0100 Subject: [PATCH 3/7] [zattoo] Add requested code review changes. --- youtube_dl/extractor/zattoo.py | 131 +++++++++++++++------------------ 1 file changed, 61 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 8cbef4e44..a44e7ac22 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -import uuid +from uuid import uuid4 import re from .common import InfoExtractor @@ -10,16 +10,17 @@ from ..utils import ( ExtractorError, sanitized_Request, urlencode_postdata, - urljoin, ) class ZattooBaseIE(InfoExtractor): _NETRC_MACHINE = 'zattoo' - _HOST_URL = 'https://zattoo.com/' + _HOST_URL = 'https://zattoo.com' - def _login(self, uuid, session_id, video_id): + _login_info = {} + + def _login(self, uuid, session_id): (username, password) = self._get_login_info() if not username or not password: raise ExtractorError( @@ -31,20 +32,19 @@ class ZattooBaseIE(InfoExtractor): 'remember': True, } request = sanitized_Request( - urljoin(self._HOST_URL, '/zapi/v2/account/login'), + '%s/zapi/v2/account/login' % self._HOST_URL, urlencode_postdata(login_form)) request.add_header( - 'Referer', urljoin(self._HOST_URL, '/login')) + 'Referer', '%s/login' % self._HOST_URL) request.add_header( 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') request.add_header( 'Cookie', self._generate_cookie(uuid, session_id)) response = self._request_webpage( - request, video_id, 'Logging in as %s' % login_form['login']) + request, None, 'Logging in as %s' % login_form['login']) cookie = response.headers.get('Set-Cookie') pzuid = self._search_regex(r'pzuid\s*=\s*(.+?);', cookie, 'pzuid') - data = self._parse_json( - response.read(), video_id) + data = self._parse_json(response.read(), None) return { 'ppid': data['session']['ppid'], @@ -54,16 +54,16 @@ class ZattooBaseIE(InfoExtractor): 'session_id': session_id } - def _get_app_token_and_version(self, video_id): + def _get_app_token_and_version(self): host_webpage = self._download_webpage( - self._HOST_URL, video_id) + self._HOST_URL, None, 'Downloading %s' % self._HOST_URL) app_token = self._html_search_regex( r' Date: Sun, 12 Nov 2017 17:25:25 +0100 Subject: [PATCH 4/7] [zattoo] Remove sensitive data from logging message --- youtube_dl/extractor/zattoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index a44e7ac22..0a8b76bc8 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -41,7 +41,7 @@ class ZattooBaseIE(InfoExtractor): request.add_header( 'Cookie', self._generate_cookie(uuid, session_id)) response = self._request_webpage( - request, None, 'Logging in as %s' % login_form['login']) + request, None, 'Logging in') cookie = response.headers.get('Set-Cookie') pzuid = self._search_regex(r'pzuid\s*=\s*(.+?);', cookie, 'pzuid') data = self._parse_json(response.read(), None) From 3f66fa4f99dd61b662be75466cf88eb91ff9a146 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Sun, 12 Nov 2017 18:33:31 +0100 Subject: [PATCH 5/7] [zattoo] Improve extraction of channel id --- youtube_dl/extractor/zattoo.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 0a8b76bc8..12041fac2 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -87,22 +87,22 @@ class ZattooBaseIE(InfoExtractor): return 'uuid=%s; beaker.session.id=%s' % (uuid, session_id) return 'uuid=%s; beaker.session.id=%s; pzuid=%s' % (uuid, session_id, pzuid) - def _get_channels_display_cid(self, login_info, video_id): - data = self._download_json( + def _extract_cid(self, video_id, channel_name): + channel_groups = self._download_json( '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL, - login_info['powerhash']), + self._login_info['powerhash']), video_id, 'Downloading available channel list', - query={'details': False}) - display_cid = {} - for elem in data['channel_groups']: - for channel in elem['channels']: - display_cid[channel['display_alias']] = channel['cid'] - return display_cid - - def _extract_cid(self, login_info, video_id, channel_name): - display_cid = self._get_channels_display_cid(login_info, video_id) - return display_cid[channel_name] + query={'details': False})['channel_groups'] + channel_list = [] + for chgrp in channel_groups: + channel_list.extend(chgrp['channels']) + try: + return next(chan['cid'] for chan in channel_list if + chan['display_alias'] == channel_name or + chan['cid'] == channel_name) + except StopIteration: + raise ExtractorError('Could not extract channel id') def _extract_cid_and_video_info(self, video_id): data = self._download_json( @@ -190,7 +190,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_video(self, channel_name, video_id, is_live=False): if is_live: - cid = self._extract_cid(self._login_info, video_id, channel_name) + cid = self._extract_cid(video_id, channel_name) info_dict = { 'id': channel_name, 'title': self._live_title(channel_name), From ce8053b6464a3c02d1fd1c7a9d32869bb99ecb0a Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Fri, 29 Dec 2017 04:42:39 +0100 Subject: [PATCH 6/7] [zattoo] Add support for zattoo recordings. --- youtube_dl/extractor/zattoo.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 12041fac2..bd4f49fb7 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -137,12 +137,15 @@ class ZattooBaseIE(InfoExtractor): hls['width'] = dash.get('width') hls['height'] = dash.get('height') - def _extract_formats(self, cid, video_id, is_live=False): + def _extract_formats(self, cid, video_id, record_id=None, is_live=False): postdata = { 'stream_type': 'dash', 'https_watch_urls': True, } - url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) + if record_id: + url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id) + else: + url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) if is_live: postdata.update({'timeshift': 10800}) @@ -188,7 +191,7 @@ class ZattooBaseIE(InfoExtractor): session_id = self._say_hello(uuid, app_token, app_version) self._login_info = self._login(uuid, session_id) - def _extract_video(self, channel_name, video_id, is_live=False): + def _extract_video(self, channel_name, video_id, record_id=None, is_live=False): if is_live: cid = self._extract_cid(video_id, channel_name) info_dict = { @@ -199,7 +202,7 @@ class ZattooBaseIE(InfoExtractor): else: cid, info_dict = self._extract_cid_and_video_info(video_id) formats = self._extract_formats( - cid, video_id, is_live=is_live) + cid, video_id, record_id=record_id, is_live=is_live) info_dict['formats'] = formats return info_dict @@ -226,17 +229,21 @@ class QuicklineLiveIE(QuicklineBaseIE): class ZattooIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' - # Since videos are only available for 7 days, we cannot have detailed tests. - _TEST = { + # Since regular videos are only available for 7 days and recorded videos + # are only available for a specific user, we cannot have detailed tests. + _TESTS = [{ 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', 'only_matching': True, - } + }, { + 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000', + 'only_matching': True, + }] def _real_extract(self, url): - channel_name, video_id = re.match(self._VALID_URL, url).groups() - return self._extract_video(channel_name, video_id) + channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups() + return self._extract_video(channel_name, video_id, record_id) class ZattooLiveIE(ZattooBaseIE): From 3383f85f432bb4cb90ae624bd6d7326c760ae814 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Sat, 28 Apr 2018 15:50:16 +0200 Subject: [PATCH 7/7] [zattoo] Implement requested code review changes. --- youtube_dl/extractor/zattoo.py | 83 ++++++++++++---------------------- 1 file changed, 29 insertions(+), 54 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index bd4f49fb7..928f22566 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -18,7 +18,7 @@ class ZattooBaseIE(InfoExtractor): _NETRC_MACHINE = 'zattoo' _HOST_URL = 'https://zattoo.com' - _login_info = {} + _power_guide_hash = None def _login(self, uuid, session_id): (username, password) = self._get_login_info() @@ -39,20 +39,11 @@ class ZattooBaseIE(InfoExtractor): request.add_header( 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') request.add_header( - 'Cookie', self._generate_cookie(uuid, session_id)) + 'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id)) response = self._request_webpage( request, None, 'Logging in') - cookie = response.headers.get('Set-Cookie') - pzuid = self._search_regex(r'pzuid\s*=\s*(.+?);', cookie, 'pzuid') data = self._parse_json(response.read(), None) - - return { - 'ppid': data['session']['ppid'], - 'powerhash': data['session']['power_guide_hash'], - 'pzuid': pzuid, - 'uuid': uuid, - 'session_id': session_id - } + return data['session']['power_guide_hash'] def _get_app_token_and_version(self): host_webpage = self._download_webpage( @@ -60,7 +51,7 @@ class ZattooBaseIE(InfoExtractor): app_token = self._html_search_regex( r'