diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index afa3f6c47..81345a130 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -11,7 +11,6 @@ from ..utils import ( NO_DEFAULT, orderedSet, parse_codecs, - qualities, try_get, unified_timestamp, update_url_query, @@ -20,7 +19,137 @@ from ..utils import ( ) -class ZDFBaseIE(InfoExtractor): +class ZDFIE(InfoExtractor): + _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?]+)\.html' + + _TESTS = [{ + 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', + 'info_dict': { + 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', + 'ext': 'mp4', + 'title': 'Die Magie der Farben (2/2)', + 'description': 'md5:a89da10c928c6235401066b60a6d5c1a', + 'duration': 2615, + 'timestamp': 1465021200, + 'upload_date': '20160604', + }, + }, { + 'url': 'https://www.zdf.de/dokumentation/terra-x/mit-antischwerkraft-zu-den-sternen-100.html', + 'md5': 'dede0475add7c2d1fa067358a636e80e', + 'info_dict': { + 'id': 'mit-antischwerkraft-zu-den-sternen-100', + 'ext': 'mp4', + 'title': 'Mit Antischwerkraft zu den Sternen?', + 'description': 'md5:44c0214d0bd2f41a5200af6b38e15186', + 'duration': 311, + 'timestamp': 1538294400, + 'upload_date': '20180930', + } + }, { + 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html', + 'only_matching': True, + }] + + _MP4_URL_REGEX = r'^(?P((https?:)?//)?(.*))_(?P[0-9]+)k_p(?P

[0-9]{1,})v(?P[0-9]{1,})\.(?P.{2,3})$' + + _H264_MAIN_L31 = 'avc1.4d001f' + _H264_HIGH_L4 = 'avc1.640028' + + # https://github.com/mediathekview/MServer/blob/master/src/main/java/mServer/crawler/sender/MediathekZdf.java + _BITRATES = { + 11: { + 35: [{ + 'tbr': 2328, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + }, + 12: { + 14: [{ + 'tbr': 2256, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + 15: [{ + 'tbr': 3256, + 'width': 1280, + 'height': 720, + 'vcodec': _H264_HIGH_L4, + }], + 35: [{ + 'tbr': 2328, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + 36: [{ + 'tbr': 3328, + 'width': 1280, + 'height': 720, + 'vcodec': _H264_HIGH_L4, + }], + }, + 13: { + 14: [{ + 'tbr': 2296, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + 15: [{ + 'tbr': 3296, + 'width': 1280, + 'height': 720, + 'vcodec': _H264_HIGH_L4, + }], + 35: [{ + 'tbr': 2328, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + 36: [{ + 'tbr': 3328, + 'width': 1280, + 'height': 720, + 'vcodec': _H264_HIGH_L4, + }], + }, + 14: { + 14: [{ + 'tbr': 2296, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + 35: [{ + 'tbr': 3328, + 'width': 1280, + 'height': 720, + 'vcodec': _H264_HIGH_L4, + }, { + 'tbr': 2328, + 'width': 1024, + 'height': 576, + 'vcodec': _H264_MAIN_L31, + }], + 36: [{ + 'tbr': 3328, + 'width': 1280, + 'height': 720, + 'vcodec': _H264_HIGH_L4, + }], + }, + } + def _call_api(self, url, player, referrer, video_id, item): return self._download_json( url, video_id, 'Downloading JSON %s' % item, @@ -37,32 +166,25 @@ class ZDFBaseIE(InfoExtractor): group='json'), video_id) + def _get_max_bitrate(self, url): + m = re.search(self._MP4_URL_REGEX, url) + if m: + return int_or_none(m.group('bitrate')) + return None -class ZDFIE(ZDFBaseIE): - _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?]+)\.html' - _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') - - _TESTS = [{ - 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', - 'info_dict': { - 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', - 'ext': 'mp4', - 'title': 'Die Magie der Farben (2/2)', - 'description': 'md5:a89da10c928c6235401066b60a6d5c1a', - 'duration': 2615, - 'timestamp': 1465021200, - 'upload_date': '20160604', - }, - }, { - 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html', - 'only_matching': True, - }, { - 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html', - 'only_matching': True, - }, { - 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html', - 'only_matching': True, - }] + @staticmethod + def _guess_resolution(bitrate): + if bitrate < 400: + return {'width': 320, 'height': 176} + if 400 <= bitrate < 500: + return {'width': 480, 'height': 272} + if 500 <= bitrate < 1000: + return {'width': 640, 'height': 360} + if 1000 <= bitrate < 1500: + return {'width': 852, 'height': 480} + if 1500 <= bitrate < 2000: + return {'width': 1024, 'height': 576} + return {'width': 1280, 'height': 720} @staticmethod def _extract_subtitles(src): @@ -76,6 +198,65 @@ class ZDFIE(ZDFBaseIE): }) return subtitles + @staticmethod + def _set_language(formats, lang): + if not lang: + return + for format in formats: + format['language'] = lang + + @staticmethod + def _find_single_language(formats): + first_lang = None + for format in formats: + lang = format.get('language') + if lang and not first_lang: + first_lang = lang + continue + if lang != first_lang: + return + return first_lang + + def _find_additional_formats(self, formats, video_id, lang=None): + present = {} + for format in formats: + url = format.get('url') + if not url: + continue + m = re.match(self._MP4_URL_REGEX, url) + if not m: + continue + base_url = m.group('base_url') + p = int_or_none(m.group('p')) + v = int_or_none(m.group('v')) + if not p or not v: + continue + if base_url not in present: + present[base_url] = {v: [p]} + elif v not in present[base_url]: + present[base_url][v] = [p] + elif p not in present[base_url][v]: + present[base_url][v].append(p) + + for base_url, vs in present.items(): + for v, ps in vs.items(): + for p, variants in (x for x in self._BITRATES.get(v, {}).items() if x[0] not in ps): + for f in variants: + f = dict(f) + url = '%s_%sk_p%sv%s.mp4' % (base_url, f['tbr'], p, v) + if self._is_valid_url(url, video_id): + f.update({ + 'url': url, + 'format_id': 'mp4-%s' % f['tbr'], + 'ext': 'mp4', + 'language': lang, + 'acodec': 'mp4a.40.2', + }) + if 'nrodlzdf' in url: + f['format_id'] += '-alt' + f['source_preference'] = -2 + formats.append(f) + def _extract_format(self, video_id, formats, format_urls, meta): format_url = url_or_none(meta.get('url')) if not format_url: @@ -86,26 +267,33 @@ class ZDFIE(ZDFBaseIE): mime_type = meta.get('mimeType') ext = determine_ext(format_url) if mime_type == 'application/x-mpegURL' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + hls_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id='hls', - entry_protocol='m3u8_native', fatal=False)) + entry_protocol='m3u8_native', fatal=False) + self._set_language(hls_formats, meta.get('language')) + formats.extend(hls_formats) elif mime_type == 'application/f4m+xml' or ext == 'f4m': - formats.extend(self._extract_f4m_formats( - update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)) + hds_formats = self._extract_f4m_formats( + update_url_query(format_url, {'hdcore': '3.7.0'}), + video_id, f4m_id='hds', fatal=False) + self._set_language(hds_formats, meta.get('language')) + formats.extend(hds_formats) else: f = parse_codecs(meta.get('mimeCodec')) - format_id = ['http'] - for p in (meta.get('type'), meta.get('quality')): - if p and isinstance(p, compat_str): - format_id.append(p) + bitrate = self._get_max_bitrate(format_url) + format_note = meta.get('quality') f.update({ 'url': format_url, - 'format_id': '-'.join(format_id), - 'format_note': meta.get('quality'), + 'format_id': 'mp4-%s' % bitrate or format_note or '0', + 'ext': ext, + 'tbr': bitrate, 'language': meta.get('language'), - 'quality': qualities(self._QUALITIES)(meta.get('quality')), - 'preference': -10, }) + if not f.get('width') and not f.get('height') and bitrate: + f.update(self._guess_resolution(bitrate)) + if 'nrodlzdf' in format_url: + f['format_id'] += '-alt' + f['source_preference'] = -2 formats.append(f) def _extract_entry(self, url, player, content, video_id): @@ -143,9 +331,12 @@ class ZDFIE(ZDFBaseIE): 'url': track.get('uri'), 'type': f.get('type'), 'mimeType': f.get('mimeType'), + 'mimeCodec': quality.get('mimeCodec'), 'quality': quality.get('quality'), 'language': track.get('language'), }) + single_lang = self._find_single_language(formats) + self._find_additional_formats(formats, video_id, single_lang) self._sort_formats(formats) thumbnails = [] @@ -235,7 +426,7 @@ class ZDFIE(ZDFBaseIE): return self._extract_mobile(video_id) -class ZDFChannelIE(ZDFBaseIE): +class ZDFChannelIE(InfoExtractor): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', @@ -272,48 +463,3 @@ class ZDFChannelIE(ZDFBaseIE): return self.playlist_result( entries, channel_id, self._og_search_title(webpage, fatal=False)) - - r""" - player = self._extract_player(webpage, channel_id) - - channel_id = self._search_regex( - r'docId\s*:\s*(["\'])(?P(?!\1).+?)\1', webpage, - 'channel id', group='id') - - channel = self._call_api( - 'https://api.zdf.de/content/documents/%s.json' % channel_id, - player, url, channel_id) - - items = [] - for module in channel['module']: - for teaser in try_get(module, lambda x: x['teaser'], list) or []: - t = try_get( - teaser, lambda x: x['http://zdf.de/rels/target'], dict) - if not t: - continue - items.extend(try_get( - t, - lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], - list) or []) - items.extend(try_get( - module, - lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], - list) or []) - - entries = [] - entry_urls = set() - for item in items: - t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) - if not t: - continue - sharing_url = t.get('http://zdf.de/rels/sharing-url') - if not sharing_url or not isinstance(sharing_url, compat_str): - continue - if sharing_url in entry_urls: - continue - entry_urls.add(sharing_url) - entries.append(self.url_result( - sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) - - return self.playlist_result(entries, channel_id, channel.get('title')) - """