Merge branch 'zdf'

2025-02-11 16:52:51 +08:00 · 2018-12-24 14:01:15 +01:00 · 2018-12-24 14:01:15 +01:00 · 2ea012f4a2
commit 2ea012f4a2
parent ce952e858f f6e1ef674b
1 changed files with 231 additions and 85 deletions
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@ -11,7 +11,6 @@ from ..utils import (
    NO_DEFAULT,
    orderedSet,
    parse_codecs,
-    qualities,
    try_get,
    unified_timestamp,
    update_url_query,
@ -20,7 +19,137 @@ from ..utils import (
 )


-class ZDFBaseIE(InfoExtractor):
+class ZDFIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
+
+    _TESTS = [{
+        'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
+        'info_dict': {
+            'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
+            'ext': 'mp4',
+            'title': 'Die Magie der Farben (2/2)',
+            'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+            'duration': 2615,
+            'timestamp': 1465021200,
+            'upload_date': '20160604',
+        },
+    }, {
+        'url': 'https://www.zdf.de/dokumentation/terra-x/mit-antischwerkraft-zu-den-sternen-100.html',
+        'md5': 'dede0475add7c2d1fa067358a636e80e',
+        'info_dict': {
+            'id': 'mit-antischwerkraft-zu-den-sternen-100',
+            'ext': 'mp4',
+            'title': 'Mit Antischwerkraft zu den Sternen?',
+            'description': 'md5:44c0214d0bd2f41a5200af6b38e15186',
+            'duration': 311,
+            'timestamp': 1538294400,
+            'upload_date': '20180930',
+        }
+    }, {
+        'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
+        'only_matching': True,
+    }]
+
+    _MP4_URL_REGEX = r'^(?P<base_url>((https?:)?//)?(.*))_(?P<bitrate>[0-9]+)k_p(?P<p>[0-9]{1,})v(?P<v>[0-9]{1,})\.(?P<ext>.{2,3})$'
+
+    _H264_MAIN_L31 = 'avc1.4d001f'
+    _H264_HIGH_L4 = 'avc1.640028'
+
+    # https://github.com/mediathekview/MServer/blob/master/src/main/java/mServer/crawler/sender/MediathekZdf.java
+    _BITRATES = {
+        11: {
+            35: [{
+                'tbr': 2328,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+        },
+        12: {
+            14: [{
+                'tbr': 2256,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+            15: [{
+                'tbr': 3256,
+                'width': 1280,
+                'height': 720,
+                'vcodec': _H264_HIGH_L4,
+            }],
+            35: [{
+                'tbr': 2328,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+            36: [{
+                'tbr': 3328,
+                'width': 1280,
+                'height': 720,
+                'vcodec': _H264_HIGH_L4,
+            }],
+        },
+        13: {
+            14: [{
+                'tbr': 2296,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+            15: [{
+                'tbr': 3296,
+                'width': 1280,
+                'height': 720,
+                'vcodec': _H264_HIGH_L4,
+            }],
+            35: [{
+                'tbr': 2328,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+            36: [{
+                'tbr': 3328,
+                'width': 1280,
+                'height': 720,
+                'vcodec': _H264_HIGH_L4,
+            }],
+        },
+        14: {
+            14: [{
+                'tbr': 2296,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+            35: [{
+                'tbr': 3328,
+                'width': 1280,
+                'height': 720,
+                'vcodec': _H264_HIGH_L4,
+            }, {
+                'tbr': 2328,
+                'width': 1024,
+                'height': 576,
+                'vcodec': _H264_MAIN_L31,
+            }],
+            36: [{
+                'tbr': 3328,
+                'width': 1280,
+                'height': 720,
+                'vcodec': _H264_HIGH_L4,
+            }],
+        },
+    }
+
    def _call_api(self, url, player, referrer, video_id, item):
        return self._download_json(
            url, video_id, 'Downloading JSON %s' % item,
@ -37,32 +166,25 @@ class ZDFBaseIE(InfoExtractor):
                group='json'),
            video_id)

+    def _get_max_bitrate(self, url):
+        m = re.search(self._MP4_URL_REGEX, url)
+        if m:
+            return int_or_none(m.group('bitrate'))
+        return None

-class ZDFIE(ZDFBaseIE):
-    _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
-    _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
-
-    _TESTS = [{
-        'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
-        'info_dict': {
-            'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
-            'ext': 'mp4',
-            'title': 'Die Magie der Farben (2/2)',
-            'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
-            'duration': 2615,
-            'timestamp': 1465021200,
-            'upload_date': '20160604',
-        },
-    }, {
-        'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
-        'only_matching': True,
-    }]
+    @staticmethod
+    def _guess_resolution(bitrate):
+        if bitrate < 400:
+            return {'width': 320, 'height': 176}
+        if 400 <= bitrate < 500:
+            return {'width': 480, 'height': 272}
+        if 500 <= bitrate < 1000:
+            return {'width': 640, 'height': 360}
+        if 1000 <= bitrate < 1500:
+            return {'width': 852, 'height': 480}
+        if 1500 <= bitrate < 2000:
+            return {'width': 1024, 'height': 576}
+        return {'width': 1280, 'height': 720}

    @staticmethod
    def _extract_subtitles(src):
@ -76,6 +198,65 @@ class ZDFIE(ZDFBaseIE):
                })
        return subtitles

+    @staticmethod
+    def _set_language(formats, lang):
+        if not lang:
+            return
+        for format in formats:
+            format['language'] = lang
+
+    @staticmethod
+    def _find_single_language(formats):
+        first_lang = None
+        for format in formats:
+            lang = format.get('language')
+            if lang and not first_lang:
+                first_lang = lang
+                continue
+            if lang != first_lang:
+                return
+        return first_lang
+
+    def _find_additional_formats(self, formats, video_id, lang=None):
+        present = {}
+        for format in formats:
+            url = format.get('url')
+            if not url:
+                continue
+            m = re.match(self._MP4_URL_REGEX, url)
+            if not m:
+                continue
+            base_url = m.group('base_url')
+            p = int_or_none(m.group('p'))
+            v = int_or_none(m.group('v'))
+            if not p or not v:
+                continue
+            if base_url not in present:
+                present[base_url] = {v: [p]}
+            elif v not in present[base_url]:
+                present[base_url][v] = [p]
+            elif p not in present[base_url][v]:
+                present[base_url][v].append(p)
+
+        for base_url, vs in present.items():
+            for v, ps in vs.items():
+                for p, variants in (x for x in self._BITRATES.get(v, {}).items() if x[0] not in ps):
+                    for f in variants:
+                        f = dict(f)
+                        url = '%s_%sk_p%sv%s.mp4' % (base_url, f['tbr'], p, v)
+                        if self._is_valid_url(url, video_id):
+                            f.update({
+                                'url': url,
+                                'format_id': 'mp4-%s' % f['tbr'],
+                                'ext': 'mp4',
+                                'language': lang,
+                                'acodec': 'mp4a.40.2',
+                            })
+                            if 'nrodlzdf' in url:
+                                f['format_id'] += '-alt'
+                                f['source_preference'] = -2
+                            formats.append(f)
+
    def _extract_format(self, video_id, formats, format_urls, meta):
        format_url = url_or_none(meta.get('url'))
        if not format_url:
@ -86,26 +267,33 @@ class ZDFIE(ZDFBaseIE):
        mime_type = meta.get('mimeType')
        ext = determine_ext(format_url)
        if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
-            formats.extend(self._extract_m3u8_formats(
+            hls_formats = self._extract_m3u8_formats(
                format_url, video_id, 'mp4', m3u8_id='hls',
-                entry_protocol='m3u8_native', fatal=False))
+                entry_protocol='m3u8_native', fatal=False)
+            self._set_language(hls_formats, meta.get('language'))
+            formats.extend(hls_formats)
        elif mime_type == 'application/f4m+xml' or ext == 'f4m':
-            formats.extend(self._extract_f4m_formats(
-                update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+            hds_formats = self._extract_f4m_formats(
+                update_url_query(format_url, {'hdcore': '3.7.0'}),
+                video_id, f4m_id='hds', fatal=False)
+            self._set_language(hds_formats, meta.get('language'))
+            formats.extend(hds_formats)
        else:
            f = parse_codecs(meta.get('mimeCodec'))
-            format_id = ['http']
-            for p in (meta.get('type'), meta.get('quality')):
-                if p and isinstance(p, compat_str):
-                    format_id.append(p)
+            bitrate = self._get_max_bitrate(format_url)
+            format_note = meta.get('quality')
            f.update({
                'url': format_url,
-                'format_id': '-'.join(format_id),
-                'format_note': meta.get('quality'),
+                'format_id': 'mp4-%s' % bitrate or format_note or '0',
+                'ext': ext,
+                'tbr': bitrate,
                'language': meta.get('language'),
-                'quality': qualities(self._QUALITIES)(meta.get('quality')),
-                'preference': -10,
            })
+            if not f.get('width') and not f.get('height') and bitrate:
+                f.update(self._guess_resolution(bitrate))
+            if 'nrodlzdf' in format_url:
+                f['format_id'] += '-alt'
+                f['source_preference'] = -2
            formats.append(f)

    def _extract_entry(self, url, player, content, video_id):
@ -143,9 +331,12 @@ class ZDFIE(ZDFBaseIE):
                                'url': track.get('uri'),
                                'type': f.get('type'),
                                'mimeType': f.get('mimeType'),
+                                'mimeCodec': quality.get('mimeCodec'),
                                'quality': quality.get('quality'),
                                'language': track.get('language'),
                            })
+        single_lang = self._find_single_language(formats)
+        self._find_additional_formats(formats, video_id, single_lang)
        self._sort_formats(formats)

        thumbnails = []
@ -235,7 +426,7 @@ class ZDFIE(ZDFBaseIE):
        return self._extract_mobile(video_id)


-class ZDFChannelIE(ZDFBaseIE):
+class ZDFChannelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
@ -272,48 +463,3 @@ class ZDFChannelIE(ZDFBaseIE):

        return self.playlist_result(
            entries, channel_id, self._og_search_title(webpage, fatal=False))
-
-        r"""
-        player = self._extract_player(webpage, channel_id)
-
-        channel_id = self._search_regex(
-            r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
-            'channel id', group='id')
-
-        channel = self._call_api(
-            'https://api.zdf.de/content/documents/%s.json' % channel_id,
-            player, url, channel_id)
-
-        items = []
-        for module in channel['module']:
-            for teaser in try_get(module, lambda x: x['teaser'], list) or []:
-                t = try_get(
-                    teaser, lambda x: x['http://zdf.de/rels/target'], dict)
-                if not t:
-                    continue
-                items.extend(try_get(
-                    t,
-                    lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
-                    list) or [])
-            items.extend(try_get(
-                module,
-                lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
-                list) or [])
-
-        entries = []
-        entry_urls = set()
-        for item in items:
-            t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
-            if not t:
-                continue
-            sharing_url = t.get('http://zdf.de/rels/sharing-url')
-            if not sharing_url or not isinstance(sharing_url, compat_str):
-                continue
-            if sharing_url in entry_urls:
-                continue
-            entry_urls.add(sharing_url)
-            entries.append(self.url_result(
-                sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
-        return self.playlist_result(entries, channel_id, channel.get('title'))
-        """