Merge pull request #96 from ytdl-org/master

[pull] master from ytdl-org:master
2025-03-10 23:07:16 +08:00 · 2019-09-24 19:55:42 +00:00 · 2019-09-24 19:55:42 +00:00 · 83dc323a62
commit 83dc323a62
parent d56f28db08 33c1c7d80f
5 changed files with 105 additions and 48 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -852,8 +852,9 @@ class YoutubeDL(object):
            extract_flat = self.params.get('extract_flat', False)
            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
                    or extract_flat is True):
-                if self.params.get('forcejson', False):
-                    self.to_stdout(json.dumps(ie_result))
+                self.__forced_printings(
+                    ie_result, self.prepare_filename(ie_result),
+                    incomplete=True)
                return ie_result

        if result_type == 'video':
@ -1694,6 +1695,36 @@ class YoutubeDL(object):
            subs[lang] = f
        return subs

+    def __forced_printings(self, info_dict, filename, incomplete):
+        def print_mandatory(field):
+            if (self.params.get('force%s' % field, False)
+                    and (not incomplete or info_dict.get(field) is not None)):
+                self.to_stdout(info_dict[field])
+
+        def print_optional(field):
+            if (self.params.get('force%s' % field, False)
+                    and info_dict.get(field) is not None):
+                self.to_stdout(info_dict[field])
+
+        print_mandatory('title')
+        print_mandatory('id')
+        if self.params.get('forceurl', False) and not incomplete:
+            if info_dict.get('requested_formats') is not None:
+                for f in info_dict['requested_formats']:
+                    self.to_stdout(f['url'] + f.get('play_path', ''))
+            else:
+                # For RTMP URLs, also include the playpath
+                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+        print_optional('thumbnail')
+        print_optional('description')
+        if self.params.get('forcefilename', False) and filename is not None:
+            self.to_stdout(filename)
+        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+            self.to_stdout(formatSeconds(info_dict['duration']))
+        print_mandatory('format')
+        if self.params.get('forcejson', False):
+            self.to_stdout(json.dumps(info_dict))
+
    def process_info(self, info_dict):
        """Process a single resolved IE result."""

@ -1704,9 +1735,8 @@ class YoutubeDL(object):
            if self._num_downloads >= int(max_downloads):
                raise MaxDownloadsReached()

+        # TODO: backward compatibility, to be removed
        info_dict['fulltitle'] = info_dict['title']
-        if len(info_dict['title']) > 200:
-            info_dict['title'] = info_dict['title'][:197] + '...'

        if 'format' not in info_dict:
            info_dict['format'] = info_dict['ext']
@ -1721,29 +1751,7 @@ class YoutubeDL(object):
        info_dict['_filename'] = filename = self.prepare_filename(info_dict)

        # Forced printings
-        if self.params.get('forcetitle', False):
-            self.to_stdout(info_dict['fulltitle'])
-        if self.params.get('forceid', False):
-            self.to_stdout(info_dict['id'])
-        if self.params.get('forceurl', False):
-            if info_dict.get('requested_formats') is not None:
-                for f in info_dict['requested_formats']:
-                    self.to_stdout(f['url'] + f.get('play_path', ''))
-            else:
-                # For RTMP URLs, also include the playpath
-                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
-        if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
-            self.to_stdout(info_dict['thumbnail'])
-        if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
-            self.to_stdout(info_dict['description'])
-        if self.params.get('forcefilename', False) and filename is not None:
-            self.to_stdout(filename)
-        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
-            self.to_stdout(formatSeconds(info_dict['duration']))
-        if self.params.get('forceformat', False):
-            self.to_stdout(info_dict['format'])
-        if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict))
+        self.__forced_printings(info_dict, filename, incomplete=False)

        # Do nothing else if in simulate mode
        if self.params.get('simulate', False):
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -3,7 +3,12 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import parse_duration
+from ..utils import (
+    determine_ext,
+    merge_dicts,
+    parse_duration,
+    url_or_none,
+)


 class BYUtvIE(InfoExtractor):
@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id

-        info = self._download_json(
+        video = self._download_json(
            'https://api.byutv.org/api3/catalog/getvideosforcontent',
            display_id, query={
                'contentid': video_id,
@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
                'x-byutv-platformkey': 'xsaaw9c7y5',
            })

-        ep = info.get('ooyalaVOD')
+        ep = video.get('ooyalaVOD')
        if ep:
            return {
                '_type': 'url_transparent',
@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
                'thumbnail': ep.get('imageThumbnail'),
            }

-        ep = info['dvr']
-        title = ep['title']
-        formats = self._extract_m3u8_formats(
-            ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
+        info = {}
+        formats = []
+        for format_id, ep in video.items():
+            if not isinstance(ep, dict):
+                continue
+            video_url = url_or_none(ep.get('videoUrl'))
+            if not video_url:
+                continue
+            ext = determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    video_url, video_id, mpd_id='dash', fatal=False))
+            else:
+                formats.append({
+                    'url': video_url,
+                    'format_id': format_id,
+                })
+            merge_dicts(info, {
+                'title': ep.get('title'),
+                'description': ep.get('description'),
+                'thumbnail': ep.get('imageThumbnail'),
+                'duration': parse_duration(ep.get('length')),
+            })
        self._sort_formats(formats)
-        return {
+
+        return merge_dicts(info, {
            'id': video_id,
            'display_id': display_id,
-            'title': title,
-            'description': ep.get('description'),
-            'thumbnail': ep.get('imageThumbnail'),
-            'duration': parse_duration(ep.get('length')),
+            'title': display_id,
            'formats': formats,
-        }
+        })
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor):
    # Content available only for a limited period of time. Visit
    # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
    _TESTS = [{
+        # clip
+        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+        'md5': '256a1be14f48d960a7e61e2532d95ec3',
+        'info_dict': {
+            'id': 'a95j5iza',
+            'ext': 'mp4',
+            'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+            'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+            'timestamp': 1565965194,
+            'upload_date': '20190816',
+        },
+    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
        'only_matching': True,
    }, {
@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor):
        'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
        'only_matching': True,
    }]
-    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
+    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'

    def _real_extract(self, url):
        lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor):

        is_video = m_type == 'video'
        episode = self._download_json(
-            self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
+            self._API_URL_TEMPLATE % (
+                'v' if is_video else 'r',
+                'clip' if episode_id[:4] == '9999' else 'esd',
+                episode_id, lang, '/all' if is_video else ''),
            episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
        title = episode.get('sub_title_clean') or episode['sub_title']

@ -60,8 +75,8 @@ class NhkVodIE(InfoExtractor):
        if is_video:
            info.update({
                '_type': 'url_transparent',
-                'ie_key': 'Ooyala',
-                'url': 'ooyala:' + episode['vod_id'],
+                'ie_key': 'Piksel',
+                'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
            })
        else:
            audio = episode['audio']
--- a/youtube_dl/extractor/piksel.py
+++ b/youtube_dl/extractor/piksel.py
@ -15,7 +15,7 @@ from ..utils import (


 class PikselIE(InfoExtractor):
-    _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
    _TESTS = [
        {
            'url': 'http://player.piksel.com/v/ums2867l',
@ -40,6 +40,11 @@ class PikselIE(InfoExtractor):
                'timestamp': 1486171129,
                'upload_date': '20170204'
            }
+        },
+        {
+            # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+            'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+            'only_matching': True,
        }
    ]

@ -52,8 +57,11 @@ class PikselIE(InfoExtractor):
            return mobj.group('url')

    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'data-de-program-uuid=[\'"]([a-z0-9]+)',
+            webpage, 'program uuid', default=display_id)
        app_token = self._search_regex([
            r'clientAPI\s*:\s*"([^"]+)"',
            r'data-de-api-key\s*=\s*"([^"]+)"'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -397,6 +397,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
+                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID: