Merge remote-tracking branch 'dstftw/multifeed-videos' (closes #6360)

2024-12-22 10:52:56 +08:00 · 2015-07-29 21:55:20 +02:00 · 2015-07-29 21:55:20 +02:00 · 34866b4836
commit 34866b4836
parent be530dfea2 5e1eddb939
1 changed files with 106 additions and 27 deletions
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -33,9 +33,11 @@ from ..utils import (
    int_or_none,
    orderedSet,
    parse_duration,
    smuggle_url,
    str_to_int,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    uppercase_escape,
    ISO3166Utils,
 )
@ -562,6 +564,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'format': '135',  # bestvideo
            }
        },
        {
            # Multifeed videos (multiple cameras), URL is for Main Camera
            'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
            'info_dict': {
                'id': 'jqWvoWXjCVs',
                'title': 'teamPGP: Rocket League Noob Stream',
                'description': 'md5:dc7872fb300e143831327f1bae3af010',
            },
            'playlist': [{
                'info_dict': {
                    'id': 'jqWvoWXjCVs',
                    'ext': 'mp4',
                    'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
                    'upload_date': '20150721',
                    'uploader': 'Beer Games Beer',
                    'uploader_id': 'beergamesbeer',
                },
            }, {
                'info_dict': {
                    'id': '6h8e8xoXJzg',
                    'ext': 'mp4',
                    'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
                    'upload_date': '20150721',
                    'uploader': 'Beer Games Beer',
                    'uploader_id': 'beergamesbeer',
                },
            }, {
                'info_dict': {
                    'id': 'PUOgX5z9xZw',
                    'ext': 'mp4',
                    'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
                    'upload_date': '20150721',
                    'uploader': 'Beer Games Beer',
                    'uploader_id': 'beergamesbeer',
                },
            }, {
                'info_dict': {
                    'id': 'teuwxikvS5k',
                    'ext': 'mp4',
                    'title': 'teamPGP: Rocket League Noob Stream (zim)',
                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
                    'upload_date': '20150721',
                    'uploader': 'Beer Games Beer',
                    'uploader_id': 'beergamesbeer',
                },
            }],
            'params': {
                'skip_download': True,
            },
        }
    ]
    def __init__(self, *args, **kwargs):
@ -893,6 +948,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        return formats
    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
        proto = (
            'http' if self._downloader.params.get('prefer_insecure', False)
            else 'https')
@ -1009,6 +1066,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    '"token" parameter not in video info for unknown reason',
                    video_id=video_id)
        # title
        if 'title' in video_info:
            video_title = video_info['title'][0]
        else:
            self._downloader.report_warning('Unable to extract video title')
            video_title = '_'
        # description
        video_description = get_element_by_id("eow-description", video_webpage)
        if video_description:
            video_description = re.sub(r'''(?x)
                <a\s+
                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
                    title="([^"]+)"\s+
                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
                    class="yt-uix-redirect-link"\s*>
                [^<]+
                </a>
            ''', r'\1', video_description)
            video_description = clean_html(video_description)
        else:
            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
            if fd_mobj:
                video_description = unescapeHTML(fd_mobj.group(1))
            else:
                video_description = ''
        if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
            if not self._downloader.params.get('noplaylist'):
                entries = []
                feed_ids = []
                multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
                for feed in multifeed_metadata_list.split(','):
                    feed_data = compat_parse_qs(feed)
                    entries.append({
                        '_type': 'url_transparent',
                        'ie_key': 'Youtube',
                        'url': smuggle_url(
                            '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
                            {'force_singlefeed': True}),
                        'title': '%s (%s)' % (video_title, feed_data['title'][0]),
                    })
                    feed_ids.append(feed_data['id'][0])
                self.to_screen(
                    'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
                    % (', '.join(feed_ids), video_id))
                return self.playlist_result(entries, video_id, video_title, video_description)
            self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
        if 'view_count' in video_info:
            view_count = int(video_info['view_count'][0])
        else:
@ -1034,13 +1140,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        else:
            self._downloader.report_warning('unable to extract uploader nickname')
        # title
        if 'title' in video_info:
            video_title = video_info['title'][0]
        else:
            self._downloader.report_warning('Unable to extract video title')
            video_title = '_'
        # thumbnail image
        # We try first to get a high quality image:
        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@ -1080,26 +1179,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            unescapeHTML(m.group('content'))
            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
        # description
        video_description = get_element_by_id("eow-description", video_webpage)
        if video_description:
            video_description = re.sub(r'''(?x)
                <a\s+
                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
                    title="([^"]+)"\s+
                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
                    class="yt-uix-redirect-link"\s*>
                [^<]+
                </a>
            ''', r'\1', video_description)
            video_description = clean_html(video_description)
        else:
            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
            if fd_mobj:
                video_description = unescapeHTML(fd_mobj.group(1))
            else:
                video_description = ''
        def _extract_count(count_name):
            return str_to_int(self._search_regex(
                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'