Add support for merged videos to formats

2025-03-14 01:17:16 +08:00 · 2014-05-23 03:45:41 +02:00 · 2014-05-23 03:45:41 +02:00 · 378969fdfd
commit 378969fdfd
parent eec4d8ef96
3 changed files with 124 additions and 67 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -56,6 +56,7 @@ from .utils import (
    write_string,
    YoutubeDLHandler,
    prepend_extension,
+    select_format,
 )
 from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
@ -664,46 +665,6 @@ class YoutubeDL(object):
        else:
            raise Exception('Invalid result type: %s' % result_type)

-    def select_format(self, format_spec, available_formats):
-        if format_spec == 'best' or format_spec is None:
-            return available_formats[-1]
-        elif format_spec == 'worst':
-            return available_formats[0]
-        elif format_spec == 'bestaudio':
-            audio_formats = [
-                f for f in available_formats
-                if f.get('vcodec') == 'none']
-            if audio_formats:
-                return audio_formats[-1]
-        elif format_spec == 'worstaudio':
-            audio_formats = [
-                f for f in available_formats
-                if f.get('vcodec') == 'none']
-            if audio_formats:
-                return audio_formats[0]
-        elif format_spec == 'bestvideo':
-            video_formats = [
-                f for f in available_formats
-                if f.get('acodec') == 'none']
-            if video_formats:
-                return video_formats[-1]
-        elif format_spec == 'worstvideo':
-            video_formats = [
-                f for f in available_formats
-                if f.get('acodec') == 'none']
-            if video_formats:
-                return video_formats[0]
-        else:
-            extensions = ['mp4', 'flv', 'webm', '3gp']
-            if format_spec in extensions:
-                filter_f = lambda f: f['ext'] == format_spec
-            else:
-                filter_f = lambda f: f['format_id'] == format_spec
-            matches = list(filter(filter_f, available_formats))
-            if matches:
-                return matches[-1]
-        return None
-
    def process_video_result(self, info_dict, download=True):
        assert info_dict.get('_type', 'video') == 'video'

@ -743,8 +704,21 @@ class YoutubeDL(object):

        # We check that all the formats have the format and format_id fields
        for i, format in enumerate(formats):
-            if 'url' not in format:
-                raise ExtractorError('Missing "url" key in result (index %d)' % i)
+            if 'url' in format and 'sub_formats' in format:
+                raise ExtractorError('Both "url" and "sub_formats" key in result (index %d)' % i)
+            if not 'url' in format and not 'sub_formats' in format:
+                raise ExtractorError('Neither "url" nor "sub_formats" key in result (index %d)' % i)
+            if 'sub_formats' in format:
+                if not len(format['sub_formats'])==2:
+                    raise ExtractorError('Not two "sub_formats" in result (index %d)' % i)
+                if not format['sub_formats'][0] in formats:
+                    raise ExtractorError('Result\'s first sub format is not in formats (index %d)' % i)
+                if not format['sub_formats'][1] in formats:
+                    raise ExtractorError('Result\'s second sub format is not in formats (index %d)' % i)
+                if not 'url' in format['sub_formats'][0]:
+                    raise ExtractorError('Missing "url" key in result\'s first sub format (index %d)' % i)
+                if not 'url' in format['sub_formats'][1]:
+                    raise ExtractorError('Missing "url" key in result\'s second sub format (index %d)' % i)

            if format.get('format_id') is None:
                format['format_id'] = compat_str(i)
@ -756,7 +730,10 @@ class YoutubeDL(object):
                )
            # Automatically determine file extension if missing
            if 'ext' not in format:
-                format['ext'] = determine_ext(format['url']).lower()
+                if 'url' in format:
+                    format['ext'] = determine_ext(format['url']).lower()
+                else:
+                    format['ext'] = determine_ext(format['sub_formats'][0]['url']).lower()

        format_limit = self.params.get('format_limit', None)
        if format_limit:
@ -788,21 +765,7 @@ class YoutubeDL(object):
            # the first that is available, starting from left
            req_formats = req_format.split('/')
            for rf in req_formats:
-                if re.match(r'.+?\+.+?', rf) is not None:
-                    # Two formats have been requested like '137+139'
-                    format_1, format_2 = rf.split('+')
-                    formats_info = (self.select_format(format_1, formats),
-                        self.select_format(format_2, formats))
-                    if all(formats_info):
-                        selected_format = {
-                            'requested_formats': formats_info,
-                            'format': rf,
-                            'ext': formats_info[0]['ext'],
-                        }
-                    else:
-                        selected_format = None
-                else:
-                    selected_format = self.select_format(rf, formats)
+                selected_format = select_format(rf, formats)
                if selected_format is not None:
                    formats_to_download = [selected_format]
                    break
@ -857,7 +820,11 @@ class YoutubeDL(object):
            self.to_stdout(info_dict['id'])
        if self.params.get('forceurl', False):
            # For RTMP URLs, also include the playpath
-            self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+            if 'url' in info_dict:
+                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+            else:
+                for f in info_dict['sub_formats']:
+                    self.to_stdout(f['url'] + f.get('play_path', ''))
        if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
            self.to_stdout(info_dict['thumbnail'])
        if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
@ -983,7 +950,7 @@ class YoutubeDL(object):
                        for ph in self._progress_hooks:
                            fd.add_progress_hook(ph)
                        return fd.download(name, info)
-                    if info_dict.get('requested_formats') is not None:
+                    if info_dict.get('sub_formats') is not None:
                        downloaded = []
                        success = True
                        merger = FFmpegMergerPP(self)
@ -994,7 +961,7 @@ class YoutubeDL(object):
                                ' The formats won\'t be merged')
                        else:
                            postprocessors = [merger]
-                        for f in info_dict['requested_formats']:
+                        for f in info_dict['sub_formats']:
                            new_info = dict(info_dict)
                            new_info.update(f)
                            fname = self.prepare_filename(new_info)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -20,7 +20,9 @@ from ..utils import (
    RegexNotFoundError,
    sanitize_filename,
    unescapeHTML,
+    select_format,
 )
+from ..postprocessor import FFmpegMergerPP
 _NO_DEFAULT = object()


@ -81,6 +83,8 @@ class InfoExtractor(object):
                                 format, irrespective of the file format.
                                 -1 for default (order by other properties),
                                 -2 or smaller for less than default.
+                    * sub_formats List of two formats for combined formats,
+                                  first file is video and second audio.
    url:            Final video URL.
    ext:            Video filename extension.
    format:         The video format, defaults to ext (used for --get-format)
@ -495,26 +499,72 @@ class InfoExtractor(object):
        return self._html_search_meta('twitter:player', html,
            'twitter card player')

+    def _merge_formats(self, format_1, format_2):
+        format = {
+            'ext': format_1.get('ext'),
+            'format_id': format_1['format_id']+'+'+format_2['format_id'],
+            'width': format_1.get('width'),
+            'height': format_1.get('height'),
+            'resolution': format_1.get('resolution'),
+            'abr': format_2.get('abr'),
+            'acodec': format_2.get('acodec'),
+            'asr': format_2.get('asr'),
+            'vbr': format_1.get('vbr'),
+            'vcodec': format_1.get('vcodec'),
+            'container': format_1.get('container'),
+            'sub_formats': [format_1, format_2],
+            'protocol': format_1.get('protocol') if format_1.get('protocol') == format_2.get('protocol') else None,
+            'tbr': format_1.get('vbr')+format_2.get('abr') if format_1.get('vbr') is not None and format_2.get('abr') is not None else None,
+            'filesize': format_1.get('filesize')+format_2.get('filesize') if format_1.get('filesize') is not None and format_2.get('filesize') is not None else None,
+        }
+        return format
+
+    def _add_merged_formats(self, formats, format_ids):
+        for format_1_id, format_2_id in map(lambda x: x.split('+'), format_ids):
+            format_1 = select_format(format_1_id, formats)
+            format_2 = select_format(format_2_id, formats)
+            if format_1 == None or format_2 == None:
+                continue
+            formats.append(self._merge_formats(format_1, format_2))
+        
+
    def _sort_formats(self, formats):
        if not formats:
            raise ExtractorError(u'No video formats found')

        def _formats_key(f):
+            ext = f.get('ext')
            # TODO remove the following workaround
            from ..utils import determine_ext
-            if not f.get('ext') and 'url' in f:
-                f['ext'] = determine_ext(f['url'])
+            if not ext:
+                if 'url' in f:
+                    ext = determine_ext(f['url'])
+                elif 'sub_formats' in f and 'url' in f['sub_formats'][0]:
+                    ext = determine_ext(f['sub_formats'][0]['url'])

            preference = f.get('preference')
            if preference is None:
                proto = f.get('protocol')
                if proto is None:
-                    proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
+                    if 'url' in f:
+                        proto = compat_urllib_parse_urlparse(f['url']).scheme
+                    elif 'sub_formats' in f and 'url' in f['sub_formats'][0] and\
+                            'url' in f['sub_formats'][1]:
+                        proto_1 = compat_urllib_parse_urlparse(f['sub_formats'][0]['url']).scheme
+                        proto_2 = compat_urllib_parse_urlparse(f['sub_formats'][1]['url']).scheme
+                        if proto_1 == proto_2:
+                            proto = proto_1
+                        elif proto_1 in ['http', 'https'] and proto_2 in ['http', 'https']:
+                            proto = 'https'

                preference = 0 if proto in ['http', 'https'] else -0.1
-                if f.get('ext') in ['f4f', 'f4m']:  # Not yet supported
+                if ext in ['f4f', 'f4m']:  # Not yet supported
                    preference -= 0.5

+                merger = FFmpegMergerPP(self._downloader)
+                if 'sub_formats' in f and not merger._get_executable(): # can't merge files
+                    preference -= 1000
+
            if f.get('vcodec') == 'none':  # audio only
                if self._downloader.params.get('prefer_free_formats'):
                    ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
@ -522,7 +572,7 @@ class InfoExtractor(object):
                    ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
                ext_preference = 0
                try:
-                    audio_ext_preference = ORDER.index(f['ext'])
+                    audio_ext_preference = ORDER.index(ext)
                except ValueError:
                    audio_ext_preference = -1
            else:
@ -531,7 +581,7 @@ class InfoExtractor(object):
                else:
                    ORDER = [u'webm', u'flv', u'mp4']
                try:
-                    ext_preference = ORDER.index(f['ext'])
+                    ext_preference = ORDER.index(ext)
                except ValueError:
                    ext_preference = -1
                audio_ext_preference = 0
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1451,3 +1451,43 @@ except AttributeError:
        if ret:
            raise subprocess.CalledProcessError(ret, p.args, output=output)
        return output
+
+def select_format(format_spec, available_formats):
+    if format_spec == 'best' or format_spec is None:
+        return available_formats[-1]
+    elif format_spec == 'worst':
+        return available_formats[0]
+    elif format_spec == 'bestaudio':
+        audio_formats = [
+            f for f in available_formats
+            if f.get('vcodec') == 'none']
+        if audio_formats:
+            return audio_formats[-1]
+    elif format_spec == 'worstaudio':
+        audio_formats = [
+            f for f in available_formats
+            if f.get('vcodec') == 'none']
+        if audio_formats:
+            return audio_formats[0]
+    elif format_spec == 'bestvideo':
+        video_formats = [
+            f for f in available_formats
+            if f.get('acodec') == 'none']
+        if video_formats:
+            return video_formats[-1]
+    elif format_spec == 'worstvideo':
+        video_formats = [
+            f for f in available_formats
+            if f.get('acodec') == 'none']
+        if video_formats:
+            return video_formats[0]
+    else:
+        extensions = ['mp4', 'flv', 'webm', '3gp']
+        if format_spec in extensions:
+            filter_f = lambda f: f['ext'] == format_spec
+        else:
+            filter_f = lambda f: f['format_id'] == format_spec
+        matches = list(filter(filter_f, available_formats))
+        if matches:
+            return matches[-1]
+    return None