1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-14 01:17:16 +08:00

Add support for merged videos to formats

This commit is contained in:
rzhxeo 2014-05-23 03:45:41 +02:00
parent eec4d8ef96
commit 378969fdfd
3 changed files with 124 additions and 67 deletions

View File

@ -56,6 +56,7 @@ from .utils import (
write_string,
YoutubeDLHandler,
prepend_extension,
select_format,
)
from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
@ -664,46 +665,6 @@ class YoutubeDL(object):
else:
raise Exception('Invalid result type: %s' % result_type)
def select_format(self, format_spec, available_formats):
if format_spec == 'best' or format_spec is None:
return available_formats[-1]
elif format_spec == 'worst':
return available_formats[0]
elif format_spec == 'bestaudio':
audio_formats = [
f for f in available_formats
if f.get('vcodec') == 'none']
if audio_formats:
return audio_formats[-1]
elif format_spec == 'worstaudio':
audio_formats = [
f for f in available_formats
if f.get('vcodec') == 'none']
if audio_formats:
return audio_formats[0]
elif format_spec == 'bestvideo':
video_formats = [
f for f in available_formats
if f.get('acodec') == 'none']
if video_formats:
return video_formats[-1]
elif format_spec == 'worstvideo':
video_formats = [
f for f in available_formats
if f.get('acodec') == 'none']
if video_formats:
return video_formats[0]
else:
extensions = ['mp4', 'flv', 'webm', '3gp']
if format_spec in extensions:
filter_f = lambda f: f['ext'] == format_spec
else:
filter_f = lambda f: f['format_id'] == format_spec
matches = list(filter(filter_f, available_formats))
if matches:
return matches[-1]
return None
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@ -743,8 +704,21 @@ class YoutubeDL(object):
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
if 'url' in format and 'sub_formats' in format:
raise ExtractorError('Both "url" and "sub_formats" key in result (index %d)' % i)
if not 'url' in format and not 'sub_formats' in format:
raise ExtractorError('Neither "url" nor "sub_formats" key in result (index %d)' % i)
if 'sub_formats' in format:
if not len(format['sub_formats'])==2:
raise ExtractorError('Not two "sub_formats" in result (index %d)' % i)
if not format['sub_formats'][0] in formats:
raise ExtractorError('Result\'s first sub format is not in formats (index %d)' % i)
if not format['sub_formats'][1] in formats:
raise ExtractorError('Result\'s second sub format is not in formats (index %d)' % i)
if not 'url' in format['sub_formats'][0]:
raise ExtractorError('Missing "url" key in result\'s first sub format (index %d)' % i)
if not 'url' in format['sub_formats'][1]:
raise ExtractorError('Missing "url" key in result\'s second sub format (index %d)' % i)
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
@ -756,7 +730,10 @@ class YoutubeDL(object):
)
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url']).lower()
if 'url' in format:
format['ext'] = determine_ext(format['url']).lower()
else:
format['ext'] = determine_ext(format['sub_formats'][0]['url']).lower()
format_limit = self.params.get('format_limit', None)
if format_limit:
@ -788,21 +765,7 @@ class YoutubeDL(object):
# the first that is available, starting from left
req_formats = req_format.split('/')
for rf in req_formats:
if re.match(r'.+?\+.+?', rf) is not None:
# Two formats have been requested like '137+139'
format_1, format_2 = rf.split('+')
formats_info = (self.select_format(format_1, formats),
self.select_format(format_2, formats))
if all(formats_info):
selected_format = {
'requested_formats': formats_info,
'format': rf,
'ext': formats_info[0]['ext'],
}
else:
selected_format = None
else:
selected_format = self.select_format(rf, formats)
selected_format = select_format(rf, formats)
if selected_format is not None:
formats_to_download = [selected_format]
break
@ -857,7 +820,11 @@ class YoutubeDL(object):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if 'url' in info_dict:
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
else:
for f in info_dict['sub_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
@ -983,7 +950,7 @@ class YoutubeDL(object):
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
return fd.download(name, info)
if info_dict.get('requested_formats') is not None:
if info_dict.get('sub_formats') is not None:
downloaded = []
success = True
merger = FFmpegMergerPP(self)
@ -994,7 +961,7 @@ class YoutubeDL(object):
' The formats won\'t be merged')
else:
postprocessors = [merger]
for f in info_dict['requested_formats']:
for f in info_dict['sub_formats']:
new_info = dict(info_dict)
new_info.update(f)
fname = self.prepare_filename(new_info)

View File

@ -20,7 +20,9 @@ from ..utils import (
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
select_format,
)
from ..postprocessor import FFmpegMergerPP
_NO_DEFAULT = object()
@ -81,6 +83,8 @@ class InfoExtractor(object):
format, irrespective of the file format.
-1 for default (order by other properties),
-2 or smaller for less than default.
* sub_formats List of two formats for combined formats,
first file is video and second audio.
url: Final video URL.
ext: Video filename extension.
format: The video format, defaults to ext (used for --get-format)
@ -495,26 +499,72 @@ class InfoExtractor(object):
return self._html_search_meta('twitter:player', html,
'twitter card player')
def _merge_formats(self, format_1, format_2):
format = {
'ext': format_1.get('ext'),
'format_id': format_1['format_id']+'+'+format_2['format_id'],
'width': format_1.get('width'),
'height': format_1.get('height'),
'resolution': format_1.get('resolution'),
'abr': format_2.get('abr'),
'acodec': format_2.get('acodec'),
'asr': format_2.get('asr'),
'vbr': format_1.get('vbr'),
'vcodec': format_1.get('vcodec'),
'container': format_1.get('container'),
'sub_formats': [format_1, format_2],
'protocol': format_1.get('protocol') if format_1.get('protocol') == format_2.get('protocol') else None,
'tbr': format_1.get('vbr')+format_2.get('abr') if format_1.get('vbr') is not None and format_2.get('abr') is not None else None,
'filesize': format_1.get('filesize')+format_2.get('filesize') if format_1.get('filesize') is not None and format_2.get('filesize') is not None else None,
}
return format
def _add_merged_formats(self, formats, format_ids):
for format_1_id, format_2_id in map(lambda x: x.split('+'), format_ids):
format_1 = select_format(format_1_id, formats)
format_2 = select_format(format_2_id, formats)
if format_1 == None or format_2 == None:
continue
formats.append(self._merge_formats(format_1, format_2))
def _sort_formats(self, formats):
if not formats:
raise ExtractorError(u'No video formats found')
def _formats_key(f):
ext = f.get('ext')
# TODO remove the following workaround
from ..utils import determine_ext
if not f.get('ext') and 'url' in f:
f['ext'] = determine_ext(f['url'])
if not ext:
if 'url' in f:
ext = determine_ext(f['url'])
elif 'sub_formats' in f and 'url' in f['sub_formats'][0]:
ext = determine_ext(f['sub_formats'][0]['url'])
preference = f.get('preference')
if preference is None:
proto = f.get('protocol')
if proto is None:
proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
if 'url' in f:
proto = compat_urllib_parse_urlparse(f['url']).scheme
elif 'sub_formats' in f and 'url' in f['sub_formats'][0] and\
'url' in f['sub_formats'][1]:
proto_1 = compat_urllib_parse_urlparse(f['sub_formats'][0]['url']).scheme
proto_2 = compat_urllib_parse_urlparse(f['sub_formats'][1]['url']).scheme
if proto_1 == proto_2:
proto = proto_1
elif proto_1 in ['http', 'https'] and proto_2 in ['http', 'https']:
proto = 'https'
preference = 0 if proto in ['http', 'https'] else -0.1
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
if ext in ['f4f', 'f4m']: # Not yet supported
preference -= 0.5
merger = FFmpegMergerPP(self._downloader)
if 'sub_formats' in f and not merger._get_executable(): # can't merge files
preference -= 1000
if f.get('vcodec') == 'none': # audio only
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
@ -522,7 +572,7 @@ class InfoExtractor(object):
ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
ext_preference = 0
try:
audio_ext_preference = ORDER.index(f['ext'])
audio_ext_preference = ORDER.index(ext)
except ValueError:
audio_ext_preference = -1
else:
@ -531,7 +581,7 @@ class InfoExtractor(object):
else:
ORDER = [u'webm', u'flv', u'mp4']
try:
ext_preference = ORDER.index(f['ext'])
ext_preference = ORDER.index(ext)
except ValueError:
ext_preference = -1
audio_ext_preference = 0

View File

@ -1451,3 +1451,43 @@ except AttributeError:
if ret:
raise subprocess.CalledProcessError(ret, p.args, output=output)
return output
def select_format(format_spec, available_formats):
if format_spec == 'best' or format_spec is None:
return available_formats[-1]
elif format_spec == 'worst':
return available_formats[0]
elif format_spec == 'bestaudio':
audio_formats = [
f for f in available_formats
if f.get('vcodec') == 'none']
if audio_formats:
return audio_formats[-1]
elif format_spec == 'worstaudio':
audio_formats = [
f for f in available_formats
if f.get('vcodec') == 'none']
if audio_formats:
return audio_formats[0]
elif format_spec == 'bestvideo':
video_formats = [
f for f in available_formats
if f.get('acodec') == 'none']
if video_formats:
return video_formats[-1]
elif format_spec == 'worstvideo':
video_formats = [
f for f in available_formats
if f.get('acodec') == 'none']
if video_formats:
return video_formats[0]
else:
extensions = ['mp4', 'flv', 'webm', '3gp']
if format_spec in extensions:
filter_f = lambda f: f['ext'] == format_spec
else:
filter_f = lambda f: f['format_id'] == format_spec
matches = list(filter(filter_f, available_formats))
if matches:
return matches[-1]
return None