From 378969fdfd4b7d9cbe157ff05be75624868187da Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 23 May 2014 03:45:41 +0200 Subject: [PATCH 1/3] Add support for merged videos to formats --- youtube_dl/YoutubeDL.py | 89 +++++++++++----------------------- youtube_dl/extractor/common.py | 62 ++++++++++++++++++++--- youtube_dl/utils.py | 40 +++++++++++++++ 3 files changed, 124 insertions(+), 67 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f3666573a..f4d2ad029 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -56,6 +56,7 @@ from .utils import ( write_string, YoutubeDLHandler, prepend_extension, + select_format, ) from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader @@ -664,46 +665,6 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) - def select_format(self, format_spec, available_formats): - if format_spec == 'best' or format_spec is None: - return available_formats[-1] - elif format_spec == 'worst': - return available_formats[0] - elif format_spec == 'bestaudio': - audio_formats = [ - f for f in available_formats - if f.get('vcodec') == 'none'] - if audio_formats: - return audio_formats[-1] - elif format_spec == 'worstaudio': - audio_formats = [ - f for f in available_formats - if f.get('vcodec') == 'none'] - if audio_formats: - return audio_formats[0] - elif format_spec == 'bestvideo': - video_formats = [ - f for f in available_formats - if f.get('acodec') == 'none'] - if video_formats: - return video_formats[-1] - elif format_spec == 'worstvideo': - video_formats = [ - f for f in available_formats - if f.get('acodec') == 'none'] - if video_formats: - return video_formats[0] - else: - extensions = ['mp4', 'flv', 'webm', '3gp'] - if format_spec in extensions: - filter_f = lambda f: f['ext'] == format_spec - else: - filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f, available_formats)) - if matches: - return matches[-1] - return None - def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -743,8 +704,21 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): - if 'url' not in format: - raise ExtractorError('Missing "url" key in result (index %d)' % i) + if 'url' in format and 'sub_formats' in format: + raise ExtractorError('Both "url" and "sub_formats" key in result (index %d)' % i) + if not 'url' in format and not 'sub_formats' in format: + raise ExtractorError('Neither "url" nor "sub_formats" key in result (index %d)' % i) + if 'sub_formats' in format: + if not len(format['sub_formats'])==2: + raise ExtractorError('Not two "sub_formats" in result (index %d)' % i) + if not format['sub_formats'][0] in formats: + raise ExtractorError('Result\'s first sub format is not in formats (index %d)' % i) + if not format['sub_formats'][1] in formats: + raise ExtractorError('Result\'s second sub format is not in formats (index %d)' % i) + if not 'url' in format['sub_formats'][0]: + raise ExtractorError('Missing "url" key in result\'s first sub format (index %d)' % i) + if not 'url' in format['sub_formats'][1]: + raise ExtractorError('Missing "url" key in result\'s second sub format (index %d)' % i) if format.get('format_id') is None: format['format_id'] = compat_str(i) @@ -756,7 +730,10 @@ class YoutubeDL(object): ) # Automatically determine file extension if missing if 'ext' not in format: - format['ext'] = determine_ext(format['url']).lower() + if 'url' in format: + format['ext'] = determine_ext(format['url']).lower() + else: + format['ext'] = determine_ext(format['sub_formats'][0]['url']).lower() format_limit = self.params.get('format_limit', None) if format_limit: @@ -788,21 +765,7 @@ class YoutubeDL(object): # the first that is available, starting from left req_formats = req_format.split('/') for rf in req_formats: - if re.match(r'.+?\+.+?', rf) is not None: - # Two formats have been requested like '137+139' - format_1, format_2 = rf.split('+') - formats_info = (self.select_format(format_1, formats), - self.select_format(format_2, formats)) - if all(formats_info): - selected_format = { - 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], - } - else: - selected_format = None - else: - selected_format = self.select_format(rf, formats) + selected_format = select_format(rf, formats) if selected_format is not None: formats_to_download = [selected_format] break @@ -857,7 +820,11 @@ class YoutubeDL(object): self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + if 'url' in info_dict: + self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + else: + for f in info_dict['sub_formats']: + self.to_stdout(f['url'] + f.get('play_path', '')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: @@ -983,7 +950,7 @@ class YoutubeDL(object): for ph in self._progress_hooks: fd.add_progress_hook(ph) return fd.download(name, info) - if info_dict.get('requested_formats') is not None: + if info_dict.get('sub_formats') is not None: downloaded = [] success = True merger = FFmpegMergerPP(self) @@ -994,7 +961,7 @@ class YoutubeDL(object): ' The formats won\'t be merged') else: postprocessors = [merger] - for f in info_dict['requested_formats']: + for f in info_dict['sub_formats']: new_info = dict(info_dict) new_info.update(f) fname = self.prepare_filename(new_info) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index db472aace..42a5fef38 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -20,7 +20,9 @@ from ..utils import ( RegexNotFoundError, sanitize_filename, unescapeHTML, + select_format, ) +from ..postprocessor import FFmpegMergerPP _NO_DEFAULT = object() @@ -81,6 +83,8 @@ class InfoExtractor(object): format, irrespective of the file format. -1 for default (order by other properties), -2 or smaller for less than default. + * sub_formats List of two formats for combined formats, + first file is video and second audio. url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -495,26 +499,72 @@ class InfoExtractor(object): return self._html_search_meta('twitter:player', html, 'twitter card player') + def _merge_formats(self, format_1, format_2): + format = { + 'ext': format_1.get('ext'), + 'format_id': format_1['format_id']+'+'+format_2['format_id'], + 'width': format_1.get('width'), + 'height': format_1.get('height'), + 'resolution': format_1.get('resolution'), + 'abr': format_2.get('abr'), + 'acodec': format_2.get('acodec'), + 'asr': format_2.get('asr'), + 'vbr': format_1.get('vbr'), + 'vcodec': format_1.get('vcodec'), + 'container': format_1.get('container'), + 'sub_formats': [format_1, format_2], + 'protocol': format_1.get('protocol') if format_1.get('protocol') == format_2.get('protocol') else None, + 'tbr': format_1.get('vbr')+format_2.get('abr') if format_1.get('vbr') is not None and format_2.get('abr') is not None else None, + 'filesize': format_1.get('filesize')+format_2.get('filesize') if format_1.get('filesize') is not None and format_2.get('filesize') is not None else None, + } + return format + + def _add_merged_formats(self, formats, format_ids): + for format_1_id, format_2_id in map(lambda x: x.split('+'), format_ids): + format_1 = select_format(format_1_id, formats) + format_2 = select_format(format_2_id, formats) + if format_1 == None or format_2 == None: + continue + formats.append(self._merge_formats(format_1, format_2)) + + def _sort_formats(self, formats): if not formats: raise ExtractorError(u'No video formats found') def _formats_key(f): + ext = f.get('ext') # TODO remove the following workaround from ..utils import determine_ext - if not f.get('ext') and 'url' in f: - f['ext'] = determine_ext(f['url']) + if not ext: + if 'url' in f: + ext = determine_ext(f['url']) + elif 'sub_formats' in f and 'url' in f['sub_formats'][0]: + ext = determine_ext(f['sub_formats'][0]['url']) preference = f.get('preference') if preference is None: proto = f.get('protocol') if proto is None: - proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme + if 'url' in f: + proto = compat_urllib_parse_urlparse(f['url']).scheme + elif 'sub_formats' in f and 'url' in f['sub_formats'][0] and\ + 'url' in f['sub_formats'][1]: + proto_1 = compat_urllib_parse_urlparse(f['sub_formats'][0]['url']).scheme + proto_2 = compat_urllib_parse_urlparse(f['sub_formats'][1]['url']).scheme + if proto_1 == proto_2: + proto = proto_1 + elif proto_1 in ['http', 'https'] and proto_2 in ['http', 'https']: + proto = 'https' preference = 0 if proto in ['http', 'https'] else -0.1 - if f.get('ext') in ['f4f', 'f4m']: # Not yet supported + if ext in ['f4f', 'f4m']: # Not yet supported preference -= 0.5 + merger = FFmpegMergerPP(self._downloader) + if 'sub_formats' in f and not merger._get_executable(): # can't merge files + preference -= 1000 + if f.get('vcodec') == 'none': # audio only if self._downloader.params.get('prefer_free_formats'): ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] @@ -522,7 +572,7 @@ class InfoExtractor(object): ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] ext_preference = 0 try: - audio_ext_preference = ORDER.index(f['ext']) + audio_ext_preference = ORDER.index(ext) except ValueError: audio_ext_preference = -1 else: @@ -531,7 +581,7 @@ class InfoExtractor(object): else: ORDER = [u'webm', u'flv', u'mp4'] try: - ext_preference = ORDER.index(f['ext']) + ext_preference = ORDER.index(ext) except ValueError: ext_preference = -1 audio_ext_preference = 0 diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b97e62ae9..2271ff151 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1451,3 +1451,43 @@ except AttributeError: if ret: raise subprocess.CalledProcessError(ret, p.args, output=output) return output + +def select_format(format_spec, available_formats): + if format_spec == 'best' or format_spec is None: + return available_formats[-1] + elif format_spec == 'worst': + return available_formats[0] + elif format_spec == 'bestaudio': + audio_formats = [ + f for f in available_formats + if f.get('vcodec') == 'none'] + if audio_formats: + return audio_formats[-1] + elif format_spec == 'worstaudio': + audio_formats = [ + f for f in available_formats + if f.get('vcodec') == 'none'] + if audio_formats: + return audio_formats[0] + elif format_spec == 'bestvideo': + video_formats = [ + f for f in available_formats + if f.get('acodec') == 'none'] + if video_formats: + return video_formats[-1] + elif format_spec == 'worstvideo': + video_formats = [ + f for f in available_formats + if f.get('acodec') == 'none'] + if video_formats: + return video_formats[0] + else: + extensions = ['mp4', 'flv', 'webm', '3gp'] + if format_spec in extensions: + filter_f = lambda f: f['ext'] == format_spec + else: + filter_f = lambda f: f['format_id'] == format_spec + matches = list(filter(filter_f, available_formats)) + if matches: + return matches[-1] + return None From 3ab9e4128ee19c6171610f339bfa83fd45f7e283 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 23 May 2014 04:17:58 +0200 Subject: [PATCH 2/3] Delete source files after merging --- youtube_dl/YoutubeDL.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f4d2ad029..b8534d4c4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1052,6 +1052,13 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning('Unable to remove downloaded video file') + if '__files_to_merge' in ie_info and not self.params.get('keepvideo', False): + for fname in ie_info['__files_to_merge']: + try: + self.to_screen('Deleting original file %s (pass -k to keep)' % fname) + os.remove(encodeFilename(fname)) + except (IOError, OSError): + self.report_warning('Unable to remove downloaded video file') def _make_archive_id(self, info_dict): # Future-proof against any change in case From 68c925b22aa32728481e6e519c5e0f83ab0549e3 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 23 May 2014 04:18:23 +0200 Subject: [PATCH 3/3] [YoutubeIE] Add merged videos --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 981ca62c0..fafd0c96e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -231,6 +231,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } + _merged_formats = [x+'+'+y for x in ['133','134','135','136','137','138','160','264'] for y in ['139','140','141']] + \ + [x+'+'+y for x in ['167','168','169','170','218','219','242','243','244','245','246','247','248'] for y in ['171','172']] IE_NAME = u'youtube' _TESTS = [ @@ -1351,6 +1353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): except (ExtractorError, KeyError) as e: self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) + self._add_merged_formats(formats,self._merged_formats) self._sort_formats(formats) return {