From 4479fccc2b83384cc71b16bac00736bd1b5bb366 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 13 Jan 2016 17:12:03 +0100 Subject: [PATCH 1/3] [YoutubeDL] use parts array instead of multi_video type --- youtube_dl/YoutubeDL.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d50b7cfed..236953bac 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -745,7 +745,7 @@ class YoutubeDL(object): return self.process_ie_result( new_result, download=download, extra_info=extra_info) - elif result_type == 'playlist' or result_type == 'multi_video': + elif result_type == 'playlist': # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen('[download] Downloading playlist: %s' % playlist) @@ -1190,9 +1190,9 @@ class YoutubeDL(object): if add_headers: res.update(add_headers) - cookies = self._calc_cookies(info_dict) - if cookies: - res['Cookie'] = cookies + #cookies = self._calc_cookies(info_dict) + #if cookies: + # res['Cookie'] = cookies return res @@ -1274,8 +1274,13 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): - if 'url' not in format: - raise ExtractorError('Missing "url" key in result (index %d)' % i) + if 'parts' in format and len(format['parts']) == 0: + raise ExtractorError('Empty "parts" key in result (index %d)' % i) + if 'parts' in format and len(format['parts']) == 1: + format.update(format['parts'][0]) + del format['parts'] + if 'url' not in format and 'parts' not in format: + raise ExtractorError('Missing "url" or "parts" key in result (index %d)' % i) if format.get('format_id') is None: format['format_id'] = compat_str(i) @@ -1299,7 +1304,10 @@ class YoutubeDL(object): ) # Automatically determine file extension if missing if 'ext' not in format: - format['ext'] = determine_ext(format['url']).lower() + if 'parts' in format: + format['ext'] = determine_ext(format['parts'][0]['url']).lower() + else: + format['ext'] = determine_ext(format['url']).lower() # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -1430,12 +1438,19 @@ class YoutubeDL(object): if self.params.get('forceid', False): self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): + def print_format_url(format_info): + if 'parts' in format_info: + for f in format_info['parts']: + self.to_stdout(f['url'] + f.get('play_path', '')) + else: + self.to_stdout(format_info['url'] + format_info.get('play_path', '')) + if info_dict.get('requested_formats') is not None: for f in info_dict['requested_formats']: - self.to_stdout(f['url'] + f.get('play_path', '')) + print_format_url(f) else: # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + print_format_url(info_dict) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: @@ -1839,6 +1854,10 @@ class YoutubeDL(object): if res: res += ', ' res += '~' + format_bytes(fdict['filesize_approx']) + if fdict.get('parts'): + if res: + res += ', ' + res += 'multipart' return res def list_formats(self, info_dict): From 21a867912e3d8b8ffeed50b3667b058b62f455ad Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 13 Jan 2016 17:13:46 +0100 Subject: [PATCH 2/3] [downloader/multipart] add a simple implementation of a multipart downloader --- youtube_dl/downloader/__init__.py | 6 ++++ youtube_dl/downloader/multipart.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/downloader/multipart.py diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index dccc59212..808b9b6d8 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -9,6 +9,7 @@ from .http import HttpFD from .rtsp import RtspFD from .rtmp import RtmpFD from .dash import DashSegmentsFD +from .multipart import MultiPartFD from ..utils import ( determine_protocol, @@ -22,11 +23,16 @@ PROTOCOL_MAP = { 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, + 'multipart': MultiPartFD, } def get_suitable_downloader(info_dict, params={}): """Get the downloader class that can handle the info dict.""" + + if 'parts' in info_dict: + return MultiPartFD + protocol = determine_protocol(info_dict) info_dict['protocol'] = protocol diff --git a/youtube_dl/downloader/multipart.py b/youtube_dl/downloader/multipart.py new file mode 100644 index 000000000..20ba32819 --- /dev/null +++ b/youtube_dl/downloader/multipart.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import os + +from .fragment import FragmentFD + +from ..utils import ( + encodeFilename, + sanitize_open, +) + + +class MultiPartFD(FragmentFD): + """ A more limited implementation that does not require ffmpeg """ + + FD_NAME = 'multipart' + + def real_download(self, filename, info_dict): + parts = info_dict['parts'] + ctx = { + 'filename': filename, + 'total_frags': len(parts), + } + + self._prepare_and_start_frag_download(ctx) + + frags_filenames = [] + for i in range(len(parts)): + frag_filename = '%s%d' % (ctx['tmpfilename'], i) + success = ctx['dl'].download(frag_filename, {'url': parts[i]['url']}) + if not success: + return False + down, frag_sanitized = sanitize_open(frag_filename, 'rb') + ctx['dest_stream'].write(down.read()) + down.close() + frags_filenames.append(frag_sanitized) + # We only download the first fragment during the test + if self.params.get('test', False): + break + + self._finish_frag_download(ctx) + + for frag_file in frags_filenames: + os.remove(encodeFilename(frag_file)) + + return True From f5100e1750e8b512a346e471b0ae5d6f4b306450 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 13 Jan 2016 17:28:35 +0100 Subject: [PATCH 3/3] [youku] handle the case when different formats has different number of parts(fixes #6193) --- youtube_dl/extractor/youku.py | 58 +++++++++++++++-------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index f767fa15f..2bf01c0c2 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -14,6 +14,8 @@ from ..compat import ( from ..utils import ( ExtractorError, sanitized_Request, + int_or_none, + float_or_none, ) @@ -66,7 +68,7 @@ class YoukuIE(InfoExtractor): }, }] - def construct_video_urls(self, data): + def construct_formats(self, data): # get sid, token def yk_t(s1, s2): ls = list(range(256)) @@ -115,10 +117,10 @@ class YoukuIE(InfoExtractor): return ep # generate video_urls - video_urls_dict = {} + formats = [] for stream in data['stream']: format = stream.get('stream_type') - video_urls = [] + parts = [] for dt in stream['segs']: n = str(stream['segs'].index(dt)) param = { @@ -139,10 +141,19 @@ class YoukuIE(InfoExtractor): '/st/' + self.parse_ext_l(format) + \ '/fileid/' + get_fileid(format, n) + '?' + \ compat_urllib_parse.urlencode(param) - video_urls.append(video_url) - video_urls_dict[format] = video_urls - - return video_urls_dict + parts.append({ + 'url': video_url, + 'filesize': int_or_none(dt.get('size')), + }) + formats.append({ + 'format_id': self.get_format_name(format), + 'parts': parts, + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'filesize': int_or_none(stream.get('size')), + 'ext': self.parse_ext_l(format), + }) + return formats @staticmethod def get_ysuid(): @@ -235,34 +246,13 @@ class YoukuIE(InfoExtractor): msg += ': ' + error_note raise ExtractorError(msg) - # get video title - title = data['video']['title'] - - # generate video_urls_dict - video_urls_dict = self.construct_video_urls(data) - - # construct info - entries = [{ - 'id': '%s_part%d' % (video_id, i + 1), - 'title': title, - 'formats': [], - # some formats are not available for all parts, we have to detect - # which one has all - } for i in range(max(len(v.get('segs')) for v in data['stream']))] - for stream in data['stream']: - fm = stream.get('stream_type') - video_urls = video_urls_dict[fm] - for video_url, seg, entry in zip(video_urls, stream['segs'], entries): - entry['formats'].append({ - 'url': video_url, - 'format_id': self.get_format_name(fm), - 'ext': self.parse_ext_l(fm), - 'filesize': int(seg['size']), - }) + video_data = data['video'] return { - '_type': 'multi_video', 'id': video_id, - 'title': title, - 'entries': entries, + 'title': video_data['title'], + 'duration': float_or_none(video_data.get('seconds')), + 'uploader': video_data.get('username'), + 'uploader_id': video_data.get('userid'), + 'formats': self.construct_formats(data), }