From a2990d4cc1641bbee8319303a46b12ea7008f4ee Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 8 Sep 2015 07:50:59 +0100 Subject: [PATCH 1/6] [dailymotion] extract m3u8 formats and handle live feeds(fixes #6794) --- youtube_dl/extractor/dailymotion.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 2d90b2224..51406ad4b 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -380,6 +380,11 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): if mobj: return mobj.group(1) + def redirect(self, url, video_id): + if re.match(r'https?://.*cdn.*\.dmcloud\.net/route/', url): + return self._download_webpage(url + '&redirect=0', video_id).strip() + return url + def _real_extract(self, url): video_id = self._match_id(url) @@ -390,12 +395,19 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): video_info = self._parse_json(self._search_regex( r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) - # TODO: parse ios_url, which is in fact a manifest - video_url = video_info['mp4_url'] + is_live = video_info['mode'] == 'live' + + formats = self._extract_m3u8_formats(self.redirect(video_info['ios_url'], video_id), video_id) + + if is_live: + title = self._live_title(title) + else: + formats.append({'url': self.redirect(video_info['mp4_url'], video_id), 'format_id': 'mp4'}) return { 'id': video_id, - 'url': video_url, 'title': title, 'thumbnail': video_info.get('thumbnail_url'), + 'formats': formats, + 'is_live': is_live, } From bc4a20a236a757765d417b17fcd5bf3ab5c07d18 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 8 Sep 2015 19:35:41 +0100 Subject: [PATCH 2/6] [extractor/common] get the redirected m3u8_url in _extract_m3u8_formats --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5eeeda08d..abc92e821 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -924,13 +924,14 @@ class InfoExtractor(object): if re.match(r'^https?://', u) else compat_urlparse.urljoin(m3u8_url, u)) - m3u8_doc = self._download_webpage( + m3u8_doc, urlh = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', fatal=fatal) if m3u8_doc is False: return m3u8_doc + m3u8_url = urlh.geturl() last_info = None last_media = None kv_rex = re.compile( From 20423b8cf66dcf6d895dedb5b4637f3b445010fd Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 8 Sep 2015 19:37:13 +0100 Subject: [PATCH 3/6] [dailymotion] use the normal HTTP 302 redirect --- youtube_dl/extractor/dailymotion.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 51406ad4b..56940ff31 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -380,11 +380,6 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): if mobj: return mobj.group(1) - def redirect(self, url, video_id): - if re.match(r'https?://.*cdn.*\.dmcloud\.net/route/', url): - return self._download_webpage(url + '&redirect=0', video_id).strip() - return url - def _real_extract(self, url): video_id = self._match_id(url) @@ -397,12 +392,14 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): is_live = video_info['mode'] == 'live' - formats = self._extract_m3u8_formats(self.redirect(video_info['ios_url'], video_id), video_id) + ios_url = video_info['ios_url'] + if '.m3u8' in ios_url: + formats = self._extract_m3u8_formats(ios_url, video_id) if is_live: title = self._live_title(title) else: - formats.append({'url': self.redirect(video_info['mp4_url'], video_id), 'format_id': 'mp4'}) + formats.append({'url': video_info['mp4_url'], 'format_id': 'mp4'}) return { 'id': video_id, From d6d3dc1aa109d8da08b0bee03785ebafa2558015 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 8 Sep 2015 19:43:34 +0100 Subject: [PATCH 4/6] [dailymotion] initialize formats --- youtube_dl/extractor/dailymotion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 56940ff31..894afe424 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -391,7 +391,8 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) is_live = video_info['mode'] == 'live' - + + formats = [] ios_url = video_info['ios_url'] if '.m3u8' in ios_url: formats = self._extract_m3u8_formats(ios_url, video_id) From 6a904ff9c27a15c8978f3419d4522b5192a0130b Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 10 Sep 2015 20:49:43 +0100 Subject: [PATCH 5/6] [downloader/f4m] get the redirected f4m_url and handle url query string properly --- youtube_dl/downloader/f4m.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 174180db5..b8db6bf9b 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -11,6 +11,7 @@ from .fragment import FragmentFD from ..compat import ( compat_urlparse, compat_urllib_error, + compat_urllib_parse_urlparse, ) from ..utils import ( encodeFilename, @@ -285,7 +286,9 @@ class F4mFD(FragmentFD): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(man_url).read() + urlh = self.ydl.urlopen(man_url) + man_url = urlh.geturl() + manifest = urlh.read() doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) @@ -329,20 +332,22 @@ class F4mFD(FragmentFD): if not live: write_metadata_tag(dest_stream, metadata) + base_url_parsed = compat_urllib_parse_urlparse(base_url) + self._start_frag_download(ctx) frags_filenames = [] while fragments_list: seg_i, frag_i = fragments_list.pop(0) name = 'Seg%d-Frag%d' % (seg_i, frag_i) - url = base_url + name + url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name) if akamai_pv: - url += '?' + akamai_pv.strip(';') + url_parsed = url_parsed._replace(query=url_parsed.query + akamai_pv.strip(';')) if info_dict.get('extra_param_to_segment_url'): - url += info_dict.get('extra_param_to_segment_url') + url_parsed = url_parsed._replace(query=url_parsed.query + info_dict.get('extra_param_to_segment_url')) frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = ctx['dl'].download(frag_filename, {'url': url}) + success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()}) if not success: return False (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') From fb62ae46b9aace2daeb4d0e6d39ef1810736ab66 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 10 Sep 2015 20:50:37 +0100 Subject: [PATCH 6/6] [dailymotion] extract f4m_url from the swf file --- youtube_dl/extractor/dailymotion.py | 60 ++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 894afe424..3d83fc2d4 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -19,6 +19,7 @@ from ..utils import ( str_to_int, unescapeHTML, ) +from ..swfinterp import SWFInterpreter class DailymotionBaseInfoExtractor(InfoExtractor): @@ -380,6 +381,52 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): if mobj: return mobj.group(1) + def process_layer(self, layer): + param = layer.get('param') + if param: + customURL = param.get('customURL') + if customURL: + return customURL + autoURL = param.get('autoURL') + if autoURL: + return autoURL + sequenceList = layer.get('sequenceList') + if sequenceList: + for sequence in sequenceList: + result = self.process_sequence(sequence) + if result: + return result + return False + + def process_sequence(self, sequence): + layerList = sequence.get('layerList') + if layerList: + for layer in layerList: + result = self.process_layer(layer) + if result: + return result + return False + + def _extract_f4m_url(self, swf_url, video_id): + swf = self._request_webpage(swf_url, video_id, fatal=False) + if swf: + constant_strings = SWFInterpreter(swf.read()).constant_strings + for i in range(len(constant_strings)): + if constant_strings[i] == 'payload': + payload = constant_strings[i+1] + payload = self._search_regex(r'({.*})', payload, 'payload') + if payload: + payload = self._parse_json(payload, video_id) + sequences = self._parse_json(payload['parameters']['sequence'], video_id) + for sequence in sequences['sequence']: + for layer in sequence['layerList']: + result = self.process_sequence(sequence) + if result: + return result + else: + break + return False + def _real_extract(self, url): video_id = self._match_id(url) @@ -390,18 +437,27 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor): video_info = self._parse_json(self._search_regex( r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) - is_live = video_info['mode'] == 'live' - formats = [] + ios_url = video_info['ios_url'] if '.m3u8' in ios_url: formats = self._extract_m3u8_formats(ios_url, video_id) + f4m_url = self._extract_f4m_url(video_info['swf_url'], video_id) + if f4m_url: + if '.f4m' in f4m_url: + formats.extend(self._extract_f4m_formats(f4m_url, video_id)) + else: + formats.append({'url': f4m_url}) + + is_live = video_info['mode'] == 'live' if is_live: title = self._live_title(title) else: formats.append({'url': video_info['mp4_url'], 'format_id': 'mp4'}) + self._sort_formats(formats) + return { 'id': video_id, 'title': title,