From 4ca8e3b5d98463dd289c3dc214a920ce55c54b99 Mon Sep 17 00:00:00 2001 From: Celthi Date: Wed, 16 Dec 2015 11:36:21 +0800 Subject: [PATCH 1/7] youku --download the longest video in minimun segs --- youtube_dl/extractor/youku.py | 58 +++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 5110efcb4..94b7e2c8f 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -145,50 +145,52 @@ class YoukuIE(InfoExtractor): compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls + print(video_urls) return video_urls_dict def get_hd(self, fm): hd_id_dict = { - 'flv': '0', - 'mp4': '1', - 'hd2': '2', - 'hd3': '3', '3gp': '0', - '3gphd': '1', + 'flv': '0', 'flvhd': '0', + 'mp4hd2': '0', + 'mp4hd3': '0', + 'mp4': '1', 'mp4hd': '1', - 'mp4hd2': '1' + '3gphd': '1', + 'hd2': '2', + 'hd3': '3' } return hd_id_dict[fm] def parse_ext_l(self, fm): ext_dict = { 'flv': 'flv', - 'mp4': 'mp4', - 'mp4hd': 'mp4', 'mp4hd2': 'flv', 'mp4hd3': 'flv', + 'flvhd': 'flv', 'hd2': 'flv', 'hd3': 'flv', '3gp': 'flv', - '3gphd': 'mp4', - 'flvhd': 'flv' + 'mp4': 'mp4', + 'mp4hd': 'mp4', + '3gphd': 'mp4' } return ext_dict[fm] def get_format_name(self, fm): _dict = { - '3gp': 'h6', - '3gphd': 'h5', - 'flvhd': 'h4', - 'flv': 'h4', - 'mp4': 'h3', - 'hd2': 'h2', 'hd3': 'h1', + 'hd2': 'h2', + 'mp4': 'h3', 'mp4hd': 'h3', + 'flv': 'h4', + 'flvhd': 'h4', + 'mp4hd2': 'h4', 'mp4hd3': 'h4', - 'mp4hd2': 'h4' + '3gphd': 'h5', + '3gp': 'h6' } return _dict[fm] @@ -244,6 +246,16 @@ class YoukuIE(InfoExtractor): # generate video_urls_dict video_urls_dict = self.construct_video_urls(data) + # get the number of segs of the longest video stream + def get_segs(streams): + times = max([stream.get('milliseconds_video') for stream in streams]) + seq = [] + for stream in streams: + if times == stream.get('milliseconds_video'): + seq.append(len(stream.get('segs'))) + return seq + + seq = get_segs(data['stream']) # construct info entries = [{ 'id': '%s_part%d' % (video_id, i + 1), @@ -251,12 +263,12 @@ class YoukuIE(InfoExtractor): 'formats': [], # some formats are not available for all parts, we have to detect # which one has all - } for i in range(max(len(v.get('segs')) for v in data['stream']))] - for stream in data['stream']: - fm = stream.get('stream_type') - video_urls = video_urls_dict[fm] - for video_url, seg, entry in zip(video_urls, stream['segs'], entries): - entry['formats'].append({ + } for i in range(min(seq))] + stream = data['stream'][seq.index(min(seq))] + fm = stream.get('stream_type') + video_urls = video_urls_dict[fm] + for video_url, seg, entry in zip(video_urls, stream['segs'], entries): + entry['formats'].append({ 'url': video_url, 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), From 5533e594e8621ffa599bafbef97880b93a9e0097 Mon Sep 17 00:00:00 2001 From: Celthi Date: Wed, 16 Dec 2015 12:22:20 +0800 Subject: [PATCH 2/7] youku -- check with flake8 --- youtube_dl/extractor/youku.py | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 94b7e2c8f..844d59e25 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import base64 -import json from .common import InfoExtractor from ..compat import ( @@ -89,9 +88,6 @@ class YoukuIE(InfoExtractor): oip = data['security']['ip'] # get fileid - string_ls = list( - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') - fileid_dict = {} for stream in data['stream']: format = stream.get('stream_type') @@ -122,13 +118,12 @@ class YoukuIE(InfoExtractor): format = stream.get('stream_type') video_urls = [] for dt in stream['segs']: - #n = str(int(dt['size'])) + # n = str(int(dt['size'])) n = str(stream['segs'].index(dt)) param = { 'K': dt['key'], 'hd': self.get_hd(format), 'myp': 0, - #'ts': dt['total_milliseconds_video'], 'ypp': 0, 'ctype': 12, 'ev': 1, @@ -139,7 +134,7 @@ class YoukuIE(InfoExtractor): video_url = \ 'http://k.youku.com/player/getFlvPath/' + \ 'sid/' + sid + \ - '_00'+ \ + '_00' + \ '/st/' + self.parse_ext_l(format) + \ '/fileid/' + get_fileid(format, n) + '?' + \ compat_urllib_parse.urlencode(param) @@ -198,24 +193,21 @@ class YoukuIE(InfoExtractor): video_id = self._match_id(url) def retrieve_data(req_url, note): - headers = { - 'Referer': req_url, - } - self._set_cookie('youku.com','xreferrer','http://www.youku.com') - req = sanitized_Request(req_url,headers=headers) + 'Referer': req_url, + } + self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com') + req = sanitized_Request(req_url, headers=headers) cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') if cn_verification_proxy: req.add_header('Ytdl-request-proxy', cn_verification_proxy) raw_data = self._download_json(req, video_id, note=note) - js = json.dumps(raw_data) return raw_data['data'] - video_password = self._downloader.params.get('videopassword', None) # request basic data @@ -239,10 +231,9 @@ class YoukuIE(InfoExtractor): msg += ': ' + error_note raise ExtractorError(msg) - #get video title + # get video title title = data['video']['title'] - # generate video_urls_dict video_urls_dict = self.construct_video_urls(data) @@ -263,17 +254,17 @@ class YoukuIE(InfoExtractor): 'formats': [], # some formats are not available for all parts, we have to detect # which one has all - } for i in range(min(seq))] + } for i in range(min(seq))] stream = data['stream'][seq.index(min(seq))] fm = stream.get('stream_type') video_urls = video_urls_dict[fm] for video_url, seg, entry in zip(video_urls, stream['segs'], entries): entry['formats'].append({ - 'url': video_url, - 'format_id': self.get_format_name(fm), - 'ext': self.parse_ext_l(fm), - 'filesize': int(seg['size']), - }) + 'url': video_url, + 'format_id': self.get_format_name(fm), + 'ext': self.parse_ext_l(fm), + 'filesize': int(seg['size']), + }) return { '_type': 'multi_video', From 44651b67ad41441204a317b028ef01737b2c106c Mon Sep 17 00:00:00 2001 From: Celthi Date: Wed, 16 Dec 2015 12:44:11 +0800 Subject: [PATCH 3/7] youku -- remove the redundant print --- youtube_dl/extractor/youku.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 844d59e25..8396d2f7b 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -140,7 +140,6 @@ class YoukuIE(InfoExtractor): compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls - print(video_urls) return video_urls_dict From b5d2c22d877e814c8c9076c702b8bfc8ba4da279 Mon Sep 17 00:00:00 2001 From: Celthi Date: Wed, 16 Dec 2015 11:36:21 +0800 Subject: [PATCH 4/7] youku --download the longest video in minimun segs --- youtube_dl/extractor/youku.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index d33caa79e..d09466eba 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -138,6 +138,7 @@ class YoukuIE(InfoExtractor): compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls + print(video_urls) return video_urls_dict @@ -164,10 +165,6 @@ class YoukuIE(InfoExtractor): 'flvhd': 'flv', 'mp4': 'mp4', 'mp4hd': 'mp4', - 'mp4hd2': 'flv', - 'mp4hd3': 'flv', - 'hd2': 'flv', - 'hd3': 'flv', } return ext_dict[fm] @@ -231,6 +228,16 @@ class YoukuIE(InfoExtractor): # generate video_urls_dict video_urls_dict = self.construct_video_urls(data) + # get the number of segs of the longest video stream + def get_segs(streams): + times = max([stream.get('milliseconds_video') for stream in streams]) + seq = [] + for stream in streams: + if times == stream.get('milliseconds_video'): + seq.append(len(stream.get('segs'))) + return seq + + seq = get_segs(data['stream']) # construct info entries = [{ 'id': '%s_part%d' % (video_id, i + 1), @@ -238,12 +245,12 @@ class YoukuIE(InfoExtractor): 'formats': [], # some formats are not available for all parts, we have to detect # which one has all - } for i in range(max(len(v.get('segs')) for v in data['stream']))] - for stream in data['stream']: - fm = stream.get('stream_type') - video_urls = video_urls_dict[fm] - for video_url, seg, entry in zip(video_urls, stream['segs'], entries): - entry['formats'].append({ + } for i in range(min(seq))] + stream = data['stream'][seq.index(min(seq))] + fm = stream.get('stream_type') + video_urls = video_urls_dict[fm] + for video_url, seg, entry in zip(video_urls, stream['segs'], entries): + entry['formats'].append({ 'url': video_url, 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), From 42c7b13f7530246f7429dd83c3522bd9ac3921c4 Mon Sep 17 00:00:00 2001 From: Celthi Date: Thu, 17 Dec 2015 09:27:53 +0800 Subject: [PATCH 5/7] [youku] rebase on master and resolve confilcts --- youtube_dl/extractor/youku.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index d09466eba..2e3385349 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -245,17 +245,17 @@ class YoukuIE(InfoExtractor): 'formats': [], # some formats are not available for all parts, we have to detect # which one has all - } for i in range(min(seq))] + } for i in range(min(seq))] stream = data['stream'][seq.index(min(seq))] fm = stream.get('stream_type') video_urls = video_urls_dict[fm] for video_url, seg, entry in zip(video_urls, stream['segs'], entries): entry['formats'].append({ - 'url': video_url, - 'format_id': self.get_format_name(fm), - 'ext': self.parse_ext_l(fm), - 'filesize': int(seg['size']), - }) + 'url': video_url, + 'format_id': self.get_format_name(fm), + 'ext': self.parse_ext_l(fm), + 'filesize': int(seg['size']), + }) return { '_type': 'multi_video', From d687cf46faa68e7ce0cc5742b8a8a7152b180766 Mon Sep 17 00:00:00 2001 From: Celthi Date: Wed, 16 Dec 2015 12:44:11 +0800 Subject: [PATCH 6/7] youku -- remove the redundant print --- youtube_dl/extractor/youku.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 2e3385349..30cd80611 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -138,7 +138,6 @@ class YoukuIE(InfoExtractor): compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls - print(video_urls) return video_urls_dict From 1d189d15c51c0a51e7e74c4babe06575f3f475e2 Mon Sep 17 00:00:00 2001 From: Celthi Date: Thu, 17 Dec 2015 10:25:32 +0800 Subject: [PATCH 7/7] [youku] adapt my modification to main repo --- youtube_dl/extractor/youku.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 24e3953bd..30cd80611 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -87,10 +87,6 @@ class YoukuIE(InfoExtractor): # get oip oip = data['security']['ip'] -<<<<<<< HEAD -======= - # get fileid ->>>>>>> githubMaster/youku_bugfix fileid_dict = {} for stream in data['stream']: format = stream.get('stream_type')