diff --git a/youtube_dl/extractor/qqvideo.py b/youtube_dl/extractor/qqvideo.py index efe7723e5..bc0e7fc52 100644 --- a/youtube_dl/extractor/qqvideo.py +++ b/youtube_dl/extractor/qqvideo.py @@ -2,12 +2,14 @@ from __future__ import unicode_literals import re +import os.path from .common import InfoExtractor from ..utils import ( ExtractorError, ) + class QqVideoIE(InfoExtractor): """ qq viedo extractor """ IE_NAME = 'qq' @@ -15,13 +17,14 @@ class QqVideoIE(InfoExtractor): # http://v.qq.com/page/9/n/6/9jWRYWGYvn6.html # http://v.qq.com/cover/o/oy8cl3wkrebcv8h.html?vid=x001970x491 # http://v.qq.com/cover/x/xfxd9mej2luhfoz/s00190fcjfl.html 连续剧 + # http://v.qq.com/cover/x/xfxd9mej2luhfoz # 普通流视频(完整视频) # http://vv.video.qq.com/geturl?vid=v00149uf4ir&otype=json # 高清视频(分段视频) # 1080P-fhd,超清-shd,高清-hd,标清-sd # http://vv.video.qq.com/getinfo?vids=v00149uf4ir&otype=json&charge=0&defaultfmt=shd - _VALID_URL = r'http://v\.qq\.com/(?:cover/.+?/(?P[\w\d_-]+)\.html(?:\?vid=(?P[\w\d_-]+))?' \ - r'|page/.+?/(?P[\w\d_-]+)\.html)' + _VALID_URL = r'http://v\.qq\.com/(?:cover/.+?/(?P[\w\d_-]+)\.html(?:\?vid=(?P[\w\d_-]+))?' \ + r'|page/.+?/(?P[\w\d_-]+)\.html)' _TESTS = [{ 'url': 'http://v.qq.com/page/9/n/6/9jWRYWGYvn6.html', 'info_dict': { @@ -49,64 +52,173 @@ class QqVideoIE(InfoExtractor): ] + def _multi_part_video(self, video_id): + platform = 11 + # hd + album_hd_xml = self._download_xml( + 'http://vv.video.qq.com/getinfo?vids={0}&platform={1}&charge=0&otype=xml&defaultfmt=hd'.format( + video_id, platform), + video_id, 'get hd video metadata: {0}'.format(video_id)) + # shd + album_shd_xml = self._download_xml( + 'http://vv.video.qq.com/getinfo?vids={0}&platform={1}&charge=0&otype=xml&defaultfmt=shd'.format( + video_id, platform), + video_id, 'get shd video metadata: {0}'.format(video_id)) + + title = album_hd_xml.find('./vl/vi/ti').text + + entries = [] + hd_vtypes = {v.find('./name').text: v.find('./id').text for v in album_hd_xml.findall('./fl/fi')} + hd_fclip = int(album_hd_xml.find('./vl/vi/cl/fc').text) + hd_filename = album_hd_xml.find('./vl/vi/fn').text + hd_base_url = album_hd_xml.findall('./vl/vi/ul/ui/url')[-1].text + ext = os.path.splitext(hd_filename)[1][1:] + + for i in range(hd_fclip): + clip_filename = '{0}.{1}.{2}'.format(hd_filename[:-4], i + 1, ext) + key_xml = self._download_xml( + 'http://vv.video.qq.com/getkey?otype=xml&format=10{0}&filename={1}&linkver=2&vid={2}&charge=0&platform={3}'.format( + int(hd_vtypes['hd']) % 10000, clip_filename, video_id, platform), + video_id, 'get {0} {1}{2} vkey with vid: {3}'.format(title, 'clip', i + 1, video_id)) + vkey = key_xml.find('./key').text + level = key_xml.find('./level').text + sp = key_xml.find('./sp').text + vbr = int(album_hd_xml.find('./vl/vi/br').text) + clipsize = int(album_hd_xml.findall('./vl/vi/cl/ci')[i].find('./cs').text) + video_url = '{0}{1}?sdtfrom=v1001&vkey={2}&type={3}&level={4}&platform={5}&br={6}&fmt=hd&sp={7}&size={8}'.format( + hd_base_url, clip_filename, vkey, ext, level, platform, vbr, sp, clipsize) + entries.append({ + 'id': '{0}_part{1}'.format(video_id, i + 1), + 'title': title, + 'formats': [{ + 'url': video_url, + 'ext': ext, + 'filesize': clipsize, + 'vbr': vbr, + 'width': int(album_hd_xml.find('./vl/vi/vw').text), + 'height': int(album_hd_xml.find('./vl/vi/vh').text), + 'quality': 0, + }] + }) + + shd_vtypes = {v.find('./name').text: v.find('./id').text for v in album_shd_xml.findall('./fl/fi')} + shd_fclip = album_shd_xml.find('./vl/vi/cl/fc').text + shd_filename = album_shd_xml.find('./vl/vi/fn').text + shd_base_url = album_shd_xml.findall('./vl/vi/ul/ui/url')[-1].text + for i in range(int(shd_fclip)): + clip_filename = '{0}.{1}.{2}'.format(shd_filename[:-4], i + 1, ext) + key_xml = self._download_xml( + 'http://vv.video.qq.com/getkey?otype=xml&format=10{0}&filename={1}&linkver=2&vid={2}&charge=0&platform={3}'.format( + int(shd_vtypes['shd']) % 10000, clip_filename, video_id, platform), + video_id, 'get {0} {1}{2} vkey with vid: {3}'.format(title, 'clip', i + 1, video_id)) + vkey = key_xml.find('./key').text + level = key_xml.find('./level').text + sp = key_xml.find('./sp').text + vbr = int(album_shd_xml.find('./vl/vi/br').text) + clipsize = int(album_shd_xml.findall('./vl/vi/cl/ci')[i].find('./cs').text) + video_url = '{0}{1}?sdtfrom=v1001&vkey={2}&type={3}&level={4}&platform={5}&br={6}&fmt=hd&sp={7}&size={8}'.format( + shd_base_url, clip_filename, vkey, ext, level, platform, vbr, sp, clipsize) + format_item = { + 'url': video_url, + 'ext': ext, + 'filesize': clipsize, + 'vbr': vbr, + 'width': int(album_shd_xml.find('./vl/vi/vw').text), + 'height': int(album_shd_xml.find('./vl/vi/vh').text), + 'quality': 1, + } + if i < hd_fclip: + entries[i]['formats'].append(format_item) + else: + entries.append({ + 'id': '{0}_part{1}'.format(video_id, i + 1), + 'title': title, + 'formats': [format_item] + }) + + return entries + + def _single_video(self, video_id): + mobile_xml = self._download_xml( + 'http://vv.video.qq.com/getinfo?vids={0}&platform=2' + '&charge=0&otype=xml&defaultfmt=msd'.format(video_id), + video_id, 'get video metadata of mobile resolution: {0}'.format(video_id)) + sd_xml = self._download_xml( + 'http://vv.video.qq.com/getinfo?vids={0}&platform=2' + '&charge=0&otype=xml&defaultfmt=mp4'.format(video_id), + video_id, 'get video metadata of sd resolution: {0}'.format(video_id)) + mobile_filename = mobile_xml.find('./vl/vi/fn').text + sd_filename = sd_xml.find('./vl/vi/fn').text + return { + 'id': '{0}'.format(video_id), + 'title': mobile_xml.find('./vl/vi/ti').text, + 'formats': [{ + 'url': '{0}{1}?vkey={2}'.format(mobile_xml.findall('./vl/vi/ul/ui/url')[-1].text, mobile_filename, + mobile_xml.find('./vl/vi/fvkey').text), + 'ext': os.path.splitext(mobile_filename)[1][1:], + 'filesize': int(mobile_xml.find('./vl/vi/fs').text), + 'vbr': int(mobile_xml.find('./vl/vi/br').text), + 'width': int(mobile_xml.find('./vl/vi/vw').text), + 'height': int(mobile_xml.find('./vl/vi/vh').text), + 'quality': -2, + }, { + 'url': '{0}{1}?vkey={2}'.format(sd_xml.findall('./vl/vi/ul/ui/url')[-1].text, sd_filename, + sd_xml.find('./vl/vi/fvkey').text), + 'ext': os.path.splitext(sd_filename)[1][1:], + 'filesize': int(sd_xml.find('./vl/vi/fs').text), + 'vbr': int(sd_xml.find('./vl/vi/br').text), + 'width': int(sd_xml.find('./vl/vi/vw').text), + 'height': int(sd_xml.find('./vl/vi/vh').text), + 'quality': -1, + }], + } + def _soap_extract(self, url, video_id): """ extract soap opera url of qq video,""" webpage = self._download_webpage(url, video_id, 'download web page: {0}'.format(url)) - album_list = [album.group('vid') for album in re.finditer(r'(?is)]+class="album_link"\s+id="(?P[\w\d\-_]+)"[^>]+>.*?', webpage)] + album_list = [album.group('vid') for album in + re.finditer(r'(?is)]+class="album_link"\s+id="(?P[\w\d\-_]+)"[^>]+>.*?', webpage)] if len(album_list) == 0: raise ExtractorError('invalid video id: {0}'.format(video_id)) elif video_id in album_list: album_list.clear() album_list.append(video_id) + # interface + # mobile + # http://vv.video.qq.com/getinfo?vids=s00190fcjfl&platform=2&charge=0&otype=xml + # sd + # http://vv.video.qq.com/getinfo?vids=s00190fcjfl&platform=2&charge=0&otype=xml&defaultfmt=mp4 + # getvinfo + # http://vv.video.qq.com/getinfo?vids=s00190fcjfl&platform=11&charge=0&otype=xml&defaultfmt=shd&defnpayver=1 + # getvkey + # http://vv.video.qq.com/getkey?otype=xml&format=10401&filename=h00197yddy9%2Ep401%2E1%2Emp4&linkver=2&vid=s00190fcjfl&charge=0&platform=11 + # video_url + # http://video.dispatch.tc.qq.com/h00197yddy9.p401.1.mp4?sdtfrom=v1001&type=mp4&vkey=C0E385B593A13951BBAF7F37D45730E4354E5F5FAE2265F8A395EB42ECE3F98D3EB4E0834B9E8BD3BE0660B774D0F41CE8E4476107D2C247056CCEAF1EC2E36CE8AF34BC1110269DA0B1A598001AE04D6CD90D56EF6EEDBA&level=0&platform=11&br=169&fmt=shd&sp=0&size=68593491 + entries = [] for album_index in range(len(album_list)): vid = album_list[album_index] - info_doc = self._download_xml( - 'http://vv.video.qq.com/getinfo?vid={0}&otype=xml&defaultfmt=shd'.format(vid), - vid, 'fetch video metadata: {0}'.format(vid)) - fclip = info_doc.find('./vl/vi/cl/fc').text - fn = info_doc.find('./vl/vi/fn').text - vtypes = {v.find('./name').text:v.find('./id').text for v in info_doc.findall('./fl/fi')} - base_url = info_doc.findall('./vl/vi/ul/ui/url')[-1].text - title = info_doc.find('./vl/vi/ti').text - for i in range(int(fclip)): - newfn = '{0}.{1}.{2}'.format(fn[:-4], i + 1, 'mp4') - qid = vtypes['sd'] - if 'fhd' in vtypes: - qid = vtypes['fhd'] - elif 'shd' in vtypes: - qid = vtypes['shd'] - elif 'hd' in vtypes: - qid = vtypes['hd'] - key_doc = self._download_xml( - 'http://vv.video.qq.com/getkey?format=10{0}&otype=xml&vid={1}&filename={2}'.format(int(qid) % 10000, vid, newfn), - vid, 'get {0} {1}{2} vkey with vid: {3}'.format(title, 'clip', i + 1, vid)) - vkey = key_doc.find('./key').text - video_url = '{0}{1}?vkey={2}&type={3}'.format(base_url, newfn, vkey, 'mp4') - entries.append({ - 'id': '{0}_part{1}'.format(vid, i + 1), - 'title': title, - 'formats': [{ - 'url': video_url, - 'ext': 'mp4' - }], - }) + # mobile and sd video + entries.append(self._single_video(vid)) + # hd and shd video + for v in self._multi_part_video(vid): + entries.append(v) return { '_type': 'multi_video', 'id': video_id, - 'title': title, + 'title': entries[0]['title'], 'entries': entries, } def _video_extract(self, url, video_id): """ extract normal qq video url """ video_url = self._download_xml( - 'http://vv.video.qq.com/geturl?vid={0}&otype=xml'.format(video_id), - video_id, 'fetch video url').find('./vd/vi/url').text + 'http://vv.video.qq.com/geturl?vid={0}&otype=xml'.format(video_id), + video_id, 'fetch video url').find('./vd/vi/url').text ext = self._search_regex('\.([\d\w]+)\?', video_url, '', '') title = self._download_xml( - 'http://vv.video.qq.com/getinfo?vid={0}&otype=xml&defaultfmt=shd'.format(video_id), + 'http://vv.video.qq.com/getinfo?vid={0}&otype=xml'.format(video_id), video_id, 'fetch video metadata').find('./vl/vi/ti').text return { 'id': video_id, @@ -118,8 +230,8 @@ class QqVideoIE(InfoExtractor): def _real_extract(self, url): """ extract qq video url """ mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') or mobj.group('vid') or mobj.group('pid') - if (mobj.group('pid') is not None): - return self._soap_extract(url, video_id) - else: + video_id = mobj.group('pid') or mobj.group('vid') or mobj.group('sid') + if (mobj.group('vid') or mobj.group('pid')): return self._video_extract(url, video_id) + else: + return self._soap_extract(url, video_id)