1
0
mirror of https://github.com/l1ving/youtube-dl synced 2026-06-05 14:33:39 +08:00
This commit is contained in:
ZhangXinYang
2016-08-28 17:11:58 +08:00
Unverified
parent 39efc6e3e0
commit 419bf5314f
+19 -54
View File
@@ -1,28 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
import urllib2
import calendar
import datetime
import re
import json
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
compat_str,
compat_parse_qs,
compat_xml_parse_error,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
xpath_text,
)
class BiliBiliIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
_TESTS = [{
@@ -85,75 +80,46 @@ class BiliBiliIE(InfoExtractor):
# BiliBili blocks keys from time to time. The current key is extracted from
# the Android client
# TODO: find the sign algorithm used in the flash player
_APP_KEY = '86385cdc024c0f6c'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
api = 'http://www.bilibili.com/m/html5?aid=%s&page=1' % video_id
info = json.loads(self._download_webpage(api,video_id))
url = info['src']
params = compat_parse_qs(self._search_regex(
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))
cid = params['cid'][0]
info_xml_str = self._download_webpage(
'http://interface.bilibili.com/v_cdn_play',
cid, query={'appkey': self._APP_KEY, 'cid': cid},
note='Downloading video info page')
err_msg = None
durls = None
info_xml = None
try:
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
except compat_xml_parse_error:
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
err_msg = (info_json or {}).get('error_text')
else:
err_msg = xpath_text(info_xml, './message')
if info_xml is not None:
durls = info_xml.findall('./durl')
if not durls:
if err_msg:
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
else:
raise ExtractorError('No videos found!')
response = urllib2.Request(url)
html = urllib2.urlopen(response)
size = html.headers['Content-Length']
entries = []
formats = [{
'url': url,
'filesize': int_or_none(size),
}]
for durl in durls:
size = xpath_text(durl, ['./filesize', './size'])
formats = [{
'url': durl.find('./url').text,
'filesize': int_or_none(size),
}]
for backup_url in durl.findall('./backup_url/url'):
formats.append({
'url': backup_url.text,
# backup URLs have lower priorities
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
})
self._sort_formats(formats)
self._sort_formats(formats)
entries.append({
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
'duration': int_or_none(xpath_text(durl, './length'), 1000),
entries.append({
'id': '%s_part' % cid,
'duration': int_or_none(size),
'formats': formats,
})
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
description = self._html_search_meta('description', webpage)
description = self._html_search_meta('description', webpage)
datetime_str = self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
timestamp = None
if datetime_str:
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
# TODO 'view_count' requires deobfuscating Javascript
info = {
'id': compat_str(cid),
@@ -161,9 +127,8 @@ class BiliBiliIE(InfoExtractor):
'description': description,
'timestamp': timestamp,
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
'duration': int_or_none(size),
}
uploader_mobj = re.search(
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
webpage)