commit

2026-06-05 14:33:39 +08:00 · 2016-08-28 17:11:58 +08:00
parent 39efc6e3e0
commit 419bf5314f
1 changed files with 19 additions and 54 deletions
@@ -1,28 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import urllib2 
 import calendar
 import datetime
 import re
+import json

 from .common import InfoExtractor
 from ..compat import (
-    compat_etree_fromstring,
    compat_str,
    compat_parse_qs,
-    compat_xml_parse_error,
 )
 from ..utils import (
-    ExtractorError,
    int_or_none,
-    float_or_none,
-    xpath_text,
 )


 class BiliBiliIE(InfoExtractor):
-    _WORKING = False
-
    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'

    _TESTS = [{
@@ -85,75 +80,46 @@ class BiliBiliIE(InfoExtractor):
    # BiliBili blocks keys from time to time. The current key is extracted from
    # the Android client
    # TODO: find the sign algorithm used in the flash player
-    _APP_KEY = '86385cdc024c0f6c'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)
+	api = 'http://www.bilibili.com/m/html5?aid=%s&page=1' % video_id
+	info = json.loads(self._download_webpage(api,video_id))
+        url = info['src']

        params = compat_parse_qs(self._search_regex(
            [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
             r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
            webpage, 'player parameters'))
        cid = params['cid'][0]
-
-        info_xml_str = self._download_webpage(
-            'http://interface.bilibili.com/v_cdn_play',
-            cid, query={'appkey': self._APP_KEY, 'cid': cid},
-            note='Downloading video info page')
-
-        err_msg = None
-        durls = None
-        info_xml = None
-        try:
-            info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
-        except compat_xml_parse_error:
-            info_json = self._parse_json(info_xml_str, video_id, fatal=False)
-            err_msg = (info_json or {}).get('error_text')
-        else:
-            err_msg = xpath_text(info_xml, './message')
-
-        if info_xml is not None:
-            durls = info_xml.findall('./durl')
-        if not durls:
-            if err_msg:
-                raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
-            else:
-                raise ExtractorError('No videos found!')
+   
+	response = urllib2.Request(url) 
+	html = urllib2.urlopen(response)
+	size = html.headers['Content-Length']

        entries = []
+        formats = [{
+            'url': url,
+            'filesize': int_or_none(size),
+        }]

-        for durl in durls:
-            size = xpath_text(durl, ['./filesize', './size'])
-            formats = [{
-                'url': durl.find('./url').text,
-                'filesize': int_or_none(size),
-            }]
-            for backup_url in durl.findall('./backup_url/url'):
-                formats.append({
-                    'url': backup_url.text,
-                    # backup URLs have lower priorities
-                    'preference': -2 if 'hd.mp4' in backup_url.text else -3,
-                })
+        self._sort_formats(formats)

-            self._sort_formats(formats)
-
-            entries.append({
-                'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
-                'duration': int_or_none(xpath_text(durl, './length'), 1000),
+        entries.append({
+            'id': '%s_part' % cid,
+                'duration': int_or_none(size),
                'formats': formats,
            })
-
        title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
-        description = self._html_search_meta('description', webpage)
+ 	description = self._html_search_meta('description', webpage)
        datetime_str = self._html_search_regex(
            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
        timestamp = None
        if datetime_str:
            timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
-
        # TODO 'view_count' requires deobfuscating Javascript
        info = {
            'id': compat_str(cid),
@@ -161,9 +127,8 @@ class BiliBiliIE(InfoExtractor):
            'description': description,
            'timestamp': timestamp,
            'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
-            'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
+            'duration': int_or_none(size),
        }
-
        uploader_mobj = re.search(
            r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
            webpage)