Add files via upload

2025-03-09 22:57:16 +08:00 · 2020-02-19 18:44:35 -05:00 · 2020-02-19 18:44:35 -05:00 · d723a946d7
commit d723a946d7
parent 97c822b3d5
1 changed files with 40 additions and 116 deletions
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@ -15,7 +15,6 @@ from ..utils import (
    float_or_none,
    parse_iso8601,
    smuggle_url,
-    str_or_none,
    strip_jsonp,
    unified_timestamp,
    unsmuggle_url,
@ -24,9 +23,41 @@ from ..utils import (


 class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)(?:/?\?p=(?P<page>\d+))?'

    _TESTS = [{
+        'url': 'https://www.bilibili.com/video/av41213189?p=1',
+        'md5': '166c3e684970fbb4f834f24ddd19b275',
+        'info_dict': {
+            'id': '41213189_p1',
+            'cid': '72383807',
+            'ext': 'flv',
+            'title': '【春晚鬼畜】宋丹丹：我就是念诗女王！【改革春风吹进门】_p1',
+            'description': 'md5:a29fb90e0aff106d062a38658b0b75e2',
+            'duration': 152.024,
+            'timestamp': 1548014429,
+            'upload_date': '20190120',
+            'thumbnail': r're:^https?://.+\.jpg',
+            'uploader': '吃素的狮子',
+            'uploader_id': '808171',
+        },
+    }, {
+        'url': 'https://www.bilibili.com/video/av41213189?p=2',
+        'md5': 'bda0939f327f2ead942e89d7f028ecc3',
+        'info_dict': {
+            'id': '41213189_p2',
+            'cid': '72387898',
+            'ext': 'flv',
+            'title': '【春晚鬼畜】宋丹丹：我就是念诗女王！【改革春风吹进门】_p2',
+            'description': 'md5:a29fb90e0aff106d062a38658b0b75e2',
+            'duration': 152.024,
+            'timestamp': 1548014429,
+            'upload_date': '20190120',
+            'thumbnail': r're:^https?://.+\.jpg',
+            'uploader': '吃素的狮子',
+            'uploader_id': '808171',
+        },
+    }, {
        'url': 'http://www.bilibili.tv/video/av1074402/',
        'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
        'info_dict': {
@ -111,10 +142,14 @@ class BiliBiliIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        anime_id = mobj.group('anime_id')
+        page_id = mobj.group('page')
        webpage = self._download_webpage(url, video_id)

        if 'anime/' not in url:
            cid = self._search_regex(
+                r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
+                default=None
+            ) or self._search_regex(
                r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
                default=None
            ) or compat_parse_qs(self._search_regex(
@ -194,7 +229,7 @@ class BiliBiliIE(InfoExtractor):
        title = self._html_search_regex(
            ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
             '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
-            group='title')
+            group='title') + ('_p' + str(page_id) if page_id is not None else '')
        description = self._html_search_meta('description', webpage)
        timestamp = unified_timestamp(self._html_search_regex(
            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
@ -204,7 +239,8 @@ class BiliBiliIE(InfoExtractor):

        # TODO 'view_count' requires deobfuscating Javascript
        info = {
-            'id': video_id,
+            'id': video_id if page_id is None else str(video_id) + '_p' + str(page_id),
+            'cid': cid,
            'title': title,
            'description': description,
            'timestamp': timestamp,
@ -307,115 +343,3 @@ class BiliBiliBangumiIE(InfoExtractor):
        return self.playlist_result(
            entries, bangumi_id,
            season_info.get('bangumi_title'), season_info.get('evaluate'))
-
-
-class BilibiliAudioBaseIE(InfoExtractor):
-    def _call_api(self, path, sid, query=None):
-        if not query:
-            query = {'sid': sid}
-        return self._download_json(
-            'https://www.bilibili.com/audio/music-service-c/web/' + path,
-            sid, query=query)['data']
-
-
-class BilibiliAudioIE(BilibiliAudioBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://www.bilibili.com/audio/au1003142',
-        'md5': 'fec4987014ec94ef9e666d4d158ad03b',
-        'info_dict': {
-            'id': '1003142',
-            'ext': 'm4a',
-            'title': '【tsukimi】YELLOW / 神山羊',
-            'artist': 'tsukimi',
-            'comment_count': int,
-            'description': 'YELLOW的mp3版！',
-            'duration': 183,
-            'subtitles': {
-                'origin': [{
-                    'ext': 'lrc',
-                }],
-            },
-            'thumbnail': r're:^https?://.+\.jpg',
-            'timestamp': 1564836614,
-            'upload_date': '20190803',
-            'uploader': 'tsukimi-つきみぐー',
-            'view_count': int,
-        },
-    }
-
-    def _real_extract(self, url):
-        au_id = self._match_id(url)
-
-        play_data = self._call_api('url', au_id)
-        formats = [{
-            'url': play_data['cdns'][0],
-            'filesize': int_or_none(play_data.get('size')),
-        }]
-
-        song = self._call_api('song/info', au_id)
-        title = song['title']
-        statistic = song.get('statistic') or {}
-
-        subtitles = None
-        lyric = song.get('lyric')
-        if lyric:
-            subtitles = {
-                'origin': [{
-                    'url': lyric,
-                }]
-            }
-
-        return {
-            'id': au_id,
-            'title': title,
-            'formats': formats,
-            'artist': song.get('author'),
-            'comment_count': int_or_none(statistic.get('comment')),
-            'description': song.get('intro'),
-            'duration': int_or_none(song.get('duration')),
-            'subtitles': subtitles,
-            'thumbnail': song.get('cover'),
-            'timestamp': int_or_none(song.get('passtime')),
-            'uploader': song.get('uname'),
-            'view_count': int_or_none(statistic.get('play')),
-        }
-
-
-class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://www.bilibili.com/audio/am10624',
-        'info_dict': {
-            'id': '10624',
-            'title': '每日新曲推荐（每日11:00更新）',
-            'description': '每天11:00更新，为你推送最新音乐',
-        },
-        'playlist_count': 19,
-    }
-
-    def _real_extract(self, url):
-        am_id = self._match_id(url)
-
-        songs = self._call_api(
-            'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
-
-        entries = []
-        for song in songs:
-            sid = str_or_none(song.get('id'))
-            if not sid:
-                continue
-            entries.append(self.url_result(
-                'https://www.bilibili.com/audio/au' + sid,
-                BilibiliAudioIE.ie_key(), sid))
-
-        if entries:
-            album_data = self._call_api('menu/info', am_id) or {}
-            album_title = album_data.get('title')
-            if album_title:
-                for entry in entries:
-                    entry['album'] = album_title
-                return self.playlist_result(
-                    entries, am_id, album_title, album_data.get('intro'))
-
-        return self.playlist_result(entries, am_id)