From d723a946d784cd98c6c9af8d90ca774365f90688 Mon Sep 17 00:00:00 2001 From: tomyang001 Date: Wed, 19 Feb 2020 18:44:35 -0500 Subject: [PATCH 1/4] Add files via upload --- youtube_dl/extractor/bilibili.py | 156 ++++++++----------------------- 1 file changed, 40 insertions(+), 116 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 80bd696e2..5eec78a7e 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -15,7 +15,6 @@ from ..utils import ( float_or_none, parse_iso8601, smuggle_url, - str_or_none, strip_jsonp, unified_timestamp, unsmuggle_url, @@ -24,9 +23,41 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)' + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)(?:/?\?p=(?P\d+))?' _TESTS = [{ + 'url': 'https://www.bilibili.com/video/av41213189?p=1', + 'md5': '166c3e684970fbb4f834f24ddd19b275', + 'info_dict': { + 'id': '41213189_p1', + 'cid': '72383807', + 'ext': 'flv', + 'title': '【春晚鬼畜】宋丹丹:我就是念诗女王!【改革春风吹进门】_p1', + 'description': 'md5:a29fb90e0aff106d062a38658b0b75e2', + 'duration': 152.024, + 'timestamp': 1548014429, + 'upload_date': '20190120', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': '吃素的狮子', + 'uploader_id': '808171', + }, + }, { + 'url': 'https://www.bilibili.com/video/av41213189?p=2', + 'md5': 'bda0939f327f2ead942e89d7f028ecc3', + 'info_dict': { + 'id': '41213189_p2', + 'cid': '72387898', + 'ext': 'flv', + 'title': '【春晚鬼畜】宋丹丹:我就是念诗女王!【改革春风吹进门】_p2', + 'description': 'md5:a29fb90e0aff106d062a38658b0b75e2', + 'duration': 152.024, + 'timestamp': 1548014429, + 'upload_date': '20190120', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': '吃素的狮子', + 'uploader_id': '808171', + }, + }, { 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'info_dict': { @@ -111,10 +142,14 @@ class BiliBiliIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') anime_id = mobj.group('anime_id') + page_id = mobj.group('page') webpage = self._download_webpage(url, video_id) if 'anime/' not in url: cid = self._search_regex( + r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid', + default=None + ) or self._search_regex( r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', default=None ) or compat_parse_qs(self._search_regex( @@ -194,7 +229,7 @@ class BiliBiliIE(InfoExtractor): title = self._html_search_regex( (']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', - group='title') + group='title') + ('_p' + str(page_id) if page_id is not None else '') description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', @@ -204,7 +239,8 @@ class BiliBiliIE(InfoExtractor): # TODO 'view_count' requires deobfuscating Javascript info = { - 'id': video_id, + 'id': video_id if page_id is None else str(video_id) + '_p' + str(page_id), + 'cid': cid, 'title': title, 'description': description, 'timestamp': timestamp, @@ -307,115 +343,3 @@ class BiliBiliBangumiIE(InfoExtractor): return self.playlist_result( entries, bangumi_id, season_info.get('bangumi_title'), season_info.get('evaluate')) - - -class BilibiliAudioBaseIE(InfoExtractor): - def _call_api(self, path, sid, query=None): - if not query: - query = {'sid': sid} - return self._download_json( - 'https://www.bilibili.com/audio/music-service-c/web/' + path, - sid, query=query)['data'] - - -class BilibiliAudioIE(BilibiliAudioBaseIE): - _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)' - _TEST = { - 'url': 'https://www.bilibili.com/audio/au1003142', - 'md5': 'fec4987014ec94ef9e666d4d158ad03b', - 'info_dict': { - 'id': '1003142', - 'ext': 'm4a', - 'title': '【tsukimi】YELLOW / 神山羊', - 'artist': 'tsukimi', - 'comment_count': int, - 'description': 'YELLOW的mp3版!', - 'duration': 183, - 'subtitles': { - 'origin': [{ - 'ext': 'lrc', - }], - }, - 'thumbnail': r're:^https?://.+\.jpg', - 'timestamp': 1564836614, - 'upload_date': '20190803', - 'uploader': 'tsukimi-つきみぐー', - 'view_count': int, - }, - } - - def _real_extract(self, url): - au_id = self._match_id(url) - - play_data = self._call_api('url', au_id) - formats = [{ - 'url': play_data['cdns'][0], - 'filesize': int_or_none(play_data.get('size')), - }] - - song = self._call_api('song/info', au_id) - title = song['title'] - statistic = song.get('statistic') or {} - - subtitles = None - lyric = song.get('lyric') - if lyric: - subtitles = { - 'origin': [{ - 'url': lyric, - }] - } - - return { - 'id': au_id, - 'title': title, - 'formats': formats, - 'artist': song.get('author'), - 'comment_count': int_or_none(statistic.get('comment')), - 'description': song.get('intro'), - 'duration': int_or_none(song.get('duration')), - 'subtitles': subtitles, - 'thumbnail': song.get('cover'), - 'timestamp': int_or_none(song.get('passtime')), - 'uploader': song.get('uname'), - 'view_count': int_or_none(statistic.get('play')), - } - - -class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): - _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)' - _TEST = { - 'url': 'https://www.bilibili.com/audio/am10624', - 'info_dict': { - 'id': '10624', - 'title': '每日新曲推荐(每日11:00更新)', - 'description': '每天11:00更新,为你推送最新音乐', - }, - 'playlist_count': 19, - } - - def _real_extract(self, url): - am_id = self._match_id(url) - - songs = self._call_api( - 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data'] - - entries = [] - for song in songs: - sid = str_or_none(song.get('id')) - if not sid: - continue - entries.append(self.url_result( - 'https://www.bilibili.com/audio/au' + sid, - BilibiliAudioIE.ie_key(), sid)) - - if entries: - album_data = self._call_api('menu/info', am_id) or {} - album_title = album_data.get('title') - if album_title: - for entry in entries: - entry['album'] = album_title - return self.playlist_result( - entries, am_id, album_title, album_data.get('intro')) - - return self.playlist_result(entries, am_id) From 82ee7c5946a482046daca13c2ed1904b4e5579d3 Mon Sep 17 00:00:00 2001 From: tomyang001 <wyh.aaron@gmail.com> Date: Wed, 19 Feb 2020 18:47:07 -0500 Subject: [PATCH 2/4] Create pythonapp.yml --- .github/workflows/pythonapp.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/pythonapp.yml diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml new file mode 100644 index 000000000..73e1394b6 --- /dev/null +++ b/.github/workflows/pythonapp.yml @@ -0,0 +1,30 @@ +name: Python application + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Lint with flake8 + run: | + pip install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pip install pytest + pytest From 4a0a254b41f36c19d0b228e4a47dd1fb067faed7 Mon Sep 17 00:00:00 2001 From: tomyang001 <wyh.aaron@gmail.com> Date: Wed, 19 Feb 2020 19:10:13 -0500 Subject: [PATCH 3/4] Delete pythonapp.yml --- .github/workflows/pythonapp.yml | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 .github/workflows/pythonapp.yml diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml deleted file mode 100644 index 73e1394b6..000000000 --- a/.github/workflows/pythonapp.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Python application - -on: [push] - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pip install pytest - pytest From 653c200e35b99ef6c2322af98eca2e9b6b766b73 Mon Sep 17 00:00:00 2001 From: tomyang001 <wyh.aaron@gmail.com> Date: Wed, 19 Feb 2020 19:30:00 -0500 Subject: [PATCH 4/4] Update bilibili.py --- youtube_dl/extractor/bilibili.py | 113 +++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5eec78a7e..8e8492a21 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -15,6 +15,7 @@ from ..utils import ( float_or_none, parse_iso8601, smuggle_url, + str_or_none, strip_jsonp, unified_timestamp, unsmuggle_url, @@ -343,3 +344,115 @@ class BiliBiliBangumiIE(InfoExtractor): return self.playlist_result( entries, bangumi_id, season_info.get('bangumi_title'), season_info.get('evaluate')) + + +class BilibiliAudioBaseIE(InfoExtractor): + def _call_api(self, path, sid, query=None): + if not query: + query = {'sid': sid} + return self._download_json( + 'https://www.bilibili.com/audio/music-service-c/web/' + path, + sid, query=query)['data'] + + +class BilibiliAudioIE(BilibiliAudioBaseIE): + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)' + _TEST = { + 'url': 'https://www.bilibili.com/audio/au1003142', + 'md5': 'fec4987014ec94ef9e666d4d158ad03b', + 'info_dict': { + 'id': '1003142', + 'ext': 'm4a', + 'title': '【tsukimi】YELLOW / 神山羊', + 'artist': 'tsukimi', + 'comment_count': int, + 'description': 'YELLOW的mp3版!', + 'duration': 183, + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + 'thumbnail': r're:^https?://.+\.jpg', + 'timestamp': 1564836614, + 'upload_date': '20190803', + 'uploader': 'tsukimi-つきみぐー', + 'view_count': int, + }, + } + + def _real_extract(self, url): + au_id = self._match_id(url) + + play_data = self._call_api('url', au_id) + formats = [{ + 'url': play_data['cdns'][0], + 'filesize': int_or_none(play_data.get('size')), + }] + + song = self._call_api('song/info', au_id) + title = song['title'] + statistic = song.get('statistic') or {} + + subtitles = None + lyric = song.get('lyric') + if lyric: + subtitles = { + 'origin': [{ + 'url': lyric, + }] + } + + return { + 'id': au_id, + 'title': title, + 'formats': formats, + 'artist': song.get('author'), + 'comment_count': int_or_none(statistic.get('comment')), + 'description': song.get('intro'), + 'duration': int_or_none(song.get('duration')), + 'subtitles': subtitles, + 'thumbnail': song.get('cover'), + 'timestamp': int_or_none(song.get('passtime')), + 'uploader': song.get('uname'), + 'view_count': int_or_none(statistic.get('play')), + } + + +class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)' + _TEST = { + 'url': 'https://www.bilibili.com/audio/am10624', + 'info_dict': { + 'id': '10624', + 'title': '每日新曲推荐(每日11:00更新)', + 'description': '每天11:00更新,为你推送最新音乐', + }, + 'playlist_count': 19, + } + + def _real_extract(self, url): + am_id = self._match_id(url) + + songs = self._call_api( + 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data'] + + entries = [] + for song in songs: + sid = str_or_none(song.get('id')) + if not sid: + continue + entries.append(self.url_result( + 'https://www.bilibili.com/audio/au' + sid, + BilibiliAudioIE.ie_key(), sid)) + + if entries: + album_data = self._call_api('menu/info', am_id) or {} + album_title = album_data.get('title') + if album_title: + for entry in entries: + entry['album'] = album_title + return self.playlist_result( + entries, am_id, album_title, album_data.get('intro')) + + return self.playlist_result(entries, am_id)