From 9a0458a4943101bb9ff66b5a2b52b8be57fad498 Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Sun, 9 Jul 2017 11:22:09 +0000 Subject: [PATCH 1/6] [vlive:playlist] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vlive.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b83c3aba5..68e8c1696 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1203,6 +1203,7 @@ from .vk import ( VKWallPostIE, ) from .vlive import ( + VLivePlaylistIE, VLiveIE, VLiveChannelIE ) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index e58940607..3180b5f59 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -261,3 +261,50 @@ class VLiveChannelIE(InfoExtractor): return self.playlist_result( entries, channel_code, channel_name) + + +class VLivePlaylistIE(InfoExtractor): + IE_NAME = 'vlive:playlist' + _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P[0-9]+)/playlist/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.vlive.tv/video/30824/playlist/30826', + 'info_dict': { + 'id': '30826', + 'title': 'TWICE TV5 - TWICE in SWITZERLAND' + }, + 'playlist_mincount': 20 + }, { + 'url': 'http://www.vlive.tv/video/22867/playlist/22912', + 'info_dict': { + 'id': '22912', + 'title': 'Valentine Day Message from TWICE' + }, + 'playlist_mincount': 9 + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + video_id = self._search_regex( + self._VALID_URL, url, 'video id', group='video_id') + + webpage = self._download_webpage( + 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) + + playlist_name = self._html_search_regex( + r']+class="[^"]*multicam_playlist[^>]*>\s*]+>([^<]+)', + webpage, 'playlist name', fatal=False) + + item_ids = self._search_regex( + r'\bvar\s+playlistVideoSeqs\s*=\s*\[([^\]]+)\]', + webpage, 'playlist item ids', default='') + + entries = [] + for item_id in re.split(r'\s*,\s*', item_ids): + item_id = compat_str(item_id) + entries.append( + self.url_result( + 'http://www.vlive.tv/video/%s' % item_id, + ie=VLiveIE.ie_key(), video_id=item_id)) + + return self.playlist_result( + entries, playlist_id, playlist_name) From 7acd0b11179667c3232910aecc0e262548b49163 Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Sun, 9 Jul 2017 13:23:43 +0000 Subject: [PATCH 2/6] [vlive:playlist] Prevent VLiveIE matching playlist URLs --- youtube_dl/extractor/vlive.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 3180b5f59..36e09850a 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -49,6 +49,10 @@ class VLiveIE(InfoExtractor): }, }] + @classmethod + def suitable(cls, url): + return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) + def _real_extract(self, url): video_id = self._match_id(url) From 4dcd1206064f891c1c561a3213490d2cb6eb18b6 Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Sun, 9 Jul 2017 18:20:51 +0000 Subject: [PATCH 3/6] [vlive:playlist] Address PR comments --- youtube_dl/extractor/extractors.py | 4 ++-- youtube_dl/extractor/vlive.py | 24 ++++++++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68e8c1696..b2cc6ccd5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1203,9 +1203,9 @@ from .vk import ( VKWallPostIE, ) from .vlive import ( - VLivePlaylistIE, VLiveIE, - VLiveChannelIE + VLiveChannelIE, + VLivePlaylistIE ) from .vodlocker import VodlockerIE from .vodpl import VODPlIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 36e09850a..f918917cf 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -288,8 +288,20 @@ class VLivePlaylistIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - video_id = self._search_regex( - self._VALID_URL, url, 'video id', group='video_id') + video_id_match = re.match(self._VALID_URL, url) + assert video_id_match + video_id = compat_str(video_id_match.group('video_id')) + + video_url_format = 'http://www.vlive.tv/video/%s' + if self._downloader.params.get('noplaylist'): + self.to_screen( + 'Downloading just video %s because of --no-playlist' % video_id) + return self.url_result( + video_url_format % video_id, + ie=VLiveIE.ie_key(), video_id=video_id) + else: + self.to_screen( + 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) webpage = self._download_webpage( 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) @@ -299,15 +311,15 @@ class VLivePlaylistIE(InfoExtractor): webpage, 'playlist name', fatal=False) item_ids = self._search_regex( - r'\bvar\s+playlistVideoSeqs\s*=\s*\[([^\]]+)\]', - webpage, 'playlist item ids', default='') + r'\bvar\s+playlistVideoSeqs\s*=\s*\[([^]]+)\]', + webpage, 'playlist item ids') entries = [] - for item_id in re.split(r'\s*,\s*', item_ids): + for item_id in self._parse_json('[%s]' % item_ids, playlist_id): item_id = compat_str(item_id) entries.append( self.url_result( - 'http://www.vlive.tv/video/%s' % item_id, + video_url_format % item_id, ie=VLiveIE.ie_key(), video_id=item_id)) return self.playlist_result( From 500b0a90642a4144ec76980d37269dc7b58bf07c Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Sun, 9 Jul 2017 18:30:54 +0000 Subject: [PATCH 4/6] [vlive:playlist] Simplify playlist item ids extraction --- youtube_dl/extractor/vlive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index f918917cf..2fec2d4a1 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -311,11 +311,11 @@ class VLivePlaylistIE(InfoExtractor): webpage, 'playlist name', fatal=False) item_ids = self._search_regex( - r'\bvar\s+playlistVideoSeqs\s*=\s*\[([^]]+)\]', + r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, 'playlist item ids') entries = [] - for item_id in self._parse_json('[%s]' % item_ids, playlist_id): + for item_id in self._parse_json(item_ids, playlist_id): item_id = compat_str(item_id) entries.append( self.url_result( From 673bfc293d8a41c3c0e7193918fe8655b59c3bf0 Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Sun, 9 Jul 2017 18:51:12 +0000 Subject: [PATCH 5/6] [vlive:playlist] Uppercase for constants and remove unnecessary 'else' --- youtube_dl/extractor/vlive.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 2fec2d4a1..6b72b8c8b 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -292,16 +292,16 @@ class VLivePlaylistIE(InfoExtractor): assert video_id_match video_id = compat_str(video_id_match.group('video_id')) - video_url_format = 'http://www.vlive.tv/video/%s' + VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' if self._downloader.params.get('noplaylist'): self.to_screen( 'Downloading just video %s because of --no-playlist' % video_id) return self.url_result( - video_url_format % video_id, + VIDEO_URL_TEMPLATE % video_id, ie=VLiveIE.ie_key(), video_id=video_id) - else: - self.to_screen( - 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + + self.to_screen( + 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) webpage = self._download_webpage( 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) @@ -319,7 +319,7 @@ class VLivePlaylistIE(InfoExtractor): item_id = compat_str(item_id) entries.append( self.url_result( - video_url_format % item_id, + VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), video_id=item_id)) return self.playlist_result( From ea4317aae973766278841baf2b8f0057b3ffdc32 Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Sun, 9 Jul 2017 19:32:19 +0000 Subject: [PATCH 6/6] [vlive:playlist] Remove a test --- youtube_dl/extractor/vlive.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 6b72b8c8b..f3825db5c 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -270,21 +270,14 @@ class VLiveChannelIE(InfoExtractor): class VLivePlaylistIE(InfoExtractor): IE_NAME = 'vlive:playlist' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P[0-9]+)/playlist/(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://www.vlive.tv/video/30824/playlist/30826', - 'info_dict': { - 'id': '30826', - 'title': 'TWICE TV5 - TWICE in SWITZERLAND' - }, - 'playlist_mincount': 20 - }, { + _TEST = { 'url': 'http://www.vlive.tv/video/22867/playlist/22912', 'info_dict': { 'id': '22912', 'title': 'Valentine Day Message from TWICE' }, 'playlist_mincount': 9 - }] + } def _real_extract(self, url): playlist_id = self._match_id(url)