From 9ae76e8e6d3819f36c4e3558a9c9aa3010b3214b Mon Sep 17 00:00:00 2001 From: mtilbury Date: Mon, 8 Apr 2019 01:07:26 -0400 Subject: [PATCH 1/5] [Twitch] Add support for collections as per #20414 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitch.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cc19af5c4..76ad692d6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1241,6 +1241,7 @@ from .twitch import ( TwitchChapterIE, TwitchVodIE, TwitchProfileIE, + TwitchCollectionsIE, TwitchAllVideosIE, TwitchUploadsIE, TwitchPastBroadcastsIE, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8c87f6dd3..d602cf997 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -435,6 +435,62 @@ class TwitchVideosBaseIE(TwitchPlaylistBaseIE): _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type=' +class TwitchCollectionsIE(TwitchBaseIE): + IE_NAME = 'twitch:collections' + _VALID_URL = r'%s/collections/(?P[\w\d]+)' % TwitchBaseIE._VALID_URL_BASE + + _TESTS = [{ + 'url': 'https://www.twitch.tv/collections/myIbIFkZphQSbQ', + 'info_dict': { + 'id': 'myIbIFkZphQSbQ', + 'title': 'Fanboys - LIVE every Tuesday at 1 pm PT ' + }, + 'playlist_mincount': 28, + }, { + 'url': 'https://m.twitch.tv/collections/KFti877JixXiHQ', + 'info_dict': { + 'id': 'KFti877JixXiHQ', + 'title': 'CORSAIR DreamLeague Season 11 - The Stockholm Major - Studio' + }, + 'playlist_mincount': 57, + }] + + def _extract_collection(self, collection_id): + # Get collection metadata + info = self._call_api( + 'kraken/collections/%s' % collection_id, + collection_id, 'Downloading collection info JSON', + headers={ + 'Accept': 'application/vnd.twitchtv.v5+json', + } + ) + + # Get collection items (videos) + collection = self._call_api( + 'kraken/collections/%s/items' % collection_id, + collection_id, 'Downloading collection items JSON', + headers={ + 'Accept': 'application/vnd.twitchtv.v5+json', + } + ) + + # Collection JSON contains item_id values, need to turn these into URLs + urls = [self._make_url_result(item.get('item_id')) for item in collection.get('items')] + + return self.playlist_result( + urls, collection_id, info.get('title') + ) + + def _make_url_result(self, item_id): + video_id = 'v%s' % item_id + url = 'https://www.twitch.tv/videos/%s' % item_id + return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id) + + def _real_extract(self, url): + collection_id = self._match_id(url) + return self._extract_collection(collection_id) + + class TwitchAllVideosIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:all' _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE From b2633030ce2bce4f4dffa9f2bb6f64db62e6e12d Mon Sep 17 00:00:00 2001 From: mtilbury Date: Mon, 8 Apr 2019 01:58:03 -0400 Subject: [PATCH 2/5] Explicitly stop collection URLs from matching TwitchStream pattern to prevent overlap --- youtube_dl/extractor/twitch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d602cf997..4373337f0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -575,6 +575,7 @@ class TwitchStreamIE(TwitchBaseIE): (?:(?:www|go|m)\.)?twitch\.tv/| player\.twitch\.tv/\?.*?\bchannel= ) + (?!collections) (?P[^/#?]+) ''' From fb56a16673ed7ff43b7947936d2ac501e5ae3407 Mon Sep 17 00:00:00 2001 From: Michael Tilbury Date: Sun, 14 Apr 2019 13:14:07 -0400 Subject: [PATCH 3/5] Modify valid URL regex to allow hyphens in collection id --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4373337f0..d8b98803e 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -437,7 +437,7 @@ class TwitchVideosBaseIE(TwitchPlaylistBaseIE): class TwitchCollectionsIE(TwitchBaseIE): IE_NAME = 'twitch:collections' - _VALID_URL = r'%s/collections/(?P[\w\d]+)' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'%s/collections/(?P[\w\d\-]+)' % TwitchBaseIE._VALID_URL_BASE _TESTS = [{ 'url': 'https://www.twitch.tv/collections/myIbIFkZphQSbQ', From f9c9a53a0c44c95625f131ca9d3fcba245511e20 Mon Sep 17 00:00:00 2001 From: Michael Tilbury Date: Sun, 14 Apr 2019 13:19:02 -0400 Subject: [PATCH 4/5] Make lookeahead in TwitchStream regex more specific --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d8b98803e..61d9c38c9 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -575,7 +575,7 @@ class TwitchStreamIE(TwitchBaseIE): (?:(?:www|go|m)\.)?twitch\.tv/| player\.twitch\.tv/\?.*?\bchannel= ) - (?!collections) + (?!collections/[\w\d\-]+) (?P[^/#?]+) ''' From 97483c2ff1ea80ed39ca4bb7c604aa137928a87c Mon Sep 17 00:00:00 2001 From: Michael Tilbury Date: Sun, 14 Apr 2019 13:50:10 -0400 Subject: [PATCH 5/5] Add test for collection url --- youtube_dl/extractor/twitch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 61d9c38c9..b60257508 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -453,6 +453,9 @@ class TwitchCollectionsIE(TwitchBaseIE): 'title': 'CORSAIR DreamLeague Season 11 - The Stockholm Major - Studio' }, 'playlist_mincount': 57, + }, { + 'url': 'https://www.twitch.tv/collections/HgTD8zFrghUb-Q', + 'only_matching': True, }] def _extract_collection(self, collection_id):