From 74b57769db73e07f12e065e7b3a6d85309e934a9 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 11 Jun 2018 17:29:54 +0200 Subject: [PATCH 1/4] Improve the roosterteeth extractor by using the API This commit is here for improving the roosterteeth extractor by using the API and fix #16694 --- youtube_dl/extractor/roosterteeth.py | 81 ++++++++++++---------------- 1 file changed, 35 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 857434540..924c2017b 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -1,18 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, +) from ..utils import ( ExtractorError, int_or_none, - strip_or_none, - unescapeHTML, + str_or_none, urlencode_postdata, ) - class RoosterTeethIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P[^/?#&]+)' _LOGIN_URL = 'https://roosterteeth.com/login' @@ -29,7 +28,6 @@ class RoosterTeethIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', - 'comment_count': int, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -50,7 +48,7 @@ class RoosterTeethIE(InfoExtractor): }] def _login(self): - username, password = self._get_login_info() + (username, password) = self._get_login_info() if username is None: return @@ -90,51 +88,42 @@ class RoosterTeethIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - episode = strip_or_none(unescapeHTML(self._search_regex( - (r'videoTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<title>(?P<title>[^<]+)'), webpage, 'title', - default=None, group='title'))) - - title = strip_or_none(self._og_search_title( - webpage, default=None)) or episode - - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', - webpage, 'm3u8 url', default=None, group='url') - - if not m3u8_url: - if re.search(r']+class=["\']non-sponsor', webpage): - self.raise_login_required( - '%s is only available for FIRST members' % display_id) - - if re.search(r']+class=["\']golive-gate', webpage): - self.raise_login_required('%s is not available yet' % display_id) + try: + json_m3u8 = self._download_json( + 'https://svod-be.roosterteeth.com/api/v1/episodes/%s/videos' % display_id, + display_id, 'Downloading JSON m3u8') + json_metadata = self._download_json( + 'https://svod-be.roosterteeth.com/api/v1/episodes/%s/' % display_id, + display_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_login_required('This video is only available for FIRST memebers') + raise + try: + m3u8_url = json_m3u8['data'][0]['attributes']['url'] + except: raise ExtractorError('Unable to extract m3u8 URL') formats = self._extract_m3u8_formats( m3u8_url, display_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) - - description = strip_or_none(self._og_search_description(webpage)) - thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) - - series = self._search_regex( - (r'

More ([^<]+)

', r']+>See All ([^<]+) Videos<'), - webpage, 'series', fatal=False) - - comment_count = int_or_none(self._search_regex( - r'>Comments \((\d+)\)<', webpage, - 'comment count', fatal=False)) - - video_id = self._search_regex( - (r'containerId\s*=\s*["\']episode-(\d+)\1', - r' Date: Mon, 11 Jun 2018 17:34:56 +0200 Subject: [PATCH 2/4] Remove unnecessary brackets --- youtube_dl/extractor/roosterteeth.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 924c2017b..0c1e9149e 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -27,7 +27,6 @@ class RoosterTeethIE(InfoExtractor): 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', - 'episode': 'Million Dollars, But... The Game Announcement', }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -48,7 +47,7 @@ class RoosterTeethIE(InfoExtractor): }] def _login(self): - (username, password) = self._get_login_info() + username, password = self._get_login_info() if username is None: return From f2f3689841789c4cfb06af8a0070157fec01adb0 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 11 Jun 2018 19:19:56 +0200 Subject: [PATCH 3/4] Fix test --- youtube_dl/extractor/roosterteeth.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 0c1e9149e..bc32e8759 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -18,14 +18,13 @@ class RoosterTeethIE(InfoExtractor): _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', - 'md5': 'e2bd7764732d785ef797700a2489f212', 'info_dict': { - 'id': '26576', + 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', - 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', + 'title': 'Million Dollars, But... The Game Announcement', 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', - 'thumbnail': r're:^https?://.*\.png$', + 'thumbnail': r'^https?://.*\.png$', 'series': 'Million Dollars, But...', }, }, { @@ -113,11 +112,10 @@ class RoosterTeethIE(InfoExtractor): json_attributes = json_body['attributes'] display_title = json_attributes['display_title'] - - title = str_or_none(self._search_regex(r': ([\w]+)$', display_title, 'title')) episode = int_or_none(self._search_regex(r':E([\d]+)', display_title, 'episode', fatal=False)) - season = int_or_none(self._search_regex(r'^V([\d]+):E', display_title, 'season', fatal=False)) + season = int_or_none(self._search_regex(r'^[\w]([\d]+)', display_title, 'season', fatal=False)) + title = json_attributes.get('title') video_id = str(json_body.get('id')) thumbnail = json_body['included']['images'][0]['attributes']['large'] description = json_attributes.get('description') From 54209e7d23e09e91bef201436afa78ee49a17704 Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Jun 2018 11:18:13 +0200 Subject: [PATCH 4/4] Improve API extraction and code convention Add : - Thumbnails - Duration - Uploader Fix test --- youtube_dl/extractor/roosterteeth.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index bc32e8759..a26f2aa06 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -21,11 +21,10 @@ class RoosterTeethIE(InfoExtractor): 'info_dict': { 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', - 'ext': 'mp4', 'title': 'Million Dollars, But... The Game Announcement', 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', - 'thumbnail': r'^https?://.*\.png$', 'series': 'Million Dollars, But...', + 'duration': 145, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -108,27 +107,39 @@ class RoosterTeethIE(InfoExtractor): entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) - json_body = json_metadata['data'][0] - json_attributes = json_body['attributes'] + json_body = json_metadata.get('data')[0] + json_attributes = json_body.get('attributes') - display_title = json_attributes['display_title'] + display_title = json_attributes.get('display_title') episode = int_or_none(self._search_regex(r':E([\d]+)', display_title, 'episode', fatal=False)) season = int_or_none(self._search_regex(r'^[\w]([\d]+)', display_title, 'season', fatal=False)) title = json_attributes.get('title') video_id = str(json_body.get('id')) - thumbnail = json_body['included']['images'][0]['attributes']['large'] description = json_attributes.get('description') series = json_attributes.get('show_title') + uploader = json_attributes.get('channel_slug') + duration = json_attributes.get('length') + thumbnails = [] + thumbnails_attributes = json_body.get('included').get('images')[0].get('attributes') + if thumbnails_attributes: + for img_name in ('large', 'medium', 'small', 'thumb'): + thumbnails.append({ + 'url': thumbnails_attributes.get(img_name), + 'id': img_name, + }) + return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'duration': duration, + 'thumbnails': thumbnails, 'series': series, 'season': season, 'episode': episode, + 'uploader': uploader, 'formats': formats, }