[^<]+)</p>\s*</a>') for sec_title, part in reversed(sections): episodes = part.split('</li>') for ep_part in episodes: if ep_part.strip() == '': continue ep = EP_RE.search(ep_part) if not ep: raise ExtractorError("Failed to parse an episode of season %s! (%s, %s)" % (sec_title or '0', playlist_id, ep_part)) url = clean_html(ep.group('url')) if sec_title: # Pass the season title to the video extractor. url += '#;' + compat_urllib_parse.urlencode({'season': sec_title}) res = self.url_result(url, 'Roosterteeth') res['season'] = sec_title else: res = self.url_result(url, 'Roosterteeth') if self._match_filter(res, ep_filter): results.append(res) if len(sections) == 1 and sections[0][0] is None: # If the page didn't contain sections, then the episodes are in reverse order. results = list(reversed(results)) return self.playlist_result(results, playlist_id, title, description) def _match_filter(self, item, filter_rules): for k, v in filter_rules.items(): if isinstance(v, list) and len(v) > 1: # A list of acceptable values if item.get(k) not in v: return False else: if not re.match(v[0], item.get(k)): return False return True class RoosterteethIE(InfoExtractor): _VALID_URL = r'http://(?P<domain>(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus))/episode/(?P<id>[^/]+)' _TESTS = [ { 'params': { # Without this parameter ytdl downloads the whole file. 'hls_prefer_native': True }, 'url': 'http://achievementhunter.com/episode/rage-quit-season-1-episode-199', 'md5': '828fe30ccdddf5d85e444e33686d531a', 'info_dict': { 'id': 'rage-quit-season-1-episode-199', 'ext': 'mp4', 'title': 'Rage Quit - No Time to Explain', 'description': 'There\'s no time to explain this video.', 'thumbnail': r're:^http://s3\.amazonaws\.com/cdn\.roosterteeth\.com/uploads/images/[a-f0-9-]+/md/[a-z0-9-]+\.jpeg$', 'protocol': 'm3u8', 'url': r're:^http://[a-zA-Z0-9.]+\.taucdn\.net/[0-9a-zA-Z]+/video/uploads/videos/[0-9a-f-]+/[0-9A-Z]+\.m3u8$', } }, { 'url': 'http://roosterteeth.com/episode/red-vs-blue-season-1-episode-1', 'md5': '80277833f3ed946b553d13cf8e27443d', 'info_dict': { 'id': 'red-vs-blue-season-1-episode-1', 'ext': 'mp4', 'title': 'Why Are We Here? - Episode 1 - Red vs. Blue Season 1', 'thumbnail': r're:^https://i\.ytimg\.com/vi/[0-9a-zA-Z]+/maxresdefault\.jpg$', 'url': r're:^https://[0-9a-z-]+\.googlevideo\.com/videoplayback', 'upload_date': '20150306', 'uploader_id': 'UCII0hP2Ycmhh5j8lS4cexBQ', 'uploader': 'Red vs. Blue', 'description': 'The first episode of Red vs. Blue introduces the main characters, and poses the all-important question, why are we here?' } } ] _NETRC_MACHINE = 'roosterteeth' _authed = None _sponsor = None def _real_initialize(self): self._authed = {} def _real_extract(self, url): if '#;' in url: url, params = url.split('#;') params = compat_parse_qs(params) else: params = {} video_id = self._match_id(url) html = self._download_webpage(url, video_id) if html.find('Unfortunately, this is sponsor-only.') > -1: domain = compat_urllib_parse_urlparse(url).netloc release = re.search(r'<p>[^<]+ Releases ([0-9]+ [a-zA-Z]+) from now</p>', html) if release: release = ' The video will be public in %s.' % release.group(1) else: release = '' if not self._login(domain): raise ExtractorError("This video is sponsor-only. You didn't provide your credentials or the login failed.%s" % release, expected=True) # Try again. html = self._download_webpage(url, video_id) if html.find('Unfortunately, this is sponsor-only.') > -1: if not self._is_sponsor(domain): raise ExtractorError('This video is sponsor-only but you are not a sponsor.%s' % release, expected=True) else: raise ExtractorError('This is a sponsor-only video and although I tried to login, it did not work.') js = self._html_search_regex(r'<script src="https?://roosterteeth\.com/scripts/lib/(?:jwplayer|youtube)\.min\.js"></script>\s*<script>\s*([^<]+)\s*</script>', html, 'video info') info = re.search(r'RT\.(?P<player>youtube|jwplayer)\.player$(?P<json>\{(?:[^}]|\}(?!$;))+\})\);', js) if not info: raise ExtractorError("Can't parse the video metadata! (%s)" % js) player = info.group('player') meta = self._parse_json(js_to_json(info.group('json')), video_id) if player == 'jwplayer': # Make sure that all values are there. for attr in ('containerId', 'videoImage', 'videoTitle', 'manifest'): if attr not in meta: raise ExtractorError('Unexpected video info! Attribute %s is missing.' % attr) video_image = meta['videoImage'] if video_image.startswith('//'): video_image = 'http:' + video_image res = { 'id': video_id, 'title': meta['videoTitle'].strip(), 'formats': self._extract_m3u8_formats(meta['manifest'], video_id, ext='mp4'), 'thumbnail': video_image } elif player == 'youtube': if 'youtubeKey' not in meta: raise ExtractorError('Invalid metadata for youtube video!') res = self.url_result('https://youtube.com/watch?v=' + meta['youtubeKey']) res['_type'] = 'url_transparent' res['id'] = video_id else: raise ExtractorError('Unknown player type %s!' % player) if 'season' in params: res['season'] = params['season'][0] desc = self._og_search_description(html) if desc: res['description'] = desc.strip() return res def _login(self, domain='roosterteeth.com'): """ Attempt to log in to RoosterTeeth (or Achievement Hunter). NOTE: RT is planning to implement SSO which will probably change how this works. """ if domain in self._authed: return self._authed[domain] (username, password) = self._get_login_info() # No authentication to be performed if username is None: return False LOGIN_URL = 'http://%s/login' % domain login_page, hdl = self._download_webpage_handle( LOGIN_URL, None, note='Downloading login page', errnote='unable to fetch login page', fatal=False) if login_page is False: return False if hdl.geturl() != LOGIN_URL: # We were redirected which means that we're already logged in. self._authed[domain] = True return True token = self._search_regex(r'(?s)<input.+?name="_token".+?value="(.+?)"', login_page, 'Login token') # Log in login_form_strs = { '_token': token, 'username': username, 'password': password } # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode # chokes on unicode login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') req = compat_urllib_request.Request(LOGIN_URL, login_data, {'Content-Type': 'application/x-www-form-urlencoded'}) login_results = self._download_webpage( req, None, note='Logging in', errnote='unable to log in', fatal=False) if login_results is False: return False if login_results.find('Error in exception handler.') > -1 or login_results.find('Authentication failed. Please check and try again, or reset your password') > -1: self.report_warning('unable to log in: bad username or password') self._authed[domain] = False return False self._authed[domain] = True return True def _is_sponsor(self, domain='roosterteeth.com'): if self._sponsor is None: username, _ = self._get_login_info() profile_page = 'http://%s/user/%s' % (domain, compat_urllib_parse.quote(username)) html = self._download_webpage( profile_page, None, note='Checking user profile...', errnote='unable to access user profile', fatal=False) if not html: return False user_info = self._search_regex( r'<div class="sidebar-profile-header">\s*<p[^>]+>\s*<a href="%s">[^<]+</a>\s*<span>((?:[^<]|<(?!/span>))+)</span>' % (profile_page), html, 'user status', fatal=False) if not user_info: return False self._sponsor = '<i class="icon ion-star"></i>' in user_info return self.

" start = html.find(start_piece) if start == -1: raise ExtractorError("Can't find the episodes!") html = html[start + len(start_piece):].lstrip() sections = [] if html.startswith('

") end = section.find("

(?:[^<]|<(?!p class="name"))+

(?P[^<]+)</p>\s*</a>') for sec_title, part in reversed(sections): episodes = part.split('</li>') for ep_part in episodes: if ep_part.strip() == '': continue ep = EP_RE.search(ep_part) if not ep: raise ExtractorError("Failed to parse an episode of season %s! (%s, %s)" % (sec_title or '0', playlist_id, ep_part)) url = clean_html(ep.group('url')) if sec_title: # Pass the season title to the video extractor. url += '#;' + compat_urllib_parse.urlencode({'season': sec_title}) res = self.url_result(url, 'Roosterteeth') res['season'] = sec_title else: res = self.url_result(url, 'Roosterteeth') if self._match_filter(res, ep_filter): results.append(res) if len(sections) == 1 and sections[0][0] is None: # If the page didn't contain sections, then the episodes are in reverse order. results = list(reversed(results)) return self.playlist_result(results, playlist_id, title, description) def _match_filter(self, item, filter_rules): for k, v in filter_rules.items(): if isinstance(v, list) and len(v) > 1: # A list of acceptable values if item.get(k) not in v: return False else: if not re.match(v[0], item.get(k)): return False return True class RoosterteethIE(InfoExtractor): _VALID_URL = r'http://(?P<domain>(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus))/episode/(?P<id>[^/]+)' _TESTS = [ { 'params': { # Without this parameter ytdl downloads the whole file. 'hls_prefer_native': True }, 'url': 'http://achievementhunter.com/episode/rage-quit-season-1-episode-199', 'md5': '828fe30ccdddf5d85e444e33686d531a', 'info_dict': { 'id': 'rage-quit-season-1-episode-199', 'ext': 'mp4', 'title': 'Rage Quit - No Time to Explain', 'description': 'There\'s no time to explain this video.', 'thumbnail': r're:^http://s3\.amazonaws\.com/cdn\.roosterteeth\.com/uploads/images/[a-f0-9-]+/md/[a-z0-9-]+\.jpeg$', 'protocol': 'm3u8', 'url': r're:^http://[a-zA-Z0-9.]+\.taucdn\.net/[0-9a-zA-Z]+/video/uploads/videos/[0-9a-f-]+/[0-9A-Z]+\.m3u8$', } }, { 'url': 'http://roosterteeth.com/episode/red-vs-blue-season-1-episode-1', 'md5': '80277833f3ed946b553d13cf8e27443d', 'info_dict': { 'id': 'red-vs-blue-season-1-episode-1', 'ext': 'mp4', 'title': 'Why Are We Here? - Episode 1 - Red vs. Blue Season 1', 'thumbnail': r're:^https://i\.ytimg\.com/vi/[0-9a-zA-Z]+/maxresdefault\.jpg$', 'url': r're:^https://[0-9a-z-]+\.googlevideo\.com/videoplayback', 'upload_date': '20150306', 'uploader_id': 'UCII0hP2Ycmhh5j8lS4cexBQ', 'uploader': 'Red vs. Blue', 'description': 'The first episode of Red vs. Blue introduces the main characters, and poses the all-important question, why are we here?' } } ] _NETRC_MACHINE = 'roosterteeth' _authed = None _sponsor = None def _real_initialize(self): self._authed = {} def _real_extract(self, url): if '#;' in url: url, params = url.split('#;') params = compat_parse_qs(params) else: params = {} video_id = self._match_id(url) html = self._download_webpage(url, video_id) if html.find('Unfortunately, this is sponsor-only.') > -1: domain = compat_urllib_parse_urlparse(url).netloc release = re.search(r'<p>[^<]+ Releases ([0-9]+ [a-zA-Z]+) from now</p>', html) if release: release = ' The video will be public in %s.' % release.group(1) else: release = '' if not self._login(domain): raise ExtractorError("This video is sponsor-only. You didn't provide your credentials or the login failed.%s" % release, expected=True) # Try again. html = self._download_webpage(url, video_id) if html.find('Unfortunately, this is sponsor-only.') > -1: if not self._is_sponsor(domain): raise ExtractorError('This video is sponsor-only but you are not a sponsor.%s' % release, expected=True) else: raise ExtractorError('This is a sponsor-only video and although I tried to login, it did not work.') js = self._html_search_regex(r'<script src="https?://roosterteeth\.com/scripts/lib/(?:jwplayer|youtube)\.min\.js"></script>\s*<script>\s*([^<]+)\s*</script>', html, 'video info') info = re.search(r'RT\.(?P<player>youtube|jwplayer)\.player$(?P<json>\{(?:[^}]|\}(?!$;))+\})\);', js) if not info: raise ExtractorError("Can't parse the video metadata! (%s)" % js) player = info.group('player') meta = self._parse_json(js_to_json(info.group('json')), video_id) if player == 'jwplayer': # Make sure that all values are there. for attr in ('containerId', 'videoImage', 'videoTitle', 'manifest'): if attr not in meta: raise ExtractorError('Unexpected video info! Attribute %s is missing.' % attr) video_image = meta['videoImage'] if video_image.startswith('//'): video_image = 'http:' + video_image res = { 'id': video_id, 'title': meta['videoTitle'].strip(), 'formats': self._extract_m3u8_formats(meta['manifest'], video_id, ext='mp4'), 'thumbnail': video_image } elif player == 'youtube': if 'youtubeKey' not in meta: raise ExtractorError('Invalid metadata for youtube video!') res = self.url_result('https://youtube.com/watch?v=' + meta['youtubeKey']) res['_type'] = 'url_transparent' res['id'] = video_id else: raise ExtractorError('Unknown player type %s!' % player) if 'season' in params: res['season'] = params['season'][0] desc = self._og_search_description(html) if desc: res['description'] = desc.strip() return res def _login(self, domain='roosterteeth.com'): """ Attempt to log in to RoosterTeeth (or Achievement Hunter). NOTE: RT is planning to implement SSO which will probably change how this works. """ if domain in self._authed: return self._authed[domain] (username, password) = self._get_login_info() # No authentication to be performed if username is None: return False LOGIN_URL = 'http://%s/login' % domain login_page, hdl = self._download_webpage_handle( LOGIN_URL, None, note='Downloading login page', errnote='unable to fetch login page', fatal=False) if login_page is False: return False if hdl.geturl() != LOGIN_URL: # We were redirected which means that we're already logged in. self._authed[domain] = True return True token = self._search_regex(r'(?s)<input.+?name="_token".+?value="(.+?)"', login_page, 'Login token') # Log in login_form_strs = { '_token': token, 'username': username, 'password': password } # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode # chokes on unicode login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') req = compat_urllib_request.Request(LOGIN_URL, login_data, {'Content-Type': 'application/x-www-form-urlencoded'}) login_results = self._download_webpage( req, None, note='Logging in', errnote='unable to log in', fatal=False) if login_results is False: return False if login_results.find('Error in exception handler.') > -1 or login_results.find('Authentication failed. Please check and try again, or reset your password') > -1: self.report_warning('unable to log in: bad username or password') self._authed[domain] = False return False self._authed[domain] = True return True def _is_sponsor(self, domain='roosterteeth.com'): if self._sponsor is None: username, _ = self._get_login_info() profile_page = 'http://%s/user/%s' % (domain, compat_urllib_parse.quote(username)) html = self._download_webpage( profile_page, None, note='Checking user profile...', errnote='unable to access user profile', fatal=False) if not html: return False user_info = self._search_regex( r'<div class="sidebar-profile-header">\s*<p[^>]+>\s*<a href="%s">[^<]+</a>\s*<span>((?:[^<]|<(?!/span>))+)</span>' % (profile_page), html, 'user status', fatal=False) if not user_info: return False self._sponsor = '<i class="icon ion-star"></i>' in user_info return self._sponsor