1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 02:27:23 +08:00

[roosterteeth] Updated extractor

This commit is contained in:
ngld 2015-10-02 00:46:33 +02:00
parent 1b36d0bb5e
commit 00fe5a46ca

View File

@ -110,7 +110,7 @@ class RoosterteethShowIE(InfoExtractor):
class RoosterteethIE(InfoExtractor): class RoosterteethIE(InfoExtractor):
_VALID_URL = r'http://(?P<domain>(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus))/episode/(?P<id>[^/]+)' _VALID_URL = r'https?://(?P<domain>(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus))/episode/(?P<id>[^/]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://achievementhunter.com/episode/rage-quit-season-1-episode-199', 'url': 'http://achievementhunter.com/episode/rage-quit-season-1-episode-199',
@ -174,29 +174,27 @@ class RoosterteethIE(InfoExtractor):
else: else:
raise ExtractorError('This is a sponsor-only video and although I tried to login, it did not work.') raise ExtractorError('This is a sponsor-only video and although I tried to login, it did not work.')
js = self._html_search_regex(r'<script src="https?://(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus)/scripts/lib/(?:jwplayer|youtube)\.min\.js"></script>\s*<script>\s*([^<]+)\s*</script>', html, 'video info') p = re.search(r'<script src="https?://(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus)/scripts/lib/(?P<player>jwplayer|youtube)\.(?:min\.)?js"></script>\s*<script>\s*(?P<script>[^<]+)\s*</script>', html)
info = re.search(r'RT\.(?P<player>youtube|jwplayer)\.player\((?P<json>\{(?:[^}]|\}(?!\);))+\})\);', js) if not p:
if not info: raise ExtractorError("Can't parse the video metadata! (%s)" % video_id)
raise ExtractorError("Can't parse the video metadata! (%s)" % js)
player = info.group('player') player = p.group('player')
meta = self._parse_json(js_to_json(info.group('json')), video_id)
if player == 'jwplayer': if player == 'jwplayer':
# Make sure that all values are there. video_image = self._search_regex(r"var videoImage = '([^']+)';", p.group('script'), 'video image')
for attr in ('containerId', 'videoImage', 'videoTitle', 'manifest'):
if attr not in meta:
raise ExtractorError('Unexpected video info! Attribute %s is missing.' % attr)
video_image = meta['videoImage']
if video_image.startswith('//'): if video_image.startswith('//'):
video_image = 'http:' + video_image video_image = 'http:' + video_image
manifest = self._search_regex(r"RT\.jwplayer\.player\([^\{]+\{\s*file: '([^']+)',", p.group('script'), 'manifest')
res = { res = {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats(meta['manifest'], video_id, ext='mp4', entry_protocol='m3u8_native'), 'formats': self._extract_m3u8_formats(manifest, video_id, ext='mp4'),
'thumbnail': video_image 'thumbnail': video_image
} }
elif player == 'youtube': elif player == 'youtube':
info = self._html_search_regex(r'RT\.(?:youtube|jwplayer)\.player\((\{(?:[^}]|\}(?!\);))+\})\);', p.group('script'), 'video metadata')
meta = self._parse_json(js_to_json(info), video_id)
if 'youtubeKey' not in meta: if 'youtubeKey' not in meta:
raise ExtractorError('Invalid metadata for youtube video!') raise ExtractorError('Invalid metadata for youtube video!')