From 8b2e4e87e0d3b11d9d60d8111280cee7bcaa3455 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 15:53:13 +0200 Subject: [PATCH 1/7] [roosterteeth] added --- youtube_dl/extractor/__init__.py | 4 + youtube_dl/extractor/roosterteeth.py | 335 +++++++++++++++++++++++++++ 2 files changed, 339 insertions(+) create mode 100644 youtube_dl/extractor/roosterteeth.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a73a1317e..becc51a3d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -498,6 +498,10 @@ from .restudy import RestudyIE from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE +from .roosterteeth import ( + RoosterteethIE, + RoosterteethShowIE +) from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py new file mode 100644 index 000000000..7d976579f --- /dev/null +++ b/youtube_dl/extractor/roosterteeth.py @@ -0,0 +1,335 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + js_to_json, + ExtractorError, + compat_urllib_parse_urlparse, + compat_urllib_parse, + compat_urllib_request +) + + +class RoosterteethShowIE(InfoExtractor): + _VALID_URL = r'http://(?P(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus))/show/(?P[^/]+)(?:/season)?' + _TESTS = [{ + 'url': 'http://roosterteeth.com/show/screen-play', + 'info_dict': { + 'id': 'screen-play', + 'description': 'A Rooster Teeth podcast focusing on all things Film and TV. Listen to our pop culture geeks chat about TV premieres and finales, blockbuster franchises, indie darlings, casting rumors and spotlight a film to discuss in their weekly "Movie Book Club" segment. So pop some popcorn, grab a good seat and enjoy the show.', + 'title': 'Screen Play', + }, + 'playlist_count': 23 + }, { + 'url': 'http://roosterteeth.com/show/red-vs-blue#;season=.* 1$', + 'info_dict': { + 'id': 'red-vs-blue', + 'description': 'In the distant future, two groups of soldiers battle for control of the least desirable piece of real estate in the known universe - a box canyon in the middle of nowhere.', + 'title': 'Red vs. Blue', + }, + 'playlist_count': 24 + }, { + 'url': 'http://roosterteeth.com/show/red-vs-blue', + 'info_dict': { + 'id': 'red-vs-blue', + 'description': 'In the distant future, two groups of soldiers battle for control of the least desirable piece of real estate in the known universe - a box canyon in the middle of nowhere.', + 'title': 'Red vs. Blue', + }, + + 'playlist_mincount': 380 + }] + + def _real_extract(self, url): + ep_filter = {} + + if '#;' in url: + url, params = url.split('#;') + ep_filter = compat_urllib_parse.parse_qs(params) + + playlist_id = self._match_id(url) + html = self._download_webpage(url, playlist_id) + + title = self._html_search_regex(r'
\s*

([^<]+)

\s*
', html, 'show title') + description = self._html_search_regex(r'
((?:[^<]|<(?!/section>))+)
', html, 'show description') + + start_piece = "
" + start = html.find(start_piece) + if start == -1: + raise ExtractorError("Can't find the episodes!") + + html = html[start + len(start_piece):].lstrip() + sections = [] + if html.startswith('