# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor from ..utils import ( clean_html, js_to_json, ExtractorError, compat_parse_qs, compat_urllib_parse_urlparse, compat_urllib_parse, compat_urllib_request ) class RoosterteethShowIE(InfoExtractor): _VALID_URL = r'http://(?P(?:www\.)?(?:roosterteeth\.com|achievementhunter\.com|fun\.haus))/show/(?P[^/]+)(?:/season)?' _TESTS = [{ 'url': 'http://roosterteeth.com/show/screen-play', 'info_dict': { 'id': 'screen-play', 'description': 'A Rooster Teeth podcast focusing on all things Film and TV. Listen to our pop culture geeks chat about TV premieres and finales, blockbuster franchises, indie darlings, casting rumors and spotlight a film to discuss in their weekly "Movie Book Club" segment. So pop some popcorn, grab a good seat and enjoy the show.', 'title': 'Screen Play', }, 'playlist_count': 23 }, { 'url': 'http://roosterteeth.com/show/red-vs-blue#;season=.* 1$', 'info_dict': { 'id': 'red-vs-blue', 'description': 'In the distant future, two groups of soldiers battle for control of the least desirable piece of real estate in the known universe - a box canyon in the middle of nowhere.', 'title': 'Red vs. Blue', }, 'playlist_count': 24 }, { 'url': 'http://roosterteeth.com/show/red-vs-blue', 'info_dict': { 'id': 'red-vs-blue', 'description': 'In the distant future, two groups of soldiers battle for control of the least desirable piece of real estate in the known universe - a box canyon in the middle of nowhere.', 'title': 'Red vs. Blue', }, 'playlist_mincount': 380 }] def _real_extract(self, url): ep_filter = {} if '#;' in url: url, params = url.split('#;') ep_filter = compat_parse_qs(params) playlist_id = self._match_id(url) html = self._download_webpage(url, playlist_id) title = self._html_search_regex(r'
\s*

([^<]+)

\s*
', html, 'show title') description = self._html_search_regex(r'
((?:[^<]|<(?!/section>))+)
', html, 'show description') start_piece = "
" start = html.find(start_piece) if start == -1: raise ExtractorError("Can't find the episodes!") html = html[start + len(start_piece):].lstrip() sections = [] if html.startswith('