diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e16bbd969..5da893c1f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -175,6 +175,7 @@ from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .prosiebensat1 import ProsiebenSat1IE from .pyvideo import PyvideoIE from .radiofrance import RadioFranceIE from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py new file mode 100644 index 000000000..c3a9d1d1b --- /dev/null +++ b/youtube_dl/extractor/prosiebensat1.py @@ -0,0 +1,37 @@ +import re +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) + +class ProsiebenSat1IE(InfoExtractor): + """Information Extractor for ProsiebenSat1""" + _VALID_URL = r'http://(?:www\.)?(?P(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.de/([^/]+/)*?videos?/.+)' + _TESTS = [ + { + 'url': 'http://www.prosieben.de/tv/got-to-dance/video/11-first-look-team-recycled-clip', + 'file': '9160151.flv', + 'params': { + 'skip_download': True, + } + } + ] + + def _real_extract(self,url): + def unescape(s): + return s.encode("ascii", "ignore").decode('unicode_escape') + + mobj = re.match(self._VALID_URL, url) + + url = u'http://www.' + mobj.group('url') + webpage = self._download_webpage(url, None) + artist = unescape(self._search_regex(r'"show_artist":"([^"]+)', webpage, 'title')) + title = unescape(self._search_regex(r'"clipList":\[{"title":"([^"]+)', webpage, 'artist')) + + # Encode with utf-8 for python2 compatibility + search_url = u'http://www.myvideo.de/search?q=%s&category=TV' % compat_urllib_parse.quote_plus((title + u' - ' + artist).encode('utf-8')) + search_webpage = self._download_webpage(search_url, None, note='Searching video') + + result_url = u'http://www.myvideo.de' + self._search_regex(r'search-video" href="(/watch/[^"]+)', search_webpage, 'result_url') + + return self.url_result(result_url, 'MyVideo')