Add preliminary support for ProsiebenSat1

2025-02-10 20:02:55 +08:00 · 2014-02-26 18:14:58 +01:00 · 2014-02-26 18:14:58 +01:00 · 7820f82418
commit 7820f82418
parent 2acea5c03d
2 changed files with 38 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -175,6 +175,7 @@ from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
+from .prosiebensat1 import ProsiebenSat1IE
 from .pyvideo import PyvideoIE
 from .radiofrance import RadioFranceIE
 from .rbmaradio import RBMARadioIE
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@ -0,0 +1,37 @@
+import re
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+class ProsiebenSat1IE(InfoExtractor):
+    """Information Extractor for ProsiebenSat1"""
+    _VALID_URL = r'http://(?:www\.)?(?P<url>(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.de/([^/]+/)*?videos?/.+)'
+    _TESTS = [
+        {
+            'url': 'http://www.prosieben.de/tv/got-to-dance/video/11-first-look-team-recycled-clip',
+            'file': '9160151.flv',
+            'params': {
+                'skip_download': True,
+            }
+        }
+    ]
+
+    def _real_extract(self,url):
+        def unescape(s):
+            return s.encode("ascii", "ignore").decode('unicode_escape')
+
+        mobj = re.match(self._VALID_URL, url)
+
+        url = u'http://www.' + mobj.group('url')
+        webpage = self._download_webpage(url, None)
+        artist = unescape(self._search_regex(r'"show_artist":"([^"]+)', webpage, 'title'))
+        title = unescape(self._search_regex(r'"clipList":\[{"title":"([^"]+)', webpage, 'artist'))
+
+        # Encode with utf-8 for python2 compatibility
+        search_url = u'http://www.myvideo.de/search?q=%s&category=TV' % compat_urllib_parse.quote_plus((title + u' - ' + artist).encode('utf-8'))
+        search_webpage = self._download_webpage(search_url, None, note='Searching video')
+
+        result_url = u'http://www.myvideo.de' + self._search_regex(r'search-video" href="(/watch/[^"]+)', search_webpage, 'result_url')
+
+        return self.url_result(result_url, 'MyVideo')