- added vol.at extractor prototype

2025-02-03 01:43:31 +08:00 · 2018-02-22 02:05:42 +01:00 · 2018-02-22 02:05:42 +01:00 · 58b499c989
commit 58b499c989
parent 15e5d19a9b
1 changed files with 19 additions and 11 deletions
--- a/youtube_dl/extractor/volat.py
+++ b/youtube_dl/extractor/volat.py
@ -6,28 +6,36 @@ from .common import InfoExtractor

 class VolAtIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vol\.at/[^?#]*?/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.vol.at/blue-man-group/5593454',
-        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+        'md5': '0e4b19b0467a3af136e63cd2fa6cbfde',
        'info_dict': {
            'id': '5593454',
            'ext': 'mp4',
            'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
        }
-    }
+    },{
+        'url': 'http://www.vol.at/umbenennung-lustenauer-reichshofstadion-das-sagen-die-lustenauer/5678401',
+        'md5': '2e256451e94d661e0eca9af9f3349460',
+        'info_dict': {
+            'id': '5678401',
+            'ext': 'mp4',
+            'title': 'Umbenennung Lustenauer Reichshofstadion: Das sagen die Lustenauer!',
+        }
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        # TODO more code goes here, for example ...
        title =  self._og_search_title(webpage)
+
+        video_url_embedded = self._html_search_regex(r'iframe\s*class\s*="vodl-video__iframe"\s*src=\s*"([^"]+)"', webpage, 'videoInfo', fatal=True)
+        webpage_embedded = self._download_webpage(video_url_embedded, video_id)
+        video_url = self._search_regex(
+            r'(?s)file:\s*"([^"]+?(?=\.mp4)\.mp4)[^"]+"',
+            webpage_embedded, 'url')
        return {
            'id': video_id,
-            'title': title
-            # TODO more properties (see youtube_dl/extractor/common.py)
+            'title': title,
+            'url': video_url
        }