From 15e5d19a9be129d53066b8af0b9d73c67354381a Mon Sep 17 00:00:00 2001 From: carsten demming Date: Tue, 20 Feb 2018 23:51:15 +0100 Subject: [PATCH 1/3] - WIP - successfully extracted title --- youtube_dl/extractor/volat.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/volat.py b/youtube_dl/extractor/volat.py index 8afe6616c..76193f105 100644 --- a/youtube_dl/extractor/volat.py +++ b/youtube_dl/extractor/volat.py @@ -5,8 +5,7 @@ from .common import InfoExtractor class VolAtIE(InfoExtractor): - print("wtf") - _VALID_URL = r'https?://(?:www\.)?vol\.at/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?vol\.at/[^?#]*?/(?P[0-9]+)' _TEST = { 'url': 'http://www.vol.at/blue-man-group/5593454', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -14,7 +13,6 @@ class VolAtIE(InfoExtractor): 'id': '5593454', 'ext': 'mp4', 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview', - 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -24,17 +22,12 @@ class VolAtIE(InfoExtractor): } def _real_extract(self, url): - print("hello test") video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - + title = self._og_search_title(webpage) return { 'id': video_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + 'title': title # TODO more properties (see youtube_dl/extractor/common.py) } From 58b499c9896dc7108a65a9f1b249a187c91bda7d Mon Sep 17 00:00:00 2001 From: carsten demming Date: Thu, 22 Feb 2018 02:05:42 +0100 Subject: [PATCH 2/3] - added vol.at extractor prototype --- youtube_dl/extractor/volat.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/volat.py b/youtube_dl/extractor/volat.py index 76193f105..abd6c24cf 100644 --- a/youtube_dl/extractor/volat.py +++ b/youtube_dl/extractor/volat.py @@ -6,28 +6,36 @@ from .common import InfoExtractor class VolAtIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vol\.at/[^?#]*?/(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.vol.at/blue-man-group/5593454', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'md5': '0e4b19b0467a3af136e63cd2fa6cbfde', 'info_dict': { 'id': '5593454', 'ext': 'mp4', 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } - } + },{ + 'url': 'http://www.vol.at/umbenennung-lustenauer-reichshofstadion-das-sagen-die-lustenauer/5678401', + 'md5': '2e256451e94d661e0eca9af9f3349460', + 'info_dict': { + 'id': '5678401', + 'ext': 'mp4', + 'title': 'Umbenennung Lustenauer Reichshofstadion: Das sagen die Lustenauer!', + } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... title = self._og_search_title(webpage) + + video_url_embedded = self._html_search_regex(r'iframe\s*class\s*="vodl-video__iframe"\s*src=\s*"([^"]+)"', webpage, 'videoInfo', fatal=True) + webpage_embedded = self._download_webpage(video_url_embedded, video_id) + video_url = self._search_regex( + r'(?s)file:\s*"([^"]+?(?=\.mp4)\.mp4)[^"]+"', + webpage_embedded, 'url') return { 'id': video_id, - 'title': title - # TODO more properties (see youtube_dl/extractor/common.py) + 'title': title, + 'url': video_url } From 60cf3a0ee4f58e5d43ca5e47e3a1db09b411d4f0 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Thu, 22 Feb 2018 02:08:10 +0100 Subject: [PATCH 3/3] - flake 8 improvement --- youtube_dl/extractor/volat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/volat.py b/youtube_dl/extractor/volat.py index abd6c24cf..2e0146656 100644 --- a/youtube_dl/extractor/volat.py +++ b/youtube_dl/extractor/volat.py @@ -14,7 +14,7 @@ class VolAtIE(InfoExtractor): 'ext': 'mp4', 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview', } - },{ + }, { 'url': 'http://www.vol.at/umbenennung-lustenauer-reichshofstadion-das-sagen-die-lustenauer/5678401', 'md5': '2e256451e94d661e0eca9af9f3349460', 'info_dict': { @@ -27,7 +27,7 @@ class VolAtIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + title = self._og_search_title(webpage) video_url_embedded = self._html_search_regex(r'iframe\s*class\s*="vodl-video__iframe"\s*src=\s*"([^"]+)"', webpage, 'videoInfo', fatal=True) webpage_embedded = self._download_webpage(video_url_embedded, video_id)