From 86f237e6492ef5fc90c4450cf7f2cffbf902e5c7 Mon Sep 17 00:00:00 2001 From: Sebastian Leske Date: Wed, 25 Oct 2017 14:59:57 +0200 Subject: [PATCH 1/4] [wdr] Refactoring: extract _extract_jsonp_url --- youtube_dl/extractor/wdr.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 621de1e1e..2d36672b6 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -16,7 +16,7 @@ from ..utils import ( class WDRBaseIE(InfoExtractor): - def _extract_wdr_video(self, webpage, display_id): + def _extract_jsonp_url(self, webpage, display_id): # for wdr.de the data-extension is in a tag with the class "mediaLink" # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" # for wdrmaus, in a tag with the class "videoButton" (previously a link @@ -35,8 +35,9 @@ class WDRBaseIE(InfoExtractor): media_link_obj = self._parse_json(json_metadata, display_id, transform_source=js_to_json) - jsonp_url = media_link_obj['mediaObj']['url'] + return media_link_obj['mediaObj']['url'] + def _extract_wdr_video(self, jsonp_url, display_id): metadata = self._download_json( jsonp_url, display_id, transform_source=strip_jsonp) @@ -206,7 +207,8 @@ class WDRIE(WDRBaseIE): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - info_dict = self._extract_wdr_video(webpage, display_id) + jsonp_url = self._extract_jsonp_url(webpage, display_id) + info_dict = self._extract_wdr_video(jsonp_url, display_id) if not info_dict: entries = [ From 6bbc101aed4535f7bf921b2b4fe16541d2e7758c Mon Sep 17 00:00:00 2001 From: Sebastian Leske Date: Wed, 25 Oct 2017 15:00:31 +0200 Subject: [PATCH 2/4] [wdr]: Add extractor for "Sendung mit dem Elefanten" The homepage of "Sendung mit dem Elefanten" (a children's show) at http://www.wdrmaus.de/elefantenseite/ offers various videos. All videos use the same URL, but with different fragments, such as http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015 . The new extractor WDRElefantIE supports these URLs; it downloads the site's internal table of contents (a JSON document) to look up the video. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wdr.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 18350810b..78de9c9b7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1275,6 +1275,7 @@ from .watchbox import WatchBoxIE from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, + WDRElefantIE, WDRMobileIE, ) from .webcaster import ( diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 2d36672b6..b2e73ce39 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -241,6 +241,59 @@ class WDRIE(WDRBaseIE): return info_dict +class WDRElefantIE(WDRBaseIE): + _VALID_URL = r'https?://(?:www\.)wdrmaus.de/elefantenseite/#(?P.+)' + IE_NAME = 'wdr:elefant' + + _TESTS = [ + { + 'url': 'http://www.wdrmaus.de/elefantenseite/#lieder_geburtstagslied', + 'info_dict': { + 'title': 'Ich bin schon 1-2-3', + 'id': 'mdb-1008774', + 'ext': 'mp4', + 'age_limit': None, + 'upload_date': '20091119' + }, + }, + { + 'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015', + 'info_dict': { + 'title': 'Folge Oster-Spezial 2015', + 'id': 'mdb-1088195', + 'ext': 'mp4', + 'age_limit': None, + 'upload_date': '20150406' + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + # Table of Contents seems to always be at this address, so fetch it directly. + # The website fetches configurationJS.php5, which links to tableOfContentsJS.php5. + table_of_contents = self._download_json( + 'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5', display_id) + if display_id not in table_of_contents: + raise ExtractorError( + 'No entry in site\'s table of contents for this URL. ' + 'Is the fragment part of the URL (after the #) correct?', + expected=True) + xml_metadata_path = table_of_contents[display_id]['xmlPath'] + xml_metadata = self._download_xml( + 'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path, display_id) + zmdb_url_element = xml_metadata.find('./movie/zmdb_url') + if zmdb_url_element is None: + raise ExtractorError( + 'The URL looks valid, but no video was found. Note that download only works ' + 'on pages showing a single video, not on video selection pages.', + expected=True) + info_dict = self._extract_wdr_video(zmdb_url_element.text, display_id) + return info_dict + + class WDRMobileIE(InfoExtractor): _VALID_URL = r'''(?x) https?://mobile-ondemand\.wdr\.de/ From 17b4c3b9a782e0f7f54b6fcee39d1b7d601b7334 Mon Sep 17 00:00:00 2001 From: Sebastian Leske Date: Thu, 4 Jan 2018 09:02:12 +0100 Subject: [PATCH 3/4] [wdr] skip_download for tests in WDRElefantIE --- youtube_dl/extractor/wdr.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index b2e73ce39..ce041642f 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -253,7 +253,10 @@ class WDRElefantIE(WDRBaseIE): 'id': 'mdb-1008774', 'ext': 'mp4', 'age_limit': None, - 'upload_date': '20091119' + 'upload_date': '20091119', + }, + 'params': { + 'skip_download' : True, }, }, { @@ -265,6 +268,9 @@ class WDRElefantIE(WDRBaseIE): 'age_limit': None, 'upload_date': '20150406' }, + 'params': { + 'skip_download' : True, + }, }, ] From ee95166630bd1e253fb8de75d9b32b2890f2f0e3 Mon Sep 17 00:00:00 2001 From: Sebastian Leske Date: Mon, 8 Jan 2018 22:28:44 +0100 Subject: [PATCH 4/4] [wdr] Remove one test for WDRElefantIE --- youtube_dl/extractor/wdr.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ce041642f..4871ae92b 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -246,19 +246,6 @@ class WDRElefantIE(WDRBaseIE): IE_NAME = 'wdr:elefant' _TESTS = [ - { - 'url': 'http://www.wdrmaus.de/elefantenseite/#lieder_geburtstagslied', - 'info_dict': { - 'title': 'Ich bin schon 1-2-3', - 'id': 'mdb-1008774', - 'ext': 'mp4', - 'age_limit': None, - 'upload_date': '20091119', - }, - 'params': { - 'skip_download' : True, - }, - }, { 'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015', 'info_dict': {