From cd17a1c6d0e64fc5a76fe68991c5f0217044d2b5 Mon Sep 17 00:00:00 2001 From: Jan 'Yenda' Trmal Date: Fri, 10 Jan 2020 10:14:18 +0100 Subject: [PATCH 1/5] Fixes for NOVA TV --- youtube_dl/extractor/nova.py | 55 +++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 901f44b54..427f3e81f 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -40,9 +40,14 @@ class NovaEmbedIE(InfoExtractor): QUALITIES = ('lq', 'mq', 'hq', 'hd') quality_key = qualities(QUALITIES) + formats = [] for format_id, format_list in bitrates.items(): + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_list, video_id, ext='mp4', m3u8_id='hls', fatal=False)) + if not isinstance(format_list, list): continue for format_url in format_list: @@ -91,7 +96,7 @@ class NovaIE(InfoExtractor): _VALID_URL = r'https?://(?:[^.]+\.)?(?Ptv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P[^/]+?)(?:\.html|/|$)' _TESTS = [{ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', - 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', + #'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 'info_dict': { 'id': '1757139', 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', @@ -100,6 +105,21 @@ class NovaIE(InfoExtractor): 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'thumbnail': r're:^https?://.*\.(?:jpg)', } + }, { + 'url': 'https://novaplus.nova.cz/porad/televizni-noviny/epizoda/41099-televizni-noviny-8-1-2020', + 'info_dict': { + 'id': 'LWVmgbBh2tR', + 'ext': 'mp4', + 'title': '2020-01-08 Televizní noviny', + #'description': 're:.*Sportovní noviny, Počasí Pořad je opatřen audiodeskripcí.*', + 'thumbnail': r're:https?://.*\.jpg(\?.*)?', + #'upload_date': '20200108' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'add_ie': [NovaEmbedIE.ie_key()], }, { 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'info_dict': { @@ -152,14 +172,31 @@ class NovaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + + description = clean_html(self._og_search_description(webpage, default=None)) + if site == 'novaplus': + upload_date = unified_strdate(self._search_regex( + r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None)) + elif site == 'fanda': + upload_date = unified_strdate(self._search_regex( + r'(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None)) + else: + upload_date = None + # novaplus embed_id = self._search_regex( - r']+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', + r']+\bsrc=[\"\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', webpage, 'embed url', default=None) + if embed_id: - return self.url_result( + info = { + 'description': description, + 'upload_date': upload_date + } + info.update(self.url_result( 'https://media.cms.nova.cz/embed/%s' % embed_id, - ie=NovaEmbedIE.ie_key(), video_id=embed_id) + ie=NovaEmbedIE.ie_key(), video_id=embed_id)) + return info video_id = self._search_regex( [r"(?:media|video_id)\s*:\s*'(\d+)'", @@ -233,18 +270,8 @@ class NovaIE(InfoExtractor): self._sort_formats(formats) title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) - description = clean_html(self._og_search_description(webpage, default=None)) thumbnail = config.get('poster') - if site == 'novaplus': - upload_date = unified_strdate(self._search_regex( - r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None)) - elif site == 'fanda': - upload_date = unified_strdate(self._search_regex( - r'(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None)) - else: - upload_date = None - return { 'id': video_id, 'display_id': display_id, From a29fb83452031feef2f74f1625003b44f6a04f01 Mon Sep 17 00:00:00 2001 From: Jan 'Yenda' Trmal Date: Fri, 10 Jan 2020 10:21:40 +0100 Subject: [PATCH 2/5] flake8 fixes --- youtube_dl/extractor/nova.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 427f3e81f..7cd045a69 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -40,7 +40,6 @@ class NovaEmbedIE(InfoExtractor): QUALITIES = ('lq', 'mq', 'hq', 'hd') quality_key = qualities(QUALITIES) - formats = [] for format_id, format_list in bitrates.items(): @@ -96,7 +95,7 @@ class NovaIE(InfoExtractor): _VALID_URL = r'https?://(?:[^.]+\.)?(?Ptv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P[^/]+?)(?:\.html|/|$)' _TESTS = [{ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', - #'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', + 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 'info_dict': { 'id': '1757139', 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', @@ -111,9 +110,9 @@ class NovaIE(InfoExtractor): 'id': 'LWVmgbBh2tR', 'ext': 'mp4', 'title': '2020-01-08 Televizní noviny', - #'description': 're:.*Sportovní noviny, Počasí Pořad je opatřen audiodeskripcí.*', + # 'description': 're:.*Sportovní noviny, Počasí Pořad je opatřen audiodeskripcí.*', 'thumbnail': r're:https?://.*\.jpg(\?.*)?', - #'upload_date': '20200108' + # 'upload_date': '20200108' }, 'params': { # rtmp download @@ -172,7 +171,6 @@ class NovaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - description = clean_html(self._og_search_description(webpage, default=None)) if site == 'novaplus': upload_date = unified_strdate(self._search_regex( @@ -187,7 +185,7 @@ class NovaIE(InfoExtractor): embed_id = self._search_regex( r']+\bsrc=[\"\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', webpage, 'embed url', default=None) - + if embed_id: info = { 'description': description, From 457efa7e8f11215dee5bdcf25baa21db9b73a4da Mon Sep 17 00:00:00 2001 From: Jan 'Yenda' Trmal Date: Sat, 11 Jan 2020 17:47:11 +0100 Subject: [PATCH 3/5] address review feedback --- youtube_dl/extractor/nova.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 7cd045a69..631f24632 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -110,9 +110,9 @@ class NovaIE(InfoExtractor): 'id': 'LWVmgbBh2tR', 'ext': 'mp4', 'title': '2020-01-08 Televizní noviny', - # 'description': 're:.*Sportovní noviny, Počasí Pořad je opatřen audiodeskripcí.*', + 'description': 're:.*Sportovní noviny, Počasí Pořad je opatřen audiodeskripcí.*', 'thumbnail': r're:https?://.*\.jpg(\?.*)?', - # 'upload_date': '20200108' + 'upload_date': '20200108' }, 'params': { # rtmp download @@ -183,17 +183,18 @@ class NovaIE(InfoExtractor): # novaplus embed_id = self._search_regex( - r']+\bsrc=[\"\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', + r']+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', webpage, 'embed url', default=None) if embed_id: info = { + '_type': 'url_transparent', + 'url' : 'https://media.cms.nova.cz/embed/%s' % embed_id, + 'ie_key' : NovaEmbedIE.ie_key(), + 'id' : embed_id, 'description': description, 'upload_date': upload_date } - info.update(self.url_result( - 'https://media.cms.nova.cz/embed/%s' % embed_id, - ie=NovaEmbedIE.ie_key(), video_id=embed_id)) return info video_id = self._search_regex( From d68e0c2e6dbbcdf506bbb50389de500c39a6d2a7 Mon Sep 17 00:00:00 2001 From: Jan 'Yenda' Trmal Date: Tue, 14 Jan 2020 20:16:21 +0100 Subject: [PATCH 4/5] validate url --- youtube_dl/extractor/nova.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 631f24632..9083be424 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -44,8 +44,11 @@ class NovaEmbedIE(InfoExtractor): formats = [] for format_id, format_list in bitrates.items(): if format_id == 'hls': + m3u8_url = url_or_none(format_list) + if not m3u8_url: + continue formats.extend(self._extract_m3u8_formats( - format_list, video_id, ext='mp4', m3u8_id='hls', fatal=False)) + m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) if not isinstance(format_list, list): continue From c36e603a88da22c17a451a4c44034b8d170f1cec Mon Sep 17 00:00:00 2001 From: Jan 'Yenda' Trmal Date: Sun, 26 Jan 2020 20:49:53 +0100 Subject: [PATCH 5/5] fixing flake8 error --- youtube_dl/extractor/nova.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 9083be424..fb05631fb 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -192,9 +192,9 @@ class NovaIE(InfoExtractor): if embed_id: info = { '_type': 'url_transparent', - 'url' : 'https://media.cms.nova.cz/embed/%s' % embed_id, - 'ie_key' : NovaEmbedIE.ie_key(), - 'id' : embed_id, + 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id, + 'ie_key': NovaEmbedIE.ie_key(), + 'id': embed_id, 'description': description, 'upload_date': upload_date }