From 095f9df2ad3b6e61185872795d21826ca16d5bf2 Mon Sep 17 00:00:00 2001 From: thePanz Date: Fri, 25 Oct 2019 15:22:14 +0200 Subject: [PATCH 1/3] RaiPlay: Handle multiple arrays of subtitles --- youtube_dl/extractor/rai.py | 49 ++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 207a6c247..16a1e6612 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -120,6 +120,19 @@ class RaiBaseIE(InfoExtractor): }) return subtitles + @staticmethod + def _extract_subtitles_from_list(subtitle_array): + subtitles = {} + if isinstance(subtitle_array, list): + for item in subtitle_array: + lang = item.get('language') + url = item.get('url') + if isinstance(url, compat_str) and '' != url and isinstance(lang, compat_str) and '' != lang: + subtitles[lang.lower()] = [{ + 'ext': url[-3:], + 'url': url, + }] + return subtitles class RaiPlayIE(RaiBaseIE): _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P%s)\.html)' % RaiBaseIE._UUID_RE @@ -142,21 +155,25 @@ class RaiPlayIE(RaiBaseIE): 'season': '2016', }, }, { - 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'url': 'https://www.raiplay.it/video/2019/10/Report-del-21102019-La-fabbrica-della-paura-825ce3a7-8573-46c8-80d2-cde1b519fd01.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { - 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', - 'ext': 'mp4', - 'title': 'Report del 07/04/2014', - 'alt_title': 'S2013/14 - Puntata del 07/04/2014', + "id": "825ce3a7-8573-46c8-80d2-cde1b519fd01", + "title": "Report - La fabbrica della paura", + "alt_title": "St 2019/20 - La fabbrica della paura - 21/10/2019 ", 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Rai 5', - 'creator': 'Rai 5', - 'duration': 6160, - 'series': 'Report', - 'season_number': 5, - 'season': '2013/14', + "ext": "mp4", + "series": "Report", + "season_number": 7, + "season": "2019/20", + "subtitles": { + "it": [ + { + "ext": "srt", + "url": "http://creativemedia4-rai-it.akamaized.net/infocdn/raitre/report/Report_EP_Puntate/11217587.srt" + } + ] + }, }, 'params': { 'skip_download': True, @@ -191,8 +208,12 @@ class RaiPlayIE(RaiBaseIE): timestamp = unified_timestamp(try_get( media, lambda x: x['availabilities'][0]['start'], compat_str)) - subtitles = self._extract_subtitles(url, video.get('subtitles')) - + subtitles = {} + if '' != video.get('subtitles'): + subtitles = self._extract_subtitles(url, video.get('subtitles')) + else: + if video.get('subtitlesArray'): + subtitles = self._extract_subtitles_from_list(video.get('subtitlesArray')) info = { 'id': video_id, 'title': self._live_title(title) if relinker_info.get( From 8815646ce36565ef3a9ed1aa7bfe2b7f1d2e995f Mon Sep 17 00:00:00 2001 From: thePanz Date: Fri, 8 Nov 2019 12:15:45 +0100 Subject: [PATCH 2/3] Fix JSON fetching and subtitles validation --- youtube_dl/extractor/rai.py | 45 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 16a1e6612..15f0f22d3 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -15,12 +15,14 @@ from ..utils import ( GeoRestrictedError, int_or_none, parse_duration, + str_or_none, strip_or_none, try_get, unescapeHTML, unified_strdate, unified_timestamp, update_url_query, + url_or_none, urljoin, xpath_text, ) @@ -102,8 +104,7 @@ class RaiBaseIE(InfoExtractor): }.items() if v is not None) @staticmethod - def _extract_subtitles(url, subtitle_url): - subtitles = {} + def _extract_subtitles(url, subtitle_url, subtitles): if subtitle_url and isinstance(subtitle_url, compat_str): subtitle_url = urljoin(url, subtitle_url) STL_EXT = '.stl' @@ -121,13 +122,12 @@ class RaiBaseIE(InfoExtractor): return subtitles @staticmethod - def _extract_subtitles_from_list(subtitle_array): - subtitles = {} + def _extract_subtitles_from_list(subtitle_array, subtitles): if isinstance(subtitle_array, list): for item in subtitle_array: lang = item.get('language') url = item.get('url') - if isinstance(url, compat_str) and '' != url and isinstance(lang, compat_str) and '' != lang: + if url_or_none(url) and str_or_none(lang): subtitles[lang.lower()] = [{ 'ext': url[-3:], 'url': url, @@ -158,21 +158,16 @@ class RaiPlayIE(RaiBaseIE): 'url': 'https://www.raiplay.it/video/2019/10/Report-del-21102019-La-fabbrica-della-paura-825ce3a7-8573-46c8-80d2-cde1b519fd01.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { - "id": "825ce3a7-8573-46c8-80d2-cde1b519fd01", - "title": "Report - La fabbrica della paura", - "alt_title": "St 2019/20 - La fabbrica della paura - 21/10/2019 ", + 'id': '825ce3a7-8573-46c8-80d2-cde1b519fd01', + 'title': 'Report - La fabbrica della paura', + 'alt_title': 'St 2019/20 - La fabbrica della paura - 21/10/2019 ', 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', - "ext": "mp4", - "series": "Report", - "season_number": 7, - "season": "2019/20", - "subtitles": { - "it": [ - { - "ext": "srt", - "url": "http://creativemedia4-rai-it.akamaized.net/infocdn/raitre/report/Report_EP_Puntate/11217587.srt" - } - ] + 'ext': 'mp4', + 'series': 'Report', + 'season_number': 7, + 'season': '2019/20', + 'subtitles': { + 'it': [{'ext': 'srt'}] }, }, 'params': { @@ -187,14 +182,13 @@ class RaiPlayIE(RaiBaseIE): mobj = re.match(self._VALID_URL, url) url, video_id = mobj.group('url', 'id') - media = self._download_json( - '%s?json' % url, video_id, 'Downloading video JSON') + media = self._download_json(url.replace('.html', '.json'), video_id, 'Downloading video JSON') title = media['name'] video = media['video'] - relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) + relinker_info = self._extract_relinker_info(video['content_url'], video_id) self._sort_formats(relinker_info['formats']) thumbnails = [] @@ -210,10 +204,9 @@ class RaiPlayIE(RaiBaseIE): subtitles = {} if '' != video.get('subtitles'): - subtitles = self._extract_subtitles(url, video.get('subtitles')) - else: - if video.get('subtitlesArray'): - subtitles = self._extract_subtitles_from_list(video.get('subtitlesArray')) + subtitles = self._extract_subtitles(url, video.get('subtitles'), subtitles) + if video.get('subtitlesArray'): + subtitles = self._extract_subtitles_from_list(video.get('subtitlesArray'), subtitles) info = { 'id': video_id, 'title': self._live_title(title) if relinker_info.get( From bc478ec1e3467f383bf409857c83e2e2d6a48bcd Mon Sep 17 00:00:00 2001 From: thePanz Date: Wed, 18 Dec 2019 19:41:38 +0100 Subject: [PATCH 3/3] Handle relative subtitle URLs --- youtube_dl/extractor/rai.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 15f0f22d3..93cadd53c 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -125,12 +125,17 @@ class RaiBaseIE(InfoExtractor): def _extract_subtitles_from_list(subtitle_array, subtitles): if isinstance(subtitle_array, list): for item in subtitle_array: - lang = item.get('language') - url = item.get('url') - if url_or_none(url) and str_or_none(lang): - subtitles[lang.lower()] = [{ - 'ext': url[-3:], - 'url': url, + subtitle_lang = item.get('language') + subtitle_url = item.get('url') + + # Handle relative subtitles URL + if None == url_or_none(subtitle_url): + subtitle_url = 'https://www.raiplay.it'+subtitle_url + + if url_or_none(subtitle_url) and str_or_none(subtitle_lang): + subtitles[subtitle_lang.lower()] = [{ + 'ext': subtitle_url[-3:], + 'url': subtitle_url, }] return subtitles @@ -183,7 +188,6 @@ class RaiPlayIE(RaiBaseIE): url, video_id = mobj.group('url', 'id') media = self._download_json(url.replace('.html', '.json'), video_id, 'Downloading video JSON') - title = media['name'] video = media['video']