From 8e5cc10774db459c8af9031f7e72db3e070dd70e Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 27 Aug 2018 21:14:54 +0200 Subject: [PATCH 1/2] [RTBF] Get full title and description Get the full title and description of RTBF by downloading the real webpage and searching og metadata. --- youtube_dl/extractor/rtbf.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index 3b0f3080b..1e6aa5bd9 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -69,9 +69,20 @@ class RTBFIE(InfoExtractor): def _real_extract(self, url): live, media_id = re.match(self._VALID_URL, url).groups() + + webpage = self._download_webpage(url, media_id) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage, default=None) + + # Remove date from title and description + title = re.sub(r'(?P\(\d{1,}\/\d{1,}\) - \d{2}\/\d{2}\/\d{4})$', '', title) + if description: + description = re.sub(r'(?P\(\d{1,}\/\d{1,} du \d{2}\/\d{2}\/\d{4}\))$', '', description) + embed_page = self._download_webpage( 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), - media_id, query={'id': media_id}) + media_id, query={'id': media_id}, note='Downloading embed webpage') data = self._parse_json(self._html_search_regex( r'data-media="([^"]+)"', embed_page, 'media data'), media_id) @@ -83,7 +94,6 @@ class RTBFIE(InfoExtractor): if provider in self._PROVIDERS: return self.url_result(data['url'], self._PROVIDERS[provider]) - title = data['title'] is_live = data.get('isLive') if is_live: title = self._live_title(title) @@ -151,7 +161,7 @@ class RTBFIE(InfoExtractor): 'id': media_id, 'formats': formats, 'title': title, - 'description': strip_or_none(data.get('description')), + 'description': description, 'thumbnail': data.get('thumbnail'), 'duration': float_or_none(data.get('realDuration')), 'timestamp': int_or_none(data.get('liveFrom')), From 4ccb83efe755826acd15d9c93a228a1b42ded914 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 27 Aug 2018 22:20:41 +0200 Subject: [PATCH 2/2] [RTBF] Improve fail-safe --- youtube_dl/extractor/rtbf.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index 1e6aa5bd9..cc3ace546 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -71,12 +71,16 @@ class RTBFIE(InfoExtractor): live, media_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, media_id) + + if not webpage: + raise ExtractorError('%s said: failed to download the webpage' % self.IE_NAME, expected=True) - title = self._og_search_title(webpage) + title = self._og_search_title(webpage, default=None) description = self._og_search_description(webpage, default=None) # Remove date from title and description - title = re.sub(r'(?P\(\d{1,}\/\d{1,}\) - \d{2}\/\d{2}\/\d{4})$', '', title) + if title: + title = re.sub(r'(?P\(\d{1,}\/\d{1,}\) - \d{2}\/\d{2}\/\d{4})$', '', title) if description: description = re.sub(r'(?P\(\d{1,}\/\d{1,} du \d{2}\/\d{2}\/\d{4}\))$', '', description)