From ad544260716d733de04b41db7a0e5cef7573df43 Mon Sep 17 00:00:00 2001 From: Vincent Olivier Date: Thu, 17 Nov 2016 18:23:20 -0500 Subject: [PATCH 1/2] [radiocanada] Extractor Enhancements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/radiocanada.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9107f0b96..a55db0928 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -738,6 +738,7 @@ from .r7 import ( from .radiocanada import ( RadioCanadaIE, RadioCanadaAudioVideoIE, + RadioCanadaArticleIE ) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 321917ad0..2133c332e 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -14,6 +14,7 @@ from ..utils import ( ExtractorError, determine_protocol, unsmuggle_url, + unescapeHTML ) @@ -168,3 +169,30 @@ class RadioCanadaAudioVideoIE(InfoExtractor): def _real_extract(self, url): return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) + + + +class RadioCanadaArticleIE(InfoExtractor): + 'radiocanada:article' + _VALID_URL = r'https?://ici\.radio-canada\.ca/(?P[^?#&]+)' + + @classmethod + def suitable(cls, url): + return False if RadioCanadaAudioVideoIE.suitable(url) else super(RadioCanadaArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + display_id.replace('/', '-') + + webpage = unescapeHTML(self._download_webpage(url, display_id)) + + + entries = [ + self.url_result( + 'radiocanada:medianet:%s' % mobj.group('id'), + ie=RadioCanadaIE.ie_key(), video_id=mobj.group('id')) + for mobj in re.finditer( + r'\"idMedia\"\s*:\s*\"(?P\d+)\"', webpage)] + + return self.playlist_result(entries, display_id) + From f17227520e1ba465ff8e00c35109102f66cd5f77 Mon Sep 17 00:00:00 2001 From: Vincent Olivier Date: Fri, 18 Nov 2016 14:40:40 -0500 Subject: [PATCH 2/2] [radiocanada] changes requested in pr #11224 --- youtube_dl/extractor/radiocanada.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 2133c332e..cb735ca27 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -171,10 +171,20 @@ class RadioCanadaAudioVideoIE(InfoExtractor): return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) - class RadioCanadaArticleIE(InfoExtractor): - 'radiocanada:article' _VALID_URL = r'https?://ici\.radio-canada\.ca/(?P[^?#&]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/nouvelle/1000657/victimes-richard-henry-bain-abandonnees-ivac', + 'info_dict': { + 'id': '7633340', + 'ext': 'mp4', + 'title': 'La sentence de Bain connue vendredi', + 'upload_date': '20161118', + }, + 'params': { + 'skip_download': True, + }, + } @classmethod def suitable(cls, url): @@ -186,13 +196,11 @@ class RadioCanadaArticleIE(InfoExtractor): webpage = unescapeHTML(self._download_webpage(url, display_id)) - entries = [ self.url_result( 'radiocanada:medianet:%s' % mobj.group('id'), ie=RadioCanadaIE.ie_key(), video_id=mobj.group('id')) for mobj in re.finditer( - r'\"idMedia\"\s*:\s*\"(?P\d+)\"', webpage)] + r'"idMedia"\s*:\s*"(?P\d+)"', webpage)] return self.playlist_result(entries, display_id) -