From 9deb3bea04233aeed6e1a7cd2421f3cc7d6c1879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20L=C3=B3pez=20Juan?= Date: Sat, 23 Dec 2017 23:03:15 +0100 Subject: [PATCH 1/4] [ccma] Fix pattern to support video collection URLs --- youtube_dl/extractor/ccma.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index bec0a825a..d0c74050f 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -13,7 +13,7 @@ from ..utils import ( class CCMAIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?Ptv3|catradio|[^/*])(?:[^/]+/)*?(?Paudio|video|[^/]*)/(?P\d+)/?$' _TESTS = [{ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', 'md5': '7296ca43977c8ea4469e719c609b0871', @@ -36,10 +36,33 @@ class CCMAIE(InfoExtractor): 'upload_date': '20171205', 'timestamp': 1512507300, } + }, { + 'url': 'http://www.ccma.cat/tv3/alacarta/e17-tots-els-videos-de-les-eleccions-del-21d/arrimadas-cs-hem-guanyat-les-eleccions-al-parlament-de-catalunya/coleccio/10970/5711075/', + 'md5': '4cab47c2c3eb1312ab17771b1848c1ad', + 'info_dict': { + 'id': '5711075', + 'ext': 'mp4', + 'description': 'md5:feca2bcac2bace0c37395f625ea4065e', + 'title': 'Arrimadas (Cs): "Hem guanyat les eleccions al Parlament de Catalunya"' + } }] def _real_extract(self, url): - media_type, media_id = re.match(self._VALID_URL, url).groups() + m = re.match(self._VALID_URL, url) + url_channel = m.group('channel') + url_type = m.group('type') + # Heuristics to guess media type + if url_type == 'video': + media_type = 'video' + elif url_type == 'audio': + media_type = 'audio' + elif url_channel == 'tv3': + media_type = 'video' + elif url_channel == 'catradio': + media_type = 'audio' + else: + media_type = 'video' + media_id = m.group('id') media_data = {} formats = [] profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc'] From 96b31b9b84156b0e867399e787d1d4bed93dc4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20L=C3=B3pez=20Juan?= Date: Sat, 23 Dec 2017 23:39:15 +0100 Subject: [PATCH 2/4] [ccma] Separate pattern for collection URLs --- youtube_dl/extractor/ccma.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index d0c74050f..056b3d0a3 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -13,7 +13,7 @@ from ..utils import ( class CCMAIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?Ptv3|catradio|[^/*])(?:[^/]+/)*?(?Paudio|video|[^/]*)/(?P\d+)/?$' + _VALID_URL = r'^https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+).*$|^https?://(?:www\.)?ccma\.cat/(?Ptv3|catradio)/(?:[^/]+/)*?(?P\d+)/?$' _TESTS = [{ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', 'md5': '7296ca43977c8ea4469e719c609b0871', @@ -49,20 +49,13 @@ class CCMAIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url) - url_channel = m.group('channel') - url_type = m.group('type') - # Heuristics to guess media type - if url_type == 'video': - media_type = 'video' - elif url_type == 'audio': - media_type = 'audio' - elif url_channel == 'tv3': - media_type = 'video' - elif url_channel == 'catradio': - media_type = 'audio' - else: - media_type = 'video' - media_id = m.group('id') + if m.group('type'): + media_type = m.group('type') + media_id = m.group('id1') + elif m.group('channel'): + channel_to_type = {'tv3':'video','catradio':'audio'} + media_type = channel_to_type[m.group('channel')] + media_id = m.group('id2') media_data = {} formats = [] profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc'] From b6aef5870b11505232c7bb849a9c3a029b549f84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20L=C3=B3pez=20Juan?= Date: Sat, 23 Dec 2017 23:46:44 +0100 Subject: [PATCH 3/4] [ccma] Additional test, shorter URL regex --- youtube_dl/extractor/ccma.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 056b3d0a3..4397e9626 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -13,7 +13,7 @@ from ..utils import ( class CCMAIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+).*$|^https?://(?:www\.)?ccma\.cat/(?Ptv3|catradio)/(?:[^/]+/)*?(?P\d+)/?$' + _VALID_URL = r'^https?://(?:www\.)?ccma\.cat/((?:[^/]+/)*?(?Pvideo|audio)/(?P\d+)|(?Ptv3|catradio)/(?:[^/]+/)*?(?P\d+)/?$)' _TESTS = [{ 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', 'md5': '7296ca43977c8ea4469e719c609b0871', @@ -45,6 +45,17 @@ class CCMAIE(InfoExtractor): 'description': 'md5:feca2bcac2bace0c37395f625ea4065e', 'title': 'Arrimadas (Cs): "Hem guanyat les eleccions al Parlament de Catalunya"' } + }, { + 'url': 'http://www.ccma.cat/catradio/alacarta/lendema-del-21d/sabria-erc-no-ens-podem-entretenir-ni-un-moment-per-formar-govern/coleccio/11011/986031/', + 'md5': '471586ce88bcbbdd031afafe75ec72e0', + 'info_dict': { + 'id': '986031', + 'ext': 'mp3', + 'upload_date': '20181210', + 'title': 'SabriĆ  (ERC): "No ens podem entretenir ni un moment per formar govern"', + 'description': 'md5:faf8ec9faf2115fbf462ad3f7ad175df', + 'timestamp': 1544424300, + } }] def _real_extract(self, url): From f75b67a1a1793f83fc26625b5558b6e1d56c20be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20L=C3=B3pez=20Juan?= Date: Sat, 23 Dec 2017 23:55:39 +0100 Subject: [PATCH 4/4] [ccma] flake8 passing --- youtube_dl/extractor/ccma.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 4397e9626..61e858463 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -62,9 +62,9 @@ class CCMAIE(InfoExtractor): m = re.match(self._VALID_URL, url) if m.group('type'): media_type = m.group('type') - media_id = m.group('id1') + media_id = m.group('id1') elif m.group('channel'): - channel_to_type = {'tv3':'video','catradio':'audio'} + channel_to_type = {'tv3': 'video', 'catradio': 'audio'} media_type = channel_to_type[m.group('channel')] media_id = m.group('id2') media_data = {}