From 638a71d9be1afb4efcd695ab824901c89d2aaf6e Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 23 Oct 2015 07:09:41 +0100 Subject: [PATCH 1/3] [abc] add support for audio extraction --- youtube_dl/extractor/abc.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index f9a389f67..9153bbc72 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -12,7 +12,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P\d+)' + _VALID_URL = r'http://www\.abc\.net\.au/news/([^/]+/){1,2}(?P\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', @@ -36,6 +36,15 @@ class ABCIE(InfoExtractor): 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', }, 'add_ie': ['Youtube'], + }, { + 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', + 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', + 'info_dict': { + 'id': '6880080', + 'ext': 'mp3', + 'title': 'NAB lifts interest rates, following Westpac and CBA', + 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', + }, }] def _real_extract(self, url): @@ -43,7 +52,7 @@ class ABCIE(InfoExtractor): webpage = self._download_webpage(url, video_id) mobj = re.search( - r'inline(?PVideo|YouTube)Data\.push\((?P[^)]+)\);', + r'inline(?PVideo|Audio|YouTube)Data\.push\((?P[^)]+)\);', webpage) if mobj is None: raise ExtractorError('Unable to extract video urls') @@ -58,13 +67,22 @@ class ABCIE(InfoExtractor): return self.playlist_result([ self.url_result(url_info['url']) for url_info in urls_info]) - formats = [{ - 'url': url_info['url'], - 'width': int_or_none(url_info.get('width')), - 'height': int_or_none(url_info.get('height')), - 'tbr': int_or_none(url_info.get('bitrate')), - 'filesize': int_or_none(url_info.get('filesize')), - } for url_info in urls_info] + if mobj.group('type') == 'Video': + formats = [{ + 'url': url_info['url'], + 'vcodec': url_info.get('codec'), + 'width': int_or_none(url_info.get('width')), + 'height': int_or_none(url_info.get('height')), + 'tbr': int_or_none(url_info.get('bitrate')), + 'filesize': int_or_none(url_info.get('filesize')), + } for url_info in urls_info] + else: + formats = [{ + 'url': url_info['url'], + 'vcodec': 'none', + 'tbr': int_or_none(url_info.get('bitrate')), + 'filesize': int_or_none(url_info.get('filesize')), + } for url_info in urls_info] self._sort_formats(formats) return { From a79ddbf62b51c198d42c8ac42e64b6512e65bbd2 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 23 Oct 2015 07:16:48 +0100 Subject: [PATCH 2/3] [abc] do not capture a regex group --- youtube_dl/extractor/abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 9153bbc72..04695806b 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -12,7 +12,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'http://www\.abc\.net\.au/news/([^/]+/){1,2}(?P\d+)' + _VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', From dded39165056baf1cde60e9e7e9fe5bce417e31d Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 24 Oct 2015 04:51:15 +0100 Subject: [PATCH 3/3] [abc] simplify and add a test --- youtube_dl/extractor/abc.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 04695806b..c0e5d1abf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -45,6 +45,9 @@ class ABCIE(InfoExtractor): 'title': 'NAB lifts interest rates, following Westpac and CBA', 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', }, + }, { + 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', + 'only_matching': True, }] def _real_extract(self, url): @@ -67,22 +70,15 @@ class ABCIE(InfoExtractor): return self.playlist_result([ self.url_result(url_info['url']) for url_info in urls_info]) - if mobj.group('type') == 'Video': - formats = [{ - 'url': url_info['url'], - 'vcodec': url_info.get('codec'), - 'width': int_or_none(url_info.get('width')), - 'height': int_or_none(url_info.get('height')), - 'tbr': int_or_none(url_info.get('bitrate')), - 'filesize': int_or_none(url_info.get('filesize')), - } for url_info in urls_info] - else: - formats = [{ - 'url': url_info['url'], - 'vcodec': 'none', - 'tbr': int_or_none(url_info.get('bitrate')), - 'filesize': int_or_none(url_info.get('filesize')), - } for url_info in urls_info] + formats = [{ + 'url': url_info['url'], + 'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none', + 'width': int_or_none(url_info.get('width')), + 'height': int_or_none(url_info.get('height')), + 'tbr': int_or_none(url_info.get('bitrate')), + 'filesize': int_or_none(url_info.get('filesize')), + } for url_info in urls_info] + self._sort_formats(formats) return {