From 84a5e9bf1b013e4168b43a17dcbcd331324dd217 Mon Sep 17 00:00:00 2001 From: pulpe Date: Fri, 14 Feb 2014 17:51:49 +0100 Subject: [PATCH 1/3] [pbs] attempt to support m3u8 #2375 #1598 --- youtube_dl/extractor/pbs.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index e7e0042fb..856aeece6 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import compat_urllib_request class PBSIE(InfoExtractor): @@ -57,11 +58,35 @@ class PBSIE(InfoExtractor): info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id info = self._download_json(info_url, display_id) + redir_url = compat_urllib_request.urlopen(info['recommended_encoding']['url']).geturl() + base_url = '/'.join(redir_url.split('/')[0:len(redir_url.split('/'))-1]) + + m3u8 = self._download_webpage(redir_url, display_id, note='Downloading m3u8 playlist') + + splitted_m3u8 = m3u8.splitlines() + + formats = [] + for line in splitted_m3u8: + if line.startswith('#EXT-X-STREAM-INF'): + bandwidth = self._search_regex(r'BANDWIDTH=(\d+)', line, 'bandwidth') + codecs = self._search_regex(r'CODECS="(.+?)"', line, 'codecs') + filename = splitted_m3u8[splitted_m3u8.index(line)+1] + + formats.append({ + 'format_id': re.sub(r'(.*)000', r'\1k', bandwidth), + 'url': base_url+'/'+filename, + 'protocol': 'm3u8', + 'ext': 'mp4', + 'format_note': codecs, + 'quality': int(bandwidth), + }) + + self._sort_formats(formats) + return { 'id': video_id, 'title': info['title'], - 'url': info['alternate_encoding']['url'], - 'ext': 'mp4', + 'formats': formats, 'description': info['program'].get('description'), 'thumbnail': info.get('image_url'), 'duration': info.get('duration'), From a800076037f5db4a730846b892f38bf30afad611 Mon Sep 17 00:00:00 2001 From: pulpe Date: Fri, 14 Feb 2014 18:10:11 +0100 Subject: [PATCH 2/3] [pbs] fix test --- youtube_dl/extractor/pbs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 856aeece6..d9bafeee7 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -20,7 +20,6 @@ class PBSIE(InfoExtractor): _TEST = { 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', - 'md5': 'ce1888486f0908d555a8093cac9a7362', 'info_dict': { 'id': '2365006249', 'ext': 'mp4', @@ -28,6 +27,9 @@ class PBSIE(InfoExtractor): 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', 'duration': 3190, }, + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): From ebc903353e2e9a8ede0fbbab1db671c772bac3c6 Mon Sep 17 00:00:00 2001 From: pulpe Date: Fri, 14 Feb 2014 19:38:23 +0100 Subject: [PATCH 3/3] [pbs] specify audio only format --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index d9bafeee7..436c2b9a6 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -79,7 +79,7 @@ class PBSIE(InfoExtractor): 'url': base_url+'/'+filename, 'protocol': 'm3u8', 'ext': 'mp4', - 'format_note': codecs, + 'format_note': 'Audio only' if codecs.split('.')[0] == 'mp4a' else 'Video', 'quality': int(bandwidth), })