From 30787f7259c4e6a08f691cc691f14fa0c8fe4b87 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 3 Oct 2015 19:28:48 +0100 Subject: [PATCH 01/18] [cspan] correct the clip info extraction --- youtube_dl/extractor/cspan.py | 58 ++++++++++++++++------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index fbefd37d0..994e080d5 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -18,22 +18,21 @@ class CSpanIE(InfoExtractor): IE_DESC = 'C-SPAN' _TESTS = [{ 'url': 'http://www.c-span.org/video/?313572-1/HolderonV', - 'md5': '8e44ce11f0f725527daccc453f553eb0', + 'md5': '067803f994e049b455a58b16e5aab442', 'info_dict': { 'id': '315139', 'ext': 'mp4', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', + 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.', }, 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - # For whatever reason, the served video alternates between - # two different ones + 'md5': '4eafd1e91a75d2b1e6a3cbd0995816a2', 'info_dict': { - 'id': '340723', + 'id': 'c4486943', 'ext': 'mp4', - 'title': 'International Health Care Models', + 'title': 'CSPAN - International Health Care Models', 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967', } }, { @@ -44,7 +43,7 @@ class CSpanIE(InfoExtractor): 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', 'duration': 14848, - 'description': 'md5:70c7c3b8fa63fa60d42772440596034c' + 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3' }, }, { # Video from senate.gov @@ -57,36 +56,33 @@ class CSpanIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - page_id = mobj.group('id') - webpage = self._download_webpage(url, page_id) - video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage) + if matches: + video_type, video_id = matches.groups() + if video_type == 'prog': + video_type = 'program' + else: + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + title = self._og_search_title(webpage) + surl = smuggle_url(senate_isvp_url, {'force_title': title}) + return self.url_result(surl, 'SenateISVP', video_id, title) - description = self._html_search_regex( - [ - # The full description - r'
(.*?)(.*?)

' - ], - webpage, 'description', flags=re.DOTALL, default=None) - - info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id - data = self._download_json(info_url, video_id) + data = self._download_json( + 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), + video_id) doc = self._download_xml( - 'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id, + 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id), video_id) + description = self._html_search_meta('description', webpage) + title = find_xpath_attr(doc, './/string', 'name', 'title').text thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text - senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) - if senate_isvp_url: - surl = smuggle_url(senate_isvp_url, {'force_title': title}) - return self.url_result(surl, 'SenateISVP', video_id, title) - files = data['video']['files'] try: capfile = data['video']['capfile']['#text'] @@ -112,12 +108,12 @@ class CSpanIE(InfoExtractor): if len(entries) == 1: entry = dict(entries[0]) - entry['id'] = video_id + entry['id'] = 'c' + video_id if video_type == 'clip' else video_id return entry else: return { '_type': 'playlist', 'entries': entries, 'title': title, - 'id': video_id, + 'id': 'c' + video_id if video_type == 'clip' else video_id, } From 355c7ad361aa3c8a57ff83e3f702a496dce59e65 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 17 Oct 2015 21:30:38 +0100 Subject: [PATCH 02/18] [cspan] handle error massages and extract qualities --- youtube_dl/extractor/cspan.py | 67 +++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 994e080d5..c74b35fd9 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -9,16 +9,21 @@ from ..utils import ( find_xpath_attr, smuggle_url, determine_ext, + ExtractorError, ) from .senateisvp import SenateISVPIE +def get_text_attr(d, attr): + return d.get(attr, {}).get('#text') + + class CSpanIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P[0-9a-f]+)' IE_DESC = 'C-SPAN' _TESTS = [{ 'url': 'http://www.c-span.org/video/?313572-1/HolderonV', - 'md5': '067803f994e049b455a58b16e5aab442', + 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'info_dict': { 'id': '315139', 'ext': 'mp4', @@ -28,7 +33,7 @@ class CSpanIE(InfoExtractor): 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - 'md5': '4eafd1e91a75d2b1e6a3cbd0995816a2', + 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', 'info_dict': { 'id': 'c4486943', 'ext': 'mp4', @@ -37,7 +42,7 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'md5': '446562a736c6bf97118e389433ed88d4', + 'md5': '2ae5051559169baadba13fc35345ae74', 'info_dict': { 'id': '342759', 'ext': 'mp4', @@ -71,8 +76,10 @@ class CSpanIE(InfoExtractor): return self.url_result(surl, 'SenateISVP', video_id, title) data = self._download_json( - 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), - video_id) + 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), + video_id)['video'] + if data['@status'] != 'Success': + raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True) doc = self._download_xml( 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id), @@ -83,28 +90,36 @@ class CSpanIE(InfoExtractor): title = find_xpath_attr(doc, './/string', 'name', 'title').text thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text - files = data['video']['files'] - try: - capfile = data['video']['capfile']['#text'] - except KeyError: - capfile = None + files = data['files'] + capfile = get_text_attr(data, 'capfile') - entries = [{ - 'id': '%s_%d' % (video_id, partnum + 1), - 'title': ( - title if len(files) == 1 else - '%s part %d' % (title, partnum + 1)), - 'url': unescapeHTML(f['path']['#text']), - 'description': description, - 'thumbnail': thumbnail, - 'duration': int_or_none(f.get('length', {}).get('#text')), - 'subtitles': { - 'en': [{ - 'url': capfile, - 'ext': determine_ext(capfile, 'dfxp') - }], - } if capfile else None, - } for partnum, f in enumerate(files)] + entries = [] + for partnum, f in enumerate(files): + formats = [] + for quality in f['qualities']: + formats.append({ + 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), + 'url': unescapeHTML(get_text_attr(quality, 'file')), + 'height': int_or_none(get_text_attr(quality, 'height')), + 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), + }) + self._sort_formats(formats) + entries.append({ + 'id': '%s_%d' % (video_id, partnum + 1), + 'title': ( + title if len(files) == 1 else + '%s part %d' % (title, partnum + 1)), + 'formats': formats, + 'description': description, + 'thumbnail': thumbnail, + 'duration': int_or_none(get_text_attr(f, 'length')), + 'subtitles': { + 'en': [{ + 'url': capfile, + 'ext': determine_ext(capfile, 'dfxp') + }], + } if capfile else None, + }) if len(entries) == 1: entry = dict(entries[0]) From 4bf56141950f3c24000381403417d20095f04460 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 20 Oct 2015 07:43:39 +0100 Subject: [PATCH 03/18] [cspan] move get_text_attr to CSpanIE --- youtube_dl/extractor/cspan.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index c74b35fd9..388460a32 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -14,10 +14,6 @@ from ..utils import ( from .senateisvp import SenateISVPIE -def get_text_attr(d, attr): - return d.get(attr, {}).get('#text') - - class CSpanIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P[0-9a-f]+)' IE_DESC = 'C-SPAN' @@ -60,6 +56,9 @@ class CSpanIE(InfoExtractor): } }] + def get_text_attr(self, d, attr): + return d.get(attr, {}).get('#text') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -79,7 +78,7 @@ class CSpanIE(InfoExtractor): 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), video_id)['video'] if data['@status'] != 'Success': - raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True) + raise ExtractorError('%s said: %s' % (self.IE_NAME, self.get_text_attr(data, 'error')), expected=True) doc = self._download_xml( 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id), @@ -91,17 +90,17 @@ class CSpanIE(InfoExtractor): thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text files = data['files'] - capfile = get_text_attr(data, 'capfile') + capfile = self.get_text_attr(data, 'capfile') entries = [] for partnum, f in enumerate(files): formats = [] for quality in f['qualities']: formats.append({ - 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), - 'url': unescapeHTML(get_text_attr(quality, 'file')), - 'height': int_or_none(get_text_attr(quality, 'height')), - 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), + 'format_id': '%s-%sp' % (self.get_text_attr(quality, 'bitrate'), self.get_text_attr(quality, 'height')), + 'url': unescapeHTML(self.get_text_attr(quality, 'file')), + 'height': int_or_none(self.get_text_attr(quality, 'height')), + 'tbr': int_or_none(self.get_text_attr(quality, 'bitrate')), }) self._sort_formats(formats) entries.append({ @@ -112,7 +111,7 @@ class CSpanIE(InfoExtractor): 'formats': formats, 'description': description, 'thumbnail': thumbnail, - 'duration': int_or_none(get_text_attr(f, 'length')), + 'duration': int_or_none(self.get_text_attr(f, 'length')), 'subtitles': { 'en': [{ 'url': capfile, From 2a776f978849e0c66f70133747e7fd244f516f7f Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 28 Nov 2015 20:22:31 +0100 Subject: [PATCH 04/18] [cspan] change into a function --- youtube_dl/extractor/cspan.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 388460a32..7b685d157 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -56,9 +56,6 @@ class CSpanIE(InfoExtractor): } }] - def get_text_attr(self, d, attr): - return d.get(attr, {}).get('#text') - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -74,11 +71,14 @@ class CSpanIE(InfoExtractor): surl = smuggle_url(senate_isvp_url, {'force_title': title}) return self.url_result(surl, 'SenateISVP', video_id, title) + def get_text_attr(d, attr): + return d.get(attr, {}).get('#text') + data = self._download_json( 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), video_id)['video'] if data['@status'] != 'Success': - raise ExtractorError('%s said: %s' % (self.IE_NAME, self.get_text_attr(data, 'error')), expected=True) + raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True) doc = self._download_xml( 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id), @@ -90,17 +90,17 @@ class CSpanIE(InfoExtractor): thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text files = data['files'] - capfile = self.get_text_attr(data, 'capfile') + capfile = get_text_attr(data, 'capfile') entries = [] for partnum, f in enumerate(files): formats = [] for quality in f['qualities']: formats.append({ - 'format_id': '%s-%sp' % (self.get_text_attr(quality, 'bitrate'), self.get_text_attr(quality, 'height')), - 'url': unescapeHTML(self.get_text_attr(quality, 'file')), - 'height': int_or_none(self.get_text_attr(quality, 'height')), - 'tbr': int_or_none(self.get_text_attr(quality, 'bitrate')), + 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), + 'url': unescapeHTML(get_text_attr(quality, 'file')), + 'height': int_or_none(get_text_attr(quality, 'height')), + 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), }) self._sort_formats(formats) entries.append({ @@ -111,7 +111,7 @@ class CSpanIE(InfoExtractor): 'formats': formats, 'description': description, 'thumbnail': thumbnail, - 'duration': int_or_none(self.get_text_attr(f, 'length')), + 'duration': int_or_none(get_text_attr(f, 'length')), 'subtitles': { 'en': [{ 'url': capfile, From 87f0e62d94e0486598d123e26db3173e6f1d18e6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 Nov 2015 12:42:50 +0800 Subject: [PATCH 05/18] [utils] Separate codes for handling Youtubedl-* headers --- youtube_dl/utils.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d7b737e21..653a49055 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -663,6 +663,15 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): return hc +def handle_youtubedl_headers(headers): + if 'Youtubedl-no-compression' in headers: + filtered_headers = dict((k, v) for k, v in headers.items() if k.lower() != 'accept-encoding') + del filtered_headers['Youtubedl-no-compression'] + return filtered_headers + + return headers + + class YoutubeDLHandler(compat_urllib_request.HTTPHandler): """Handler for HTTP requests and responses. @@ -731,10 +740,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): # The dict keys are capitalized because of this bug by urllib if h.capitalize() not in req.headers: req.add_header(h, v) - if 'Youtubedl-no-compression' in req.headers: - if 'Accept-encoding' in req.headers: - del req.headers['Accept-encoding'] - del req.headers['Youtubedl-no-compression'] + + req.headers = handle_youtubedl_headers(req.headers) if sys.version_info < (2, 7) and '#' in req.get_full_url(): # Python 2.6 is brain-dead when it comes to fragments From 94e8c8047353eb541fa20dcf55819cc6ee6d3303 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 Nov 2015 12:43:59 +0800 Subject: [PATCH 06/18] [downloader/hls] Respect Youtubedl-* headers --- youtube_dl/downloader/hls.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 92765a3f9..b5a3e1167 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,6 +13,7 @@ from ..utils import ( encodeArgument, encodeFilename, sanitize_open, + handle_youtubedl_headers, ) @@ -33,9 +34,10 @@ class HlsFD(FileDownloader): if info_dict['http_headers'] and re.match(r'^https?://', url): # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + headers = handle_youtubedl_headers(info_dict['http_headers']) args += [ '-headers', - ''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items() if key.lower() != 'accept-encoding')] + ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc'] From ac5a69af45307b583a9a6088abe5939bec18d562 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 Nov 2015 12:44:24 +0800 Subject: [PATCH 07/18] [youtube] Disable compression for live streams --- youtube_dl/extractor/youtube.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1c2420a33..52f4fe36d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1475,6 +1475,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): manifest_url = video_info['hlsvp'][0] url_map = self._extract_from_m3u8(manifest_url, video_id) formats = _map_to_format_list(url_map) + # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming + for a_format in formats: + if 'http_headers' not in a_format: + a_format['http_headers'] = {} + a_format['http_headers']['Youtubedl-no-compression'] = True else: raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') From 0424ec307bb920a2a7c217a741241f3d2af84efa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 Nov 2015 12:46:04 +0800 Subject: [PATCH 08/18] [utils] Correct docstring of YoutubeDLHandler --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 653a49055..c43e9e3a1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -679,7 +679,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): the standard headers to every HTTP request and handles gzipped and deflated responses from web servers. If compression is to be avoided in a particular request, the original request in the program code only has - to include the HTTP header "Youtubedl-No-Compression", which will be + to include the HTTP header "Youtubedl-no-compression", which will be removed before making the real request. Part of this code was copied from: From 8639f89f516c5bd1e4fda38c40e2a5a9b940ad85 Mon Sep 17 00:00:00 2001 From: Ryan Schmidt Date: Sat, 28 Nov 2015 22:56:24 -0600 Subject: [PATCH 09/18] Always use PYTHON env var in Makefile --- Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 0636fc4cb..337a2eefb 100644 --- a/Makefile +++ b/Makefile @@ -60,34 +60,34 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py chmod a+x youtube-dl README.md: youtube_dl/*.py youtube_dl/*/*.py - COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py + COLUMNS=80 $(PYTHON) youtube_dl/__main__.py --help | $(PYTHON) devscripts/make_readme.py CONTRIBUTING.md: README.md - python devscripts/make_contributing.py README.md CONTRIBUTING.md + $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md supportedsites: - python devscripts/make_supportedsites.py docs/supportedsites.md + $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md README.txt: README.md pandoc -f markdown -t plain README.md -o README.txt youtube-dl.1: README.md - python devscripts/prepare_manpage.py >youtube-dl.1.temp.md + $(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 rm -f youtube-dl.1.temp.md youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in - python devscripts/bash-completion.py + $(PYTHON) devscripts/bash-completion.py bash-completion: youtube-dl.bash-completion youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in - python devscripts/zsh-completion.py + $(PYTHON) devscripts/zsh-completion.py zsh-completion: youtube-dl.zsh youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in - python devscripts/fish-completion.py + $(PYTHON) devscripts/fish-completion.py fish-completion: youtube-dl.fish From 992fc9d6e124b910ff3d720e252ef9aad99b2a8b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 Nov 2015 12:58:29 +0800 Subject: [PATCH 10/18] [utils] Refactor handle_youtubedl_headers for future extension --- youtube_dl/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c43e9e3a1..d0606b4bc 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -664,12 +664,13 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): def handle_youtubedl_headers(headers): - if 'Youtubedl-no-compression' in headers: - filtered_headers = dict((k, v) for k, v in headers.items() if k.lower() != 'accept-encoding') - del filtered_headers['Youtubedl-no-compression'] - return filtered_headers + filtered_headers = headers - return headers + if 'Youtubedl-no-compression' in filtered_headers: + filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding') + del filtered_headers['Youtubedl-no-compression'] + + return filtered_headers class YoutubeDLHandler(compat_urllib_request.HTTPHandler): From bf2c8c8f82ff54f9594673c48e531661a72dbdcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Nov 2015 17:03:33 +0600 Subject: [PATCH 11/18] [spiegel] Fix extraction (Closes #7693) --- youtube_dl/extractor/spiegel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 5bd3c0087..39a7aaf9d 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -58,7 +58,8 @@ class SpiegelIE(InfoExtractor): description = self._html_search_meta('description', webpage, 'description') base_url = self._search_regex( - r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL') + [r'server\s*:\s*(["\'])(?P.+?)\1', r'var\s+server\s*=\s*"(?P[^"]+)\"'], + webpage, 'server URL', group='url') xml_url = base_url + video_id + '.xml' idoc = self._download_xml(xml_url, video_id) From 049d71d8745014bf5ec23e25e51d6b92556baa8c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 Nov 2015 19:52:48 +0800 Subject: [PATCH 12/18] [youtube] Simplify and make sure header values are strings --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 52f4fe36d..4f375e2c8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1477,9 +1477,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): formats = _map_to_format_list(url_map) # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming for a_format in formats: - if 'http_headers' not in a_format: - a_format['http_headers'] = {} - a_format['http_headers']['Youtubedl-no-compression'] = True + a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' else: raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') From 040ac686798fdc922157cca64d654933e3f6d096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Nov 2015 21:01:59 +0600 Subject: [PATCH 13/18] [youtube] Extend _VALID_URL (Closes #7694) --- youtube_dl/extractor/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4f375e2c8..55a06eb68 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -258,7 +258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): |(?: # or the v= param in all its forms (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! - (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx) + (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY) v= ) )) @@ -730,6 +730,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', + 'only_matching': True, + } ] def __init__(self, *args, **kwargs): From 2e1b92854000662e554413df0c34c1cbc0d7ffc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Nov 2015 21:04:11 +0600 Subject: [PATCH 14/18] [youtube:playlist] Extend _VALID_URL --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 55a06eb68..032691e7f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1566,7 +1566,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract youtube\.com/ (?: (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) - \? (?:.*?&)*? (?:p|a|list)= + \? (?:.*?[&;])*? (?:p|a|list)= | p/ ) ( From d53a4af1a49413a38d639aeb7f522c4ebff8f5c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Nov 2015 03:47:01 +0600 Subject: [PATCH 15/18] [pornhub:playlist] Allow alphanumeric viewkeys (Closes #7695) --- youtube_dl/extractor/pornhub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 965940a4b..08275687d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -147,7 +147,8 @@ class PornHubPlaylistIE(InfoExtractor): entries = [ self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') - for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) + for video_url in set(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) ] playlist = self._parse_json( From af284305d58a9915a8ef00d056484b3a59548dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Nov 2015 03:58:39 +0600 Subject: [PATCH 16/18] [vodlocker] Capture file not found error (Closes #7696) --- youtube_dl/extractor/vodlocker.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index be0a2780f..357594a11 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -3,11 +3,14 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse -from ..utils import sanitized_Request +from ..utils import ( + ExtractorError, + sanitized_Request, +) class VodlockerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' + _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P[0-9a-zA-Z]+)(?:\..*?)?' _TESTS = [{ 'url': 'http://vodlocker.com/e8wvyzz4sl42', @@ -24,6 +27,12 @@ class VodlockerIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if any(p in webpage for p in ( + '>THIS FILE WAS DELETED<', + '>File Not Found<', + 'The file you were looking for could not be found, sorry for any inconvenience.<')): + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + fields = self._hidden_inputs(webpage) if fields['op'] == 'download1': From 59ee8a86471af488c2ee16dcacf7a913636f0150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Nov 2015 20:10:09 +0600 Subject: [PATCH 17/18] [facebook] Make alternative title optional (Closes #7700) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index fd854411b..321eec59e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -164,7 +164,7 @@ class FacebookIE(InfoExtractor): if not video_title: video_title = self._html_search_regex( r'(?s)(.*?)', - webpage, 'alternative title', fatal=False) + webpage, 'alternative title', default=None) video_title = limit_length(video_title, 80) if not video_title: video_title = 'Facebook video #%s' % video_id From 4c6b4764f0260808d321cfb6cca1daa5e3eb13d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Nov 2015 20:42:05 +0600 Subject: [PATCH 18/18] [youtube] Clarify itag 272 possible resolutions (#7699) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 032691e7f..9b39505ba 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -346,6 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},