From 52bb437e412726a37d585cf782c88bc8c8a042a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 20:40:36 +0600 Subject: [PATCH 001/128] [options] Add --fragment-retries option --- youtube_dl/__init__.py | 17 ++++++++++++----- youtube_dl/options.py | 4 ++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 79b389840..737f6545d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -144,14 +144,20 @@ def _real_main(argv=None): if numeric_limit is None: parser.error('invalid max_filesize specified') opts.max_filesize = numeric_limit - if opts.retries is not None: - if opts.retries in ('inf', 'infinite'): - opts_retries = float('inf') + + def parse_retries(retries): + if retries in ('inf', 'infinite'): + parsed_retries = float('inf') else: try: - opts_retries = int(opts.retries) + parsed_retries = int(retries) except (TypeError, ValueError): parser.error('invalid retry count specified') + return parsed_retries + if opts.retries is not None: + opts.retries = parse_retries(opts.retries) + if opts.fragment_retries is not None: + opts.fragment_retries = parse_retries(opts.fragment_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -299,7 +305,8 @@ def _real_main(argv=None): 'force_generic_extractor': opts.force_generic_extractor, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, - 'retries': opts_retries, + 'retries': opts.retries, + 'fragment_retries': opts.fragment_retries, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 9dd7a8034..822728afc 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -399,6 +399,10 @@ def parseOpts(overrideArguments=None): '-R', '--retries', dest='retries', metavar='RETRIES', default=10, help='Number of retries (default is %default), or "infinite".') + downloader.add_option( + '--fragment-retries', + dest='fragment_retries', metavar='RETRIES', default=10, + help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', From 721f26b8211a22648523f51c80f9b81d1eaa3b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 20:41:24 +0600 Subject: [PATCH 002/128] [downloader/fragment] Add report_retry_fragment --- youtube_dl/downloader/fragment.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index a5bae9669..df66c35f0 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -21,6 +21,11 @@ class FragmentFD(FileDownloader): A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). """ + def report_retry_fragment(self, fragment_name, count, retries): + self.to_screen( + '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %.0f)...' + % (fragment_name, count, retries)) + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) From e33baba0dd6584475f75badec2186a7d86b88a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 20:42:23 +0600 Subject: [PATCH 003/128] [downloader/dash] Add fragment retry capability YouTube may often return 404 HTTP error for a fragment causing the whole download to fail. However if the same fragment is immediately retried with the same request data this usually succeeds (1-2 attemps is usually enough) thus allowing to download the whole file successfully. So, we will retry all fragments that fail with 404 HTTP error for now. --- youtube_dl/downloader/dash.py | 42 ++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8b1b17c6e..8bbab9dbc 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -4,6 +4,7 @@ import os import re from .fragment import FragmentFD +from ..compat import compat_urllib_error from ..utils import ( sanitize_open, encodeFilename, @@ -36,20 +37,41 @@ class DashSegmentsFD(FragmentFD): segments_filenames = [] - def append_url_to_file(target_url, target_filename): - success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) - if not success: + fragment_retries = self.params.get('fragment_retries', 0) + + def append_url_to_file(target_url, tmp_filename, segment_name): + target_filename = '%s-%s' % (tmp_filename, segment_name) + count = 0 + while count <= fragment_retries: + try: + success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) + if not success: + return False + down, target_sanitized = sanitize_open(target_filename, 'rb') + ctx['dest_stream'].write(down.read()) + down.close() + segments_filenames.append(target_sanitized) + break + except (compat_urllib_error.HTTPError, ) as err: + # YouTube may often return 404 HTTP error for a fragment causing the + # whole download to fail. However if the same fragment is immediately + # retried with the same request data this usually succeeds (1-2 attemps + # is usually enough) thus allowing to download the whole file successfully. + # So, we will retry all fragments that fail with 404 HTTP error for now. + if err.code != 404: + raise + # Retry fragment + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(segment_name, count, fragment_retries) + if count > fragment_retries: + self.report_error('giving up after %s fragment retries' % fragment_retries) return False - down, target_sanitized = sanitize_open(target_filename, 'rb') - ctx['dest_stream'].write(down.read()) - down.close() - segments_filenames.append(target_sanitized) if initialization_url: - append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init') + append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init') for i, segment_url in enumerate(segment_urls): - segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i) - append_url_to_file(segment_url, segment_filename) + append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i) self._finish_frag_download(ctx) From 617e58d85063b68fb9736355e8354b05e82b1147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 20:51:30 +0600 Subject: [PATCH 004/128] [downloader/{common,fragment}] Fix total retries reporting on python 2.6 --- youtube_dl/downloader/common.py | 8 +++++++- youtube_dl/downloader/fragment.py | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index f39db58f6..1dba9f49a 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -115,6 +115,10 @@ class FileDownloader(object): return '%10s' % '---b/s' return '%10s' % ('%s/s' % format_bytes(speed)) + @staticmethod + def format_retries(retries): + return 'inf' if retries == float('inf') else '%.0f' % retries + @staticmethod def best_block_size(elapsed_time, bytes): new_min = max(bytes / 2.0, 1.0) @@ -297,7 +301,9 @@ class FileDownloader(object): def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" - self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries)) + self.to_screen( + '[download] Got server HTTP error. Retrying (attempt %d of %s)...' + % (count, self.format_retries(retries))) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index df66c35f0..c2671e6d2 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -23,8 +23,8 @@ class FragmentFD(FileDownloader): def report_retry_fragment(self, fragment_name, count, retries): self.to_screen( - '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %.0f)...' - % (fragment_name, count, retries)) + '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...' + % (fragment_name, count, self.format_retries(retries))) def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) From 16a8b7986b88572aea12c0f80c499e6e8085f1cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 20:54:21 +0600 Subject: [PATCH 005/128] [downloader/fragment] Document fragment_retries --- youtube_dl/downloader/fragment.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index c2671e6d2..ba903ae10 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -19,6 +19,10 @@ class HttpQuietDownloader(HttpFD): class FragmentFD(FileDownloader): """ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). + + Available options: + + fragment_retries: Number of times to retry a fragment for HTTP error (DASH only) """ def report_retry_fragment(self, fragment_name, count, retries): From 3aec71766da38478740437c901514e666a39dbb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 22:30:48 +0600 Subject: [PATCH 006/128] [safari:api] Separate extractor (Closes #8871) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/safari.py | 52 +++++++++++++++++++------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 529051a93..b3bc38916 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -628,6 +628,7 @@ from .ruutu import RuutuIE from .sandia import SandiaIE from .safari import ( SafariIE, + SafariApiIE, SafariCourseIE, ) from .sapo import SapoIE diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 256396bb8..6ba91f202 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor): class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = r'''(?x)https?:// - (?:www\.)?safaribooksonline\.com/ - (?: - library/view/[^/]+| - api/v1/book - )/ - (?P[^/]+)/ - (?:chapter(?:-content)?/)? - (?Ppart\d+)\.html - ''' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P[^/]+)/(?Ppart\d+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -97,9 +88,6 @@ class SafariIE(SafariBaseIE): 'upload_date': '20150724', 'uploader_id': 'stork', }, - }, { - 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', - 'only_matching': True, }, { # non-digits in course id 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', @@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - course_id = mobj.group('course_id') - part = mobj.group('part') + video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) - webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) - reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') - partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') - ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') + webpage = self._download_webpage(url, video_id) + reference_id = self._search_regex( + r'data-reference-id=(["\'])(?P.+?)\1', + webpage, 'kaltura reference id', group='id') + partner_id = self._search_regex( + r'data-partner-id=(["\'])(?P.+?)\1', + webpage, 'kaltura widget id', group='id') + ui_id = self._search_regex( + r'data-ui-id=(["\'])(?P.+?)\1', + webpage, 'kaltura uiconf id', group='id') query = { 'wid': '_%s' % partner_id, @@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE): if self.LOGGED_IN: kaltura_session = self._download_json( '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), - course_id, 'Downloading kaltura session JSON', + video_id, 'Downloading kaltura session JSON', 'Unable to download kaltura session JSON', fatal=False) if kaltura_session: session = kaltura_session.get('session') @@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE): 'Kaltura') +class SafariApiIE(SafariBaseIE): + IE_NAME = 'safari:api' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?Ppart\d+)\.html' + + _TEST = { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', + 'only_matching': True, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + part = self._download_json( + url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), + 'Downloading part JSON') + return self.url_result(part['web_url'], SafariIE.ie_key()) + + class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' @@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE): 'No chapters found for course %s' % course_id, expected=True) entries = [ - self.url_result(chapter, 'Safari') + self.url_result(chapter, SafariApiIE.ie_key()) for chapter in course_json['chapters']] course_title = course_json['title'] From 8b0d7a66ef5451556bb8ae5b085c7bef4c992f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Wed, 9 Mar 2016 20:55:27 +0100 Subject: [PATCH 007/128] [cda] Add new extractor for cda.pl Fixes #8760 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cda.py | 96 ++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100755 youtube_dl/extractor/cda.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b3bc38916..5f5eca42b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -108,6 +108,7 @@ from .cbsnews import ( ) from .cbssports import CBSSportsIE from .ccc import CCCIE +from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE from .chaturbate import ChaturbateIE diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py new file mode 100755 index 000000000..4c53b8dda --- /dev/null +++ b/youtube_dl/extractor/cda.py @@ -0,0 +1,96 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + decode_packed_codes, + ExtractorError, + parse_duration +) + + +class CDAIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|ebd)\.)?cda\.pl/(?:video|[0-9]+x[0-9]+)/(?P[0-9a-z]+)' + _TESTS = [ + { + 'url': 'http://www.cda.pl/video/5749950c', + 'md5': '6f844bf51b15f31fae165365707ae970', + 'info_dict': { + 'id': '5749950c', + 'ext': 'mp4', + 'height': 720, + 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', + 'duration': 39 + } + }, + { + 'url': 'http://www.cda.pl/video/57413289', + 'md5': 'a88828770a8310fc00be6c95faf7f4d5', + 'info_dict': { + 'id': '57413289', + 'ext': 'mp4', + 'title': 'Lądowanie na lotnisku na Maderze', + 'duration': 137 + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) + + if 'Ten film jest dostępny dla użytkowników premium' in webpage: + raise ExtractorError('This video is only available for premium users.', expected=True) + + title = self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False) + + def _get_format(page, version=''): + unpacked = decode_packed_codes(page) + duration = self._search_regex(r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False) + format_id = None + height = None + + m = re.search(r'(?P[0-9]+)p<\/a>', page) + if m: + format_id = m.group('format_id') + height = int(m.group('height')) + + url = self._search_regex(r"url:\\'(.+?)\\'", unpacked, version + ' url', fatal=False) + if url is None: + return None + + return { + 'format_id': format_id, + 'height': height, + 'url': url + }, parse_duration(duration) + + formats = [] + + format_desc, duration = _get_format(webpage) or (None, None) + if format_desc is not None: + formats.append(format_desc) + + pattern = re.compile(r'([0-9]+p)<\/a>') + for version in re.findall(pattern, webpage): + webpage = self._download_webpage(version[0], video_id, 'Downloading %s version information' % version[1], fatal=False) + if not webpage: + # Manually report warning because empty page is returned when invalid version is requested. + self.report_warning('Unable to download %s version information' % version[1]) + continue + + format_desc, duration_ = _get_format(webpage, version[1]) or (None, None) + duration = duration or duration_ + if format_desc is not None: + formats.append(format_desc) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'duration': duration + } From f1ced6df51e4d81523e9051cadb6e4f5ceac19f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 23:17:14 +0600 Subject: [PATCH 008/128] [cda] Improve and simplify (Closes #8805) --- youtube_dl/extractor/cda.py | 132 ++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 4c53b8dda..498d2c0d8 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -12,30 +12,30 @@ from ..utils import ( class CDAIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|ebd)\.)?cda\.pl/(?:video|[0-9]+x[0-9]+)/(?P[0-9a-z]+)' - _TESTS = [ - { - 'url': 'http://www.cda.pl/video/5749950c', - 'md5': '6f844bf51b15f31fae165365707ae970', - 'info_dict': { - 'id': '5749950c', - 'ext': 'mp4', - 'height': 720, - 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', - 'duration': 39 - } - }, - { - 'url': 'http://www.cda.pl/video/57413289', - 'md5': 'a88828770a8310fc00be6c95faf7f4d5', - 'info_dict': { - 'id': '57413289', - 'ext': 'mp4', - 'title': 'Lądowanie na lotnisku na Maderze', - 'duration': 137 - } + _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' + _TESTS = [{ + 'url': 'http://www.cda.pl/video/5749950c', + 'md5': '6f844bf51b15f31fae165365707ae970', + 'info_dict': { + 'id': '5749950c', + 'ext': 'mp4', + 'height': 720, + 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', + 'duration': 39 } - ] + }, { + 'url': 'http://www.cda.pl/video/57413289', + 'md5': 'a88828770a8310fc00be6c95faf7f4d5', + 'info_dict': { + 'id': '57413289', + 'ext': 'mp4', + 'title': 'Lądowanie na lotnisku na Maderze', + 'duration': 137 + } + }, { + 'url': 'http://ebd.cda.pl/0x0/5749950c', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -44,53 +44,53 @@ class CDAIE(InfoExtractor): if 'Ten film jest dostępny dla użytkowników premium' in webpage: raise ExtractorError('This video is only available for premium users.', expected=True) - title = self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False) - - def _get_format(page, version=''): - unpacked = decode_packed_codes(page) - duration = self._search_regex(r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False) - format_id = None - height = None - - m = re.search(r'(?P[0-9]+)p<\/a>', page) - if m: - format_id = m.group('format_id') - height = int(m.group('height')) - - url = self._search_regex(r"url:\\'(.+?)\\'", unpacked, version + ' url', fatal=False) - if url is None: - return None - - return { - 'format_id': format_id, - 'height': height, - 'url': url - }, parse_duration(duration) + title = self._html_search_regex(r'(.+?)', webpage, 'title') formats = [] - format_desc, duration = _get_format(webpage) or (None, None) - if format_desc is not None: - formats.append(format_desc) - - pattern = re.compile(r'([0-9]+p)<\/a>') - for version in re.findall(pattern, webpage): - webpage = self._download_webpage(version[0], video_id, 'Downloading %s version information' % version[1], fatal=False) - if not webpage: - # Manually report warning because empty page is returned when invalid version is requested. - self.report_warning('Unable to download %s version information' % version[1]) - continue - - format_desc, duration_ = _get_format(webpage, version[1]) or (None, None) - duration = duration or duration_ - if format_desc is not None: - formats.append(format_desc) - - self._sort_formats(formats) - - return { + info_dict = { 'id': video_id, 'title': title, 'formats': formats, - 'duration': duration + 'duration': None, } + + def extract_format(page, version): + unpacked = decode_packed_codes(page) + format_url = self._search_regex( + r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False) + if not format_url: + return + f = { + 'url': format_url, + } + m = re.search( + r']+data-quality="(?P[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P[0-9]+)p', + page) + if m: + f.update({ + 'format_id': m.group('format_id'), + 'height': int(m.group('height')), + }) + info_dict['formats'].append(f) + if not info_dict['duration']: + info_dict['duration'] = parse_duration(self._search_regex( + r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)) + + extract_format(webpage, 'default') + + for href, resolution in re.findall( + r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', + webpage): + webpage = self._download_webpage( + href, video_id, 'Downloading %s version information' % resolution, fatal=False) + if not webpage: + # Manually report warning because empty page is returned when + # invalid version is requested. + self.report_warning('Unable to download %s version information' % resolution) + continue + extract_format(webpage, resolution) + + self._sort_formats(formats) + + return info_dict From 9261e347ccf63b31bd2035996279b0ad1a45247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Mar 2016 23:18:04 +0600 Subject: [PATCH 009/128] Credit @kasper93 for cda (#8805) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index aa48cd5a6..6c9747913 100644 --- a/AUTHORS +++ b/AUTHORS @@ -163,3 +163,4 @@ Patrick Griffis Aidan Rowe mutantmonkey Ben Congdon +Kacper Michajłow From e36f4aa72b01b3f6a322edc094cdf1c20b071367 Mon Sep 17 00:00:00 2001 From: jjatria Date: Wed, 28 Oct 2015 18:31:52 +0000 Subject: [PATCH 010/128] [biobiotv] Add extractor --- youtube_dl/extractor/biobiotv.py | 75 ++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 youtube_dl/extractor/biobiotv.py diff --git a/youtube_dl/extractor/biobiotv.py b/youtube_dl/extractor/biobiotv.py new file mode 100644 index 000000000..aae0588ef --- /dev/null +++ b/youtube_dl/extractor/biobiotv.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class BioBioTVIE(InfoExtractor): + _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?P\d{4})/\d{2}/\d{2}/(?P[\w-]+)(?:\.shtml)?' + + _TESTS = [{ + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', + 'md5': '26f51f03cf580265defefb4518faec09', + 'info_dict': { + 'id': 'col_c266', + 'display_id': 'sobre-camaras-y-camarillas-parlamentarias', + 'ext': 'mp4', + 'title': 'Sobre Cámaras y camarillas parlamentarias - BioBioChile TV', + 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/atria-2010-730x350.jpg', + 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c266.mp4', + 'uploader': 'Fernando Atria', + } + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', + 'md5': 'a8c868e6b5f6c17d56873d5633204f84', + 'info_dict': { + 'id': 'col_c270', + 'display_id': 'ninos-transexuales-de-quien-es-la-decision', + 'ext': 'mp4', + 'title': 'Niños transexuales: ¿De quién es la decisión? - BioBioChile TV', + 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/samantha-2210-730x350.jpg', + 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c270.mp4', + 'uploader': 'Samantha Morán', + } + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', + 'md5': 'c8369b50d42ff0a4f6b969fbd1a7c32d', + 'info_dict': { + 'id': 'Keno_Pinto', + 'display_id': 'exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo', + 'ext': 'mp4', + 'title': 'Exclusivo: Héctor Pinto, formador de “Chupete”, revela versión del ex delantero albo - BioBioChile TV', + 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/pinto-730x350.jpg', + 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/Keno_Pinto.mp4', + 'uploader': 'Juan Pablo Echenique', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + year = mobj.group('year') + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_meta( + 'og:title', webpage, 'title', fatal=True) + + thumbnail = self._html_search_meta( + 'og:image', webpage, 'thumbnail', fatal=True) + + video_id = self._html_search_regex( + r'loadFWPlayerVideo\(\"player_0\", \"\d{4}/(.+)\.mp4\"\)', webpage, 'title') + + url = 'http://unlimited2-cl.digitalproserver.com/bbtv/' + year + '/' + video_id + '.mp4' + + return { + 'id': video_id, + 'title': title, + 'url': url, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'uploader': self._search_regex(r'biobiochile\.cl/author[^"]+"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + } From fa023ccb2c00f393c78ae4cbbabec7a8ec7b3ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 01:31:55 +0600 Subject: [PATCH 011/128] [biobiochiletv] Fix extraction, extract m3u8 formats and overall improve (Closes #7314) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/biobiochiletv.py | 86 +++++++++++++++++++++++++++ youtube_dl/extractor/biobiotv.py | 75 ----------------------- 3 files changed, 87 insertions(+), 75 deletions(-) create mode 100644 youtube_dl/extractor/biobiochiletv.py delete mode 100644 youtube_dl/extractor/biobiotv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5f5eca42b..b773edb3d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -72,6 +72,7 @@ from .bet import BetIE from .bigflix import BigflixIE from .bild import BildIE from .bilibili import BiliBiliIE +from .biobiochiletv import BioBioChileTVIE from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py new file mode 100644 index 000000000..133228133 --- /dev/null +++ b/youtube_dl/extractor/biobiochiletv.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class BioBioChileTVIE(InfoExtractor): + _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P[^/]+)\.shtml' + + _TESTS = [{ + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', + 'md5': '26f51f03cf580265defefb4518faec09', + 'info_dict': { + 'id': 'sobre-camaras-y-camarillas-parlamentarias', + 'ext': 'mp4', + 'title': 'Sobre Cámaras y camarillas parlamentarias', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Fernando Atria', + }, + }, { + # different uploader layout + 'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', + 'md5': 'edc2e6b58974c46d5b047dea3c539ff3', + 'info_dict': { + 'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades', + 'ext': 'mp4', + 'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Piangella Obrador', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') + + file_url = self._search_regex( + r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P.+?)\1', + webpage, 'file url', group='url') + + base_url = self._search_regex( + r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*fileURL', webpage, + 'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/', + group='url') + + formats = self._extract_m3u8_formats( + '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + f = { + 'url': '%s%s' % (base_url, file_url), + 'format_id': 'http', + 'protocol': 'http', + 'preference': 1, + } + if formats: + f_copy = formats[-1].copy() + f_copy.update(f) + f = f_copy + formats.append(f) + self._sort_formats(formats) + + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._html_search_regex( + r']+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)', + webpage, 'uploader', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'formats': formats, + } diff --git a/youtube_dl/extractor/biobiotv.py b/youtube_dl/extractor/biobiotv.py deleted file mode 100644 index aae0588ef..000000000 --- a/youtube_dl/extractor/biobiotv.py +++ /dev/null @@ -1,75 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class BioBioTVIE(InfoExtractor): - _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?P\d{4})/\d{2}/\d{2}/(?P[\w-]+)(?:\.shtml)?' - - _TESTS = [{ - 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', - 'md5': '26f51f03cf580265defefb4518faec09', - 'info_dict': { - 'id': 'col_c266', - 'display_id': 'sobre-camaras-y-camarillas-parlamentarias', - 'ext': 'mp4', - 'title': 'Sobre Cámaras y camarillas parlamentarias - BioBioChile TV', - 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/atria-2010-730x350.jpg', - 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c266.mp4', - 'uploader': 'Fernando Atria', - } - }, { - 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', - 'md5': 'a8c868e6b5f6c17d56873d5633204f84', - 'info_dict': { - 'id': 'col_c270', - 'display_id': 'ninos-transexuales-de-quien-es-la-decision', - 'ext': 'mp4', - 'title': 'Niños transexuales: ¿De quién es la decisión? - BioBioChile TV', - 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/samantha-2210-730x350.jpg', - 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c270.mp4', - 'uploader': 'Samantha Morán', - } - }, { - 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', - 'md5': 'c8369b50d42ff0a4f6b969fbd1a7c32d', - 'info_dict': { - 'id': 'Keno_Pinto', - 'display_id': 'exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo', - 'ext': 'mp4', - 'title': 'Exclusivo: Héctor Pinto, formador de “Chupete”, revela versión del ex delantero albo - BioBioChile TV', - 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/pinto-730x350.jpg', - 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/Keno_Pinto.mp4', - 'uploader': 'Juan Pablo Echenique', - } - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - year = mobj.group('year') - - webpage = self._download_webpage(url, display_id) - - title = self._html_search_meta( - 'og:title', webpage, 'title', fatal=True) - - thumbnail = self._html_search_meta( - 'og:image', webpage, 'thumbnail', fatal=True) - - video_id = self._html_search_regex( - r'loadFWPlayerVideo\(\"player_0\", \"\d{4}/(.+)\.mp4\"\)', webpage, 'title') - - url = 'http://unlimited2-cl.digitalproserver.com/bbtv/' + year + '/' + video_id + '.mp4' - - return { - 'id': video_id, - 'title': title, - 'url': url, - 'display_id': display_id, - 'thumbnail': thumbnail, - 'uploader': self._search_regex(r'biobiochile\.cl/author[^"]+"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - } From 94dcade8f892f27f1cdbee29be2e06c08442976e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 01:36:20 +0600 Subject: [PATCH 012/128] Credit @jjatria for biobiochiletv (#7314) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 6c9747913..e507686f2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -164,3 +164,4 @@ Aidan Rowe mutantmonkey Ben Congdon Kacper Michajłow +José Joaquín Atria From d95114dd8359d10c6a0ca5eaddbbd94806173957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 02:34:02 +0600 Subject: [PATCH 013/128] [91porn] Unquote final URL (Closes #8881) --- youtube_dl/extractor/porn91.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 3e15533e9..a6dd2694c 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -1,7 +1,10 @@ # encoding: utf-8 from __future__ import unicode_literals -from ..compat import compat_urllib_parse +from ..compat import ( + compat_urllib_parse, + compat_urllib_parse_unquote, +) from .common import InfoExtractor from ..utils import ( parse_duration, @@ -55,7 +58,8 @@ class Porn91IE(InfoExtractor): info_cn = self._download_webpage( 'http://91porn.com/getfile.php?' + url_params, video_id, 'get real video url') - video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url') + video_url = compat_urllib_parse_unquote(self._search_regex( + r'file=([^&]+)&', info_cn, 'url')) duration = parse_duration(self._search_regex( r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) From 298c04b46497b924b1cbb2f031c5d73d09d2933d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 02:35:48 +0600 Subject: [PATCH 014/128] [91porn] Use common messages' wording --- youtube_dl/extractor/porn91.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index a6dd2694c..63ce87ee3 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -31,9 +31,10 @@ class Porn91IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id self._set_cookie('91porn.com', 'language', 'cn_CN') - webpage = self._download_webpage(url, video_id, 'get HTML content') + + webpage = self._download_webpage( + 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id) if '作为游客,你每天只可观看10个视频' in webpage: raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) @@ -57,7 +58,7 @@ class Porn91IE(InfoExtractor): }) info_cn = self._download_webpage( 'http://91porn.com/getfile.php?' + url_params, video_id, - 'get real video url') + 'Downloading real video url') video_url = compat_urllib_parse_unquote(self._search_regex( r'file=([^&]+)&', info_cn, 'url')) From 5c7cd37ebd6dfb7d5809d2798d0188decce42914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 19 Mar 2016 21:50:16 +0100 Subject: [PATCH 015/128] tox.ini: Exclude test_iqiyi_sdk_interpreter.py --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 48504329f..2d7134005 100644 --- a/tox.ini +++ b/tox.ini @@ -8,6 +8,6 @@ deps = passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py - --exclude test_youtube_lists.py + --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html # test.test_download:TestDownload.test_NowVideo From 3ff63fb3657fee9e0c2df9d5bb96ae5827f257cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 19 Mar 2016 21:51:13 +0100 Subject: [PATCH 016/128] Makefile: make it compatible with bmake It's the portable version of BSD make: http://crufty.net/help/sjg/bmake.html The syntax for conditionals is different in GNU make and BSD make, so we use the shell --- Makefile | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Makefile b/Makefile index e98806791..6689ec06f 100644 --- a/Makefile +++ b/Makefile @@ -12,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -ifeq ($(PREFIX),/usr) - SYSCONFDIR=/etc -else - ifeq ($(PREFIX),/usr/local) - SYSCONFDIR=/etc - else - SYSCONFDIR=$(PREFIX)/etc - endif -endif +SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) From 40025ee2a339d1a357869fd8d8718a737d250d9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 04:12:34 +0600 Subject: [PATCH 017/128] [postprocessort/ffmpeg] Allow embedding webvtt into webm (Closes #8874) --- youtube_dl/options.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 9dd7a8034..755ed6540 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -720,7 +720,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='Embed subtitles in the video (only for mkv and mp4 videos)') + help='Embed subtitles in the video (only for mp4, webm and mkv videos)') postproc.add_option( '--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index a8819f258..06b8c0548 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -331,17 +331,34 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): def run(self, information): - if information['ext'] not in ['mp4', 'mkv']: - self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files') + if information['ext'] not in ('mp4', 'webm', 'mkv'): + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files') return [], information subtitles = information.get('requested_subtitles') if not subtitles: self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') return [], information - sub_langs = list(subtitles.keys()) filename = information['filepath'] - sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] + + ext = information['ext'] + sub_langs = [] + sub_filenames = [] + webm_vtt_warn = False + + for lang, sub_info in subtitles.items(): + sub_ext = sub_info['ext'] + if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + sub_langs.append(lang) + sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) + else: + if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': + webm_vtt_warn = True + self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + + if not sub_langs: + return [], information + input_files = [filename] + sub_filenames opts = [ From 96a9f22d983e414583f52eafece7902b1248377b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 10:26:58 +0600 Subject: [PATCH 018/128] [discovery] Relax _VALID_URL (Closes #8903) --- youtube_dl/extractor/discovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index ce680a9f3..fdce1429a 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -9,7 +9,7 @@ from ..compat import compat_str class DiscoveryIE(InfoExtractor): - _VALID_URL = r'''(?x)http://(?:www\.)?(?: + _VALID_URL = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| discoverylife| From db264e3cc3bbab191972bbe1c4efb526ff8bfc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Mar 2016 12:44:04 +0600 Subject: [PATCH 019/128] [francetvinfo] Add support for france3-regions and strip title (Closes #7673) --- youtube_dl/extractor/francetv.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3f4ac3093..7db5fb418 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -82,6 +82,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): subtitle = info.get('sous_titre') if subtitle: title += ' - %s' % subtitle + title = title.strip() subtitles = {} subtitles_list = [{ @@ -125,7 +126,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' - _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P.+)\.html' + _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _TESTS = [{ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', @@ -160,6 +161,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'title': 'Les entreprises familiales : le secret de la réussite', 'thumbnail': 're:^https?://.*\.jpe?g$', } + }, { + 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', + 'md5': 'f485bda6e185e7d15dbc69b72bae993e', + 'info_dict': { + 'id': 'NI_657393', + 'ext': 'flv', + 'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"', + 'description': 'md5:a3264114c9d29aeca11ced113c37b16c', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'timestamp': 1458300695, + 'upload_date': '20160318', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -172,7 +188,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): return self.url_result(dmcloud_url, 'DailymotionCloud') video_id, catalogue = self._search_regex( - r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@') + (r'id-video=([^@]+@[^"]+)', + r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), + webpage, 'video id').split('@') return self._extract_video(video_id, catalogue) From 3c20208eff29fcd3e5d4b13f3d4ffa1be7c56309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Mar 2016 13:00:46 +0600 Subject: [PATCH 020/128] [francetv] Improve formats extraction --- youtube_dl/extractor/francetv.py | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 7db5fb418..ad94e31f3 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -60,22 +60,24 @@ class FranceTVBaseInfoExtractor(InfoExtractor): video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: formats.extend(self._extract_f4m_formats( - f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id)) + f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', + video_id, f4m_id=format_id, fatal=False)) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, 'format_id': 'rtmp-%s' % format_id, 'ext': 'flv', - 'preference': 1, }) else: - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'preference': -1, - }) + if self._is_valid_url(video_url, video_id, format_id): + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) self._sort_formats(formats) title = info['titre'] @@ -132,7 +134,7 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'info_dict': { 'id': '84981923', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Soir 3', 'upload_date': '20130826', 'timestamp': 1377548400, @@ -140,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'fr': 'mincount:2', }, }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, }, { 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', 'info_dict': { @@ -156,17 +162,23 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', 'md5': 'f485bda6e185e7d15dbc69b72bae993e', 'info_dict': { - 'id': '556e03339473995ee145930c', + 'id': 'NI_173343', 'ext': 'mp4', 'title': 'Les entreprises familiales : le secret de la réussite', 'thumbnail': 're:^https?://.*\.jpe?g$', - } + 'timestamp': 1433273139, + 'upload_date': '20150602', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, }, { 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', 'md5': 'f485bda6e185e7d15dbc69b72bae993e', 'info_dict': { 'id': 'NI_657393', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"', 'description': 'md5:a3264114c9d29aeca11ced113c37b16c', 'thumbnail': 're:^https?://.*\.jpe?g$', From 664bcd80b99ac84c3cc7a08e8284abc024a1e58c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 20 Mar 2016 15:45:31 +0800 Subject: [PATCH 021/128] [tudou] Use InAdvancePagedList (closes #8884) --- youtube_dl/extractor/tudou.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index f56b66d06..9892e8a62 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + InAdvancePagedList, float_or_none, unescapeHTML, ) @@ -75,15 +76,16 @@ class TudouIE(InfoExtractor): quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), key=lambda k: int(k))[-1] parts = segments[quality] - result = [] len_parts = len(parts) if len_parts > 1: self.to_screen('%s: found %s parts' % (video_id, len_parts)) - for part in parts: + + def part_func(partnum): + part = parts[partnum] part_id = part['k'] final_url = self._url_for_id(part_id, quality) ext = (final_url.split('?')[0]).split('.')[-1] - part_info = { + return [{ 'id': '%s' % part_id, 'url': final_url, 'ext': ext, @@ -97,12 +99,13 @@ class TudouIE(InfoExtractor): 'http_headers': { 'Referer': self._PLAYER_URL, }, - } - result.append(part_info) + }] + + entries = InAdvancePagedList(part_func, len_parts, 1) return { '_type': 'multi_video', - 'entries': result, + 'entries': entries, 'id': video_id, 'title': title, } From 2bfeee69b976fe049761dd3012e30b637ee05a58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 20 Mar 2016 15:54:58 +0800 Subject: [PATCH 022/128] [openload] Add new extractor (closes #8489) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/openload.py | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 youtube_dl/extractor/openload.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b773edb3d..ee792bbe0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -535,6 +535,7 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) +from .openload import OpenloadIE from .ora import OraTVIE from .orf import ( ORFTVthekIE, diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py new file mode 100644 index 000000000..71021d573 --- /dev/null +++ b/youtube_dl/extractor/openload.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import encode_base_n + + +class OpenloadIE(InfoExtractor): + _VALID_URL = r'https://openload.co/f/(?P<id>[a-zA-Z0-9]+)' + + _TEST = { + 'url': 'https://openload.co/f/kUEfGclsU9o', + 'md5': 'bf1c059b004ebc7a256f89408e65c36e', + 'info_dict': { + 'id': 'kUEfGclsU9o', + 'ext': 'mp4', + 'title': 'skyrim_no-audio_1080.mp4', + }, + } + + @staticmethod + def openload_level2_debase(m): + radix, num = int(m.group(1)) + 27, int(m.group(2)) + return '"' + encode_base_n(num, radix) + '"' + + @classmethod + def openload_level2(cls, txt): + # The function name is ǃ \u01c3 + # Using escaped unicode literals does not work in Python 3.2 + return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '') + + # Openload uses a variant of aadecode + # openload_decode and related functions are originally written by + # vitas@matfyz.cz and released with public domain + # See https://github.com/rg3/youtube-dl/issues/8489 + @classmethod + def openload_decode(cls, txt): + symbol_table = [ + ('_', '(゚Д゚) [゚Θ゚]'), + ('a', '(゚Д゚) [゚ω゚ノ]'), + ('b', '(゚Д゚) [゚Θ゚ノ]'), + ('c', '(゚Д゚) [\'c\']'), + ('d', '(゚Д゚) [゚ー゚ノ]'), + ('e', '(゚Д゚) [゚Д゚ノ]'), + ('f', '(゚Д゚) [1]'), + + ('o', '(゚Д゚) [\'o\']'), + ('u', '(o゚ー゚o)'), + ('c', '(゚Д゚) [\'c\']'), + + ('7', '((゚ー゚) + (o^_^o))'), + ('6', '((o^_^o) +(o^_^o) +(c^_^o))'), + ('5', '((゚ー゚) + (゚Θ゚))'), + ('4', '(-~3)'), + ('3', '(-~-~1)'), + ('2', '(-~1)'), + ('1', '(-~0)'), + ('0', '((c^_^o)-(c^_^o))'), + ] + delim = '(゚Д゚)[゚ε゚]+' + ret = '' + for aachar in txt.split(delim): + for val, pat in symbol_table: + aachar = aachar.replace(pat, val) + aachar = aachar.replace('+ ', '') + m = re.match(r'^\d+', aachar) + if m: + ret += compat_chr(int(m.group(0), 8)) + else: + m = re.match(r'^u([\da-f]+)', aachar) + if m: + ret += compat_chr(int(m.group(1), 16)) + return cls.openload_level2(ret) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + code = self._search_regex( + r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>', + webpage, 'JS code') + + video_url = self._search_regex( + r'return\s+"(https?://[^"]+)"', self.openload_decode(code), 'video URL') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'url': video_url, + } From 9e3c2f1d741acc4dd576f77c185e99cfd6bb2ea4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 20 Mar 2016 16:49:44 +0800 Subject: [PATCH 023/128] [openload] Misc improvements * Add thumbnail * Detect errors (#6469) * Match more (#6469, #8489) --- youtube_dl/extractor/openload.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 71021d573..4468f31fc 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -5,21 +5,31 @@ import re from .common import InfoExtractor from ..compat import compat_chr -from ..utils import encode_base_n +from ..utils import ( + encode_base_n, + ExtractorError, +) class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.co/f/(?P<id>[a-zA-Z0-9]+)' + _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)' - _TEST = { + _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', 'md5': 'bf1c059b004ebc7a256f89408e65c36e', 'info_dict': { 'id': 'kUEfGclsU9o', 'ext': 'mp4', 'title': 'skyrim_no-audio_1080.mp4', + 'thumbnail': 're:^https?://.*\.jpg$', }, - } + }, { + 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', + 'only_matching': True, + }, { + 'url': 'https://openload.io/f/ZAn6oz-VZGE/', + 'only_matching': True, + }] @staticmethod def openload_level2_debase(m): @@ -78,6 +88,10 @@ class OpenloadIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + + if 'File not found' in webpage: + raise ExtractorError('File not found', expected=True) + code = self._search_regex( r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>', webpage, 'JS code') @@ -88,5 +102,6 @@ class OpenloadIE(InfoExtractor): return { 'id': video_id, 'title': self._og_search_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), 'url': video_url, } From 920d318d3cf70ee0b80cc67e7d3f85b5d45a20a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 20 Mar 2016 10:55:14 +0100 Subject: [PATCH 024/128] README: document that BSD make is also supported (#8902) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 68db546ef..fcc12d2b3 100644 --- a/README.md +++ b/README.md @@ -831,7 +831,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make +* make (both GNU make and BSD make are supported) * pandoc * zip * nosetests From 2648918c814773e746c6d26da834d32eac952ffa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 20 Mar 2016 18:14:02 +0800 Subject: [PATCH 025/128] [vlive] Fix creator extraction (closes #8814) --- youtube_dl/extractor/vlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 9e2aa58bd..bd5545173 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -64,7 +64,7 @@ class VLiveIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) creator = self._html_search_regex( - r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>', + r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)', webpage, 'creator', fatal=False) view_count = int_or_none(playinfo.get('meta', {}).get('count')) From 7caae128a72596e22cdfb538272c38a37d095db2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 20 Mar 2016 19:11:02 +0800 Subject: [PATCH 026/128] Credit @vitstradal for the key algorithm in OpenloadIE (#8489) [ci skip] --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index e507686f2..51dfc8ddd 100644 --- a/AUTHORS +++ b/AUTHORS @@ -165,3 +165,4 @@ mutantmonkey Ben Congdon Kacper Michajłow José Joaquín Atria +Viťas Strádal From daef04a4e75ccd2ff5e2d2495baa0ac9bcf75724 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 20 Mar 2016 20:17:56 +0800 Subject: [PATCH 027/128] [kwuo] Fix KuwoChartIE and KuwoSingerIE and accept new URL forms --- youtube_dl/extractor/kuwo.py | 52 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index f94804d06..45d65e61f 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -import itertools from .common import InfoExtractor from ..utils import ( get_element_by_id, clean_html, ExtractorError, + InAdvancePagedList, remove_start, ) @@ -55,7 +55,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/' + _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { @@ -80,6 +80,9 @@ class KuwoIE(KuwoBaseIE): 'params': { 'format': 'mp3-320' }, + }, { + 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', + 'only_matching': True, }] def _real_extract(self, url): @@ -172,8 +175,6 @@ class KuwoChartIE(InfoExtractor): 'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm', 'info_dict': { 'id': '香港中文龙虎榜', - 'title': '香港中文龙虎榜', - 'description': 're:\d{4}第\d{2}期', }, 'playlist_mincount': 10, } @@ -184,17 +185,11 @@ class KuwoChartIE(InfoExtractor): url, chart_id, note='Download chart info', errnote='Unable to get chart info') - chart_name = self._html_search_regex( - r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name') - - chart_desc = self._html_search_regex( - r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc') - entries = [ self.url_result(song_url, 'Kuwo') for song_url in re.findall( - r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage) + r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage) ] - return self.playlist_result(entries, chart_id, chart_name, chart_desc) + return self.playlist_result(entries, chart_id) class KuwoSingerIE(InfoExtractor): @@ -207,7 +202,7 @@ class KuwoSingerIE(InfoExtractor): 'id': 'bruno+mars', 'title': 'Bruno Mars', }, - 'playlist_count': 10, + 'playlist_mincount': 329, }, { 'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm', 'info_dict': { @@ -218,6 +213,8 @@ class KuwoSingerIE(InfoExtractor): 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540 }] + PAGE_SIZE = 15 + def _real_extract(self, url): singer_id = self._match_id(url) webpage = self._download_webpage( @@ -225,25 +222,28 @@ class KuwoSingerIE(InfoExtractor): errnote='Unable to get singer info') singer_name = self._html_search_regex( - r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name' - ) + r'<h1>([^<]+)</h1>', webpage, 'singer name') - entries = [] - first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True - for page_num in itertools.count(1): + artist_id = self._html_search_regex( + r'data-artistid="(\d+)"', webpage, 'artist id') + + page_count = int(self._html_search_regex( + r'data-page="(\d+)"', webpage, 'page count')) + + def page_func(page_num): webpage = self._download_webpage( - 'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num), - singer_id, note='Download song list page #%d' % page_num, - errnote='Unable to get song list page #%d' % page_num) + 'http://www.kuwo.cn/artist/contentMusicsAjax', + singer_id, note='Download song list page #%d' % (page_num + 1), + errnote='Unable to get song list page #%d' % (page_num + 1), + query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE}) - entries.extend([ + return [ self.url_result(song_url, 'Kuwo') for song_url in re.findall( - r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/', + r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage) - ][:10 if first_page_only else None]) + ] - if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage): - break + entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE) return self.playlist_result(entries, singer_id, singer_name) From 3e8bb9a972a377442f5f433123ea70b332248f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Mar 2016 20:39:00 +0600 Subject: [PATCH 028/128] [animeondemand] Detect geo restriction --- youtube_dl/extractor/animeondemand.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 0158407f6..85306a69c 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -44,6 +44,10 @@ class AnimeOnDemandIE(InfoExtractor): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') + if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page: + self.raise_geo_restricted( + '%s is only available in German-speaking countries of Europe' % self.IE_NAME) + login_form = self._form_hidden_inputs('new_user', login_page) login_form.update({ From 3c5d183c19f29c1f52fe913ce7e7d47f6eebff2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Mar 2016 21:51:22 +0600 Subject: [PATCH 029/128] [animeondemand] Extract all formats (Closes #8906) --- youtube_dl/extractor/animeondemand.py | 85 ++++++++++++++++++++------- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 85306a69c..4352525e2 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -3,10 +3,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_str, +) from ..utils import ( determine_ext, encode_dict, + extract_attributes, ExtractorError, sanitized_Request, urlencode_postdata, @@ -34,6 +38,10 @@ class AnimeOnDemandIE(InfoExtractor): # Episodes without titles 'url': 'https://www.anime-on-demand.de/anime/162', 'only_matching': True, + }, { + # ger/jap, Dub/OmU, account required + 'url': 'https://www.anime-on-demand.de/anime/169', + 'only_matching': True, }] def _login(self): @@ -130,33 +138,70 @@ class AnimeOnDemandIE(InfoExtractor): formats = [] - playlist_url = self._search_regex( - r'data-playlist=(["\'])(?P<url>.+?)\1', - episode_html, 'data playlist', default=None, group='url') - if playlist_url: - request = sanitized_Request( - compat_urlparse.urljoin(url, playlist_url), - headers={ - 'X-Requested-With': 'XMLHttpRequest', - 'X-CSRF-Token': csrf_token, - 'Referer': url, - 'Accept': 'application/json, text/javascript, */*; q=0.01', - }) + for input_ in re.findall( + r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html): + attributes = extract_attributes(input_) + playlist_urls = [] + for playlist_key in ('data-playlist', 'data-otherplaylist'): + playlist_url = attributes.get(playlist_key) + if isinstance(playlist_url, compat_str) and re.match( + r'/?[\da-zA-Z]+', playlist_url): + playlist_urls.append(attributes[playlist_key]) + if not playlist_urls: + continue - playlist = self._download_json( - request, video_id, 'Downloading playlist JSON', fatal=False) - if playlist: - playlist = playlist['playlist'][0] - title = playlist['title'] + lang = attributes.get('data-lang') + lang_note = attributes.get('value') + + for playlist_url in playlist_urls: + kind = self._search_regex( + r'videomaterialurl/\d+/([^/]+)/', + playlist_url, 'media kind', default=None) + format_id_list = [] + if lang: + format_id_list.append(lang) + if kind: + format_id_list.append(kind) + if not format_id_list: + format_id_list.append('hls') + format_id = '-'.join(format_id_list) + format_note = ', '.join(filter(None, (kind, lang_note))) + request = sanitized_Request( + compat_urlparse.urljoin(url, playlist_url), + headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRF-Token': csrf_token, + 'Referer': url, + 'Accept': 'application/json, text/javascript, */*; q=0.01', + }) + playlist = self._download_json( + request, video_id, 'Downloading %s playlist JSON' % format_id, + fatal=False) + if not playlist: + continue + playlist = playlist.get('playlist') + if not playlist or not isinstance(playlist, list): + continue + playlist = playlist[0] + title = playlist.get('title') + if not title: + continue description = playlist.get('description') for source in playlist.get('sources', []): file_ = source.get('file') if file_ and determine_ext(file_) == 'm3u8': - formats = self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( file_, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + entry_protocol='m3u8_native', m3u8_id=format_id) + for f in m3u8_formats: + f.update({ + 'language': lang, + 'format_note': format_note, + }) + formats.extend(m3u8_formats) if formats: + self._sort_formats(formats) f = common_info.copy() f.update({ 'title': title, From 9016d76f71b30bd61d69f80dc88fa53f978cf99c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Mar 2016 22:01:45 +0600 Subject: [PATCH 030/128] [YoutubeDL] Improve _format_note --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 93b6ca54d..29d7a3106 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1836,7 +1836,7 @@ class YoutubeDL(object): if fdict.get('language'): if res: res += ' ' - res += '[%s]' % fdict['language'] + res += '[%s] ' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: From 12af4beb3e28f986170ed00488b48e2e8bcd4e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 21:17:29 +0600 Subject: [PATCH 031/128] [mailru] Add support for https (Closes #8920) --- youtube_dl/extractor/mailru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 71085f279..46eb00492 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -13,7 +13,7 @@ from ..utils import ( class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' + _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' _TESTS = [ { From 0cef27ad255b5cb994b1fa0e80a04bd09514925a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 21:22:37 +0600 Subject: [PATCH 032/128] Add missing r prefix for _VALID_URLs --- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/nova.py | 2 +- youtube_dl/extractor/tv2.py | 4 ++-- youtube_dl/extractor/vgtv.py | 4 ++-- youtube_dl/extractor/wdr.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index e62b3860e..c3176700a 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -942,7 +942,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' + _VALID_URL = r'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 3f9c776ef..72f72b803 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -12,7 +12,7 @@ from ..utils import ( class NovaIE(InfoExtractor): IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' - _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' + _VALID_URL = r'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _TESTS = [{ 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus', 'info_dict': { diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 1457e524e..535d0d361 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -14,7 +14,7 @@ from ..utils import ( class TV2IE(InfoExtractor): - _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' + _VALID_URL = r'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' _TEST = { 'url': 'http://www.tv2.no/v/916509/', 'info_dict': { @@ -100,7 +100,7 @@ class TV2IE(InfoExtractor): class TV2ArticleIE(InfoExtractor): - _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' + _VALID_URL = r'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', 'info_dict': { diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index e148b1ef5..77d8978d4 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -214,7 +214,7 @@ class VGTVIE(XstreamIE): class BTArticleIE(InfoExtractor): IE_NAME = 'bt:article' IE_DESC = 'Bergens Tidende Articles' - _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' + _VALID_URL = r'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' _TEST = { 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', 'md5': '2acbe8ad129b3469d5ae51b1158878df', @@ -241,7 +241,7 @@ class BTArticleIE(InfoExtractor): class BTVestlendingenIE(InfoExtractor): IE_NAME = 'bt:vestlendingen' IE_DESC = 'Bergens Tidende - Vestlendingen' - _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' + _VALID_URL = r'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index a851578e0..65cab4069 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -244,7 +244,7 @@ class WDRMobileIE(InfoExtractor): class WDRMausIE(InfoExtractor): - _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' + _VALID_URL = r'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' IE_DESC = 'Sendung mit der Maus' _TESTS = [{ 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', From 5886b38d73c54239c85c3e0d8e7c1585d1bbb7da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 21:36:32 +0600 Subject: [PATCH 033/128] Add support for https for all extractors as preventive and future-proof measure --- youtube_dl/extractor/abc.py | 2 +- youtube_dl/extractor/addanime.py | 2 +- youtube_dl/extractor/aftonbladet.py | 2 +- youtube_dl/extractor/aljazeera.py | 2 +- youtube_dl/extractor/aol.py | 4 ++-- youtube_dl/extractor/arte.py | 2 +- youtube_dl/extractor/azubu.py | 2 +- youtube_dl/extractor/baidu.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/behindkink.py | 2 +- youtube_dl/extractor/bilibili.py | 2 +- youtube_dl/extractor/bokecc.py | 2 +- youtube_dl/extractor/bpb.py | 2 +- youtube_dl/extractor/breakcom.py | 2 +- youtube_dl/extractor/camdemy.py | 4 ++-- youtube_dl/extractor/cbsnews.py | 4 ++-- youtube_dl/extractor/cbssports.py | 2 +- youtube_dl/extractor/cliphunter.py | 2 +- youtube_dl/extractor/clipsyndicate.py | 2 +- youtube_dl/extractor/clubic.py | 2 +- youtube_dl/extractor/comcarcoff.py | 2 +- youtube_dl/extractor/condenast.py | 2 +- youtube_dl/extractor/cspan.py | 2 +- youtube_dl/extractor/ctsnews.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- youtube_dl/extractor/defense.py | 2 +- youtube_dl/extractor/douyutv.py | 2 +- youtube_dl/extractor/dplay.py | 2 +- youtube_dl/extractor/dreisat.py | 2 +- youtube_dl/extractor/dvtv.py | 2 +- youtube_dl/extractor/echomsk.py | 2 +- youtube_dl/extractor/exfm.py | 2 +- youtube_dl/extractor/fc2.py | 2 +- youtube_dl/extractor/firstpost.py | 2 +- youtube_dl/extractor/firsttv.py | 2 +- youtube_dl/extractor/fktv.py | 2 +- youtube_dl/extractor/footyroom.py | 2 +- youtube_dl/extractor/foxgay.py | 2 +- youtube_dl/extractor/franceinter.py | 2 +- youtube_dl/extractor/freevideo.py | 2 +- youtube_dl/extractor/gamekings.py | 2 +- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/gamestar.py | 2 +- youtube_dl/extractor/gametrailers.py | 2 +- youtube_dl/extractor/hotnewhiphop.py | 2 +- youtube_dl/extractor/hypem.py | 2 +- youtube_dl/extractor/imdb.py | 4 ++-- youtube_dl/extractor/iqiyi.py | 2 +- youtube_dl/extractor/jadorecettepub.py | 2 +- youtube_dl/extractor/jeuxvideo.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- youtube_dl/extractor/karrierevideos.py | 2 +- youtube_dl/extractor/kontrtube.py | 2 +- youtube_dl/extractor/ku6.py | 2 +- youtube_dl/extractor/kusi.py | 2 +- youtube_dl/extractor/kuwo.py | 12 ++++++------ youtube_dl/extractor/leeco.py | 4 ++-- youtube_dl/extractor/lifenews.py | 4 ++-- youtube_dl/extractor/limelight.py | 6 +++--- youtube_dl/extractor/m6.py | 2 +- youtube_dl/extractor/metacafe.py | 2 +- youtube_dl/extractor/mit.py | 2 +- youtube_dl/extractor/mitele.py | 2 +- youtube_dl/extractor/mooshare.py | 2 +- youtube_dl/extractor/motherless.py | 2 +- youtube_dl/extractor/motorsport.py | 2 +- youtube_dl/extractor/myspass.py | 2 +- youtube_dl/extractor/myvideo.py | 2 +- youtube_dl/extractor/myvidster.py | 2 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/nbc.py | 4 ++-- youtube_dl/extractor/nextmedia.py | 6 +++--- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/normalboots.py | 2 +- youtube_dl/extractor/nova.py | 2 +- youtube_dl/extractor/npr.py | 2 +- youtube_dl/extractor/ntvru.py | 2 +- youtube_dl/extractor/orf.py | 6 +++--- youtube_dl/extractor/philharmoniedeparis.py | 2 +- youtube_dl/extractor/photobucket.py | 2 +- youtube_dl/extractor/pornhd.py | 2 +- youtube_dl/extractor/pornovoisines.py | 2 +- youtube_dl/extractor/pyvideo.py | 2 +- youtube_dl/extractor/qqmusic.py | 10 +++++----- youtube_dl/extractor/rai.py | 4 ++-- youtube_dl/extractor/redtube.py | 2 +- youtube_dl/extractor/ringtv.py | 2 +- youtube_dl/extractor/rtve.py | 4 ++-- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/rutube.py | 6 +++--- youtube_dl/extractor/screenjunkies.py | 2 +- youtube_dl/extractor/senateisvp.py | 2 +- youtube_dl/extractor/shared.py | 2 +- youtube_dl/extractor/sport5.py | 2 +- youtube_dl/extractor/ssa.py | 2 +- youtube_dl/extractor/sztvhu.py | 2 +- youtube_dl/extractor/teamcoco.py | 2 +- youtube_dl/extractor/tele13.py | 2 +- youtube_dl/extractor/tf1.py | 2 +- youtube_dl/extractor/thvideo.py | 2 +- youtube_dl/extractor/tinypic.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/toypics.py | 2 +- youtube_dl/extractor/traileraddict.py | 2 +- youtube_dl/extractor/trollvids.py | 2 +- youtube_dl/extractor/tumblr.py | 2 +- youtube_dl/extractor/tv2.py | 4 ++-- youtube_dl/extractor/tvc.py | 4 ++-- youtube_dl/extractor/tvplay.py | 2 +- youtube_dl/extractor/ubu.py | 2 +- youtube_dl/extractor/unistra.py | 2 +- youtube_dl/extractor/vbox7.py | 2 +- youtube_dl/extractor/veoh.py | 2 +- youtube_dl/extractor/vesti.py | 2 +- youtube_dl/extractor/vgtv.py | 4 ++-- youtube_dl/extractor/videott.py | 2 +- youtube_dl/extractor/viidea.py | 2 +- youtube_dl/extractor/vube.py | 2 +- youtube_dl/extractor/vuclip.py | 2 +- youtube_dl/extractor/walla.py | 2 +- youtube_dl/extractor/wat.py | 2 +- youtube_dl/extractor/wdr.py | 2 +- youtube_dl/extractor/weiqitv.py | 2 +- youtube_dl/extractor/wimp.py | 2 +- youtube_dl/extractor/xbef.py | 2 +- youtube_dl/extractor/yam.py | 2 +- youtube_dl/extractor/ynet.py | 2 +- 127 files changed, 156 insertions(+), 156 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 6a29e587f..b584277be 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -12,7 +12,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' + _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index e3e6d2113..fb1cc02e1 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -16,7 +16,7 @@ from ..utils import ( class AddAnimeIE(InfoExtractor): - _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' + _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' _TESTS = [{ 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'md5': '72954ea10bc979ab5e2eb288b21425a0', diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index e0518cf26..d548592fe 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class AftonbladetIE(InfoExtractor): - _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' + _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' _TEST = { 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', 'info_dict': { diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index cddcaa489..b081695d8 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' + _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' _TEST = { 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index b761b2cc4..95a99c6b0 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)' _TESTS = [{ 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', @@ -25,7 +25,7 @@ class AolIE(InfoExtractor): class AolFeaturesIE(InfoExtractor): IE_NAME = 'features.aol.com' - _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts', diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 3e119e21b..ae0f27dcb 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -23,7 +23,7 @@ from ..utils import ( class ArteTvIE(InfoExtractor): - _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html' + _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html' IE_NAME = 'arte.tv' def _real_extract(self, url): diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 011edf128..1805b7312 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -98,7 +98,7 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$' + _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$' _TEST = { 'url': 'http://www.azubu.tv/MarsTVMDLen', diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index 76b21e596..234a661d3 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -9,7 +9,7 @@ from ..utils import unescapeHTML class BaiduVideoIE(InfoExtractor): IE_DESC = '百度视频' - _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' + _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' _TESTS = [{ 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', 'info_dict': { diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index c3176700a..2dfcee98d 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -942,7 +942,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = r'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' + _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index 1bdc25812..9bca853b3 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -8,7 +8,7 @@ from ..utils import url_basename class BehindKinkIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' + _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' _TEST = { 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', 'md5': '507b57d8fdcd75a41a9a7bdb7989c762', diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 59beb11bc..8baff2041 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -14,7 +14,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): - _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' + _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py index 122a1cbb6..86a7f4d7d 100644 --- a/youtube_dl/extractor/bokecc.py +++ b/youtube_dl/extractor/bokecc.py @@ -33,7 +33,7 @@ class BokeCCBaseIE(InfoExtractor): class BokeCCIE(BokeCCBaseIE): _IE_DESC = 'CC视频' - _VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' + _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _TESTS = [{ 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index c28e72927..6ad45a1e6 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -12,7 +12,7 @@ from ..utils import ( class BpbIE(InfoExtractor): IE_DESC = 'Bundeszentrale für politische Bildung' - _VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/' + _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/' _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index aa08051b1..725859b4d 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -11,7 +11,7 @@ from ..utils import ( class BreakIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', 'info_dict': { diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 897f3a104..dd4d96cec 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -16,7 +16,7 @@ from ..utils import ( class CamdemyIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' _TESTS = [{ # single file 'url': 'http://www.camdemy.com/media/5181/', @@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' + _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)' _TESTS = [{ # links with trailing slash 'url': 'http://www.camdemy.com/folder/450', diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 8ddcc5097..f23bac9a1 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -11,7 +11,7 @@ from ..utils import ( class CBSNewsIE(ThePlatformIE): IE_DESC = 'CBS News' - _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' _TESTS = [ { @@ -96,7 +96,7 @@ class CBSNewsIE(ThePlatformIE): class CBSNewsLiveVideoIE(InfoExtractor): IE_DESC = 'CBS News Live Videos' - _VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' _TEST = { 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index ae47e74cc..549ae32f3 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class CBSSportsIE(InfoExtractor): - _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' + _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' _TEST = { 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 2996b6b09..19f8b397e 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -19,7 +19,7 @@ def _decode(s): class CliphunterIE(InfoExtractor): IE_NAME = 'cliphunter' - _VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/ + _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/ (?P<id>[0-9]+)/ (?P<seo>.+?)(?:$|[#\?]) ''' diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index 8306d6fb7..0b6ad895f 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -8,7 +8,7 @@ from ..utils import ( class ClipsyndicateIE(InfoExtractor): - _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' + _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 1dfa7c12e..2fba93543 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -12,7 +12,7 @@ from ..utils import ( class ClubicIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' _TESTS = [{ 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 7dff68492..e697d1410 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -11,7 +11,7 @@ from ..utils import ( class ComCarCoffIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)' + _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)' _TESTS = [{ 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', 'info_dict': { diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 6f92ae2ed..054978ff2 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor): 'wmagazine': 'W Magazine', } - _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys()) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index b8b9d058d..84b36f44c 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE class CSpanIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' IE_DESC = 'C-SPAN' _TESTS = [{ 'url': 'http://www.c-span.org/video/?313572-1/HolderonV', diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index 45049bf37..1622fc844 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' # https connection failed (Connection reset) - _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' + _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' _TESTS = [{ 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', 'md5': 'a9875cb790252b08431186d741beaabe', diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index aa2c09eb6..9099f5046 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -6,7 +6,7 @@ from ..compat import compat_str class DctpTvIE(InfoExtractor): - _VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' + _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'info_dict': { diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index 98e3aedfd..9fe144e14 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class DefenseGouvFrIE(InfoExtractor): IE_NAME = 'defense.gouv.fr' - _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' + _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' _TEST = { 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index bdc768c78..bcb670945 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring) class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index a638c827c..1e7dcada6 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -10,7 +10,7 @@ from ..utils import int_or_none class DPlayIE(InfoExtractor): - _VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 028144f20..0040e70d4 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -7,7 +7,7 @@ from .zdf import ZDFIE class DreiSatIE(ZDFIE): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TESTS = [ { 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index c1a4bc757..974c69dbc 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor): IE_NAME = 'dvtv' IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' + _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' _TESTS = [{ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', diff --git a/youtube_dl/extractor/echomsk.py b/youtube_dl/extractor/echomsk.py index d2d94049d..6b7cc652f 100644 --- a/youtube_dl/extractor/echomsk.py +++ b/youtube_dl/extractor/echomsk.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class EchoMskIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' _TEST = { 'url': 'http://www.echo.msk.ru/sounds/1464134.html', 'md5': '2e44b3b78daff5b458e4dbc37f191f7c', diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index 0c0fe6d65..09ed4f2b5 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class ExfmIE(InfoExtractor): IE_NAME = 'exfm' IE_DESC = 'ex.fm' - _VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' _TESTS = [ { diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 9580f5c0c..508684d2e 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -17,7 +17,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' + _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' IE_NAME = 'fc2' _NETRC_MACHINE = 'fc2' _TESTS = [{ diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py index 298227d57..e8936cb24 100644 --- a/youtube_dl/extractor/firstpost.py +++ b/youtube_dl/extractor/firstpost.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class FirstpostIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 510d4b108..98b165143 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class FirstTVIE(InfoExtractor): IE_NAME = '1tv' IE_DESC = 'Первый канал' - _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' + _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' _TESTS = [{ 'url': 'http://www.1tv.ru/videoarchive/73390', diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 5f6e65dae..a3a291599 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -10,7 +10,7 @@ from ..utils import ( class FKTVIE(InfoExtractor): IE_NAME = 'fernsehkritik.tv' - _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' + _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' _TEST = { 'url': 'http://fernsehkritik.tv/folge-1', diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index 370fd006f..d2503ae2e 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class FootyRoomIE(InfoExtractor): - _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' + _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', 'info_dict': { diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py index 08b8ea362..70c1a815d 100644 --- a/youtube_dl/extractor/foxgay.py +++ b/youtube_dl/extractor/foxgay.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class FoxgayIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' + _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' _TEST = { 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', 'md5': '80d72beab5d04e1655a56ad37afe6841', diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 0388ba00c..2369f868d 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class FranceInterIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', 'md5': '4764932e466e6f6c79c317d2e74f6884', diff --git a/youtube_dl/extractor/freevideo.py b/youtube_dl/extractor/freevideo.py index c7bec027b..cd8423a6f 100644 --- a/youtube_dl/extractor/freevideo.py +++ b/youtube_dl/extractor/freevideo.py @@ -5,7 +5,7 @@ from ..utils import ExtractorError class FreeVideoIE(InfoExtractor): - _VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])' + _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])' _TEST = { 'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html', diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index f6b9046f9..cbcddcb7c 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -10,7 +10,7 @@ from .youtube import YoutubeIE class GamekingsIE(InfoExtractor): - _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' + _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' _TESTS = [{ # YouTube embed video 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index b3f1bafcc..4ffdd7515 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' + _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 590ccf526..69058a583 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -13,7 +13,7 @@ from ..utils import ( class GameStarIE(InfoExtractor): - _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index c3f031d9c..1e7948ab8 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -9,7 +9,7 @@ from ..utils import ( class GametrailersIE(InfoExtractor): - _VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' + _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' _TEST = { 'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 31e219945..efc3e8429 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -12,7 +12,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index b3706fe6d..e0ab31802 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -12,7 +12,7 @@ from ..utils import ( class HypemIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' _TEST = { 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 'md5': 'b9cc91b5af8995e9f0c1cee04c575828', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index b61b2dc4e..8bed8ccd0 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -12,7 +12,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' _TEST = { 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -70,7 +70,7 @@ class ImdbIE(InfoExtractor): class ImdbListIE(InfoExtractor): IE_NAME = 'imdb:list' IE_DESC = 'Internet Movie Database lists' - _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _TEST = { 'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'info_dict': { diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index e7c0cb3f6..1a4c64713 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' - _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' + _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html' _NETRC_MACHINE = 'iqiyi' diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py index 063e86de4..158c09a33 100644 --- a/youtube_dl/extractor/jadorecettepub.py +++ b/youtube_dl/extractor/jadorecettepub.py @@ -9,7 +9,7 @@ from .youtube import YoutubeIE class JadoreCettePubIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html' + _VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html' _TEST = { 'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html', diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 137db873c..1a4227f6b 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class JeuxVideoIE(InfoExtractor): - _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' + _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' _TESTS = [{ 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index 06daf5a89..b4c30b7f3 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -9,7 +9,7 @@ from ..utils import ( class KaraoketvIE(InfoExtractor): - _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)' _TEST = { 'url': 'http://karaoketv.co.il/?container=songs&id=171568', 'info_dict': { diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index bed94bc93..2cb04e533 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -12,7 +12,7 @@ from ..utils import ( class KarriereVideosIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', 'info_dict': { diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index a59c529f4..704bd7b34 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -13,7 +13,7 @@ from ..utils import ( class KontrTubeIE(InfoExtractor): IE_NAME = 'kontrtube' IE_DESC = 'KontrTube.ru - Труба зовёт' - _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/' _TEST = { 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py index a602980a1..a574408e5 100644 --- a/youtube_dl/extractor/ku6.py +++ b/youtube_dl/extractor/ku6.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class Ku6IE(InfoExtractor): - _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' + _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' _TEST = { 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', 'md5': '01203549b9efbb45f4b87d55bdea1ed1', diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py index 931f34c9b..12cc56e44 100644 --- a/youtube_dl/extractor/kusi.py +++ b/youtube_dl/extractor/kusi.py @@ -16,7 +16,7 @@ from ..utils import ( class KUSIIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' + _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' _TESTS = [{ 'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold', 'md5': 'f926e7684294cf8cb7bdf8858e1b3988', diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 45d65e61f..a586308b2 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -55,7 +55,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)' + _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { @@ -134,7 +134,7 @@ class KuwoIE(KuwoBaseIE): class KuwoAlbumIE(InfoExtractor): IE_NAME = 'kuwo:album' IE_DESC = '酷我音乐 - 专辑' - _VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/' + _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { @@ -170,7 +170,7 @@ class KuwoAlbumIE(InfoExtractor): class KuwoChartIE(InfoExtractor): IE_NAME = 'kuwo:chart' IE_DESC = '酷我音乐 - 排行榜' - _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' + _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' _TEST = { 'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm', 'info_dict': { @@ -195,7 +195,7 @@ class KuwoChartIE(InfoExtractor): class KuwoSingerIE(InfoExtractor): IE_NAME = 'kuwo:singer' IE_DESC = '酷我音乐 - 歌手' - _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' + _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { @@ -251,7 +251,7 @@ class KuwoSingerIE(InfoExtractor): class KuwoCategoryIE(InfoExtractor): IE_NAME = 'kuwo:category' IE_DESC = '酷我音乐 - 分类' - _VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' + _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' _TEST = { 'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm', 'info_dict': { @@ -288,7 +288,7 @@ class KuwoCategoryIE(InfoExtractor): class KuwoMvIE(KuwoBaseIE): IE_NAME = 'kuwo:mv' IE_DESC = '酷我音乐 - MV' - _VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/' + _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/mv/6480076/', 'info_dict': { diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index df47e88ba..462b752dd 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -28,7 +28,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' + _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -196,7 +196,7 @@ class LeIE(InfoExtractor): class LePlaylistIE(InfoExtractor): - _VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' + _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' _TESTS = [{ 'url': 'http://www.le.com/tv/46177.html', diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index a8fd639cc..ba2f80a75 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -17,7 +17,7 @@ from ..utils import ( class LifeNewsIE(InfoExtractor): IE_NAME = 'lifenews' IE_DESC = 'LIFE | NEWS' - _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' + _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' _TESTS = [{ # single video embedded via video/source @@ -159,7 +159,7 @@ class LifeNewsIE(InfoExtractor): class LifeEmbedIE(InfoExtractor): IE_NAME = 'life:embed' - _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})' + _VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})' _TEST = { 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 1a0625ac3..2599d45c3 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { @@ -207,7 +207,7 @@ class LimelightChannelIE(LimelightBaseIE): class LimelightChannelListIE(LimelightBaseIE): IE_NAME = 'limelight:channel_list' - _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', 'info_dict': { diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index 7e025831b..d5945ad66 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class M6IE(InfoExtractor): IE_NAME = 'm6' - _VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' + _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' _TEST = { 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 67d6271e1..c31e8798a 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -17,7 +17,7 @@ from ..utils import ( class MetacafeIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' + _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = 'metacafe' diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 819c1b90b..1aea78d11 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -91,7 +91,7 @@ class MITIE(TechTVMITIE): class OCWMITIE(InfoExtractor): IE_NAME = 'ocw.mit.edu' - _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' + _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' _BASE_URL = 'http://ocw.mit.edu/' _TESTS = [ diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index c595f2077..9e584860a 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -14,7 +14,7 @@ from ..utils import ( class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' - _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' + _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 7cc7f054f..f010f52d5 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -13,7 +13,7 @@ from ..utils import ( class MooshareIE(InfoExtractor): IE_NAME = 'mooshare' IE_DESC = 'Mooshare.biz' - _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})' + _VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})' _TESTS = [ { diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 0b4787c1d..ad04b12cd 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -12,7 +12,7 @@ from ..utils import ( class MotherlessIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)' _TESTS = [{ 'url': 'http://motherless.com/AC3FFE1', 'md5': '310f62e325a9fafe64f68c0bccb6e75f', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index c1a482dba..370328b36 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -9,7 +9,7 @@ from ..compat import ( class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' + _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', 'info_dict': { diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index f936b92bb..1ca7b1a9e 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,7 +11,7 @@ from ..utils import ( class MySpassIE(InfoExtractor): - _VALID_URL = r'http://www\.myspass\.de/.*' + _VALID_URL = r'https?://www\.myspass\.de/.*' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 1e21cf98a..c83a1eab5 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -20,7 +20,7 @@ from ..utils import ( class MyVideoIE(InfoExtractor): _WORKING = False - _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*' + _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*' IE_NAME = 'myvideo' _TEST = { 'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py index a94ab8358..731c24542 100644 --- a/youtube_dl/extractor/myvidster.py +++ b/youtube_dl/extractor/myvidster.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class MyVidsterIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' + _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' _TEST = { 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 7ce8d9b18..d5e53365c 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -8,7 +8,7 @@ from ..utils import ( class NationalGeographicIE(InfoExtractor): - _VALID_URL = r'http://video\.nationalgeographic\.com/.*?' + _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?' _TESTS = [ { diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index bb0817e34..a622f2212 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -115,7 +115,7 @@ class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsIE(InfoExtractor): # Does not include https because its certificate is invalid - _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', @@ -295,7 +295,7 @@ class NBCNewsIE(ThePlatformIE): class MSNBCIE(InfoExtractor): # https URLs redirect to corresponding http ones - _VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' + _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' _TEST = { 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index d1688457f..aae7aeeeb 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601 class NextMediaIE(InfoExtractor): IE_DESC = '蘋果日報' - _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', 'md5': 'dff9fad7009311c421176d1ac90bfe4f', @@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor): class NextMediaActionNewsIE(NextMediaIE): IE_DESC = '蘋果日報 - 動新聞' - _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' + _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' _TESTS = [{ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index d440313d5..ec7317a2f 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -22,7 +22,7 @@ from ..utils import ( class NocoIE(InfoExtractor): - _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' _LOGIN_URL = 'http://noco.tv/do.php' _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' _SUB_LANG_TEMPLATE = '&sub_lang=%s' diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 5952d136f..77e091072 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -9,7 +9,7 @@ from ..utils import ( class NormalbootsIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' + _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' _TEST = { 'url': 'http://normalboots.com/video/home-alone-games-jontron/', 'md5': '8bf6de238915dd501105b44ef5f1e0f6', diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 72f72b803..17671ad39 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -12,7 +12,7 @@ from ..utils import ( class NovaIE(InfoExtractor): IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' - _VALID_URL = r'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' + _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _TESTS = [{ 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus', 'info_dict': { diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index 125c7010b..a3f0abb4e 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -9,7 +9,7 @@ from ..utils import ( class NprIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', 'info_dict': { diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index 2cd924d05..0895d7ea4 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -11,7 +11,7 @@ from ..utils import ( class NTVRuIE(InfoExtractor): IE_NAME = 'ntv.ru' - _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)' + _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P<id>.+)' _TESTS = [ { diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 958eb398b..66c75f8b3 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -137,7 +137,7 @@ class ORFTVthekIE(InfoExtractor): class ORFOE1IE(InfoExtractor): IE_NAME = 'orf:oe1' IE_DESC = 'Radio Österreich 1' - _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)' + _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)' # Audios on ORF radio are only available for 7 days, so we can't add tests. _TEST = { @@ -171,7 +171,7 @@ class ORFOE1IE(InfoExtractor): class ORFFM4IE(InfoExtractor): IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' - _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)' + _VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)' _TEST = { 'url': 'http://fm4.orf.at/player/20160110/IS/', @@ -222,7 +222,7 @@ class ORFFM4IE(InfoExtractor): class ORFIPTVIE(InfoExtractor): IE_NAME = 'orf:iptv' IE_DESC = 'iptv.ORF.at' - _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' + _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' _TEST = { 'url': 'http://iptv.orf.at/stories/2275236/', diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index 6e60e5fe9..f1008ae51 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -12,7 +12,7 @@ from ..utils import ( class PhilharmonieDeParisIE(InfoExtractor): IE_DESC = 'Philharmonie de Paris' - _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'info_dict': { diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index 788411ccc..6c8bbe1d9 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -8,7 +8,7 @@ from ..compat import compat_urllib_parse_unquote class PhotobucketIE(InfoExtractor): - _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' + _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' _TEST = { 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 57c78ba52..39b53ecf6 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -12,7 +12,7 @@ from ..utils import ( class PornHdIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' + _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' _TEST = { 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'md5': '956b8ca569f7f4d8ec563e2c41598441', diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 1a53fd71c..6b51e5c54 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -13,7 +13,7 @@ from ..utils import ( class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 30a5f2de4..cc0416cb8 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class PyvideoIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' _TESTS = [ { diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 45a3c41c5..ff0af9543 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -18,7 +18,7 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', @@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', 'info_dict': { @@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', @@ -260,7 +260,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=toplist&p=global_123', @@ -314,7 +314,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:playlist' IE_DESC = 'QQ音乐 - 歌单' - _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=taoge&id=3462654915', diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index a4dc5c335..e36ce1aa1 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -18,7 +18,7 @@ from ..utils import ( class RaiTVIE(InfoExtractor): - _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' + _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' _TESTS = [ { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', @@ -175,7 +175,7 @@ class RaiTVIE(InfoExtractor): class RaiIE(InfoExtractor): - _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' + _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' _TESTS = [ { 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index d6054d717..7ba41ba59 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -5,7 +5,7 @@ from ..utils import ExtractorError class RedTubeIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.redtube.com/66418', 'md5': '7b8c22b5e7098a3e1c09709df1126d2d', diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py index 508758075..2c2c707bd 100644 --- a/youtube_dl/extractor/ringtv.py +++ b/youtube_dl/extractor/ringtv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class RingTVIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' _TEST = { 'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30', 'md5': 'd25945f5df41cdca2d2587165ac28720', diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 8a8c5d2a0..08cd1ae6c 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -62,7 +62,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -179,7 +179,7 @@ class RTVEInfantilIE(InfoExtractor): class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'http://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 0e470e73f..1f7c26299 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class RUHDIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' _TEST = { 'url': 'http://www.ruhd.ru/play.php?vid=207', 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index c5c47d01e..9ca4ae147 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -122,7 +122,7 @@ class RutubeEmbedIE(InfoExtractor): class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' - _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' + _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' _TESTS = [{ 'url': 'http://rutube.ru/tags/video/1800/', 'info_dict': { @@ -156,7 +156,7 @@ class RutubeChannelIE(InfoExtractor): class RutubeMovieIE(RutubeChannelIE): IE_NAME = 'rutube:movie' IE_DESC = 'Rutube movies' - _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' + _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' _TESTS = [] _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' @@ -174,7 +174,7 @@ class RutubeMovieIE(RutubeChannelIE): class RutubePersonIE(RutubeChannelIE): IE_NAME = 'rutube:person' IE_DESC = 'Rutube person videos' - _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' + _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' _TESTS = [{ 'url': 'http://rutube.ru/video/person/313878/', 'info_dict': { diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index f2af15f6b..dd0a6ba19 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -11,7 +11,7 @@ from ..utils import ( class ScreenJunkiesIE(InfoExtractor): - _VALID_URL = r'http://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', 'md5': '5c2b686bec3d43de42bde9ec047536b0', diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 4d3b58522..c5f474dd1 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ['arch', '', 'http://ussenate-f.akamaihd.net/'] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)' + _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 8eda3c864..96fe0b90d 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -13,7 +13,7 @@ from ..utils import ( class SharedIE(InfoExtractor): IE_DESC = 'shared.sx and vivo.sx' - _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})' + _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})' _TESTS = [{ 'url': 'http://shared.sx/0060718775', diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py index dfe50ed45..7e6783306 100644 --- a/youtube_dl/extractor/sport5.py +++ b/youtube_dl/extractor/sport5.py @@ -8,7 +8,7 @@ from ..utils import ExtractorError class Sport5IE(InfoExtractor): - _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' _TESTS = [ { 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py index 13101c714..54d1843f2 100644 --- a/youtube_dl/extractor/ssa.py +++ b/youtube_dl/extractor/ssa.py @@ -8,7 +8,7 @@ from ..utils import ( class SSAIE(InfoExtractor): - _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)' + _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)' _TEST = { 'url': 'http://ssa.nls.uk/film/3561', 'info_dict': { diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index aa5964acb..f562aa6d3 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class SztvHuIE(InfoExtractor): - _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)' _TEST = { 'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909', 'md5': 'a6df607b11fb07d0e9f2ad94613375cb', diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index d1b7264b4..b49ab5f5b 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -16,7 +16,7 @@ from ..compat import compat_ord class TeamcocoIE(InfoExtractor): - _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' + _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' _TESTS = [ { 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py index 4e860db0a..a29a64b6d 100644 --- a/youtube_dl/extractor/tele13.py +++ b/youtube_dl/extractor/tele13.py @@ -11,7 +11,7 @@ from ..utils import ( class Tele13IE(InfoExtractor): - _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)' + _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)' _TESTS = [ { 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 9ee844684..3f54b2744 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html' _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py index 496f15d80..406f4a826 100644 --- a/youtube_dl/extractor/thvideo.py +++ b/youtube_dl/extractor/thvideo.py @@ -10,7 +10,7 @@ from ..utils import ( class THVideoIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' _TEST = { 'url': 'http://thvideo.tv/v/th1987/', 'md5': 'fa107b1f73817e325e9433505a70db50', diff --git a/youtube_dl/extractor/tinypic.py b/youtube_dl/extractor/tinypic.py index e036b8cdf..c43cace24 100644 --- a/youtube_dl/extractor/tinypic.py +++ b/youtube_dl/extractor/tinypic.py @@ -9,7 +9,7 @@ from ..utils import ExtractorError class TinyPicIE(InfoExtractor): IE_NAME = 'tinypic' IE_DESC = 'tinypic.com videos' - _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+' + _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+' _TESTS = [ { diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 17add9543..abad3ff64 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -9,7 +9,7 @@ from ..compat import compat_parse_qs class TlcDeIE(InfoExtractor): IE_NAME = 'tlc.de' - _VALID_URL = r'http://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' + _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' _TEST = { 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 2756f56d3..2579ba8c6 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -41,7 +41,7 @@ class ToypicsIE(InfoExtractor): class ToypicsUserIE(InfoExtractor): IE_DESC = 'Toypics user profile' - _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' + _VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' _TEST = { 'url': 'http://videos.toypics.net/Mikey', 'info_dict': { diff --git a/youtube_dl/extractor/traileraddict.py b/youtube_dl/extractor/traileraddict.py index 0e01b15fc..747370d12 100644 --- a/youtube_dl/extractor/traileraddict.py +++ b/youtube_dl/extractor/traileraddict.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class TrailerAddictIE(InfoExtractor): _WORKING = False - _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' + _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' _TEST = { 'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer', 'md5': '41365557f3c8c397d091da510e73ceb4', diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index d239949a6..657705623 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -7,7 +7,7 @@ from .nuevo import NuevoBaseIE class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' IE_NAME = 'trollvids' _TEST = { 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 4f844706d..cea117c79 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class TumblrIE(InfoExtractor): - _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' + _VALID_URL = r'https?://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', 'md5': '479bb068e5b16462f5176a6828829767', diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 535d0d361..86bb7915d 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -14,7 +14,7 @@ from ..utils import ( class TV2IE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' _TEST = { 'url': 'http://www.tv2.no/v/916509/', 'info_dict': { @@ -100,7 +100,7 @@ class TV2IE(InfoExtractor): class TV2ArticleIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', 'info_dict': { diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 3a4f393fc..4065354dd 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -11,7 +11,7 @@ from ..utils import ( class TVCIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)' _TEST = { 'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702', 'md5': 'bbc5ff531d1e90e856f60fc4b3afd708', @@ -64,7 +64,7 @@ class TVCIE(InfoExtractor): class TVCArticleIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)' _TESTS = [{ 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/', 'info_dict': { diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index b4683de54..df70a6b23 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -13,7 +13,7 @@ from ..utils import ( class TVPlayIE(InfoExtractor): IE_DESC = 'TV3Play and related services' - _VALID_URL = r'''(?x)http://(?:www\.)? + _VALID_URL = r'''(?x)https?://(?:www\.)? (?:tvplay\.lv/parraides| tv3play\.lt/programos| play\.tv3\.lt/programos| diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py index d50237758..1d52cbc98 100644 --- a/youtube_dl/extractor/ubu.py +++ b/youtube_dl/extractor/ubu.py @@ -10,7 +10,7 @@ from ..utils import ( class UbuIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' + _VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' _TEST = { 'url': 'http://ubu.com/film/her_noise.html', 'md5': '138d5652618bf0f03878978db9bef1ee', diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 594bee4f9..66d9f1bf3 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -7,7 +7,7 @@ from ..utils import qualities class UnistraIE(InfoExtractor): - _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)' + _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)' _TESTS = [ { diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 3794bcded..b755dda90 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -13,7 +13,7 @@ from ..utils import ( class Vbox7IE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)' _TEST = { 'url': 'http://vbox7.com/play:249bb972c2', 'md5': '99f65c0c9ef9b682b97313e052734c3f', diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 9633f7ffe..23ce0a0d1 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -12,7 +12,7 @@ from ..utils import ( class VeohIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)' _TESTS = [ { diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index a0c59a2e0..cb64ae0bd 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -10,7 +10,7 @@ from .rutv import RUTVIE class VestiIE(InfoExtractor): IE_DESC = 'Вести.Ru' - _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)' + _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P<id>.+)' _TESTS = [ { diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 77d8978d4..b11cd254c 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -214,7 +214,7 @@ class VGTVIE(XstreamIE): class BTArticleIE(InfoExtractor): IE_NAME = 'bt:article' IE_DESC = 'Bergens Tidende Articles' - _VALID_URL = r'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' + _VALID_URL = r'https?://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' _TEST = { 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', 'md5': '2acbe8ad129b3469d5ae51b1158878df', @@ -241,7 +241,7 @@ class BTArticleIE(InfoExtractor): class BTVestlendingenIE(InfoExtractor): IE_NAME = 'bt:vestlendingen' IE_DESC = 'Bergens Tidende - Vestlendingen' - _VALID_URL = r'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index 2cd36508a..0f798711b 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -14,7 +14,7 @@ class VideoTtIE(InfoExtractor): _WORKING = False ID_NAME = 'video.tt' IE_DESC = 'video.tt - Your True Tube' - _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})' + _VALID_URL = r'https?://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})' _TESTS = [{ 'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8', diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 315984bf9..03b9f1353 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -15,7 +15,7 @@ from ..utils import ( class ViideaIE(InfoExtractor): - _VALID_URL = r'''(?x)http://(?:www\.)?(?: + _VALID_URL = r'''(?x)https?://(?:www\.)?(?: videolectures\.net| flexilearn\.viidea\.net| presentations\.ocwconsortium\.org| diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 149e36467..10ca6acb1 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -15,7 +15,7 @@ from ..utils import ( class VubeIE(InfoExtractor): IE_NAME = 'vube' IE_DESC = 'Vube.com' - _VALID_URL = r'http://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b' + _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b' _TESTS = [ { diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index a6d9b5fee..eaa888f00 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -14,7 +14,7 @@ from ..utils import ( class VuClipIE(InfoExtractor): - _VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)' _TEST = { 'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html', diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py index 24efbd6e6..8b9488340 100644 --- a/youtube_dl/extractor/walla.py +++ b/youtube_dl/extractor/walla.py @@ -11,7 +11,7 @@ from ..utils import ( class WallaIE(InfoExtractor): - _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' + _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' _TEST = { 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', 'info_dict': { diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 37cf3d309..5227bb5ad 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -12,7 +12,7 @@ from ..utils import ( class WatIE(InfoExtractor): - _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)' + _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)' IE_NAME = 'wat.tv' _TESTS = [ { diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 65cab4069..31c904303 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -244,7 +244,7 @@ class WDRMobileIE(InfoExtractor): class WDRMausIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' + _VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' IE_DESC = 'Sendung mit der Maus' _TESTS = [{ 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index e333ae345..3dafbeec2 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class WeiqiTVIE(InfoExtractor): IE_DESC = 'WQTV' - _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index fb0accac7..828c03dc3 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -5,7 +5,7 @@ from .youtube import YoutubeIE class WimpIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.wimp.com/maruexhausted/', 'md5': 'ee21217ffd66d058e8b16be340b74883', diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index 4ff99e5ca..e4a2baad2 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -5,7 +5,7 @@ from ..compat import compat_urllib_parse_unquote class XBefIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking', 'md5': 'a478b565baff61634a98f5e5338be995', diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 001ee17b6..63bbc0634 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -15,7 +15,7 @@ from ..utils import ( class YamIE(InfoExtractor): IE_DESC = '蕃薯藤yam天空部落' - _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)' + _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)' _TESTS = [{ # An audio hosted on Yam diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 869f3e819..2522551dc 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -9,7 +9,7 @@ from ..compat import compat_urllib_parse_unquote_plus class YnetIE(InfoExtractor): - _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' + _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' _TESTS = [ { 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', From 1600ed1ff9edb33ef901bda2c4b2732df7e0e4e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 21:46:49 +0600 Subject: [PATCH 034/128] [rutv] Improve flash version pattern (Closes #8911) --- youtube_dl/extractor/rutv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index f7fe1fece..a2379eb04 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -14,7 +14,7 @@ class RUTVIE(InfoExtractor): IE_DESC = 'RUTV.RU' _VALID_URL = r'''(?x) https?://player\.(?:rutv\.ru|vgtrk\.com)/ - (?P<path>flash2v/container\.swf\?id= + (?P<path>flash\d+v/container\.swf\?id= |iframe/(?P<type>swf|video|live)/id/ |index/iframe/cast_id/) (?P<id>\d+)''' @@ -109,7 +109,7 @@ class RUTVIE(InfoExtractor): return mobj.group('url') mobj = re.search( - r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)', + r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', webpage) if mobj: return mobj.group('url') @@ -119,7 +119,7 @@ class RUTVIE(InfoExtractor): video_id = mobj.group('id') video_path = mobj.group('path') - if video_path.startswith('flash2v'): + if re.match(r'flash\d+v', video_path): video_type = 'video' elif video_path.startswith('iframe'): video_type = mobj.group('type') @@ -168,7 +168,7 @@ class RUTVIE(InfoExtractor): 'play_path': mobj.group('playpath'), 'app': mobj.group('app'), 'page_url': 'http://player.rutv.ru', - 'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', + 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', 'rtmp_live': True, 'ext': 'flv', 'vbr': int(quality), From 065c4b27bfc4488758b357e023e9b6b1679c9641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 22:07:34 +0600 Subject: [PATCH 035/128] [xhamster:embed] Extract vars (Closes #8912) --- youtube_dl/extractor/xhamster.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index fd43e8854..b3547174d 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + dict_get, float_or_none, int_or_none, unified_strdate, @@ -170,6 +171,12 @@ class XHamsterEmbedIE(InfoExtractor): video_url = self._search_regex( r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, - webpage, 'xhamster url') + webpage, 'xhamster url', default=None) + + if not video_url: + vars = self._parse_json( + self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'), + video_id) + video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl')) return self.url_result(video_url, 'XHamster') From ff5873b72de16854ae8d506d5648148a54828243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 22:24:42 +0600 Subject: [PATCH 036/128] [motherless] Detect friends only videos --- youtube_dl/extractor/motherless.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index ad04b12cd..5e1a8a71a 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -69,6 +69,9 @@ class MotherlessIE(InfoExtractor): ">The page you're looking for cannot be found.<")): raise ExtractorError('Video %s does not exist' % video_id, expected=True) + if '>The content you are trying to view is for friends only.' in webpage: + raise ExtractorError('Video %s is for friends only' % video_id, expected=True) + title = self._html_search_regex( r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') video_url = self._html_search_regex( From 5c69f7a479936a8fc429228c8259dc5fcbf428b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 23:31:40 +0600 Subject: [PATCH 037/128] [animeondemand] Respect startvideo (Closes #8923) --- youtube_dl/extractor/animeondemand.py | 37 +++++++++++++++++++-------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 4352525e2..81a843035 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -163,7 +163,7 @@ class AnimeOnDemandIE(InfoExtractor): if kind: format_id_list.append(kind) if not format_id_list: - format_id_list.append('hls') + format_id_list.append(compat_str(num)) format_id = '-'.join(format_id_list) format_note = ', '.join(filter(None, (kind, lang_note))) request = sanitized_Request( @@ -179,26 +179,41 @@ class AnimeOnDemandIE(InfoExtractor): fatal=False) if not playlist: continue + start_video = playlist.get('startvideo', 0) playlist = playlist.get('playlist') if not playlist or not isinstance(playlist, list): continue - playlist = playlist[0] + playlist = playlist[start_video] title = playlist.get('title') if not title: continue description = playlist.get('description') for source in playlist.get('sources', []): file_ = source.get('file') - if file_ and determine_ext(file_) == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( + if not file_: + continue + ext = determine_ext(file_) + format_id_list = [lang, kind] + if ext == 'm3u8': + format_id_list.append('hls') + elif source.get('type') == 'video/dash' or ext == 'mpd': + format_id_list.append('dash') + format_id = '-'.join(filter(None, format_id_list)) + if ext == 'm3u8': + file_formats = self._extract_m3u8_formats( file_, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id=format_id) - for f in m3u8_formats: - f.update({ - 'language': lang, - 'format_note': format_note, - }) - formats.extend(m3u8_formats) + entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) + elif source.get('type') == 'video/dash' or ext == 'mpd': + file_formats = self._extract_mpd_formats( + file_, video_id, mpd_id=format_id, fatal=False) + else: + continue + for f in file_formats: + f.update({ + 'language': lang, + 'format_note': format_note, + }) + formats.extend(file_formats) if formats: self._sort_formats(formats) From 85c637b7376f0426e5e0a6812da2a72b2ca28680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 23:35:50 +0600 Subject: [PATCH 038/128] [animeondemand] Extract teaser when no full episode available (#8923) --- youtube_dl/extractor/animeondemand.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 81a843035..a47697738 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -225,16 +225,18 @@ class AnimeOnDemandIE(InfoExtractor): }) entries.append(f) - m = re.search( - r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', - episode_html) - if m: - f = common_info.copy() - f.update({ - 'id': '%s-teaser' % f['id'], - 'title': m.group('title'), - 'url': compat_urlparse.urljoin(url, m.group('href')), - }) - entries.append(f) + # Extract teaser only when full episode is not available + if not formats: + m = re.search( + r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', + episode_html) + if m: + f = common_info.copy() + f.update({ + 'id': '%s-teaser' % f['id'], + 'title': m.group('title'), + 'url': compat_urlparse.urljoin(url, m.group('href')), + }) + entries.append(f) return self.playlist_result(entries, anime_id, anime_title, anime_description) From bc5d16b3023ab29216d868bbfe8838b5e92e72f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Mar 2016 23:37:39 +0600 Subject: [PATCH 039/128] [animeondemand] Skip dash for now --- youtube_dl/extractor/animeondemand.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index a47697738..2cede55a7 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -204,6 +204,7 @@ class AnimeOnDemandIE(InfoExtractor): file_, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) elif source.get('type') == 'video/dash' or ext == 'mpd': + continue file_formats = self._extract_mpd_formats( file_, video_id, mpd_id=format_id, fatal=False) else: From cc7397b04d4a21b5ac680858ee2600e3b3bfb569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Mar 2016 21:12:29 +0600 Subject: [PATCH 040/128] [ceskatelevize] Make m3u8 formats extraction non fatal (Closes #8933) --- youtube_dl/extractor/ceskatelevize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b27b4e670..b355111cb 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -129,7 +129,8 @@ class CeskaTelevizeIE(InfoExtractor): formats = [] for format_id, stream_url in item['streamUrls'].items(): formats.extend(self._extract_m3u8_formats( - stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native')) + stream_url, playlist_id, 'mp4', + entry_protocol='m3u8_native', fatal=False)) self._sort_formats(formats) item_id = item.get('id') or item['assetId'] From 2beeb286e179a00bc0c76fc55ca5c8d19e74ca41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Mar 2016 22:32:59 +0600 Subject: [PATCH 041/128] [laola1tv] Add support for livestreams (Closes #8934) --- youtube_dl/extractor/laola1tv.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 5d8ebbeb3..41d80bc12 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -19,7 +19,7 @@ from ..utils import ( class Laola1TvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { @@ -33,7 +33,7 @@ class Laola1TvIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie', 'info_dict': { @@ -47,12 +47,28 @@ class Laola1TvIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + }, { + 'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde', + 'info_dict': { + 'id': '487850', + 'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde', + 'ext': 'flv', + 'title': 'Belogorie BELGOROD - TRENTINO Diatec', + 'upload_date': '20160322', + 'uploader': 'CEV - Europäischer Volleyball Verband', + 'is_live': True, + 'categories': ['Volleyball'], + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('slug') + kind = mobj.group('kind') lang = mobj.group('lang') portal = mobj.group('portal') @@ -85,12 +101,17 @@ class Laola1TvIE(InfoExtractor): _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) title = _v('title', fatal=True) + VS_TARGETS = { + 'video': '2', + 'livestream': '17', + } + req = sanitized_Request( 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' % compat_urllib_parse.urlencode({ 'videoId': video_id, - 'target': '2', - 'label': 'laola1tv', + 'target': VS_TARGETS.get(kind, '2'), + 'label': _v('label'), 'area': _v('area'), }), urlencode_postdata( From c6ca11f1b31a292413cab03012654fd0021814a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Mar 2016 23:48:05 +0600 Subject: [PATCH 042/128] [once] Prevent ads from embedding into m3u8 playlists (Closes #8893) --- youtube_dl/extractor/once.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 080045d4c..5db949b17 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -20,6 +20,10 @@ class OnceIE(InfoExtractor): media_item_id, 'mp4', m3u8_id='hls', fatal=False) progressive_formats = [] for adaptive_format in formats: + # Prevent advertisement from embedding into m3u8 playlist (see + # https://github.com/rg3/youtube-dl/issues/8893#issuecomment-199912684) + adaptive_format['url'] = re.sub( + r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url']) rendition_id = self._search_regex( r'/now/media/playlist/[^/]+/[^/]+/([^/]+)', adaptive_format['url'], 'redition id', default=None) From 7da2c87119db8beda1bdc979fad38c08fc1252e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 22 Mar 2016 22:17:59 +0100 Subject: [PATCH 043/128] Add extractor for thescene.com (closes #8929) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/thescene.py | 48 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/thescene.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ee792bbe0..8f7df4d12 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -743,6 +743,7 @@ from .theplatform import ( ThePlatformIE, ThePlatformFeedIE, ) +from .thescene import TheSceneIE from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py new file mode 100644 index 000000000..08d666eaf --- /dev/null +++ b/youtube_dl/extractor/thescene.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import compat_urllib_parse +from ..utils import qualities + + +class TheSceneIE(InfoExtractor): + _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' + + _TEST = { + 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', + 'info_dict': { + 'id': '520e8faac2b4c00e3c6e5f43', + 'ext': 'mp4', + 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', + 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + player_url = compat_urllib_parse.urljoin( + url, + self._html_search_regex( + r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url')) + + self.to_screen(player_url) + player = self._download_webpage(player_url, player_url) + info = self._parse_json(self._search_regex(r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), display_id) + + qualities_order = qualities(['low', 'high']) + formats = [{ + 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), + 'url': f['src'], + 'quality': qualities_order(f['quality']), + } for f in info['sources'][0]] + self._sort_formats(formats) + + return { + 'id': info['id'], + 'title': info['title'], + 'formats': formats, + 'thumbnail': info.get('poster_frame'), + 'display_id': display_id, + } From efbed08dc20c530fe428256e4dcbea4dc4423d0d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 23 Mar 2016 22:24:52 +0800 Subject: [PATCH 044/128] [utils] Encode hostnames before passing to urllib With IDN (Internationalized Domain Name) and a proxy, non-ascii URLs are passed down to urllib/urllib2, causing UnicodeEncodeError Fixes #8890 --- test/test_http.py | 10 ++++++++++ youtube_dl/utils.py | 1 + 2 files changed, 11 insertions(+) diff --git a/test/test_http.py b/test/test_http.py index fc59b1aed..15e0ad369 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals # Allow direct execution @@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase): response = ydl.urlopen(req).read().decode('utf-8') self.assertEqual(response, 'cn: {0}'.format(url)) + def test_proxy_with_idn(self): + ydl = YoutubeDL({ + 'proxy': 'localhost:{0}'.format(self.port), + }) + url = 'http://中文.tw/' + response = ydl.urlopen(url).read().decode('utf-8') + # b'xn--fiq228c' is '中文'.encode('idna') + self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 067b8a184..03bb7782f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1746,6 +1746,7 @@ def escape_url(url): """Escape URL as suggested by RFC 3986""" url_parsed = compat_urllib_parse_urlparse(url) return url_parsed._replace( + netloc=url_parsed.netloc.encode('idna').decode('ascii'), path=escape_rfc3986(url_parsed.path), params=escape_rfc3986(url_parsed.params), query=escape_rfc3986(url_parsed.query), From 882c6992967914c245e086ddaacde9d595cd6ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Mar 2016 20:45:39 +0600 Subject: [PATCH 045/128] [tunein] Fix stream data extraction (Closes #8899, closes #8924) --- youtube_dl/extractor/tunein.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py index 8322cc14d..ae4cfaec2 100644 --- a/youtube_dl/extractor/tunein.py +++ b/youtube_dl/extractor/tunein.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -import json +import re from .common import InfoExtractor from ..utils import ExtractorError @@ -27,10 +27,9 @@ class TuneInBaseIE(InfoExtractor): if not streams_url.startswith('http://'): streams_url = compat_urlparse.urljoin(url, streams_url) - stream_data = self._download_webpage( - streams_url, content_id, note='Downloading stream data') - streams = json.loads(self._search_regex( - r'\((.*)\);', stream_data, 'stream info'))['Streams'] + streams = self._download_json( + streams_url, content_id, note='Downloading stream data', + transform_source=lambda s: re.sub(r'^\s*\((.*)\);\s*$', r'\1', s))['Streams'] is_live = None formats = [] From 2d60465e44c3290fa1ee4239fe18eb9d0c69d9e9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 23 Mar 2016 23:20:28 +0800 Subject: [PATCH 046/128] [test/test_utils] Update for escape_url change --- test/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 325b870cc..8ba531542 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -577,7 +577,7 @@ class TestUtil(unittest.TestCase): ) self.assertEqual( escape_url('http://тест.рф/фрагмент'), - 'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' ) self.assertEqual( escape_url('http://тест.рф/абв?абв=абв#абв'), From 81f36eba886349475235dbacef29d6b23b40538f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 23 Mar 2016 23:23:26 +0800 Subject: [PATCH 047/128] [test/test_utils] Update for escape_url change (again) --- test/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8ba531542..a35debfe1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -581,7 +581,7 @@ class TestUtil(unittest.TestCase): ) self.assertEqual( escape_url('http://тест.рф/абв?абв=абв#абв'), - 'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') From 088e1aac5970ea2c24fa902873a5e0b984b37595 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 23 Mar 2016 23:55:08 +0800 Subject: [PATCH 048/128] [generic] Support Vine embeds (#8817) --- youtube_dl/extractor/generic.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 26de27a7e..93e0563b5 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1909,6 +1909,14 @@ class GenericIE(InfoExtractor): self._proto_relative_url(unescapeHTML(mobj.group(1))), 'AdobeTVVideo') + # Look for Vine embeds + mobj = re.search( + r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))', + webpage) + if mobj is not None: + return self.url_result( + self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From fc27ea94642a8e2e9b0fcfdcc0c370ec7484c971 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 23 Mar 2016 23:55:52 +0800 Subject: [PATCH 049/128] [tumblr] Support Vine embeds (#8817) --- youtube_dl/extractor/tumblr.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index cea117c79..584716986 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -67,6 +67,22 @@ class TumblrIE(InfoExtractor): 'uploader_id': 'user32021558', }, 'add_ie': ['Vimeo'], + }, { + 'url': 'http://sutiblr.tumblr.com/post/139638707273', + 'md5': '2dd184b3669e049ba40563a7d423f95c', + 'info_dict': { + 'id': 'ir7qBEIKqvq', + 'ext': 'mp4', + 'title': 'Vine by sutiblr', + 'alt_title': 'Vine by sutiblr', + 'uploader': 'sutiblr', + 'uploader_id': '1198993975374495744', + 'upload_date': '20160220', + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + 'add_ie': ['Vine'], }] def _real_extract(self, url): From c4096e8aeaa373159e350a3674b0ce18b6c519e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Mar 2016 16:29:33 +0800 Subject: [PATCH 050/128] [instagram] Extract embed videos (#8817) --- youtube_dl/extractor/instagram.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index ed3e07118..e8b27b379 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + get_element_by_attribute, int_or_none, limit_length, ) @@ -38,6 +39,18 @@ class InstagramIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_embed_url(webpage): + blockquote_el = get_element_by_attribute( + 'class', 'instagram-media', webpage) + if blockquote_el is None: + return + + mobj = re.search( + r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el) + if mobj: + return mobj.group('link') + def _real_extract(self, url): video_id = self._match_id(url) From 87696e78d7203cf47bdd27773d06ba15af7c819d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Mar 2016 16:30:01 +0800 Subject: [PATCH 051/128] [instagram] Unescape description (#8817) --- youtube_dl/extractor/instagram.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index e8b27b379..4e62098b0 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -7,6 +7,7 @@ from ..utils import ( get_element_by_attribute, int_or_none, limit_length, + lowercase_escape, ) @@ -59,6 +60,8 @@ class InstagramIE(InfoExtractor): webpage, 'uploader id', fatal=False) desc = self._search_regex( r'"caption":"(.+?)"', webpage, 'description', default=None) + if desc is not None: + desc = lowercase_escape(desc) return { 'id': video_id, From 5a51775a58b901c63bb0b59e44a113ad16ceb236 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Mar 2016 16:32:27 +0800 Subject: [PATCH 052/128] [generic] Extract Instagram embeds (#8817) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 93e0563b5..12f2309fc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -59,6 +59,7 @@ from .videomore import VideomoreIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE +from .instagram import InstagramIE class GenericIE(InfoExtractor): @@ -1917,6 +1918,11 @@ class GenericIE(InfoExtractor): return self.url_result( self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine') + # Look for Instagram embeds + instagram_embed_url = InstagramIE._extract_embed_url(webpage) + if instagram_embed_url is not None: + return self.url_result(instagram_embed_url, InstagramIE.ie_key()) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From 32d88410eb2ef0ca25ab770af8f2ca2326c0aca7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Mar 2016 16:32:53 +0800 Subject: [PATCH 053/128] [tumblr] Add a test with Instagram embed Closes #8817 --- youtube_dl/extractor/tumblr.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 584716986..e5bcf7798 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -83,6 +83,18 @@ class TumblrIE(InfoExtractor): 'repost_count': int, }, 'add_ie': ['Vine'], + }, { + 'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or', + 'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72', + 'info_dict': { + 'id': '-7LnUPGlSo', + 'ext': 'mp4', + 'title': 'Video by victoriassecret', + 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat', + 'uploader_id': 'victoriassecret', + 'thumbnail': 're:^https?://.*\.jpg' + }, + 'add_ie': ['Instagram'], }] def _real_extract(self, url): From 622d19160bd81161e18c6ce868c359549a4d0413 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Mar 2016 18:06:15 +0800 Subject: [PATCH 054/128] [utils] Clarify Python versions affected by buggy struct module --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 03bb7782f..b6e1dc809 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1756,7 +1756,8 @@ def escape_url(url): try: struct.pack('!I', 0) except TypeError: - # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument + # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument + # See https://bugs.python.org/issue19099 def struct_pack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') From 5767b4eeae997a4ef75e348b46489cbb55126414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Mar 2016 22:23:31 +0600 Subject: [PATCH 055/128] [mtv] Fix description extraction (Closes #8962) --- youtube_dl/extractor/mtv.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed068365d..824bbcb4e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -17,6 +17,7 @@ from ..utils import ( unescapeHTML, url_basename, RegexNotFoundError, + xpath_text, ) @@ -130,11 +131,7 @@ class MTVServicesInfoExtractor(InfoExtractor): message += item.text raise ExtractorError(message, expected=True) - description_node = itemdoc.find('description') - if description_node is not None: - description = description_node.text.strip() - else: - description = None + description = xpath_text(itemdoc, 'description') title_el = None if title_el is None: From 369e7e3ff02201210864b4e20af2893c40894ddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Mar 2016 22:54:26 +0600 Subject: [PATCH 056/128] [iprima] Fix extraction (Closes #8953) --- youtube_dl/extractor/iprima.py | 44 ++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 61a0de472..788bbe0d5 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,6 +6,8 @@ import time from .common import InfoExtractor from ..utils import ( + determine_ext, + js_to_json, sanitized_Request, ) @@ -30,8 +32,7 @@ class IPrimaIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -43,9 +44,42 @@ class IPrimaIE(InfoExtractor): req.add_header('Referer', url) playerpage = self._download_webpage(req, video_id, note='Downloading player') - m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url') + formats = [] - formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + def extract_formats(format_url, format_key=None, lang=None): + ext = determine_ext(format_url) + new_formats = [] + if format_key == 'hls' or ext == 'm3u8': + new_formats = self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) + elif format_key == 'dash' or ext == 'mpd': + return + new_formats = self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False) + if lang: + for f in new_formats: + if not f.get('language'): + f['language'] = lang + formats.extend(new_formats) + + options = self._parse_json( + self._search_regex( + r'(?s)var\s+playerOptions\s*=\s*({.+?});', + playerpage, 'player options', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + if options: + for key, tracks in options.get('tracks', {}).items(): + if not isinstance(tracks, list): + continue + for track in tracks: + src = track.get('src') + if src: + extract_formats(src, key.lower(), track.get('lang')) + + if not formats: + for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): + extract_formats(src) self._sort_formats(formats) From 993271da0a70d6d5c194a10e48d43f3aa2abc956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Mar 2016 23:28:24 +0600 Subject: [PATCH 057/128] [nytimes] Tolerate missing metadata (Closes #8952) --- youtube_dl/extractor/nytimes.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 7f254b867..681683e86 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -18,8 +18,9 @@ class NYTimesBaseIE(InfoExtractor): description = video_data.get('summary') duration = float_or_none(video_data.get('duration'), 1000) - uploader = video_data['byline'] - timestamp = parse_iso8601(video_data['publication_date'][:-8]) + uploader = video_data.get('byline') + publication_date = video_data.get('publication_date') + timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None def get_file_size(file_size): if isinstance(file_size, int): @@ -37,7 +38,7 @@ class NYTimesBaseIE(InfoExtractor): 'width': int_or_none(video.get('width')), 'height': int_or_none(video.get('height')), 'filesize': get_file_size(video.get('fileSize')), - } for video in video_data['renditions'] + } for video in video_data['renditions'] if video.get('url') ] self._sort_formats(formats) @@ -46,7 +47,7 @@ class NYTimesBaseIE(InfoExtractor): 'url': 'http://www.nytimes.com/%s' % image['url'], 'width': int_or_none(image.get('width')), 'height': int_or_none(image.get('height')), - } for image in video_data['images'] + } for image in video_data.get('images', []) if image.get('url') ] return { From f07e276a04292c3fa87f703931bad9b716e7ccdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 01:18:14 +0600 Subject: [PATCH 058/128] [youtube:live] Add extractor (Closes #8959) --- youtube_dl/extractor/youtube.py | 48 ++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 466f5da2e..96fa3b5aa 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1911,7 +1911,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @classmethod def suitable(cls, url): - return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url) + return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url) + else super(YoutubeChannelIE, cls).suitable(url)) def _real_extract(self, url): channel_id = self._match_id(url) @@ -1986,6 +1987,51 @@ class YoutubeUserIE(YoutubeChannelIE): return super(YoutubeUserIE, cls).suitable(url) +class YoutubeLiveIE(YoutubeBaseInfoExtractor): + IE_DESC = 'YouTube.com live streams' + _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live' + IE_NAME = 'youtube:live' + + _TESTS = [{ + 'url': 'http://www.youtube.com/user/TheYoungTurks/live', + 'info_dict': { + 'id': 'a48o2S1cPoo', + 'ext': 'mp4', + 'title': 'The Young Turks - Live Main Show', + 'uploader': 'The Young Turks', + 'uploader_id': 'TheYoungTurks', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', + 'upload_date': '20150715', + 'license': 'Standard YouTube License', + 'description': 'md5:438179573adcdff3c97ebb1ee632b891', + 'categories': ['News & Politics'], + 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + base_url = mobj.group('base_url') + webpage = self._download_webpage(url, channel_id, fatal=False) + if webpage: + page_type = self._og_search_property( + 'type', webpage, 'page type', default=None) + video_id = self._html_search_meta( + 'videoId', webpage, 'video id', default=None) + if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id): + return self.url_result(video_id, YoutubeIE.ie_key()) + return self.url_result(base_url) + + class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com user/channel playlists' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists' From d041a736741e37e75e94cddf9c8258de8f08b501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 01:39:25 +0600 Subject: [PATCH 059/128] [extractor/__init__] Add youtube:live and sort youtube extractors alphabetically --- youtube_dl/extractor/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8f7df4d12..d99873419 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -960,7 +960,9 @@ from .youtube import ( YoutubeChannelIE, YoutubeFavouritesIE, YoutubeHistoryIE, + YoutubeLiveIE, YoutubePlaylistIE, + YoutubePlaylistsIE, YoutubeRecommendedIE, YoutubeSearchDateIE, YoutubeSearchIE, @@ -970,7 +972,6 @@ from .youtube import ( YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, - YoutubePlaylistsIE, YoutubeWatchLaterIE, ) from .zapiks import ZapiksIE From ff9d5d093854a974afdd0191d331d817e7e4c2a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 02:26:46 +0600 Subject: [PATCH 060/128] [udemy] Improve course enrolling --- youtube_dl/extractor/udemy.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 74cc36ece..a5634ece9 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -5,6 +5,7 @@ from ..compat import ( compat_HTTPError, compat_urllib_parse, compat_urllib_request, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -35,7 +36,7 @@ class UdemyIE(InfoExtractor): 'skip': 'Requires udemy account credentials', }] - def _enroll_course(self, webpage, course_id): + def _enroll_course(self, base_url, webpage, course_id): checkout_url = unescapeHTML(self._search_regex( r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1', webpage, 'checkout url', group='url', default=None)) @@ -45,9 +46,11 @@ class UdemyIE(InfoExtractor): 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True) enroll_url = unescapeHTML(self._search_regex( - r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1', + r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1', webpage, 'enroll url', group='url', default=None)) if enroll_url: + if not enroll_url.startswith('http'): + enroll_url = compat_urlparse.urljoin(base_url, enroll_url) webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') if '>You have enrolled in' in webpage: self.to_screen('%s: Successfully enrolled in the course' % course_id) @@ -152,7 +155,7 @@ class UdemyIE(InfoExtractor): except ExtractorError as e: # Error could possibly mean we are not enrolled in the course if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self._enroll_course(webpage, course_id) + self._enroll_course(url, webpage, course_id) lecture = self._download_lecture(course_id, lecture_id) else: raise @@ -244,7 +247,7 @@ class UdemyCourseIE(UdemyIE): course_id = response['id'] course_title = response.get('title') - self._enroll_course(webpage, course_id) + self._enroll_course(url, webpage, course_id) response = self._download_json( 'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, From f0e83681d97db52af9dc73d1c8e51d671503a222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 02:27:13 +0600 Subject: [PATCH 061/128] [udemy] Extract formats from outputs --- youtube_dl/extractor/udemy.py | 78 +++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index a5634ece9..2b886d6c8 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -180,39 +180,57 @@ class UdemyIE(InfoExtractor): video_id = asset['id'] thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url') duration = float_or_none(asset.get('data', {}).get('duration')) - outputs = asset.get('data', {}).get('outputs', {}) formats = [] - for format_ in asset.get('download_urls', {}).get('Video', []): - video_url = format_.get('file') - if not video_url: - continue - format_id = format_.get('label') - f = { - 'url': format_['file'], - 'height': int_or_none(format_id), + + def extract_output_format(src): + return { + 'url': src['url'], + 'format_id': '%sp' % (src.get('label') or format_id), + 'width': int_or_none(src.get('width')), + 'height': int_or_none(src.get('height')), + 'vbr': int_or_none(src.get('video_bitrate_in_kbps')), + 'vcodec': src.get('video_codec'), + 'fps': int_or_none(src.get('frame_rate')), + 'abr': int_or_none(src.get('audio_bitrate_in_kbps')), + 'acodec': src.get('audio_codec'), + 'asr': int_or_none(src.get('audio_sample_rate')), + 'tbr': int_or_none(src.get('total_bitrate_in_kbps')), + 'filesize': int_or_none(src.get('file_size_in_bytes')), } - if format_id: - # Some videos contain additional metadata (e.g. - # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) - output = outputs.get(format_id) - if isinstance(output, dict): - f.update({ - 'format_id': '%sp' % (output.get('label') or format_id), - 'width': int_or_none(output.get('width')), - 'height': int_or_none(output.get('height')), - 'vbr': int_or_none(output.get('video_bitrate_in_kbps')), - 'vcodec': output.get('video_codec'), - 'fps': int_or_none(output.get('frame_rate')), - 'abr': int_or_none(output.get('audio_bitrate_in_kbps')), - 'acodec': output.get('audio_codec'), - 'asr': int_or_none(output.get('audio_sample_rate')), - 'tbr': int_or_none(output.get('total_bitrate_in_kbps')), - 'filesize': int_or_none(output.get('file_size_in_bytes')), - }) - else: - f['format_id'] = '%sp' % format_id - formats.append(f) + + outputs = asset.get('data', {}).get('outputs') + if not isinstance(outputs, dict): + outputs = {} + + for format_id, output in outputs.items(): + if isinstance(output, dict) and output.get('url'): + formats.append(extract_output_format(output)) + + download_urls = asset.get('download_urls') + if isinstance(download_urls, dict): + video = download_urls.get('Video') + if isinstance(video, list): + for format_ in video: + video_url = format_.get('file') + if not video_url: + continue + format_id = format_.get('label') + f = { + 'url': format_['file'], + 'height': int_or_none(format_id), + } + if format_id: + # Some videos contain additional metadata (e.g. + # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) + output = outputs.get(format_id) + if isinstance(output, dict): + output_format = extract_output_format(output) + output_format.update(f) + f = output_format + else: + f['format_id'] = '%sp' % format_id + formats.append(f) self._sort_formats(formats) From 5eb7db4ee96c97d98a87d8d7df46e8c6a607b682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 02:28:39 +0600 Subject: [PATCH 062/128] [udemy] Add support for new URL schema --- youtube_dl/extractor/udemy.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 2b886d6c8..a9046b865 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -18,7 +18,16 @@ from ..utils import ( class UdemyIE(InfoExtractor): IE_NAME = 'udemy' - _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + www\.udemy\.com/ + (?: + [^#]+\#/lecture/| + lecture/view/?\?lectureId=| + [^/]+/learn/v4/t/lecture/ + ) + (?P<id>\d+) + ''' _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1' _ORIGIN_URL = 'https://www.udemy.com' _NETRC_MACHINE = 'udemy' @@ -34,6 +43,10 @@ class UdemyIE(InfoExtractor): 'duration': 579.29, }, 'skip': 'Requires udemy account credentials', + }, { + # new URL schema + 'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906', + 'only_matching': True, }] def _enroll_course(self, base_url, webpage, course_id): From e0317686666f9de4a6eca3fc26ede32e664f2bec Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi <kagami@genshiken.org> Date: Thu, 24 Mar 2016 13:55:02 +0300 Subject: [PATCH 063/128] [mnet] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mnet.py | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/mnet.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d99873419..1e4b078a4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -410,6 +410,7 @@ from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import MixcloudIE from .mlb import MLBIE +from .mnet import MnetIE from .mpora import MporaIE from .moevideo import MoeVideoIE from .mofosex import MofosexIE diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py new file mode 100644 index 000000000..8e83b1fc3 --- /dev/null +++ b/youtube_dl/extractor/mnet.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, +) + + +class MnetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.mnet.com/tv/vod/171008', + 'md5': '6abd7a837fa9fe56d22709a60b19bffb', + 'info_dict': { + 'id': '171008', + 'title': 'SS_이해인@히든박스', + 'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55', + 'duration': 88, + 'upload_date': '20151231', + 'timestamp': 1451564040, + 'age_limit': 0, + 'thumbnails': 'mincount:5', + 'ext': 'flv', + }, + }, + { + 'url': 'http://mnet.interest.me/tv/vod/172790', + 'only_matching': True, + }, + { + 'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + info_url = 'http://content.api.mnet.com/player/vodConfig?id=%s' % video_id + info = self._download_json(info_url, video_id) + info = info['data']['info'] + + title = info['title'] + rtmp_info_url = info['cdn'] + 'CLIP' + rtmp_info = self._download_json(rtmp_info_url, video_id) + file_url = rtmp_info['serverurl'] + rtmp_info['fileurl'] + description = info.get('ment') + duration = parse_duration(info.get('time')) + timestamp = parse_iso8601(info.get('date'), delimiter=' ') + age_limit = info.get('adult') + if age_limit is not None: + age_limit = 0 if age_limit == 'N' else 18 + thumbnails = [ + { + 'id': thumb_format, + 'url': thumb['url'], + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + } + for (thumb_format, thumb) in info.get('cover', {}).items() + ] + + return { + 'id': video_id, + 'title': title, + 'url': file_url, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'age_limit': age_limit, + 'thumbnails': thumbnails, + 'ext': 'flv', + } From 98e68806fb8cfe2a81ee8a6ac6705bb3d61ed2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 03:26:29 +0600 Subject: [PATCH 064/128] [mnet] Improve (Closes #8958) --- youtube_dl/extractor/mnet.py | 85 +++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py index 8e83b1fc3..e3f42e7bd 100644 --- a/youtube_dl/extractor/mnet.py +++ b/youtube_dl/extractor/mnet.py @@ -11,66 +11,71 @@ from ..utils import ( class MnetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.mnet.com/tv/vod/171008', - 'md5': '6abd7a837fa9fe56d22709a60b19bffb', - 'info_dict': { - 'id': '171008', - 'title': 'SS_이해인@히든박스', - 'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55', - 'duration': 88, - 'upload_date': '20151231', - 'timestamp': 1451564040, - 'age_limit': 0, - 'thumbnails': 'mincount:5', - 'ext': 'flv', - }, + _TESTS = [{ + 'url': 'http://www.mnet.com/tv/vod/171008', + 'info_dict': { + 'id': '171008', + 'title': 'SS_이해인@히든박스', + 'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55', + 'duration': 88, + 'upload_date': '20151231', + 'timestamp': 1451564040, + 'age_limit': 0, + 'thumbnails': 'mincount:5', + 'thumbnail': 're:^https?://.*\.jpg$', + 'ext': 'flv', }, - { - 'url': 'http://mnet.interest.me/tv/vod/172790', - 'only_matching': True, + 'params': { + # rtmp download + 'skip_download': True, }, - { - 'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=', - 'only_matching': True, - }, - ] + }, { + 'url': 'http://mnet.interest.me/tv/vod/172790', + 'only_matching': True, + }, { + 'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - info_url = 'http://content.api.mnet.com/player/vodConfig?id=%s' % video_id - info = self._download_json(info_url, video_id) - info = info['data']['info'] + + info = self._download_json( + 'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id, + video_id, 'Downloading vod config JSON')['data']['info'] title = info['title'] - rtmp_info_url = info['cdn'] + 'CLIP' - rtmp_info = self._download_json(rtmp_info_url, video_id) - file_url = rtmp_info['serverurl'] + rtmp_info['fileurl'] + + rtmp_info = self._download_json( + info['cdn'], video_id, 'Downloading vod cdn JSON') + + formats = [{ + 'url': rtmp_info['serverurl'] + rtmp_info['fileurl'], + 'ext': 'flv', + 'page_url': url, + 'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318', + }] + description = info.get('ment') duration = parse_duration(info.get('time')) timestamp = parse_iso8601(info.get('date'), delimiter=' ') age_limit = info.get('adult') if age_limit is not None: age_limit = 0 if age_limit == 'N' else 18 - thumbnails = [ - { - 'id': thumb_format, - 'url': thumb['url'], - 'width': int_or_none(thumb.get('width')), - 'height': int_or_none(thumb.get('height')), - } - for (thumb_format, thumb) in info.get('cover', {}).items() - ] + thumbnails = [{ + 'id': thumb_format, + 'url': thumb['url'], + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + } for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')] return { 'id': video_id, 'title': title, - 'url': file_url, 'description': description, 'duration': duration, 'timestamp': timestamp, 'age_limit': age_limit, 'thumbnails': thumbnails, - 'ext': 'flv', + 'formats': formats, } From 3f15fec1d13cf4b18c093271bee7ad7586c97f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 03:56:27 +0600 Subject: [PATCH 065/128] Credit @Kagami for mnet (#8958) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 51dfc8ddd..ea8d39978 100644 --- a/AUTHORS +++ b/AUTHORS @@ -166,3 +166,4 @@ Ben Congdon Kacper Michajłow José Joaquín Atria Viťas Strádal +Kagami Hiiragi From 3bb33568121126809e965dfacf542828d3606c10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 25 Mar 2016 15:43:29 +0800 Subject: [PATCH 066/128] [douyutv] Extend _VALID_URL --- youtube_dl/extractor/douyutv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index bcb670945..3915cb182 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring) class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -60,6 +60,9 @@ class DouyuTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.douyu.com/xiaocang', + 'only_matching': True, }] def _real_extract(self, url): From d7f62b049a07d59265f679d13d736f5f8b096ce4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 25 Mar 2016 15:45:40 +0800 Subject: [PATCH 067/128] [iqiyi] Update enc_key --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 1a4c64713..ffcea30ad 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -501,7 +501,7 @@ class IqiyiIE(InfoExtractor): def get_enc_key(self, video_id): # TODO: automatic key extraction # last update at 2016-01-22 for Zombie::bite - enc_key = '8ed797d224d043e7ac23d95b70227d32' + enc_key = '4a1caba4b4465345366f28da7c117d20' return enc_key def _extract_playlist(self, webpage): From 0be8314dc86a2630863ea89c94ee827a4b97b846 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 25 Mar 2016 09:27:18 +0100 Subject: [PATCH 068/128] release 2016.03.25 --- CONTRIBUTING.md | 2 +- README.md | 6 ++++-- docs/supportedsites.md | 8 ++++++++ youtube_dl/version.py | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c996f03ab..0df6193fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make +* make (both GNU make and BSD make are supported) * pandoc * zip * nosetests diff --git a/README.md b/README.md index fcc12d2b3..52b2a85a3 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,8 @@ which means you can modify it, redistribute it or use it however you like. (e.g. 50K or 4.2M) -R, --retries RETRIES Number of retries (default is 10), or "infinite". + --fragment-retries RETRIES Number of retries for a fragment (default + is 10), or "infinite" (DASH only) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer @@ -376,8 +378,8 @@ which means you can modify it, redistribute it or use it however you like. --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs Embed subtitles in the video (only for mkv - and mp4 videos) + --embed-subs Embed subtitles in the video (only for mp4, + webm and mkv videos) --embed-thumbnail Embed thumbnail in the audio as cover art --add-metadata Write metadata to the video file --metadata-from-title FORMAT Parse additional metadata like song title / diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3415efc45..00b8c247c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,7 @@ - **Bigflix** - **Bild**: Bild.de - **BiliBili** + - **BioBioChileTV** - **BleacherReport** - **BleacherReportCMS** - **blinkx** @@ -100,6 +101,7 @@ - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** + - **CDA** - **CeskaTelevize** - **channel9**: Channel 9 - **Chaturbate** @@ -244,6 +246,7 @@ - **GPUTechConf** - **Groupon** - **Hark** + - **HBO** - **HearThisAt** - **Heise** - **HellPorno** @@ -344,6 +347,7 @@ - **MiTele**: mitele.es - **mixcloud** - **MLB** + - **Mnet** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **Mojvideo** @@ -440,6 +444,7 @@ - **OnionStudios** - **Ooyala** - **OoyalaExternal** + - **Openload** - **OraTV** - **orf:fm4**: radio FM4 - **orf:iptv**: iptv.ORF.at @@ -525,6 +530,7 @@ - **RUTV**: RUTV.RU - **Ruutu** - **safari**: safaribooksonline.com online video + - **safari:api** - **safari:course**: safaribooksonline.com online courses - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos @@ -618,6 +624,7 @@ - **TheOnion** - **ThePlatform** - **ThePlatformFeed** + - **TheScene** - **TheSixtyOne** - **TheStar** - **ThisAmericanLife** @@ -786,6 +793,7 @@ - **youtube:channel**: YouTube.com channels - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication) - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) + - **youtube:live**: YouTube.com live streams - **youtube:playlist**: YouTube.com playlists - **youtube:playlists**: YouTube.com user/channel playlists - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6b2c5fac9..2291ed783 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.18' +__version__ = '2016.03.25' From 4db441de72415f8262279e48d0b15ebc9e1da369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 19:51:28 +0600 Subject: [PATCH 069/128] [once] Relax _VALID_URL (Closes #8976) --- youtube_dl/extractor/once.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 5db949b17..1bf96ea56 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class OnceIE(InfoExtractor): - _VALID_URL = r'https?://once\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)' + _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' From 2156f16ca7babde4c5fa813dbe4e7ac1a2f758d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Mar 2016 20:14:34 +0600 Subject: [PATCH 070/128] [thescene] Fix extraction and improve style (Closes #8978) --- youtube_dl/extractor/thescene.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 08d666eaf..3e4e14031 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urlparse from ..utils import qualities @@ -21,17 +21,21 @@ class TheSceneIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - player_url = compat_urllib_parse.urljoin( + + player_url = compat_urlparse.urljoin( url, self._html_search_regex( r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url')) - self.to_screen(player_url) - player = self._download_webpage(player_url, player_url) - info = self._parse_json(self._search_regex(r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), display_id) + player = self._download_webpage(player_url, display_id) + info = self._parse_json( + self._search_regex( + r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), + display_id) - qualities_order = qualities(['low', 'high']) + qualities_order = qualities(('low', 'high')) formats = [{ 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), 'url': f['src'], @@ -41,8 +45,8 @@ class TheSceneIE(InfoExtractor): return { 'id': info['id'], + 'display_id': display_id, 'title': info['title'], 'formats': formats, 'thumbnail': info.get('poster_frame'), - 'display_id': display_id, } From 15707c7e024f1f29e7abd8ddaa362196ef2d4af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 01:46:57 +0600 Subject: [PATCH 071/128] [compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 --- youtube_dl/compat.py | 26 ++++++++++++++++++++ youtube_dl/extractor/addanime.py | 4 +-- youtube_dl/extractor/animeondemand.py | 3 +-- youtube_dl/extractor/atresplayer.py | 4 +-- youtube_dl/extractor/bambuser.py | 4 +-- youtube_dl/extractor/camdemy.py | 4 +-- youtube_dl/extractor/ceskatelevize.py | 4 +-- youtube_dl/extractor/cloudy.py | 4 +-- youtube_dl/extractor/comedycentral.py | 4 +-- youtube_dl/extractor/common.py | 4 +-- youtube_dl/extractor/condenast.py | 4 +-- youtube_dl/extractor/crunchyroll.py | 8 +++--- youtube_dl/extractor/daum.py | 6 ++--- youtube_dl/extractor/dcn.py | 8 +++--- youtube_dl/extractor/dramafever.py | 4 +-- youtube_dl/extractor/eroprofile.py | 4 +-- youtube_dl/extractor/fc2.py | 5 ++-- youtube_dl/extractor/fivemin.py | 4 +-- youtube_dl/extractor/flickr.py | 4 +-- youtube_dl/extractor/funimation.py | 5 ++-- youtube_dl/extractor/gdcvault.py | 4 +-- youtube_dl/extractor/hotnewhiphop.py | 4 +-- youtube_dl/extractor/hypem.py | 4 +-- youtube_dl/extractor/internetvideoarchive.py | 4 +-- youtube_dl/extractor/iqiyi.py | 8 +++--- youtube_dl/extractor/ivideon.py | 4 +-- youtube_dl/extractor/kaltura.py | 4 +-- youtube_dl/extractor/laola1tv.py | 6 ++--- youtube_dl/extractor/leeco.py | 8 +++--- youtube_dl/extractor/lynda.py | 6 ++--- youtube_dl/extractor/matchtv.py | 4 +-- youtube_dl/extractor/metacafe.py | 4 +-- youtube_dl/extractor/minhateca.py | 4 +-- youtube_dl/extractor/mitele.py | 5 ++-- youtube_dl/extractor/moevideo.py | 4 +-- youtube_dl/extractor/moniker.py | 4 +-- youtube_dl/extractor/mooshare.py | 4 +-- youtube_dl/extractor/mtv.py | 4 +-- youtube_dl/extractor/muzu.py | 8 +++--- youtube_dl/extractor/myvideo.py | 4 +-- youtube_dl/extractor/naver.py | 6 ++--- youtube_dl/extractor/nba.py | 4 +-- youtube_dl/extractor/neteasemusic.py | 4 +-- youtube_dl/extractor/nextmovie.py | 4 +-- youtube_dl/extractor/nfb.py | 4 +-- youtube_dl/extractor/nhl.py | 6 ++--- youtube_dl/extractor/nick.py | 4 +-- youtube_dl/extractor/niconico.py | 7 +++--- youtube_dl/extractor/noco.py | 4 +-- youtube_dl/extractor/novamov.py | 3 +-- youtube_dl/extractor/npr.py | 4 +-- youtube_dl/extractor/ooyala.py | 4 +-- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/played.py | 4 +-- youtube_dl/extractor/playtvak.py | 4 +-- youtube_dl/extractor/pluralsight.py | 4 +-- youtube_dl/extractor/porn91.py | 4 +-- youtube_dl/extractor/primesharetv.py | 4 +-- youtube_dl/extractor/promptfile.py | 4 +-- youtube_dl/extractor/prosiebensat1.py | 10 +++----- youtube_dl/extractor/shahid.py | 4 +-- youtube_dl/extractor/shared.py | 4 +-- youtube_dl/extractor/sharesix.py | 4 +-- youtube_dl/extractor/sina.py | 4 +-- youtube_dl/extractor/smotri.py | 6 ++--- youtube_dl/extractor/sohu.py | 4 +-- youtube_dl/extractor/soundcloud.py | 12 ++++----- youtube_dl/extractor/streamcloud.py | 4 +-- youtube_dl/extractor/telecinco.py | 4 +-- youtube_dl/extractor/tubitv.py | 4 +-- youtube_dl/extractor/twitch.py | 9 +++---- youtube_dl/extractor/udemy.py | 6 ++--- youtube_dl/extractor/vbox7.py | 4 +-- youtube_dl/extractor/viddler.py | 4 +-- youtube_dl/extractor/vimeo.py | 13 +++++----- youtube_dl/extractor/vk.py | 4 +-- youtube_dl/extractor/vlive.py | 4 +-- youtube_dl/extractor/vodlocker.py | 4 +-- youtube_dl/extractor/xfileshare.py | 5 ++-- youtube_dl/extractor/yahoo.py | 3 ++- youtube_dl/extractor/yandexmusic.py | 4 +-- youtube_dl/extractor/youku.py | 4 +-- youtube_dl/extractor/youtube.py | 23 +++++++++-------- youtube_dl/utils.py | 14 +++-------- 84 files changed, 229 insertions(+), 222 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index dbb91a6ef..76b6b0e38 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -169,6 +169,31 @@ except ImportError: # Python 2 string = string.replace('+', ' ') return compat_urllib_parse_unquote(string, encoding, errors) +try: + from urllib.parse import urlencode as compat_urllib_parse_urlencode +except ImportError: # Python 2 + # Python 2 will choke in urlencode on mixture of byte and unicode strings. + # Possible solutions are to either port it from python 3 with all + # the friends or manually ensure input query contains only byte strings. + # We will stick with latter thus recursively encoding the whole query. + def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): + def encode_elem(e): + if isinstance(e, dict): + e = encode_dict(e) + elif isinstance(e, (list, tuple,)): + e = encode_list(e) + elif isinstance(e, compat_str): + e = e.encode(encoding) + return e + + def encode_dict(d): + return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) + + def encode_list(l): + return [encode_elem(e) for e in l] + + return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq) + try: from urllib.request import DataHandler as compat_urllib_request_DataHandler except ImportError: # Python < 3.4 @@ -588,6 +613,7 @@ __all__ = [ 'compat_urllib_parse_unquote', 'compat_urllib_parse_unquote_plus', 'compat_urllib_parse_unquote_to_bytes', + 'compat_urllib_parse_urlencode', 'compat_urllib_parse_urlparse', 'compat_urllib_request', 'compat_urllib_request_DataHandler', diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index fb1cc02e1..55a9322a7 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -6,7 +6,7 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -60,7 +60,7 @@ class AddAnimeIE(InfoExtractor): confirm_url = ( parsed_url.scheme + '://' + parsed_url.netloc + action + '?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) self._download_webpage( confirm_url, video_id, diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 2cede55a7..9b01e38f5 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -9,7 +9,6 @@ from ..compat import ( ) from ..utils import ( determine_ext, - encode_dict, extract_attributes, ExtractorError, sanitized_Request, @@ -71,7 +70,7 @@ class AnimeOnDemandIE(InfoExtractor): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) request = sanitized_Request( - post_url, urlencode_postdata(encode_dict(login_form))) + post_url, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index b8f9ae005..f9568cb5b 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -8,7 +8,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( int_or_none, @@ -86,7 +86,7 @@ class AtresPlayerIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index da986e063..1a2eef48d 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,7 +5,7 @@ import itertools from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, ) from ..utils import ( @@ -58,7 +58,7 @@ class BambuserIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index dd4d96cec..6ffbeabd3 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -139,7 +139,7 @@ class CamdemyFolderIE(InfoExtractor): parsed_url = list(compat_urlparse.urlparse(url)) query = dict(compat_urlparse.parse_qsl(parsed_url[4])) query.update({'displayMode': 'list'}) - parsed_url[4] = compat_urllib_parse.urlencode(query) + parsed_url[4] = compat_urllib_parse_urlencode(query) final_url = compat_urlparse.urlunparse(parsed_url) page = self._download_webpage(final_url, folder_id) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b355111cb..d93108df5 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -5,8 +5,8 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -102,7 +102,7 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request( 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=compat_urllib_parse.urlencode(data)) + data=compat_urllib_parse_urlencode(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 0fa720ee8..9e267e6c0 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_HTTPError, ) from ..utils import ( @@ -64,7 +64,7 @@ class CloudyIE(InfoExtractor): 'errorUrl': error_url, }) - data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form)) + data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form)) player_data = self._download_webpage( data_url, video_id, 'Downloading player data') data = compat_parse_qs(player_data) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 5b1b99675..0c59102e0 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -5,7 +5,7 @@ import re from .mtv import MTVServicesInfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): # Correct cc.com in uri uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri) - index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri})) + index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri})) idoc = self._download_xml( index_url, epTitle, 'Downloading show index', 'Unable to download episode index') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 770105a5b..b412fd030 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,7 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -1300,7 +1300,7 @@ class InfoExtractor(object): 'plugin': 'flowplayer-3.2.0.1', } f4m_url += '&' if '?' in f4m_url else '?' - f4m_url += compat_urllib_parse.urlencode(f4m_params) + f4m_url += compat_urllib_parse_urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) continue diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 054978ff2..e8f2b5a07 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) @@ -97,7 +97,7 @@ class CondeNastIE(InfoExtractor): video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') - data = compat_urllib_parse.urlencode({'videoId': video_id, + data = compat_urllib_parse_urlencode({'videoId': video_id, 'playerId': player_id, 'target': target, }) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 85fa7a725..7746f1be3 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -11,8 +11,8 @@ from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) @@ -78,7 +78,7 @@ class CrunchyrollBaseIE(InfoExtractor): # See https://github.com/rg3/youtube-dl/issues/7202. qs['skip_wall'] = ['1'] return compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) class CrunchyrollIE(CrunchyrollBaseIE): @@ -308,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = sanitized_Request(playerdata_url) - playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) + playerdata_req.data = compat_urllib_parse_urlencode({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') @@ -322,7 +322,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text streamdata_req = sanitized_Request( 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' % (stream_id, stream_format, stream_quality), - compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) + compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata = self._download_xml( streamdata_req, video_id, diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index c84c51058..86024a745 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -8,8 +8,8 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -70,7 +70,7 @@ class DaumIE(InfoExtractor): def _real_extract(self, url): video_id = compat_urllib_parse_unquote(self._match_id(url)) - query = compat_urllib_parse.urlencode({'vid': video_id}) + query = compat_urllib_parse_urlencode({'vid': video_id}) movie_data = self._download_json( 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') @@ -86,7 +86,7 @@ class DaumIE(InfoExtractor): formats = [] for format_el in movie_data['output_list']['output_list']: profile = format_el['profile'] - format_query = compat_urllib_parse.urlencode({ + format_query = compat_urllib_parse_urlencode({ 'vid': video_id, 'profile': profile, }) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 15a1c40f7..982ed94ea 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -6,7 +6,7 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, ) from ..utils import ( @@ -106,7 +106,7 @@ class DCNVideoIE(DCNBaseIE): webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -133,7 +133,7 @@ class DCNLiveIE(DCNBaseIE): webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -174,7 +174,7 @@ class DCNSeasonIE(InfoExtractor): data['show_id'] = show_id request = sanitized_Request( 'http://admin.mangomolo.com/analytics/index.php/plus/show', - compat_urllib_parse.urlencode(data), + compat_urllib_parse_urlencode(data), { 'Origin': 'http://www.dcndigital.ae', 'Content-Type': 'application/x-www-form-urlencoded' diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index d35e88881..2101acaaf 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -6,7 +6,7 @@ import itertools from .amp import AMPIE from ..compat import ( compat_HTTPError, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -50,7 +50,7 @@ class DramaFeverBaseIE(AMPIE): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 7fcd0151d..297f8a6f5 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, unescapeHTML @@ -43,7 +43,7 @@ class EroProfileIE(InfoExtractor): if username is None: return - query = compat_urllib_parse.urlencode({ + query = compat_urllib_parse_urlencode({ 'username': username, 'password': password, 'url': 'http://www.eroprofile.com/', diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 508684d2e..cacf61973 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -5,12 +5,11 @@ import hashlib from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, sanitized_Request, ) @@ -57,7 +56,7 @@ class FC2IE(InfoExtractor): 'Submit': ' Login ', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') request = sanitized_Request( 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 67d50a386..6b8345416 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -4,8 +4,8 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_parse_qs, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) @@ -109,7 +109,7 @@ class FiveMinIE(InfoExtractor): response = self._download_json( 'https://syn.5min.com/handlers/SenseHandler.ashx?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'func': 'GetResults', 'playlist': video_id, 'sid': sid, diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 18f439df9..0a3de1498 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -42,7 +42,7 @@ class FlickrIE(InfoExtractor): } if secret: query['secret'] = secret - data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note) + data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note) if data['stat'] != 'ok': raise ExtractorError(data['message']) return data diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 0f37ed786..1eb528f31 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..utils import ( clean_html, determine_ext, - encode_dict, int_or_none, sanitized_Request, ExtractorError, @@ -54,10 +53,10 @@ class FunimationIE(InfoExtractor): (username, password) = self._get_login_info() if username is None: return - data = urlencode_postdata(encode_dict({ + data = urlencode_postdata({ 'email_field': username, 'password_field': password, - })) + }) login_request = sanitized_Request('http://www.funimation.com/login', data, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0', 'Content-Type': 'application/x-www-form-urlencoded' diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 3befd3e7b..cc8fa45d2 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( remove_end, HEADRequest, @@ -123,7 +123,7 @@ class GDCVaultIE(InfoExtractor): 'password': password, } - request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form)) + request = sanitized_Request(login_url, compat_urllib_parse_urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(request, display_id, 'Logging in') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index efc3e8429..152d2a98a 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, HEADRequest, @@ -35,7 +35,7 @@ class HotNewHipHopIE(InfoExtractor): r'"contentUrl" content="(.*?)"', webpage, 'content URL') return self.url_result(video_url, ie='Youtube') - reqdata = compat_urllib_parse.urlencode([ + reqdata = compat_urllib_parse_urlencode([ ('mediaType', 's'), ('mediaId', video_id), ]) diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index e0ab31802..f7c913054 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -4,7 +4,7 @@ import json import time from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -28,7 +28,7 @@ class HypemIE(InfoExtractor): track_id = self._match_id(url) data = {'ax': 1, 'ts': time.time()} - request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data)) + request = sanitized_Request(url + '?' + compat_urllib_parse_urlencode(data)) response, urlh = self._download_webpage_handle( request, track_id, 'Downloading webpage with the url') diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 483cc6f9e..e60145b3d 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( xpath_with_ns, @@ -38,7 +38,7 @@ class InternetVideoArchiveIE(InfoExtractor): # Other player ids return m3u8 urls cleaned_dic['playerid'] = '247' cleaned_dic['videokbrate'] = '100000' - return compat_urllib_parse.urlencode(cleaned_dic) + return compat_urllib_parse_urlencode(cleaned_dic) def _real_extract(self, url): query = compat_urlparse.urlparse(url).query diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ffcea30ad..9e8c9432a 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -14,7 +14,7 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -322,7 +322,7 @@ class IqiyiIE(InfoExtractor): 'bird_t': timestamp, } validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse.urlencode(validation_params), None, + 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, note='Validate credentials', errnote='Unable to validate credentials') MSG_MAP = { @@ -456,7 +456,7 @@ class IqiyiIE(InfoExtractor): 'QY00001': auth_result['data']['u'], }) api_video_url += '?' if '?' not in api_video_url else '&' - api_video_url += compat_urllib_parse.urlencode(param) + api_video_url += compat_urllib_parse_urlencode(param) js = self._download_json( api_video_url, video_id, note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) @@ -494,7 +494,7 @@ class IqiyiIE(InfoExtractor): } api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ - compat_urllib_parse.urlencode(param) + compat_urllib_parse_urlencode(param) raw_data = self._download_json(api_url, video_id) return raw_data diff --git a/youtube_dl/extractor/ivideon.py b/youtube_dl/extractor/ivideon.py index 617dc8c07..3ca824f79 100644 --- a/youtube_dl/extractor/ivideon.py +++ b/youtube_dl/extractor/ivideon.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import qualities @@ -62,7 +62,7 @@ class IvideonIE(InfoExtractor): quality = qualities(self._QUALITIES) formats = [{ - 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse.urlencode({ + 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse_urlencode({ 'server': server_id, 'camera': camera_id, 'sessionId': 'demo', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 44d7c84a1..a65697ff5 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -6,7 +6,7 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, compat_parse_qs, ) @@ -71,7 +71,7 @@ class KalturaIE(InfoExtractor): for k, v in a.items(): params['%d:%s' % (i, k)] = v - query = compat_urllib_parse.urlencode(params) + query = compat_urllib_parse_urlencode(params) url = self._API_BASE + query data = self._download_json(url, video_id, *args, **kwargs) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 41d80bc12..d9dc067d2 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -90,7 +90,7 @@ class Laola1TvIE(InfoExtractor): hd_doc = self._download_xml( 'http://www.laola1.tv/server/hd_video.php?%s' - % compat_urllib_parse.urlencode({ + % compat_urllib_parse_urlencode({ 'play': video_id, 'partner': partner_id, 'portal': portal, @@ -108,7 +108,7 @@ class Laola1TvIE(InfoExtractor): req = sanitized_Request( 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' % - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'videoId': video_id, 'target': VS_TARGETS.get(kind, '2'), 'label': _v('label'), diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 462b752dd..375fdaed1 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -11,7 +11,7 @@ from .common import InfoExtractor from ..compat import ( compat_ord, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -122,7 +122,7 @@ class LeIE(InfoExtractor): 'domain': 'www.le.com' } play_json_req = sanitized_Request( - 'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) + 'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse_urlencode(params) ) cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') if cn_verification_proxy: @@ -151,7 +151,7 @@ class LeIE(InfoExtractor): for format_id in formats: if format_id in dispatch: media_url = playurl['domain'][0] + dispatch[format_id][0] - media_url += '&' + compat_urllib_parse.urlencode({ + media_url += '&' + compat_urllib_parse_urlencode({ 'm3v': 1, 'format': 1, 'expect': 3, @@ -305,7 +305,7 @@ class LetvCloudIE(InfoExtractor): } self.sign_data(data) return self._download_json( - 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data), + 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data), media_id, 'Downloading playJson data for type %s' % cf) play_json = get_play_json(cf, time.time()) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index d4e1ae99d..df50cb655 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -6,7 +6,7 @@ import json from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -36,7 +36,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false' } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) login_page = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -65,7 +65,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false', } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(confirm_form).encode('utf-8')) login_page = self._download_webpage( request, None, 'Confirming log in and log out from another device') diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index 28e0dfe63..e33bfde3b 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import random from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( sanitized_Request, xpath_text, @@ -29,7 +29,7 @@ class MatchTVIE(InfoExtractor): def _real_extract(self, url): video_id = 'matchtv-live' request = sanitized_Request( - 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({ + 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse_urlencode({ 'ts': '', 'quality': 'SD', 'contentId': '561d2c0df7159b37178b4567', diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index c31e8798a..0e4865446 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -5,8 +5,8 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) + request = sanitized_Request(self._FILTER_POST, compat_urllib_parse_urlencode(disclaimer_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self.report_age_confirmation() self._download_webpage(request, None, False, 'Unable to confirm age') diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py index e46b23a6f..6ec53c303 100644 --- a/youtube_dl/extractor/minhateca.py +++ b/youtube_dl/extractor/minhateca.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, parse_duration, @@ -39,7 +39,7 @@ class MinhatecaIE(InfoExtractor): ] req = sanitized_Request( 'http://minhateca.com.br/action/License/Download', - data=compat_urllib_parse.urlencode(token_data)) + data=compat_urllib_parse_urlencode(token_data)) req.add_header('Content-Type', 'application/x-www-form-urlencoded') data = self._download_json( req, video_id, note='Downloading metadata') diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 9e584860a..76ced7928 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -2,11 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - encode_dict, get_element_by_attribute, int_or_none, ) @@ -60,7 +59,7 @@ class MiTeleIE(InfoExtractor): 'sta': '0', } media = self._download_json( - '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data))), + '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)), display_id, 'Downloading %s JSON' % location['loc']) file_ = media.get('file') if not file_: diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index d930b9634..89cdd4600 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,7 +5,7 @@ import json import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -77,7 +77,7 @@ class MoeVideoIE(InfoExtractor): ], ] r_json = json.dumps(r) - post = compat_urllib_parse.urlencode({'r': r_json}) + post = compat_urllib_parse_urlencode({'r': r_json}) req = sanitized_Request(self._API_URL, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index f6bf94f2f..c5ce693f1 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,7 +5,7 @@ import os.path import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, remove_start, @@ -88,7 +88,7 @@ class MonikerIE(InfoExtractor): fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) - post = compat_urllib_parse.urlencode(data) + post = compat_urllib_parse_urlencode(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index f010f52d5..ee3947f43 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -58,7 +58,7 @@ class MooshareIE(InfoExtractor): } request = sanitized_Request( - 'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) + 'http://mooshare.biz/%s' % video_id, compat_urllib_parse_urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._sleep(5, video_id) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 824bbcb4e..640ee3d93 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -4,7 +4,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, ) from ..utils import ( @@ -171,7 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor): data = {'uri': uri} if self._LANG: data['lang'] = self._LANG - return compat_urllib_parse.urlencode(data) + return compat_urllib_parse_urlencode(data) def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py index 1e9cf8de9..cbc800481 100644 --- a/youtube_dl/extractor/muzu.py +++ b/youtube_dl/extractor/muzu.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_urlencode class MuzuTVIE(InfoExtractor): @@ -25,7 +23,7 @@ class MuzuTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - info_data = compat_urllib_parse.urlencode({ + info_data = compat_urllib_parse_urlencode({ 'format': 'json', 'url': url, }) @@ -41,7 +39,7 @@ class MuzuTVIE(InfoExtractor): if video_info.get('v%s' % quality): break - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'ai': video_id, # Even if each time you watch a video the hash changes, # it seems to work for different videos, and it will work diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index c83a1eab5..6d447a493 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -9,8 +9,8 @@ import json from .common import InfoExtractor from ..compat import ( compat_ord, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -112,7 +112,7 @@ class MyVideoIE(InfoExtractor): encxml = compat_urllib_parse_unquote(b) if not params.get('domain'): params['domain'] = 'www.myvideo.de' - xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) + xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params)) if 'flash_playertype=MTV' in xmldata_url: self._downloader.report_warning('avoiding MTV player') xmldata_url = ( diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 1f5fc2145..6d6f69b44 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -53,8 +53,8 @@ class NaverIE(InfoExtractor): raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2) - query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, }) - query_urls = compat_urllib_parse.urlencode({ + query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, }) + query_urls = compat_urllib_parse_urlencode({ 'masterVid': vid, 'protocol': 'p2p', 'inKey': key, diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 3e2b3e599..d896b0d04 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -97,7 +97,7 @@ class NBAIE(InfoExtractor): _PAGE_SIZE = 30 def _fetch_page(self, team, video_id, page): - search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse.urlencode({ + search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({ 'type': 'teamvideo', 'start': page * self._PAGE_SIZE + 1, 'npp': (page + 1) * self._PAGE_SIZE + 1, diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py index 7830616f8..0d36474fa 100644 --- a/youtube_dl/extractor/neteasemusic.py +++ b/youtube_dl/extractor/neteasemusic.py @@ -8,7 +8,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, compat_itertools_count, ) @@ -153,7 +153,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'ids': '[%s]' % song_id } info = self.query_api( - 'song/detail?' + compat_urllib_parse.urlencode(params), + 'song/detail?' + compat_urllib_parse_urlencode(params), song_id, 'Downloading song info')['songs'][0] formats = self.extract_formats(info) diff --git a/youtube_dl/extractor/nextmovie.py b/youtube_dl/extractor/nextmovie.py index 657ae77a0..9ccd7d774 100644 --- a/youtube_dl/extractor/nextmovie.py +++ b/youtube_dl/extractor/nextmovie.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class NextMovieIE(MTVServicesInfoExtractor): @@ -20,7 +20,7 @@ class NextMovieIE(MTVServicesInfoExtractor): }] def _get_feed_query(self, uri): - return compat_urllib_parse.urlencode({ + return compat_urllib_parse_urlencode({ 'feed': '1505', 'mgid': uri, }) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 5bd15f7a7..ba1eefafc 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import sanitized_Request @@ -40,7 +40,7 @@ class NFBIE(InfoExtractor): request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, - compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) + compat_urllib_parse_urlencode({'getConfig': 'true'}).encode('ascii')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 8d5ce46ad..c1dea8b6c 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -7,7 +7,7 @@ import os from .common import InfoExtractor from ..compat import ( compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse ) from ..utils import ( @@ -38,7 +38,7 @@ class NHLBaseInfoExtractor(InfoExtractor): parsed_url = compat_urllib_parse_urlparse(initial_video_url) filename, ext = os.path.splitext(parsed_url.path) path = '%s_sd%s' % (filename, ext) - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'type': 'fvod', 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) }) @@ -211,7 +211,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor): r'tab0"[^>]*?>(.*?)</td>', webpage, 'playlist title', flags=re.DOTALL).lower().capitalize() - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'cid': cat_id, # This is the default value 'count': 12, diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index b62819ae5..ce065f2b0 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class NickIE(MTVServicesInfoExtractor): @@ -54,7 +54,7 @@ class NickIE(MTVServicesInfoExtractor): }] def _get_feed_query(self, uri): - return compat_urllib_parse.urlencode({ + return compat_urllib_parse_urlencode({ 'feed': 'nick_arc_player_prime', 'mgid': uri, }) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 586e52a4a..688f0a124 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -7,11 +7,10 @@ import datetime from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, parse_duration, @@ -101,7 +100,7 @@ class NiconicoIE(InfoExtractor): 'mail': username, 'password': password, } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') request = sanitized_Request( 'https://secure.nicovideo.jp/secure/login', login_data) login_results = self._download_webpage( @@ -141,7 +140,7 @@ class NiconicoIE(InfoExtractor): r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') # Get flv info - flv_info_data = compat_urllib_parse.urlencode({ + flv_info_data = compat_urllib_parse_urlencode({ 'k': thumb_play_key, 'v': video_id }) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index ec7317a2f..8f4b69a6f 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -8,7 +8,7 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -75,7 +75,7 @@ class NocoIE(InfoExtractor): 'username': username, 'password': password, } - request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse_urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') login = self._download_json(request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index d68c1ad79..a131f7dbd 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -7,7 +7,6 @@ from ..compat import compat_urlparse from ..utils import ( ExtractorError, NO_DEFAULT, - encode_dict, sanitized_Request, urlencode_postdata, ) @@ -73,7 +72,7 @@ class NovaMovIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(url, post_url) request = sanitized_Request( - post_url, urlencode_postdata(encode_dict(fields))) + post_url, urlencode_postdata(fields)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Referer', post_url) webpage = self._download_webpage( diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index a3f0abb4e..1777aa10b 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, qualities, @@ -38,7 +38,7 @@ class NprIE(InfoExtractor): playlist_id = self._match_id(url) config = self._download_json( - 'http://api.npr.org/query?%s' % compat_urllib_parse.urlencode({ + 'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({ 'id': playlist_id, 'fields': 'titles,audio,show', 'format': 'json', diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 20b984288..16f040191 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -9,7 +9,7 @@ from ..utils import ( ExtractorError, unsmuggle_url, ) -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): @@ -35,7 +35,7 @@ class OoyalaBaseIE(InfoExtractor): for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): auth_data = self._download_json( self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'domain': domain, 'supportedFormats': supported_format }), diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index ec8876c28..229750665 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -65,7 +65,7 @@ class PatreonIE(InfoExtractor): request = sanitized_Request( 'https://www.patreon.com/processLogin', - compat_urllib_parse.urlencode(login_form).encode('utf-8') + compat_urllib_parse_urlencode(login_form).encode('utf-8') ) login_page = self._download_webpage(request, None, note='Logging in as %s' % username) diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 2856af96f..63065622b 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,7 +5,7 @@ import re import os.path from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -40,7 +40,7 @@ class PlayedIE(InfoExtractor): self._sleep(2, video_id) - post = compat_urllib_parse.urlencode(data) + post = compat_urllib_parse_urlencode(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index e360404f7..1e8096a25 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor): }) info_url = compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) json_info = self._download_json( info_url, video_id, diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 12e1c2862..575775f09 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -8,7 +8,7 @@ import collections from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -76,7 +76,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) request = sanitized_Request( - post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 63ce87ee3..9894f3262 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -2,8 +2,8 @@ from __future__ import unicode_literals from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from .common import InfoExtractor from ..utils import ( @@ -50,7 +50,7 @@ class Porn91IE(InfoExtractor): r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code') max_vid = self._search_regex( r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid') - url_params = compat_urllib_parse.urlencode({ + url_params = compat_urllib_parse_urlencode({ 'VID': file_id, 'mp4': '1', 'seccode': sec_code, diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 85aae9576..188f08826 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -42,7 +42,7 @@ class PrimeShareTVIE(InfoExtractor): self._sleep(wait_time, video_id) req = sanitized_Request( - url, compat_urllib_parse.urlencode(fields), headers) + url, compat_urllib_parse_urlencode(fields), headers) video_page = self._download_webpage( req, video_id, 'Downloading video page') diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index d5357283a..67312016c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( determine_ext, ExtractorError, @@ -34,7 +34,7 @@ class PromptFileIE(InfoExtractor): expected=True) fields = self._hidden_inputs(webpage) - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 670e6950f..07d49d489 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,9 +5,7 @@ import re from hashlib import sha1 from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, determine_ext, @@ -235,7 +233,7 @@ class ProSiebenSat1IE(InfoExtractor): client_name = 'kolibri-2.0.19-splec4' client_location = url - videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({ + videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({ 'access_token': access_token, 'client_location': client_location, 'client_name': client_name, @@ -256,7 +254,7 @@ class ProSiebenSat1IE(InfoExtractor): client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) .encode('utf-8')).hexdigest() - sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({ + sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({ 'access_token': access_token, 'client_id': client_id, 'client_location': client_location, @@ -270,7 +268,7 @@ class ProSiebenSat1IE(InfoExtractor): client_location, source_ids_str, g, client_name]) .encode('utf-8')).hexdigest() - url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({ + url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({ 'access_token': access_token, 'client_id': client_id, 'client_location': client_location, diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 1178b7a27..b4433a689 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -81,7 +81,7 @@ class ShahidIE(InfoExtractor): video = self._download_json( '%s/%s/%s?%s' % ( api_vars['url'], api_vars['playerType'], api_vars['id'], - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'apiKey': 'sh@hid0nlin3', 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', })), diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 96fe0b90d..e66441997 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -45,7 +45,7 @@ class SharedIE(InfoExtractor): download_form = self._hidden_inputs(webpage) request = sanitized_Request( - url, compat_urllib_parse.urlencode(download_form)) + url, compat_urllib_parse_urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video_page = self._download_webpage( diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py index f1ea9bdb2..61dc1c235 100644 --- a/youtube_dl/extractor/sharesix.py +++ b/youtube_dl/extractor/sharesix.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( parse_duration, sanitized_Request, @@ -47,7 +47,7 @@ class ShareSixIE(InfoExtractor): fields = { 'method_free': 'Free' } - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index b2258a0f6..d03f1b1d4 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import sanitized_Request @@ -39,7 +39,7 @@ class SinaIE(InfoExtractor): ] def _extract_video(self, video_id): - data = compat_urllib_parse.urlencode({'vid': video_id}) + data = compat_urllib_parse_urlencode({'vid': video_id}) url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, video_id, 'Downloading video url') image_page = self._download_webpage( diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 015ef75f3..b4c6d5bbf 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,7 +7,7 @@ import hashlib import uuid from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -175,7 +175,7 @@ class SmotriIE(InfoExtractor): video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() request = sanitized_Request( - 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form)) + 'http://smotri.com/video/view/url/bot/', compat_urllib_parse_urlencode(video_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video = self._download_json(request, video_id, 'Downloading video JSON') @@ -338,7 +338,7 @@ class SmotriBroadcastIE(InfoExtractor): } request = sanitized_Request( - broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) + broadcast_url + '/?no_redirect=1', compat_urllib_parse_urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') broadcast_page = self._download_webpage( request, broadcast_id, 'Logging in and confirming age') diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index ea8fc258d..49e5d09ae 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -170,7 +170,7 @@ class SohuIE(InfoExtractor): if retries > 0: download_note += ' (retry #%d)' % retries part_info = self._parse_json(self._download_webpage( - 'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)), + 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)), video_id, download_note), video_id) video_url = part_info['url'] diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1efb2b980..2bca8fa3a 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -11,10 +11,9 @@ from .common import ( from ..compat import ( compat_str, compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, unified_strdate, @@ -393,7 +392,7 @@ class SoundcloudUserIE(SoundcloudIE): query = COMMON_QUERY.copy() query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse.urlencode(query) + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) entries = [] for i in itertools.count(): @@ -424,7 +423,7 @@ class SoundcloudUserIE(SoundcloudIE): qs = compat_urlparse.parse_qs(parsed_next_href.query) qs.update(COMMON_QUERY) next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) return { '_type': 'playlist', @@ -460,7 +459,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): if token: data_dict['secret_token'] = token - data = compat_urllib_parse.urlencode(data_dict) + data = compat_urllib_parse_urlencode(data_dict) data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') @@ -500,7 +499,8 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): query['client_id'] = self._CLIENT_ID query['linked_partitioning'] = '1' query['offset'] = 0 - data = compat_urllib_parse.urlencode(encode_dict(query)) + data = compat_urllib_parse_urlencode(query) + data = compat_urllib_parse_urlencode(query) next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) collected_results = 0 diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 77841b946..b17779e4b 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import sanitized_Request @@ -35,7 +35,7 @@ class StreamcloudIE(InfoExtractor): (?:id="[^"]+"\s+)? value="([^"]*)" ''', orig_webpage) - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) self._sleep(12, video_id) headers = { diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2c8e9b941..d6b2560f8 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -5,8 +5,8 @@ import json from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -74,7 +74,7 @@ class TelecincoIE(InfoExtractor): info_el = self._download_xml(info_url, episode).find('./video/info') video_link = info_el.find('videoUrl/link').text - token_query = compat_urllib_parse.urlencode({'id': video_link}) + token_query = compat_urllib_parse_urlencode({'id': video_link}) token_info = self._download_json( embed_data['flashvars']['ov_tk'] + '?' + token_query, episode, diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 6d78b5dfe..50ed15163 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -5,7 +5,7 @@ import codecs import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -41,7 +41,7 @@ class TubiTvIE(InfoExtractor): 'username': username, 'password': password, } - payload = compat_urllib_parse.urlencode(form_data).encode('utf-8') + payload = compat_urllib_parse_urlencode(form_data).encode('utf-8') request = sanitized_Request(self._LOGIN_URL, payload) request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_page = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d4169ec6d..c92dcc7b9 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -9,12 +9,11 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, orderedSet, @@ -82,7 +81,7 @@ class TwitchBaseIE(InfoExtractor): post_url = compat_urlparse.urljoin(redirect_url, post_url) request = sanitized_Request( - post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8')) + post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Referer', redirect_url) response = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -250,7 +249,7 @@ class TwitchVodIE(TwitchItemBaseIE): formats = self._extract_m3u8_formats( '%s/vod/%s?%s' % ( self._USHER_BASE, item_id, - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'allow_source': 'true', 'allow_audio_only': 'true', 'allow_spectre': 'true', @@ -442,7 +441,7 @@ class TwitchStreamIE(TwitchBaseIE): } formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)), + % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)), channel_id, 'mp4') self._prefer_source(formats) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index a9046b865..6adfb2cee 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) @@ -71,7 +71,7 @@ class UdemyIE(InfoExtractor): def _download_lecture(self, course_id, lecture_id): return self._download_json( 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % ( - course_id, lecture_id, compat_urllib_parse.urlencode({ + course_id, lecture_id, compat_urllib_parse_urlencode({ 'video_only': '', 'auto_play': '', 'fields[lecture]': 'title,description,asset', @@ -139,7 +139,7 @@ class UdemyIE(InfoExtractor): }) request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Referer', self._ORIGIN_URL) request.add_header('Origin', self._ORIGIN_URL) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index b755dda90..77bb200e9 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -48,7 +48,7 @@ class Vbox7IE(InfoExtractor): webpage, 'title').split('/')[0].strip() info_url = 'http://vbox7.com/play/magare.do' - data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id}) + data = compat_urllib_parse_urlencode({'as3': '1', 'vid': video_id}) info_request = sanitized_Request(info_url, data) info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 6bfbd4d85..8d92aee87 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -93,7 +93,7 @@ class ViddlerIE(InfoExtractor): headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} request = sanitized_Request( 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s' - % compat_urllib_parse.urlencode(query), None, headers) + % compat_urllib_parse_urlencode(query), None, headers) data = self._download_json(request, video_id)['video'] formats = [] diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 71c30d2cd..707a5735a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -12,7 +12,6 @@ from ..compat import ( ) from ..utils import ( determine_ext, - encode_dict, ExtractorError, InAdvancePagedList, int_or_none, @@ -42,13 +41,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): self.report_login() webpage = self._download_webpage(self._LOGIN_URL, None, False) token, vuid = self._extract_xsrft_and_vuid(webpage) - data = urlencode_postdata(encode_dict({ + data = urlencode_postdata({ 'action': 'login', 'email': username, 'password': password, 'service': 'vimeo', 'token': token, - })) + }) login_request = sanitized_Request(self._LOGIN_URL, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Referer', self._LOGIN_URL) @@ -255,10 +254,10 @@ class VimeoIE(VimeoBaseInfoExtractor): if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) token, vuid = self._extract_xsrft_and_vuid(webpage) - data = urlencode_postdata(encode_dict({ + data = urlencode_postdata({ 'password': password, 'token': token, - })) + }) if url.startswith('http://'): # vimeo only supports https now, but the user can give an http url url = url.replace('http://', 'https://') @@ -274,7 +273,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') - data = urlencode_postdata(encode_dict({'password': password})) + data = urlencode_postdata({'password': password}) pass_url = url + '/check-password' password_request = sanitized_Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') @@ -575,7 +574,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): token, vuid = self._extract_xsrft_and_vuid(webpage) fields['token'] = token fields['password'] = password - post = urlencode_postdata(encode_dict(fields)) + post = urlencode_postdata(fields) password_path = self._search_regex( r'action="([^"]+)"', login_form, 'password URL') password_url = compat_urlparse.urljoin(page_url, password_path) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d560a4b5e..458099a4a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -7,7 +7,7 @@ import json from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -204,7 +204,7 @@ class VKIE(InfoExtractor): request = sanitized_Request( 'https://login.vk.com/?act=login', - compat_urllib_parse.urlencode(login_form).encode('utf-8')) + compat_urllib_parse_urlencode(login_form).encode('utf-8')) login_page = self._download_webpage( request, None, note='Logging in as %s' % username) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index bd5545173..baf39bb2c 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -7,7 +7,7 @@ from ..utils import ( float_or_none, int_or_none, ) -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): @@ -43,7 +43,7 @@ class VLiveIE(InfoExtractor): playinfo = self._download_json( 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' - % compat_urllib_parse.urlencode({ + % compat_urllib_parse_urlencode({ 'videoId': long_video_id, 'key': key, 'ptc': 'http', diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index a97995a6d..f1abca4d9 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, NO_DEFAULT, @@ -38,7 +38,7 @@ class VodlockerIE(InfoExtractor): if fields['op'] == 'download1': self._sleep(3, video_id) # they do detect when requests happen too fast! - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 94abdb4f3..4e35e1f44 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -4,10 +4,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, - encode_dict, int_or_none, sanitized_Request, ) @@ -109,7 +108,7 @@ class XFileShareIE(InfoExtractor): if countdown: self._sleep(countdown, video_id) - post = compat_urllib_parse.urlencode(encode_dict(fields)) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 4c6142927..b2d8f4b48 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -8,6 +8,7 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -303,7 +304,7 @@ class YahooIE(InfoExtractor): region = self._search_regex( r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', webpage, 'region', fatal=False, default='US') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'protocol': 'http', 'region': region, }) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index e699e663f..158f3ea68 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -7,7 +7,7 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -170,7 +170,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) request = sanitized_Request( 'https://music.yandex.ru/handlers/track-entries.jsx', - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'entries': ','.join(missing_track_ids), 'lang': mu.get('settings', {}).get('lang', 'en'), 'external-domain': 'music.yandex.ru', diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 900eb2aba..fd7eb5a6d 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -8,7 +8,7 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_ord, ) from ..utils import ( @@ -138,7 +138,7 @@ class YoukuIE(InfoExtractor): '_00' + \ '/st/' + self.parse_ext_l(format) + \ '/fileid/' + get_fileid(format, n) + '?' + \ - compat_urllib_parse.urlencode(param) + compat_urllib_parse_urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 96fa3b5aa..83b5840f7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -17,16 +17,15 @@ from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, compat_str, ) from ..utils import ( clean_html, - encode_dict, error_to_compat_str, ExtractorError, float_or_none, @@ -116,7 +115,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii') req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -149,7 +148,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii') + tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii') tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( @@ -1007,7 +1006,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': lang, 'v': video_id, 'fmt': ext, @@ -1056,7 +1055,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if caption_url: timestamp = args['timestamp'] # We get the available subtitles - list_params = compat_urllib_parse.urlencode({ + list_params = compat_urllib_parse_urlencode({ 'type': 'list', 'tlangs': 1, 'asrs': 1, @@ -1075,7 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sub_lang = lang_node.attrib['lang_code'] sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': original_lang, 'tlang': sub_lang, 'fmt': ext, @@ -1094,7 +1093,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): caption_tracks = args['caption_tracks'] caption_translation_languages = args['caption_translation_languages'] caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] - parsed_caption_url = compat_urlparse.urlparse(caption_url) + parsed_caption_url = compat_urllib_parse_urlparse(caption_url) caption_qs = compat_parse_qs(parsed_caption_url.query) sub_lang_list = {} @@ -1110,7 +1109,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fmt': [ext], }) sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace( - query=compat_urllib_parse.urlencode(caption_qs, True))) + query=compat_urllib_parse_urlencode(caption_qs, True))) sub_formats.append({ 'url': sub_url, 'ext': ext, @@ -1140,7 +1139,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'cpn': [cpn], }) playback_url = compat_urlparse.urlunparse( - parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) self._download_webpage( playback_url, video_id, 'Marking watched', @@ -1225,7 +1224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # this can be viewed without login into Youtube url = proto + '://www.youtube.com/embed/%s' % video_id embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'sts': self._search_regex( @@ -2085,7 +2084,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): 'spf': 'navigate', } url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b6e1dc809..eacd81bf9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_str, compat_urllib_error, compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, @@ -1315,7 +1316,7 @@ def shell_quote(args): def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ - sdata = compat_urllib_parse.urlencode( + sdata = compat_urllib_parse_urlencode( {'__youtubedl_smuggle': json.dumps(data)}) return url + '#' + sdata @@ -1789,22 +1790,15 @@ def read_batch_urls(batch_fd): def urlencode_postdata(*args, **kargs): - return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') + return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii') def update_url_query(url, query): parsed_url = compat_urlparse.urlparse(url) qs = compat_parse_qs(parsed_url.query) qs.update(query) - qs = encode_dict(qs) return compat_urlparse.urlunparse(parsed_url._replace( - query=compat_urllib_parse.urlencode(qs, True))) - - -def encode_dict(d, encoding='utf-8'): - def encode(v): - return v.encode(encoding) if isinstance(v, compat_basestring) else v - return dict((encode(k), encode(v)) for k, v in d.items()) + query=compat_urllib_parse_urlencode(qs, True))) def dict_get(d, key_or_keys, default=None, skip_false_values=True): From 6e6bc8dae577c29c072ffc5c25078b5668435435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 02:19:24 +0600 Subject: [PATCH 072/128] Use urlencode_postdata across the codebase --- youtube_dl/extractor/atresplayer.py | 16 +++++++--------- youtube_dl/extractor/bambuser.py | 10 ++++------ youtube_dl/extractor/ceskatelevize.py | 4 ++-- youtube_dl/extractor/crunchyroll.py | 2 +- youtube_dl/extractor/dcn.py | 3 ++- youtube_dl/extractor/dramafever.py | 4 ++-- youtube_dl/extractor/fc2.py | 4 ++-- youtube_dl/extractor/gdcvault.py | 4 ++-- youtube_dl/extractor/hotnewhiphop.py | 4 ++-- youtube_dl/extractor/lynda.py | 10 ++++------ youtube_dl/extractor/metacafe.py | 4 ++-- youtube_dl/extractor/minhateca.py | 4 ++-- youtube_dl/extractor/moevideo.py | 4 ++-- youtube_dl/extractor/moniker.py | 4 ++-- youtube_dl/extractor/mooshare.py | 4 ++-- youtube_dl/extractor/nfb.py | 8 +++++--- youtube_dl/extractor/niconico.py | 3 ++- youtube_dl/extractor/noco.py | 4 ++-- youtube_dl/extractor/played.py | 4 ++-- youtube_dl/extractor/pluralsight.py | 4 ++-- youtube_dl/extractor/primesharetv.py | 4 ++-- youtube_dl/extractor/promptfile.py | 4 ++-- youtube_dl/extractor/shared.py | 4 ++-- youtube_dl/extractor/sharesix.py | 4 ++-- youtube_dl/extractor/smotri.py | 6 +++--- youtube_dl/extractor/soundcloud.py | 1 - youtube_dl/extractor/streamcloud.py | 8 +++++--- youtube_dl/extractor/tubitv.py | 4 ++-- youtube_dl/extractor/twitch.py | 3 ++- youtube_dl/extractor/udemy.py | 3 ++- youtube_dl/extractor/vbox7.py | 8 +++----- youtube_dl/extractor/vk.py | 8 +++----- youtube_dl/extractor/vodlocker.py | 4 ++-- youtube_dl/extractor/xfileshare.py | 4 ++-- youtube_dl/extractor/yandexmusic.py | 10 ++++------ youtube_dl/extractor/youtube.py | 5 +++-- 36 files changed, 90 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index f9568cb5b..d2f388964 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -6,16 +6,14 @@ import hashlib import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( - int_or_none, - float_or_none, - sanitized_Request, - xpath_text, ExtractorError, + float_or_none, + int_or_none, + sanitized_Request, + urlencode_postdata, + xpath_text, ) @@ -86,7 +84,7 @@ class AtresPlayerIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 1a2eef48d..0eb1930c2 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -4,15 +4,13 @@ import re import itertools from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_str, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -58,7 +56,7 @@ class BambuserIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index d93108df5..6652c8e42 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -6,13 +6,13 @@ import re from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( ExtractorError, float_or_none, sanitized_Request, + urlencode_postdata, ) @@ -102,7 +102,7 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request( 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=compat_urllib_parse_urlencode(data)) + data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 7746f1be3..8ae3f2890 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -308,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = sanitized_Request(playerdata_url) - playerdata_req.data = compat_urllib_parse_urlencode({'current_page': webpage_url}) + playerdata_req.data = urlencode_postdata({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 982ed94ea..5deff5f30 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -15,6 +15,7 @@ from ..utils import ( sanitized_Request, smuggle_url, unsmuggle_url, + urlencode_postdata, ) @@ -174,7 +175,7 @@ class DCNSeasonIE(InfoExtractor): data['show_id'] = show_id request = sanitized_Request( 'http://admin.mangomolo.com/analytics/index.php/plus/show', - compat_urllib_parse_urlencode(data), + urlencode_postdata(data), { 'Origin': 'http://www.dcndigital.ae', 'Content-Type': 'application/x-www-form-urlencoded' diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 2101acaaf..3b6529f4b 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -6,7 +6,6 @@ import itertools from .amp import AMPIE from ..compat import ( compat_HTTPError, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -14,6 +13,7 @@ from ..utils import ( clean_html, int_or_none, sanitized_Request, + urlencode_postdata ) @@ -50,7 +50,7 @@ class DramaFeverBaseIE(AMPIE): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index cacf61973..c7d69ff1f 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -5,13 +5,13 @@ import hashlib from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -56,7 +56,7 @@ class FC2IE(InfoExtractor): 'Submit': ' Login ', } - login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') + login_data = urlencode_postdata(login_form_strs) request = sanitized_Request( 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index cc8fa45d2..59ed4c38f 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( remove_end, HEADRequest, sanitized_Request, + urlencode_postdata, ) @@ -123,7 +123,7 @@ class GDCVaultIE(InfoExtractor): 'password': password, } - request = sanitized_Request(login_url, compat_urllib_parse_urlencode(login_form)) + request = sanitized_Request(login_url, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(request, display_id, 'Logging in') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 152d2a98a..9db565209 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, HEADRequest, sanitized_Request, + urlencode_postdata, ) @@ -35,7 +35,7 @@ class HotNewHipHopIE(InfoExtractor): r'"contentUrl" content="(.*?)"', webpage, 'content URL') return self.url_result(video_url, ie='Youtube') - reqdata = compat_urllib_parse_urlencode([ + reqdata = urlencode_postdata([ ('mediaType', 's'), ('mediaId', video_id), ]) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index df50cb655..71fd55ade 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -4,15 +4,13 @@ import re import json from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( ExtractorError, clean_html, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -36,7 +34,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false' } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) login_page = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -65,7 +63,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false', } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(confirm_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(confirm_form)) login_page = self._download_webpage( request, None, 'Confirming log in and log out from another device') diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 0e4865446..61dadb7a7 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -6,13 +6,13 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = sanitized_Request(self._FILTER_POST, compat_urllib_parse_urlencode(disclaimer_form)) + request = sanitized_Request(self._FILTER_POST, urlencode_postdata(disclaimer_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self.report_age_confirmation() self._download_webpage(request, None, False, 'Unable to confirm age') diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py index 6ec53c303..e6730b75a 100644 --- a/youtube_dl/extractor/minhateca.py +++ b/youtube_dl/extractor/minhateca.py @@ -2,12 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, parse_duration, parse_filesize, sanitized_Request, + urlencode_postdata, ) @@ -39,7 +39,7 @@ class MinhatecaIE(InfoExtractor): ] req = sanitized_Request( 'http://minhateca.com.br/action/License/Download', - data=compat_urllib_parse_urlencode(token_data)) + data=urlencode_postdata(token_data)) req.add_header('Content-Type', 'application/x-www-form-urlencoded') data = self._download_json( req, video_id, note='Downloading metadata') diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 89cdd4600..978d5d5bf 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,11 +5,11 @@ import json import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -77,7 +77,7 @@ class MoeVideoIE(InfoExtractor): ], ] r_json = json.dumps(r) - post = compat_urllib_parse_urlencode({'r': r_json}) + post = urlencode_postdata({'r': r_json}) req = sanitized_Request(self._API_URL, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index c5ce693f1..b208820fe 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,11 +5,11 @@ import os.path import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, remove_start, sanitized_Request, + urlencode_postdata, ) @@ -88,7 +88,7 @@ class MonikerIE(InfoExtractor): fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) - post = compat_urllib_parse_urlencode(data) + post = urlencode_postdata(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index ee3947f43..a85109a89 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -3,10 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -58,7 +58,7 @@ class MooshareIE(InfoExtractor): } request = sanitized_Request( - 'http://mooshare.biz/%s' % video_id, compat_urllib_parse_urlencode(download_form)) + 'http://mooshare.biz/%s' % video_id, urlencode_postdata(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._sleep(5, video_id) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index ba1eefafc..51e4a34f7 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,8 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + sanitized_Request, + urlencode_postdata, +) class NFBIE(InfoExtractor): @@ -40,7 +42,7 @@ class NFBIE(InfoExtractor): request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, - compat_urllib_parse_urlencode({'getConfig': 'true'}).encode('ascii')) + urlencode_postdata({'getConfig': 'true'})) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 688f0a124..dd75a48af 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -18,6 +18,7 @@ from ..utils import ( sanitized_Request, xpath_text, determine_ext, + urlencode_postdata, ) @@ -100,7 +101,7 @@ class NiconicoIE(InfoExtractor): 'mail': username, 'password': password, } - login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') + login_data = urlencode_postdata(login_form_strs) request = sanitized_Request( 'https://secure.nicovideo.jp/secure/login', login_data) login_results = self._download_webpage( diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 8f4b69a6f..06f2bda07 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -8,7 +8,6 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -18,6 +17,7 @@ from ..utils import ( float_or_none, parse_iso8601, sanitized_Request, + urlencode_postdata, ) @@ -75,7 +75,7 @@ class NocoIE(InfoExtractor): 'username': username, 'password': password, } - request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse_urlencode(login_form)) + request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') login = self._download_json(request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 63065622b..57c875ef0 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,10 +5,10 @@ import re import os.path from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -40,7 +40,7 @@ class PlayedIE(InfoExtractor): self._sleep(2, video_id) - post = compat_urllib_parse_urlencode(data) + post = urlencode_postdata(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 575775f09..bc66f7a9d 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -8,7 +8,6 @@ import collections from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -17,6 +16,7 @@ from ..utils import ( parse_duration, qualities, sanitized_Request, + urlencode_postdata, ) @@ -76,7 +76,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) request = sanitized_Request( - post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + post_url, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 188f08826..0c1024772 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -42,7 +42,7 @@ class PrimeShareTVIE(InfoExtractor): self._sleep(wait_time, video_id) req = sanitized_Request( - url, compat_urllib_parse_urlencode(fields), headers) + url, urlencode_postdata(fields), headers) video_page = self._download_webpage( req, video_id, 'Downloading video page') diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 67312016c..f93bd19ff 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( determine_ext, ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -34,7 +34,7 @@ class PromptFileIE(InfoExtractor): expected=True) fields = self._hidden_inputs(webpage) - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index e66441997..e7e5f653e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -45,7 +45,7 @@ class SharedIE(InfoExtractor): download_form = self._hidden_inputs(webpage) request = sanitized_Request( - url, compat_urllib_parse_urlencode(download_form)) + url, urlencode_postdata(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video_page = self._download_webpage( diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py index 61dc1c235..9cce5ceb4 100644 --- a/youtube_dl/extractor/sharesix.py +++ b/youtube_dl/extractor/sharesix.py @@ -4,10 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( parse_duration, sanitized_Request, + urlencode_postdata, ) @@ -47,7 +47,7 @@ class ShareSixIE(InfoExtractor): fields = { 'method_free': 'Free' } - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index b4c6d5bbf..5c3fd0fec 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,12 +7,12 @@ import hashlib import uuid from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, unified_strdate, + urlencode_postdata, ) @@ -175,7 +175,7 @@ class SmotriIE(InfoExtractor): video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() request = sanitized_Request( - 'http://smotri.com/video/view/url/bot/', compat_urllib_parse_urlencode(video_form)) + 'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video = self._download_json(request, video_id, 'Downloading video JSON') @@ -338,7 +338,7 @@ class SmotriBroadcastIE(InfoExtractor): } request = sanitized_Request( - broadcast_url + '/?no_redirect=1', compat_urllib_parse_urlencode(login_form)) + broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') broadcast_page = self._download_webpage( request, broadcast_id, 'Logging in and confirming age') diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2bca8fa3a..194dabc71 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -500,7 +500,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): query['linked_partitioning'] = '1' query['offset'] = 0 data = compat_urllib_parse_urlencode(query) - data = compat_urllib_parse_urlencode(query) next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) collected_results = 0 diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index b17779e4b..712359885 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + sanitized_Request, + urlencode_postdata, +) class StreamcloudIE(InfoExtractor): @@ -35,7 +37,7 @@ class StreamcloudIE(InfoExtractor): (?:id="[^"]+"\s+)? value="([^"]*)" ''', orig_webpage) - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) self._sleep(12, video_id) headers = { diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 50ed15163..306ee4e15 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -5,11 +5,11 @@ import codecs import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -41,7 +41,7 @@ class TubiTvIE(InfoExtractor): 'username': username, 'password': password, } - payload = compat_urllib_parse_urlencode(form_data).encode('utf-8') + payload = urlencode_postdata(form_data) request = sanitized_Request(self._LOGIN_URL, payload) request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_page = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index c92dcc7b9..36ee1adff 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -20,6 +20,7 @@ from ..utils import ( parse_duration, parse_iso8601, sanitized_Request, + urlencode_postdata, ) @@ -81,7 +82,7 @@ class TwitchBaseIE(InfoExtractor): post_url = compat_urlparse.urljoin(redirect_url, post_url) request = sanitized_Request( - post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + post_url, urlencode_postdata(login_form)) request.add_header('Referer', redirect_url) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 6adfb2cee..be6f3be5e 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, sanitized_Request, unescapeHTML, + urlencode_postdata, ) @@ -139,7 +140,7 @@ class UdemyIE(InfoExtractor): }) request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._ORIGIN_URL) request.add_header('Origin', self._ORIGIN_URL) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 77bb200e9..dff1bb702 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -2,13 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -48,7 +46,7 @@ class Vbox7IE(InfoExtractor): webpage, 'title').split('/')[0].strip() info_url = 'http://vbox7.com/play/magare.do' - data = compat_urllib_parse_urlencode({'as3': '1', 'vid': video_id}) + data = urlencode_postdata({'as3': '1', 'vid': video_id}) info_request = sanitized_Request(info_url, data) info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 458099a4a..67220f1b7 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -5,10 +5,7 @@ import re import json from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -17,6 +14,7 @@ from ..utils import ( str_to_int, unescapeHTML, unified_strdate, + urlencode_postdata, ) from .vimeo import VimeoIE from .pladform import PladformIE @@ -204,7 +202,7 @@ class VKIE(InfoExtractor): request = sanitized_Request( 'https://login.vk.com/?act=login', - compat_urllib_parse_urlencode(login_form).encode('utf-8')) + urlencode_postdata(login_form)) login_page = self._download_webpage( request, None, note='Logging in as %s' % username) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index f1abca4d9..a938a4007 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, NO_DEFAULT, sanitized_Request, + urlencode_postdata, ) @@ -38,7 +38,7 @@ class VodlockerIE(InfoExtractor): if fields['op'] == 'download1': self._sleep(3, video_id) # they do detect when requests happen too fast! - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 4e35e1f44..2d1504eaa 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -108,7 +108,7 @@ class XFileShareIE(InfoExtractor): if countdown: self._sleep(countdown, video_id) - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 158f3ea68..025716958 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -5,15 +5,13 @@ import re import hashlib from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, float_or_none, sanitized_Request, + urlencode_postdata, ) @@ -170,14 +168,14 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) request = sanitized_Request( 'https://music.yandex.ru/handlers/track-entries.jsx', - compat_urllib_parse_urlencode({ + urlencode_postdata({ 'entries': ','.join(missing_track_ids), 'lang': mu.get('settings', {}).get('lang', 'en'), 'external-domain': 'music.yandex.ru', 'overembed': 'false', 'sign': mu.get('authData', {}).get('user', {}).get('sign'), 'strict': 'true', - }).encode('utf-8')) + })) request.add_header('Referer', url) request.add_header('X-Requested-With', 'XMLHttpRequest') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 83b5840f7..8c321f1fc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -44,6 +44,7 @@ from ..utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + urlencode_postdata, ISO3166Utils, ) @@ -115,7 +116,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii') + login_data = urlencode_postdata(login_form_strs) req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -148,7 +149,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii') + tfa_data = urlencode_postdata(tfa_form_strs) tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( From e289d6d62cae85ded46ad6e92b33385f221b8370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 02:38:33 +0600 Subject: [PATCH 073/128] [test_compat] Add tests for compat_urllib_parse_urlencode --- test/test_compat.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index b6bfad05e..cc105807a 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -19,6 +19,7 @@ from youtube_dl.compat import ( compat_str, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, ) @@ -70,6 +71,12 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def') + def test_compat_urllib_parse_urlencode(self): + self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') + def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) From 62cdb96f51eca4226b4d499e292d1ea1f9babb72 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 26 Mar 2016 08:58:03 +0100 Subject: [PATCH 074/128] release 2016.03.26 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2291ed783..d4cf099f5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.25' +__version__ = '2016.03.26' From 5964b598ff536c32198181e5027610f3d9a474bb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 25 Mar 2016 16:17:54 +0800 Subject: [PATCH 075/128] [brightcove] Support alternative BrightcoveExperience layout The full URL lays in the `data` attribute of <object> (#8862) --- youtube_dl/extractor/brightcove.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 59e8008f9..afe081d82 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -136,13 +136,16 @@ class BrightcoveLegacyIE(InfoExtractor): else: flashvars = {} + data_url = object_doc.attrib.get('data', '') + data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) + def find_param(name): if name in flashvars: return flashvars[name] node = find_xpath_attr(object_doc, './param', 'name', name) if node is not None: return node.attrib['value'] - return None + return data_url_params.get(name) params = {} From d6c340cae5c1e5704d6e709eefb7009fcda6e213 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 26 Mar 2016 18:21:07 +0800 Subject: [PATCH 076/128] [brightcove] Extract more formats (#8862) --- youtube_dl/extractor/brightcove.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index afe081d82..c9e43a275 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -297,7 +297,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'uploader': video_info.get('publisherName'), } - renditions = video_info.get('renditions') + renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) if renditions: formats = [] for rend in renditions: @@ -319,13 +319,23 @@ class BrightcoveLegacyIE(InfoExtractor): if ext is None: ext = determine_ext(url) size = rend.get('size') - formats.append({ + a_format = { 'url': url, 'ext': ext, 'height': rend.get('frameHeight'), 'width': rend.get('frameWidth'), 'filesize': size if size != 0 else None, - }) + } + + # m3u8 manifests with remote == false are media playlists + # Not calling _extract_m3u8_formats here to save network traffic + if ext == 'm3u8': + a_format.update({ + 'ext': 'mp4', + 'protocol': 'm3u8', + }) + + formats.append(a_format) self._sort_formats(formats) info['formats'] = formats elif video_info.get('FLVFullLengthURL') is not None: From d10fe8358c064325349469a20be952ba794566d4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 26 Mar 2016 18:30:43 +0800 Subject: [PATCH 077/128] [generic] Add a test case for brightcove embed Closes #8862 --- youtube_dl/extractor/generic.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 12f2309fc..ea4009b41 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1124,7 +1124,23 @@ class GenericIE(InfoExtractor): # m3u8 downloads 'skip_download': True, } - } + }, + # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions' + # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm + { + 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html', + 'info_dict': { + 'id': '4785848093001', + 'ext': 'mp4', + 'title': 'The Cardinal Pell Interview', + 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ', + 'uploader': 'GlobeCast Australia - GlobeStream', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + }, ] def report_following_redirect(self, new_url): From e68d3a010fcf34455c7922b28a05ccc012381729 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 26 Mar 2016 18:34:51 +0800 Subject: [PATCH 078/128] [twitter] Fix extraction (closes #8966) HLS and DASH formats are no longer appeared in test cases. I keep them for fear of triggering new errors. --- youtube_dl/extractor/twitter.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index e70b2ab3c..602538e5c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -110,10 +110,9 @@ class TwitterCardIE(TwitterBaseIE): 'height': int(m.group('height')), }) - playlist = config.get('playlist') - if playlist: - video_url = playlist[0]['source'] + video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') + if video_url: f = { 'url': video_url, } @@ -185,7 +184,6 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'thumbnail': 're:^https?://.*\.jpg', - 'duration': 12.922, 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', From b5a5bbf3764a3912a1d07816b6e91560fe1d8a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 19:15:32 +0600 Subject: [PATCH 079/128] [mailru] Extend _VALID_URL (Closes #8990) --- youtube_dl/extractor/mailru.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 46eb00492..9a7098c43 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -13,7 +13,7 @@ from ..utils import ( class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' + _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' _TESTS = [ { @@ -61,6 +61,10 @@ class MailRuIE(InfoExtractor): 'duration': 6001, }, 'skip': 'Not accessible from Travis CI server', + }, + { + 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html', + 'only_matching': True, } ] From 17bcc626bf67453cc5ab67e56684b6c6e33f4cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 19:33:57 +0600 Subject: [PATCH 080/128] [utils] Extract sanitize_url routine --- youtube_dl/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index eacd81bf9..6d27b80c0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -417,9 +417,12 @@ def sanitize_path(s): # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of # unwanted failures due to missing protocol +def sanitize_url(url): + return 'http:%s' % url if url.startswith('//') else url + + def sanitized_Request(url, *args, **kwargs): - return compat_urllib_request.Request( - 'http:%s' % url if url.startswith('//') else url, *args, **kwargs) + return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs) def orderedSet(iterable): From dcf77cf1a74ebcf7def71aecf55b8641e4645835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 19:37:41 +0600 Subject: [PATCH 081/128] [YoutubeDL] Sanitize final URLs (Closes #8991) --- youtube_dl/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 29d7a3106..33c269f9c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( SameFileError, sanitize_filename, sanitize_path, + sanitize_url, sanitized_Request, std_headers, subtitles_filename, @@ -1229,6 +1230,7 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): + t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: @@ -1263,6 +1265,7 @@ class YoutubeDL(object): if subtitles: for _, subtitle in subtitles.items(): for subtitle_format in subtitle: + subtitle_format['url'] = sanitize_url(subtitle_format['url']) if 'ext' not in subtitle_format: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() @@ -1292,6 +1295,8 @@ class YoutubeDL(object): if 'url' not in format: raise ExtractorError('Missing "url" key in result (index %d)' % i) + format['url'] = sanitize_url(format['url']) + if format.get('format_id') is None: format['format_id'] = compat_str(i) else: From eedb7ba5364213b5f9dc773f70403ea028a44ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 19:40:33 +0600 Subject: [PATCH 082/128] [YoutubeDL] Sort imports --- youtube_dl/YoutubeDL.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 33c269f9c..53a36c145 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -39,6 +39,8 @@ from .compat import ( compat_urllib_request_DataHandler, ) from .utils import ( + age_restricted, + args_to_str, ContentTooShortError, date_from_str, DateRange, @@ -58,10 +60,12 @@ from .utils import ( PagedList, parse_filesize, PerRequestProxyHandler, - PostProcessingError, platform_name, + PostProcessingError, preferredencoding, + prepend_extension, render_table, + replace_extension, SameFileError, sanitize_filename, sanitize_path, @@ -76,10 +80,6 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, - prepend_extension, - replace_extension, - args_to_str, - age_restricted, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractors From 6dee688e6d8992913bbdbcc65a413cd9897dd489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 20:42:18 +0600 Subject: [PATCH 083/128] [youtube:playlistsbase] Restrict playlist regex (Closes #8986) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8c321f1fc..28355bf46 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -234,7 +234,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): - for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): + for playlist_id in orderedSet(re.findall( + r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', + content)): yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') From efcba804f646dfc4eda2f9df2baf3ebed0f1bbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 23:42:34 +0600 Subject: [PATCH 084/128] [udemy] Extract formats from view_html (Closes #8979) --- youtube_dl/extractor/udemy.py | 41 +++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index be6f3be5e..da2d542ec 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_HTTPError, @@ -8,6 +10,8 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, + extract_attributes, ExtractorError, float_or_none, int_or_none, @@ -73,11 +77,8 @@ class UdemyIE(InfoExtractor): return self._download_json( 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % ( course_id, lecture_id, compat_urllib_parse_urlencode({ - 'video_only': '', - 'auto_play': '', - 'fields[lecture]': 'title,description,asset', + 'fields[lecture]': 'title,description,view_html,asset', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', - 'instructorPreviewMode': 'False', })), lecture_id, 'Downloading lecture JSON') @@ -246,6 +247,38 @@ class UdemyIE(InfoExtractor): f['format_id'] = '%sp' % format_id formats.append(f) + view_html = lecture.get('view_html') + if view_html: + view_html_urls = set() + for source in re.findall(r'<source[^>]+>', view_html): + attributes = extract_attributes(source) + src = attributes.get('src') + if not src: + continue + res = attributes.get('data-res') + height = int_or_none(res) + if src in view_html_urls: + continue + view_html_urls.add(src) + if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) + for f in m3u8_formats: + m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url']) + if m: + if not f.get('height'): + f['height'] = int(m.group('height')) + if not f.get('tbr'): + f['tbr'] = int(m.group('tbr')) + formats.extend(m3u8_formats) + else: + formats.append({ + 'url': src, + 'format_id': '%dp' % height if height else None, + 'height': height, + }) + self._sort_formats(formats) return { From 48dce58ca907921f5013367a7b22235b3a3a05df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 23:42:46 +0600 Subject: [PATCH 085/128] [udemy] Use custom sorting --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index da2d542ec..100db4dd0 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -279,7 +279,7 @@ class UdemyIE(InfoExtractor): 'height': height, }) - self._sort_formats(formats) + self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) return { 'id': video_id, From 02d7634d24b704a099e17224e3dc71906ccc92a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 23:43:25 +0600 Subject: [PATCH 086/128] [udemy] Fix outputs' formats format_id --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 100db4dd0..89e713285 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -201,7 +201,7 @@ class UdemyIE(InfoExtractor): def extract_output_format(src): return { 'url': src['url'], - 'format_id': '%sp' % (src.get('label') or format_id), + 'format_id': '%sp' % (src.get('height') or format_id), 'width': int_or_none(src.get('width')), 'height': int_or_none(src.get('height')), 'vbr': int_or_none(src.get('video_bitrate_in_kbps')), From 62f55aa68a5409c25457a14289cc859fdd73cc1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 23:54:12 +0600 Subject: [PATCH 087/128] [udemy] Add outputs metadata to view_html formats --- youtube_dl/extractor/udemy.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89e713285..5a6de9982 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -218,6 +218,16 @@ class UdemyIE(InfoExtractor): if not isinstance(outputs, dict): outputs = {} + def add_output_format_meta(f, key, format_id): + output = outputs.get(key) + if isinstance(output, dict): + output_format = extract_output_format(output) + output_format.update(f) + return output_format + else: + f['format_id'] = format_id + return f + for format_id, output in outputs.items(): if isinstance(output, dict) and output.get('url'): formats.append(extract_output_format(output)) @@ -238,13 +248,7 @@ class UdemyIE(InfoExtractor): if format_id: # Some videos contain additional metadata (e.g. # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) - output = outputs.get(format_id) - if isinstance(output, dict): - output_format = extract_output_format(output) - output_format.update(f) - f = output_format - else: - f['format_id'] = '%sp' % format_id + f = add_output_format_meta(f, format_id, '%sp' % format_id) formats.append(f) view_html = lecture.get('view_html') @@ -273,11 +277,10 @@ class UdemyIE(InfoExtractor): f['tbr'] = int(m.group('tbr')) formats.extend(m3u8_formats) else: - formats.append({ + formats.append(add_output_format_meta({ 'url': src, - 'format_id': '%dp' % height if height else None, 'height': height, - }) + }, res, '%dp' % height if height else None)) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) From f973e5d54e233c139d0b407b5772ff4966c8fa30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Mar 2016 23:55:07 +0600 Subject: [PATCH 088/128] [udemy] Drop outputs' formats Always results in 403 --- youtube_dl/extractor/udemy.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5a6de9982..6a3dcb8d6 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -228,10 +228,6 @@ class UdemyIE(InfoExtractor): f['format_id'] = format_id return f - for format_id, output in outputs.items(): - if isinstance(output, dict) and output.get('url'): - formats.append(extract_output_format(output)) - download_urls = asset.get('download_urls') if isinstance(download_urls, dict): video = download_urls.get('Video') From af4116f4f04a3fc8150fdb4a220ef31a0a2dd044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 00:02:52 +0600 Subject: [PATCH 089/128] [udemy] Improve format_id --- youtube_dl/extractor/udemy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 6a3dcb8d6..5a5e9fa9e 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -218,15 +218,13 @@ class UdemyIE(InfoExtractor): if not isinstance(outputs, dict): outputs = {} - def add_output_format_meta(f, key, format_id): + def add_output_format_meta(f, key): output = outputs.get(key) if isinstance(output, dict): output_format = extract_output_format(output) output_format.update(f) return output_format - else: - f['format_id'] = format_id - return f + return f download_urls = asset.get('download_urls') if isinstance(download_urls, dict): @@ -239,12 +237,13 @@ class UdemyIE(InfoExtractor): format_id = format_.get('label') f = { 'url': format_['file'], + 'format_id': '%sp' % format_id, 'height': int_or_none(format_id), } if format_id: # Some videos contain additional metadata (e.g. # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) - f = add_output_format_meta(f, format_id, '%sp' % format_id) + f = add_output_format_meta(f, format_id) formats.append(f) view_html = lecture.get('view_html') @@ -275,8 +274,9 @@ class UdemyIE(InfoExtractor): else: formats.append(add_output_format_meta({ 'url': src, + 'format_id': '%dp' % height if height else None, 'height': height, - }, res, '%dp' % height if height else None)) + }, res)) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) From b24ab3e341b9082774785332a1aa6405764f7202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 00:09:12 +0600 Subject: [PATCH 090/128] [udemy] Improve paid course detection --- youtube_dl/extractor/udemy.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5a5e9fa9e..71bea5363 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -55,21 +55,26 @@ class UdemyIE(InfoExtractor): }] def _enroll_course(self, base_url, webpage, course_id): + def combine_url(base_url, url): + return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url + checkout_url = unescapeHTML(self._search_regex( - r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1', + r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1', webpage, 'checkout url', group='url', default=None)) if checkout_url: raise ExtractorError( 'Course %s is not free. You have to pay for it before you can download. ' - 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True) + 'Use this URL to confirm purchase: %s' + % (course_id, combine_url(base_url, checkout_url)), + expected=True) enroll_url = unescapeHTML(self._search_regex( r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1', webpage, 'enroll url', group='url', default=None)) if enroll_url: - if not enroll_url.startswith('http'): - enroll_url = compat_urlparse.urljoin(base_url, enroll_url) - webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') + webpage = self._download_webpage( + combine_url(base_url, enroll_url), + course_id, 'Enrolling in the course') if '>You have enrolled in' in webpage: self.to_screen('%s: Successfully enrolled in the course' % course_id) From 4cf3489c6e548aebe29534e496e7ccd638be6873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 01:11:11 +0600 Subject: [PATCH 091/128] [vevo] Update videoservice API URL (Closes #8900) --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 152fef42e..147480f64 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -152,7 +152,7 @@ class VevoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id response = self._download_json( json_url, video_id, 'Downloading video info', 'Unable to download info') video_info = response.get('video') or {} From 00322ad4fda31864f249bce410ac3ba520e865ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 02:00:36 +0600 Subject: [PATCH 092/128] [lynda] Extract chapter metadata (#8993) --- youtube_dl/extractor/lynda.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 71fd55ade..655627479 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -219,7 +219,7 @@ class LyndaCourseIE(LyndaBaseIE): 'Course %s does not exist' % course_id, expected=True) unaccessible_videos = 0 - videos = [] + entries = [] # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided # by single video API anymore @@ -229,20 +229,22 @@ class LyndaCourseIE(LyndaBaseIE): if video.get('HasAccess') is False: unaccessible_videos += 1 continue - if video.get('ID'): - videos.append(video['ID']) + video_id = video.get('ID') + if video_id: + entries.append({ + '_type': 'url_transparent', + 'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'ie_key': LyndaIE.ie_key(), + 'chapter': chapter.get('Title'), + 'chapter_number': int_or_none(chapter.get('ChapterIndex')), + 'chapter_id': compat_str(chapter.get('ID')), + }) if unaccessible_videos > 0: self._downloader.report_warning( '%s videos are only available for members (or paid members) and will not be downloaded. ' % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) - entries = [ - self.url_result( - 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), - 'Lynda') - for video_id in videos] - course_title = course.get('Title') return self.playlist_result(entries, course_id, course_title) From 8018028d0fabb00c32b19b04984c482c6b54d2fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 02:10:52 +0600 Subject: [PATCH 093/128] [pluralsight] Extract chapter metadata (Closes #8993) --- youtube_dl/extractor/pluralsight.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index bc66f7a9d..df03dd419 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -279,13 +279,18 @@ class PluralsightCourseIE(PluralsightBaseIE): course_id, 'Downloading course data JSON') entries = [] - for module in course_data: + for num, module in enumerate(course_data, 1): for clip in module.get('clips', []): player_parameters = clip.get('playerParameters') if not player_parameters: continue - entries.append(self.url_result( - '%s/training/player?%s' % (self._API_BASE, player_parameters), - 'Pluralsight')) + entries.append({ + '_type': 'url_transparent', + 'url': '%s/training/player?%s' % (self._API_BASE, player_parameters), + 'ie_key': PluralsightIE.ie_key(), + 'chapter': module.get('title'), + 'chapter_number': num, + 'chapter_id': module.get('moduleRef'), + }) return self.playlist_result(entries, course_id, title, description) From 395fd4b08a4639f7e84754527e9facd83c8f782d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 27 Mar 2016 04:36:02 +0800 Subject: [PATCH 094/128] [twitter] Handle another form of embedded Vine Fixes #8996 --- youtube_dl/extractor/twitter.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 602538e5c..1f32ea2eb 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -102,6 +102,9 @@ class TwitterCardIE(TwitterBaseIE): r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), video_id) + if config.get('source_type') == 'vine': + return self.url_result(config['player_url'], 'Vine') + def _search_dimensions_in_video_url(a_format, video_url): m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) if m: @@ -245,6 +248,18 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', + 'md5': '89a15ed345d13b86e9a5a5e051fa308a', + 'info_dict': { + 'id': 'MIOxnrUteUd', + 'ext': 'mp4', + 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン', + 'uploader': 'TAKUMA', + 'uploader_id': '1004126642786242560', + 'upload_date': '20140615', + }, + 'add_ie': ['Vine'], }] def _real_extract(self, url): From 19dbaeece321c51fa336ef142507adf440e22e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 07:03:08 +0600 Subject: [PATCH 095/128] Remove _sort_formats from _extract_*_formats methods Now _sort_formats should be called explicitly. _sort_formats has been added to all the necessary places in code. Closes #8051 --- youtube_dl/extractor/abc7news.py | 1 + youtube_dl/extractor/azubu.py | 1 + youtube_dl/extractor/bet.py | 1 + youtube_dl/extractor/cbsnews.py | 1 + youtube_dl/extractor/chaturbate.py | 1 + youtube_dl/extractor/common.py | 6 ------ youtube_dl/extractor/cwtv.py | 1 + youtube_dl/extractor/dfb.py | 1 + youtube_dl/extractor/discovery.py | 29 +++++++++++++++++------------ youtube_dl/extractor/dplay.py | 2 ++ youtube_dl/extractor/dw.py | 2 +- youtube_dl/extractor/generic.py | 11 ++++++++++- youtube_dl/extractor/laola1tv.py | 1 + youtube_dl/extractor/lrt.py | 1 + youtube_dl/extractor/matchtv.py | 1 + youtube_dl/extractor/mitele.py | 1 + youtube_dl/extractor/nrk.py | 1 + youtube_dl/extractor/restudy.py | 1 + youtube_dl/extractor/rte.py | 1 + youtube_dl/extractor/rtve.py | 1 + youtube_dl/extractor/rtvnh.py | 1 + youtube_dl/extractor/shahid.py | 1 + youtube_dl/extractor/sportbox.py | 1 + youtube_dl/extractor/telecinco.py | 1 + youtube_dl/extractor/tubitv.py | 1 + youtube_dl/extractor/videomore.py | 1 + youtube_dl/extractor/vier.py | 1 + youtube_dl/extractor/viidea.py | 1 + youtube_dl/extractor/ynet.py | 4 +++- 29 files changed, 56 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py index 122dc9099..c04949c21 100644 --- a/youtube_dl/extractor/abc7news.py +++ b/youtube_dl/extractor/abc7news.py @@ -44,6 +44,7 @@ class Abc7NewsIE(InfoExtractor): 'contentURL', webpage, 'm3u8 url', fatal=True) formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') + self._sort_formats(formats) title = self._og_search_title(webpage).strip() description = self._og_search_description(webpage).strip() diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 1805b7312..efa624de1 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -120,6 +120,7 @@ class AzubuLiveIE(InfoExtractor): bc_info = self._download_json(req, user) m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS') formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4') + self._sort_formats(formats) return { 'id': info['id'], diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 03dad4636..986245bf0 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -94,6 +94,7 @@ class BetIE(InfoExtractor): xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') formats = self._extract_smil_formats(smil_url, display_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index f23bac9a1..e6b7f3584 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -122,6 +122,7 @@ class CBSNewsLiveVideoIE(InfoExtractor): for entry in f4m_formats: # URLs without the extra param induce an 404 error entry.update({'extra_param_to_segment_url': hdcore_sign}) + self._sort_formats(f4m_formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 242fba311..b2234549e 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -48,6 +48,7 @@ class ChaturbateIE(InfoExtractor): raise ExtractorError('Unable to find stream URL') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b412fd030..40ddf175c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1021,8 +1021,6 @@ class InfoExtractor(object): 'height': int_or_none(media_el.attrib.get('height')), 'preference': preference, }) - self._sort_formats(formats) - return formats def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, @@ -1143,7 +1141,6 @@ class InfoExtractor(object): last_media = None formats.append(f) last_info = {} - self._sort_formats(formats) return formats @staticmethod @@ -1317,8 +1314,6 @@ class InfoExtractor(object): }) continue - self._sort_formats(formats) - return formats def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): @@ -1536,7 +1531,6 @@ class InfoExtractor(object): existing_format.update(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) - self._sort_formats(formats) return formats def _live_title(self, name): diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 36af67013..f5cefd966 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -57,6 +57,7 @@ class CWTVIE(InfoExtractor): formats = self._extract_m3u8_formats( video_data['videos']['variantplaylist']['uri'], video_id, 'mp4') + self._sort_formats(formats) thumbnails = [{ 'url': image['uri'], diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py index 263532cc6..cdfeccacb 100644 --- a/youtube_dl/extractor/dfb.py +++ b/youtube_dl/extractor/dfb.py @@ -38,6 +38,7 @@ class DFBIE(InfoExtractor): token_el = f4m_info.find('token') manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' formats = self._extract_f4m_formats(manifest_url, display_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index fdce1429a..5f1275b39 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -63,18 +63,23 @@ class DiscoveryIE(InfoExtractor): video_title = info.get('playlist_title') or info.get('video_title') - entries = [{ - 'id': compat_str(video_info['id']), - 'formats': self._extract_m3u8_formats( + entries = [] + + for idx, video_info in enumerate(info['playlist']): + formats = self._extract_m3u8_formats( video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls', - note='Download m3u8 information for video %d' % (idx + 1)), - 'title': video_info['title'], - 'description': video_info.get('description'), - 'duration': parse_duration(video_info.get('video_length')), - 'webpage_url': video_info.get('href') or video_info.get('url'), - 'thumbnail': video_info.get('thumbnailURL'), - 'alt_title': video_info.get('secondary_title'), - 'timestamp': parse_iso8601(video_info.get('publishedDate')), - } for idx, video_info in enumerate(info['playlist'])] + note='Download m3u8 information for video %d' % (idx + 1)) + self._sort_formats(formats) + entries.append({ + 'id': compat_str(video_info['id']), + 'formats': formats, + 'title': video_info['title'], + 'description': video_info.get('description'), + 'duration': parse_duration(video_info.get('video_length')), + 'webpage_url': video_info.get('href') or video_info.get('url'), + 'thumbnail': video_info.get('thumbnailURL'), + 'alt_title': video_info.get('secondary_title'), + 'timestamp': parse_iso8601(video_info.get('publishedDate')), + }) return self.playlist_result(entries, display_id, video_title) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 1e7dcada6..66bbfc6ca 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -118,6 +118,8 @@ class DPlayIE(InfoExtractor): if info.get(protocol): extract_formats(protocol, info[protocol]) + self._sort_formats(formats) + return { 'id': video_id, 'display_id': display_id, diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index b6c985547..ae7c571bd 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -39,13 +39,13 @@ class DWIE(InfoExtractor): hidden_inputs = self._hidden_inputs(webpage) title = hidden_inputs['media_title'] - formats = [] if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': formats = self._extract_smil_formats( 'http://www.dw.com/smil/v-%s' % media_id, media_id, transform_source=lambda s: s.replace( 'rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')) + self._sort_formats(formats) else: formats = [{'url': hidden_inputs['file_name']}] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ea4009b41..f3de738f7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1310,6 +1310,7 @@ class GenericIE(InfoExtractor): 'vcodec': 'none' if m.group('type') == 'audio' else None }] info_dict['direct'] = True + self._sort_formats(formats) info_dict['formats'] = formats return info_dict @@ -1336,6 +1337,7 @@ class GenericIE(InfoExtractor): # Is it an M3U playlist? if first_bytes.startswith(b'#EXTM3U'): info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4') + self._sort_formats(info_dict['formats']) return info_dict # Maybe it's a direct link to a video? @@ -1360,15 +1362,19 @@ class GenericIE(InfoExtractor): if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): - return self._parse_smil(doc, url, video_id) + smil = self._parse_smil(doc, url, video_id) + self._sort_formats(smil['formats']) + return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, video_id, mpd_base_url=url.rpartition('/')[0]) + self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id) + self._sort_formats(info_dict['formats']) return info_dict except compat_xml_parse_error: pass @@ -2053,6 +2059,9 @@ class GenericIE(InfoExtractor): else: entry_info_dict['url'] = video_url + if entry_info_dict.get('formats'): + self._sort_formats(entry_info_dict['formats']) + entries.append(entry_info_dict) if len(entries) == 1: diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d9dc067d2..d4fbafece 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -130,6 +130,7 @@ class Laola1TvIE(InfoExtractor): formats = self._extract_f4m_formats( '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth), video_id, f4m_id='hds') + self._sort_formats(formats) categories_str = _v('meta_sports') categories = categories_str.split(',') if categories_str else [] diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 863efd896..1072405b3 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -37,6 +37,7 @@ class LRTIE(InfoExtractor): r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)', webpage, 'm3u8 url', group='url') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index e33bfde3b..80a0d7013 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -47,6 +47,7 @@ class MatchTVIE(InfoExtractor): video_url = self._download_json(request, video_id)['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title('Матч ТВ - Прямой эфир'), diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 76ced7928..7b4581dc5 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -67,6 +67,7 @@ class MiTeleIE(InfoExtractor): formats.extend(self._extract_f4m_formats( file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', display_id, f4m_id=loc)) + self._sort_formats(formats) title = self._search_regex( r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3b21fbd4d..9df200822 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -63,6 +63,7 @@ class NRKIE(InfoExtractor): if determine_ext(media_url) == 'f4m': formats = self._extract_f4m_formats( media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') + self._sort_formats(formats) else: formats = [{ 'url': media_url, diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py index b17c2bfc0..fd50065d4 100644 --- a/youtube_dl/extractor/restudy.py +++ b/youtube_dl/extractor/restudy.py @@ -31,6 +31,7 @@ class RestudyIE(InfoExtractor): formats = self._extract_smil_formats( 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, video_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 042bc8dab..9c89974e7 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -49,6 +49,7 @@ class RteIE(InfoExtractor): # f4m_url = server + relative_url f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url'] f4m_formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(f4m_formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 08cd1ae6c..79af47715 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -209,6 +209,7 @@ class RTVELiveIE(InfoExtractor): png = self._download_webpage(png_url, video_id, 'Downloading url information') m3u8_url = _decrypt_url(png) formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index 7c9d4b0cd..4896d09d6 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -38,6 +38,7 @@ class RTVNHIE(InfoExtractor): item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']}) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b4433a689..d95ea06be 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -77,6 +77,7 @@ class ShahidIE(InfoExtractor): raise ExtractorError('This video is DRM protected.', expected=True) formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') + self._sort_formats(formats) video = self._download_json( '%s/%s/%s?%s' % ( diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 86d509ae5..4f0c66213 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -99,6 +99,7 @@ class SportBoxEmbedIE(InfoExtractor): webpage, 'hls file') formats = self._extract_m3u8_formats(hls, video_id, 'mp4') + self._sort_formats(formats) title = self._search_regex( r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index d6b2560f8..4b4b740b4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -82,6 +82,7 @@ class TelecincoIE(InfoExtractor): ) formats = self._extract_m3u8_formats( token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native') + self._sort_formats(formats) return { 'id': embed_data['videoId'], diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 306ee4e15..7af233cd6 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -69,6 +69,7 @@ class TubiTvIE(InfoExtractor): apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu') m3u8_url = codecs.decode(apu, 'rot_13')[::-1] formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 0bd1e1eec..04e95c66e 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -111,6 +111,7 @@ class VideomoreIE(InfoExtractor): video_url = xpath_text(video, './/video_url', 'video url', fatal=True) formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') + self._sort_formats(formats) data = self._download_json( 'http://videomore.ru/video/tracks/%s.json' % video_id, diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index c76c20614..6645c6186 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -50,6 +50,7 @@ class VierIE(InfoExtractor): playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + self._sort_formats(formats) title = self._og_search_title(webpage, default=display_id) description = self._og_search_description(webpage, default=None) diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 03b9f1353..a4f914d14 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -151,6 +151,7 @@ class ViideaIE(InfoExtractor): smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id) smil = self._download_smil(smil_url, lecture_id) info = self._parse_smil(smil, smil_url, lecture_id) + self._sort_formats(info['formats']) info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id) info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id) if multipart: diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 2522551dc..0d943c343 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -41,10 +41,12 @@ class YnetIE(InfoExtractor): m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title) if m: title = m.group('title') + formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'formats': self._extract_f4m_formats(f4m_url, video_id), + 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), } From f7df343b4a0223698f0a5320b850410d7a42be6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 07:41:19 +0600 Subject: [PATCH 096/128] [downloader/f4m] Extract routine for removing unsupported encrypted media --- youtube_dl/downloader/f4m.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index fc9642905..664d87543 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -223,6 +223,12 @@ def write_metadata_tag(stream, metadata): write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and + 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + def _add_ns(prop): return '{http://ns.adobe.com/f4m/1.0}%s' % prop @@ -244,9 +250,7 @@ class F4mFD(FragmentFD): # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: self.report_error('Missing ID in f4m DRM') - media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and - 'drmAdditionalHeaderSetId' not in e.attrib, - media)) + media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media From b22ca76204e1a05e1c4b07d24cb6a0dbbc09d18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 07:42:38 +0600 Subject: [PATCH 097/128] [extractor/common] Filter out unsupported encrypted media for f4m formats (Closes #8573) --- youtube_dl/extractor/common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 40ddf175c..9b7ab8924 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -24,6 +24,7 @@ from ..compat import ( compat_urllib_parse_urlencode, compat_urlparse, ) +from ..downloader.f4m import remove_encrypted_media from ..utils import ( NO_DEFAULT, age_restricted, @@ -989,6 +990,11 @@ class InfoExtractor(object): if not media_nodes: manifest_version = '2.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') + # Remove unsupported DRM protected media from final formats + # rendition (see https://github.com/rg3/youtube-dl/issues/8573). + media_nodes = remove_encrypted_media(media_nodes) + if not media_nodes: + return formats base_url = xpath_text( manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], 'base URL', default=None) From a122e7080bbcc505d638eaef8ab4d1e4f5bd91ee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 27 Mar 2016 16:56:33 +0200 Subject: [PATCH 098/128] release 2016.03.27 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d4cf099f5..5daa7f4e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.26' +__version__ = '2016.03.27' From a1cf3e38a34caa333fd9703333ef55e0b3ac5a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 23:22:51 +0600 Subject: [PATCH 099/128] [bbc] Extend vpid regex (Closes #9003) --- youtube_dl/extractor/bbc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 2dfcee98d..dedf721bd 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -688,6 +688,10 @@ class BBCIE(BBCCoUkIE): # custom redirection to www.bbc.com 'url': 'http://www.bbc.co.uk/news/science-environment-33661876', 'only_matching': True, + }, { + # single video article embedded with data-media-vpid + 'url': 'http://www.bbc.co.uk/sport/rowing/35908187', + 'only_matching': True, }] @classmethod @@ -817,7 +821,7 @@ class BBCIE(BBCCoUkIE): # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-video-player-vpid="(%s)"' % self._ID_REGEX, + [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], webpage, 'vpid', default=None) From 8f9a477e7f260d60836843fbe8f75629e3ae8892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 00:21:08 +0600 Subject: [PATCH 100/128] [pornhub:playlistbase] Use orderedSet --- youtube_dl/extractor/pornhub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 5a55c25e7..670e9294a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -12,6 +12,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + orderedSet, sanitized_Request, str_to_int, ) @@ -150,7 +151,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): def _extract_entries(self, webpage): return [ self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key()) - for video_url in set(re.findall( + for video_url in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) ] From 3a23bae9ccf11c9c114d2d27e4fbc09fb0bbeafe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 00:32:57 +0600 Subject: [PATCH 101/128] [pornhub:playlistbase] Do not include videos not from playlist --- youtube_dl/extractor/pornhub.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 670e9294a..b3bf81a13 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -150,9 +150,12 @@ class PornHubIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor): def _extract_entries(self, webpage): return [ - self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key()) - for video_url in orderedSet(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) + self.url_result( + 'http://www.pornhub.com/%s' % video_url, + PornHubIE.ie_key(), video_title=title) + for video_url, title in orderedSet(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', + webpage)) ] def _real_extract(self, url): From 3454139576ad98b62162ba0a9bca4b342c5d07ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 00:50:46 +0600 Subject: [PATCH 102/128] [pornhub:uservideos] Add support for multipage videos (Closes #9006) --- youtube_dl/extractor/pornhub.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index b3bf81a13..ac298d0ce 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,10 +1,12 @@ from __future__ import unicode_literals +import itertools import os import re from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, @@ -189,16 +191,31 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos' _TESTS = [{ - 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { - 'id': 'rushandlia', + 'id': 'zoe_ph', }, - 'playlist_mincount': 13, + 'playlist_mincount': 171, + }, { + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'only_matching': True, }] def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id) + entries = [] + for page_num in itertools.count(1): + try: + webpage = self._download_webpage( + url, user_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + page_entries = self._extract_entries(webpage) + if not page_entries: + break + entries.extend(page_entries) - return self.playlist_result(self._extract_entries(webpage), user_id) + return self.playlist_result(entries, user_id) From 87d105ac6c90b4dad519de7d013623923d74d570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 01:13:47 +0600 Subject: [PATCH 103/128] [amp] Fix upload timestamp extraction (Closes #9007) --- youtube_dl/extractor/amp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 69e6baff7..138fa0808 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -69,12 +69,14 @@ class AMPIE(InfoExtractor): self._sort_formats(formats) + timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) + return { 'id': video_id, 'title': get_media_node('title'), 'description': get_media_node('description'), 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(item.get('pubDate'), ' '), + 'timestamp': timestamp, 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), 'subtitles': subtitles, 'formats': formats, From c8b13fec025bcb7402656095df369ad3f3225ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 01:14:12 +0600 Subject: [PATCH 104/128] [foxnews] Restore upload time fields in test --- youtube_dl/extractor/foxnews.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 1dc50318c..b04da2415 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -18,8 +18,8 @@ class FoxNewsIE(AMPIE): 'title': 'Frozen in Time', 'description': '16-year-old girl is size of toddler', 'duration': 265, - # 'timestamp': 1304411491, - # 'upload_date': '20110503', + 'timestamp': 1304411491, + 'upload_date': '20110503', 'thumbnail': 're:^https?://.*\.jpg$', }, }, @@ -32,8 +32,8 @@ class FoxNewsIE(AMPIE): 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", 'description': "Congressman discusses president's plan", 'duration': 292, - # 'timestamp': 1417662047, - # 'upload_date': '20141204', + 'timestamp': 1417662047, + 'upload_date': '20141204', 'thumbnail': 're:^https?://.*\.jpg$', }, 'params': { From 03442072c0890f10043d1de25dc3c3fcaf10f4eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 01:21:44 +0600 Subject: [PATCH 105/128] [pornhub] Fix typo (Closes #9008) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ac298d0ce..407ea08d4 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -78,7 +78,7 @@ class PornHubIE(InfoExtractor): flashvars = self._parse_json( self._search_regex( - r'var\s+flashv1ars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), + r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: video_title = flashvars.get('video_title') From 33f3040a3e611f45ad920bd06030691910ddf815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 03:13:39 +0600 Subject: [PATCH 106/128] [YoutubeDL] Fix sanitizing subtitles' url --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 53a36c145..d7aa951ff 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1265,7 +1265,8 @@ class YoutubeDL(object): if subtitles: for _, subtitle in subtitles.items(): for subtitle_format in subtitle: - subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) if 'ext' not in subtitle_format: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() From 8d9dd3c34bd38b2545af95f8ef670b07ae1fb6ff Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Mon, 28 Mar 2016 03:08:34 +0500 Subject: [PATCH 107/128] [README.md] Add format_id to the list of string meta fields available for use in format selection --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 52b2a85a3..e972bf69f 100644 --- a/README.md +++ b/README.md @@ -600,6 +600,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native` + - `format_id`: A short description of the format Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster. From 7710bdf4e813879dbd8c5857e13a2c64e0ce8837 Mon Sep 17 00:00:00 2001 From: Sander van den Oever <sandervdo@gmail.com> Date: Sun, 6 Mar 2016 23:16:13 +0100 Subject: [PATCH 108/128] Add initial ISSUE_TEMPLATE Add auto-updating of youtube-dl version in ISSUE_TEMPLATE Move parts of template text and adopt makefile to new format Moved the 'kind-of-issue' section and rephrased a bit Rephrased and moved Example URL section upwards Moved ISSUE_TEMPLATE inside .github folder. Update makefile to match new folderstructure --- .github/ISSUE_TEMPLATE.md | 37 +++++++++++++++++++++++++++++++ Makefile | 5 ++++- devscripts/make_issue_template.py | 32 ++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 devscripts/make_issue_template.py diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..c34cbe743 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,37 @@ +Make sure you are using the **latest** version of youtube-dl. Latest youtube-dl version at this moment is **2016.03.06**. Read [our FAQ](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) if you have troubles updating. +- [ ] I've verified that I'm running youtube-dl version **2016.03.06** + +**What is the purpose of this issue? Put an `x` to the relevant box** +- [ ] Site support request (add support for a new website) +- [ ] Feature request (request new functionality) +- [ ] Bug report (encountered problems with youtube-dl) +- [ ] Other, namely ... + +**If the purpose of this issues is a site support request please provide _at least_ one example URL of a video or a playlist you are trying to download.** + +- http://some.example.url/to-video + +*If the purpose of this issue is a bug report or you are unsure about its relevance please include a log as described below.* + +**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it below wrapped in ``` for better formatting. It should look similar to this: +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version 2015.12.06 +[debug] Git HEAD: 135392e +[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +``` + +**Brief description of the problem/request** + +*I am having a problem with ... I have tried to do ... and ... I expected that ... would happen, but instead ... happened. Example: I tried to download a file but the site was not supported. Please add support for site xyz. Another example: I encountered a bug when downloading a video from xyz. I have tried to do a and b.* + +**Suggested solution or other information** + +*In case you have suggestions for a solution or any other relevant information you can write it here* diff --git a/Makefile b/Makefile index 6689ec06f..bfbe5e6cb 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md issue_template README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe @@ -59,6 +59,9 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md +issue_template: .github/ISSUE_TEMPLATE.md youtube_dl/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.md + supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py new file mode 100644 index 000000000..2fdd05035 --- /dev/null +++ b/devscripts/make_issue_template.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import io +import optparse +import re + + +def main(): + parser = optparse.OptionParser(usage='%prog FILE') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an filename') + + with io.open(args[0], encoding='utf-8') as inf: + issue_template_text = inf.read() + + # Get the version from youtube_dl/version.py without importing the package + exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) + + issue_template_text = re.sub( + r'(?<=\*\*)(?P<version>[0-9\.]+)(?=\*\*)', + __version__, + issue_template_text + ) + + with io.open(args[0], 'w', encoding='utf-8') as outf: + outf.write(issue_template_text) + +if __name__ == '__main__': + main() From 3842a3e6524c8704d4295f65e6c0bce578d69e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:15:26 +0600 Subject: [PATCH 109/128] Add ISSUE_TEMPLATE.tmpl as template for ISSUE_TEMPLATE.md --- .github/ISSUE_TEMPLATE.tmpl | 43 +++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.tmpl diff --git a/.github/ISSUE_TEMPLATE.tmpl b/.github/ISSUE_TEMPLATE.tmpl new file mode 100644 index 000000000..9bca5b531 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.tmpl @@ -0,0 +1,43 @@ +### Please follow the guide below, provide requested information and put an `x` into all the boxes [ ] relevant to your *issue*. + +#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** + +#### Before submitting an *issue* make sure you have: +- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones + +#### What is the purpose of your *issue*? +- [ ] Bug report (encountered problems with youtube-dl) +- [ ] Site support request (request for adding support for a new site) +- [ ] Feature request (request for a new functionality) +- [ ] Question +- [ ] Other + +#### The following sections concretize particular purposed issues, you can erase any section not applicable to your *issue*. + +#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: + +Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version %(version)s +[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +<end of log> +``` + +#### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + +#### Description of your *issue*, suggested solution and other information + +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. From 3bf1df51fda4189eaa9164134b56393e2c4a7f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:16:38 +0600 Subject: [PATCH 110/128] [devscripts/make_issue_template] Rework to use ISSUE_TEMPLATE.tmpl (Closes #8785) --- devscripts/make_issue_template.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 2fdd05035..e5564bac1 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -3,30 +3,27 @@ from __future__ import unicode_literals import io import optparse -import re def main(): - parser = optparse.OptionParser(usage='%prog FILE') + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() - if len(args) != 1: - parser.error('Expected an filename') + if len(args) != 2: + parser.error('Expected an input and an output filename') - with io.open(args[0], encoding='utf-8') as inf: - issue_template_text = inf.read() + infile, outfile = args + + with io.open(infile, encoding='utf-8') as inf: + issue_template_tmpl = inf.read() # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), - 'youtube_dl/version.py', 'exec')) + 'youtube_dl/version.py', 'exec')) - issue_template_text = re.sub( - r'(?<=\*\*)(?P<version>[0-9\.]+)(?=\*\*)', - __version__, - issue_template_text - ) + out = issue_template_tmpl % {'version': __version__} - with io.open(args[0], 'w', encoding='utf-8') as outf: - outf.write(issue_template_text) + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) if __name__ == '__main__': main() From 8751da85a7d21702132091986bc6224d3a3af319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:17:57 +0600 Subject: [PATCH 111/128] [Makefile] Fix ISSUE_TEMPLATE.md target --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index bfbe5e6cb..9e3ce78c8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -all: youtube-dl README.md CONTRIBUTING.md issue_template README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md ISSUE_TEMPLATE.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete @@ -59,8 +59,8 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -issue_template: .github/ISSUE_TEMPLATE.md youtube_dl/version.py - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.md +ISSUE_TEMPLATE.md: + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.tmpl .github/ISSUE_TEMPLATE.md supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md From 4a5a67ca2503165fc4b7c2ca9c881b79101245b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:18:52 +0600 Subject: [PATCH 112/128] [devscripts/release.sh] Make ISSUE_TEMPLATE.md and commit it --- devscripts/release.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 61806961c..6718ce39b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -45,9 +45,9 @@ fi /bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py -/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..." -make README.md CONTRIBUTING.md supportedsites -git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py +/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." +make README.md CONTRIBUTING.md ISSUE_TEMPLATE.md supportedsites +git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py git commit -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." From a39c68f7e5a83d6bb3b1c4eb7f856b764a5488bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:19:24 +0600 Subject: [PATCH 113/128] Exclude make_issue_template.py from flake8 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 26857750c..5760112d4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git ignore = E402,E501,E731 From 89924f8230987f9d6405d6487a7914197d42b12e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:34:12 +0600 Subject: [PATCH 114/128] [devscripts/make_issue_template] Fix NameError under python3 --- devscripts/make_issue_template.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index e5564bac1..eed4b3a00 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -16,6 +16,8 @@ def main(): with io.open(infile, encoding='utf-8') as inf: issue_template_tmpl = inf.read() + __version__ = None + # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) From 3c0de33ad758c2f16f0abb1bb594c79f4cb40593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:43:48 +0600 Subject: [PATCH 115/128] Remove ISSUE_TEMPLATE.md --- .github/ISSUE_TEMPLATE.md | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index c34cbe743..000000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,37 +0,0 @@ -Make sure you are using the **latest** version of youtube-dl. Latest youtube-dl version at this moment is **2016.03.06**. Read [our FAQ](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) if you have troubles updating. -- [ ] I've verified that I'm running youtube-dl version **2016.03.06** - -**What is the purpose of this issue? Put an `x` to the relevant box** -- [ ] Site support request (add support for a new website) -- [ ] Feature request (request new functionality) -- [ ] Bug report (encountered problems with youtube-dl) -- [ ] Other, namely ... - -**If the purpose of this issues is a site support request please provide _at least_ one example URL of a video or a playlist you are trying to download.** - -- http://some.example.url/to-video - -*If the purpose of this issue is a bug report or you are unsure about its relevance please include a log as described below.* - -**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it below wrapped in ``` for better formatting. It should look similar to this: -``` -$ youtube-dl -v <your command line> -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2015.12.06 -[debug] Git HEAD: 135392e -[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 -[debug] Proxy map: {} -... -``` - -**Brief description of the problem/request** - -*I am having a problem with ... I have tried to do ... and ... I expected that ... would happen, but instead ... happened. Example: I tried to download a file but the site was not supported. Please add support for site xyz. Another example: I encountered a bug when downloading a video from xyz. I have tried to do a and b.* - -**Suggested solution or other information** - -*In case you have suggestions for a solution or any other relevant information you can write it here* From 034947dd1eed9e7d61671c48844b3f77a4683e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:48:04 +0600 Subject: [PATCH 116/128] Rename ISSUE_TEMPLATE.tmpl in order not to be picked up by github --- .github/{ISSUE_TEMPLATE.tmpl => ISSUE_TEMPLATE_tmpl.md} | 0 Makefile | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/{ISSUE_TEMPLATE.tmpl => ISSUE_TEMPLATE_tmpl.md} (100%) diff --git a/.github/ISSUE_TEMPLATE.tmpl b/.github/ISSUE_TEMPLATE_tmpl.md similarity index 100% rename from .github/ISSUE_TEMPLATE.tmpl rename to .github/ISSUE_TEMPLATE_tmpl.md diff --git a/Makefile b/Makefile index 9e3ce78c8..3a6c37944 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md ISSUE_TEMPLATE.md: - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.tmpl .github/ISSUE_TEMPLATE.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md From 7168a6c874c1c0e4cffb6c1e29eeda322051def7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 03:05:15 +0600 Subject: [PATCH 117/128] [devscripts/make_issue_template] Fix __version__ again --- devscripts/make_issue_template.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index eed4b3a00..b7ad23d83 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -16,13 +16,11 @@ def main(): with io.open(infile, encoding='utf-8') as inf: issue_template_tmpl = inf.read() - __version__ = None - # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) - out = issue_template_tmpl % {'version': __version__} + out = issue_template_tmpl % {'version': locals()['__version__']} with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) From 0b7bfc94221bbdb79fd4602643891c8c9c59292f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 22:02:42 +0600 Subject: [PATCH 118/128] Improve ISSUE_TEMPLATE_tmpl.md --- .github/ISSUE_TEMPLATE_tmpl.md | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 9bca5b531..a5e6a4233 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -1,22 +1,32 @@ -### Please follow the guide below, provide requested information and put an `x` into all the boxes [ ] relevant to your *issue*. +## Please follow the guide below -#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly +- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) +- Use *Preview* tab to see how your issue will actually look like + +--- + +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** -#### Before submitting an *issue* make sure you have: +### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones -#### What is the purpose of your *issue*? +### What is the purpose of your *issue*? - [ ] Bug report (encountered problems with youtube-dl) - [ ] Site support request (request for adding support for a new site) - [ ] Feature request (request for a new functionality) - [ ] Question - [ ] Other -#### The following sections concretize particular purposed issues, you can erase any section not applicable to your *issue*. +--- -#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: +### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue* + +--- + +### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): ``` @@ -33,11 +43,16 @@ $ youtube-dl -v <your command line> <end of log> ``` -#### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): +--- + +### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -#### Description of your *issue*, suggested solution and other information +--- + +### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +If work on your *issue* required an account credentials please provide them or explain how one can obtain them. From 607619bc90e202b09028053d5eab9c03dc7cee4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 22:04:29 +0600 Subject: [PATCH 119/128] Add manually generated ISSUE_TEMPLATE.md In order not to wait for the next release --- .github/ISSUE_TEMPLATE.md | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..5b1f573e7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,58 @@ +## Please follow the guide below + +- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly +- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) +- Use *Preview* tab to see how your issue will actually look like + +--- + +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.03.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.03.27** + +### Before submitting an *issue* make sure you have: +- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones + +### What is the purpose of your *issue*? +- [ ] Bug report (encountered problems with youtube-dl) +- [ ] Site support request (request for adding support for a new site) +- [ ] Feature request (request for a new functionality) +- [ ] Question +- [ ] Other + +--- + +### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue* + +--- + +### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: + +Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version 2016.03.27 +[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +<end of log> +``` + +--- + +### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + +--- + +### Description of your *issue*, suggested solution and other information + +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +If work on your *issue* required an account credentials please provide them or explain how one can obtain them. From 6e359a1534e6a20acb53e1268ec77b6b92765e22 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 30 Mar 2016 12:27:00 +0100 Subject: [PATCH 120/128] [comcarcoff] don not depend on crackle extractor(closes #8995) previously extraction has been delegated to crackle to extract more info and subtitles #6106 but some of the episodes can't be extracted using crackle #8995. --- youtube_dl/extractor/comcarcoff.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index e697d1410..747c245c8 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -41,7 +41,13 @@ class ComCarCoffIE(InfoExtractor): display_id = full_data['activeVideo']['video'] video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] + video_id = compat_str(video_data['mediaId']) + title = video_data['title'] + formats = self._extract_m3u8_formats( + video_data['mediaUrl'], video_id, 'mp4') + self._sort_formats(formats) + thumbnails = [{ 'url': video_data['images']['thumb'], }, { @@ -54,15 +60,14 @@ class ComCarCoffIE(InfoExtractor): video_data.get('duration')) return { - '_type': 'url_transparent', - 'url': 'crackle:%s' % video_id, 'id': video_id, 'display_id': display_id, - 'title': video_data['title'], + 'title': title, 'description': video_data.get('description'), 'timestamp': timestamp, 'duration': duration, 'thumbnails': thumbnails, + 'formats': formats, 'season_number': int_or_none(video_data.get('season')), 'episode_number': int_or_none(video_data.get('episode')), 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), From afca767d196dee68b254720706a2ba191455e99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Mar 2016 22:26:43 +0600 Subject: [PATCH 121/128] [tumblr] Improve _VALID_URL (Closes #9027) --- youtube_dl/extractor/tumblr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index e5bcf7798..4d8b57111 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class TumblrIE(InfoExtractor): - _VALID_URL = r'https?://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' + _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', 'md5': '479bb068e5b16462f5176a6828829767', From 597d52fadbf32af4f2dcc7b9e236c318145f536b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Mar 2016 01:54:08 +0800 Subject: [PATCH 122/128] [kuwo:song] Correct song ID extraction (fixes #9033) Bug introduced in daef04a4e75ccd2ff5e2d2495baa0ac9bcf75724. --- youtube_dl/extractor/kuwo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index a586308b2..84c0f363d 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -55,7 +55,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)' + _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { From 3ae6f8fec1381df41dc05272bfe3ab03654ac4af Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Mar 2016 02:11:21 +0800 Subject: [PATCH 123/128] [kwuo] Remove _sort_formats() from KuwoBaseIE._get_formats() Following the idea proposed in 19dbaeece321c51fa336ef142507adf440e22e22 --- youtube_dl/extractor/kuwo.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 84c0f363d..f09436332 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -44,11 +44,6 @@ class KuwoBaseIE(InfoExtractor): 'abr': file_format.get('abr'), }) - # XXX _sort_formats fails if there are not formats, while it's not the - # desired behavior if 'IPDeny' is ignored - # This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged - if not tolerate_ip_deny: - self._sort_formats(formats) return formats @@ -103,6 +98,7 @@ class KuwoIE(KuwoBaseIE): lrc_content = None formats = self._get_formats(song_id) + self._sort_formats(formats) album_id = self._html_search_regex( r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"', From e621a344e6ec3518420f0b13577726615c2f4485 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Mar 2016 02:27:52 +0800 Subject: [PATCH 124/128] [kwuo] Port to new API and enable --cn-verification-proxy --- youtube_dl/extractor/kuwo.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index f09436332..86c17c931 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -26,10 +26,23 @@ class KuwoBaseIE(InfoExtractor): def _get_formats(self, song_id, tolerate_ip_deny=False): formats = [] for file_format in self._FORMATS: + headers = {} + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + headers['Ytdl-request-proxy'] = cn_verification_proxy + + query = { + 'format': file_format['ext'], + 'br': file_format.get('br', ''), + 'rid': 'MUSIC_%s' % song_id, + 'type': 'convert_url', + 'response': 'url' + } + song_url = self._download_webpage( - 'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' % - (file_format['ext'], file_format.get('br', ''), song_id), + 'http://antiserver.kuwo.cn/anti.s', song_id, note='Download %s url info' % file_format['format'], + query=query, headers=headers, ) if song_url == 'IPDeny' and not tolerate_ip_deny: From 6b820a2376a953657578f9a477ff7768d3633512 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 30 Mar 2016 21:18:07 +0100 Subject: [PATCH 125/128] [myspace] improve extraction --- youtube_dl/extractor/myspace.py | 80 ++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index 83414a232..0d5238d77 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..compat import ( - compat_str, +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, ) -from ..utils import ExtractorError class MySpaceIE(InfoExtractor): @@ -24,6 +24,8 @@ class MySpaceIE(InfoExtractor): 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', 'uploader': 'Five Minutes to the Stage', 'uploader_id': 'fiveminutestothestage', + 'timestamp': 1414108751, + 'upload_date': '20141023', }, 'params': { # rtmp download @@ -64,7 +66,7 @@ class MySpaceIE(InfoExtractor): 'ext': 'mp4', 'title': 'Starset - First Light', 'description': 'md5:2d5db6c9d11d527683bcda818d332414', - 'uploader': 'Jacob Soren', + 'uploader': 'Yumi K', 'uploader_id': 'SorenPromotions', 'upload_date': '20140725', } @@ -78,6 +80,19 @@ class MySpaceIE(InfoExtractor): player_url = self._search_regex( r'playerSwf":"([^"?]*)', webpage, 'player URL') + def rtmp_format_from_stream_url(stream_url, width=None, height=None): + rtmp_url, play_path = stream_url.split(';', 1) + return { + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': play_path, + 'player_url': player_url, + 'protocol': 'rtmp', + 'ext': 'flv', + 'width': width, + 'height': height, + } + if mobj.group('mediatype').startswith('music/song'): # songs don't store any useful info in the 'context' variable song_data = self._search_regex( @@ -93,8 +108,8 @@ class MySpaceIE(InfoExtractor): return self._search_regex( r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, song_data, name, default='', group='data') - streamUrl = search_data('stream-url') - if not streamUrl: + stream_url = search_data('stream-url') + if not stream_url: vevo_id = search_data('vevo-id') youtube_id = search_data('youtube-id') if vevo_id: @@ -106,36 +121,47 @@ class MySpaceIE(InfoExtractor): else: raise ExtractorError( 'Found song but don\'t know how to download it') - info = { + return { 'id': video_id, 'title': self._og_search_title(webpage), 'uploader': search_data('artist-name'), 'uploader_id': search_data('artist-username'), 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': int_or_none(search_data('duration')), + 'formats': [rtmp_format_from_stream_url(stream_url)] } else: - context = json.loads(self._search_regex( - r'context = ({.*?});', webpage, 'context')) - video = context['video'] - streamUrl = video['streamUrl'] - info = { - 'id': compat_str(video['mediaId']), + video = self._parse_json(self._search_regex( + r'context = ({.*?});', webpage, 'context'), + video_id)['video'] + formats = [] + hls_stream_url = video.get('hlsStreamUrl') + if hls_stream_url: + formats.append({ + 'format_id': 'hls', + 'url': hls_stream_url, + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + stream_url = video.get('streamUrl') + if stream_url: + formats.append(rtmp_format_from_stream_url( + stream_url, + int_or_none(video.get('width')), + int_or_none(video.get('height')))) + self._sort_formats(formats) + return { + 'id': video_id, 'title': video['title'], - 'description': video['description'], - 'thumbnail': video['imageUrl'], - 'uploader': video['artistName'], - 'uploader_id': video['artistUsername'], + 'description': video.get('description'), + 'thumbnail': video.get('imageUrl'), + 'uploader': video.get('artistName'), + 'uploader_id': video.get('artistUsername'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('dateAdded')), + 'formats': formats, } - rtmp_url, play_path = streamUrl.split(';', 1) - info.update({ - 'url': rtmp_url, - 'play_path': play_path, - 'player_url': player_url, - 'ext': 'flv', - }) - return info - class MySpaceAlbumIE(InfoExtractor): IE_NAME = 'MySpace:album' From c02ec7d4300d3e2607f48fe73011fd8caa38f90c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 30 Mar 2016 23:18:31 +0100 Subject: [PATCH 126/128] [cnbc] Add new extractor(closes #8012) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cnbc.py | 29 +++++++++++++++++++++++++++++ youtube_dl/extractor/theplatform.py | 8 ++++---- 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 youtube_dl/extractor/cnbc.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1e4b078a4..bd1f7d293 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -127,6 +127,7 @@ from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE +from .cnbc import CNBCIE from .cnet import CNETIE from .cnn import ( CNNIE, diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py new file mode 100644 index 000000000..593e459aa --- /dev/null +++ b/youtube_dl/extractor/cnbc.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CNBCIE(InfoExtractor): + _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://video.cnbc.com/gallery/?video=3000503714', + 'md5': '', + 'info_dict': { + 'id': '3000503714', + 'ext': 'mp4', + 'title': 'Video title goes here', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, + {'force_smil_url': True}), + 'id': video_id, + } diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 863914299..236c99972 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -82,7 +82,7 @@ class ThePlatformBaseIE(OnceIE): class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ - (?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P<id>[^/\?&]+)''' _TESTS = [{ @@ -170,10 +170,10 @@ class ThePlatformIE(ThePlatformBaseIE): if not provider_id: provider_id = 'dJ5BDC' - path = provider_id + path = provider_id + '/' if mobj.group('media'): - path += '/media' - path += '/' + video_id + path += mobj.group('media') + path += video_id qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) if 'guid' in qs_dict: From ce548296fe8bde2756fd9915bd744c904231de8f Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 00:25:11 +0100 Subject: [PATCH 127/128] [cnbc] fix test --- youtube_dl/extractor/cnbc.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 593e459aa..25b308752 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -9,12 +9,16 @@ class CNBCIE(InfoExtractor): _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' _TEST = { 'url': 'http://video.cnbc.com/gallery/?video=3000503714', - 'md5': '', 'info_dict': { 'id': '3000503714', 'ext': 'mp4', - 'title': 'Video title goes here', - } + 'title': 'Fighting zombies is big business', + 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): From 9cf01f7f30b698aee6d22052d8898b14a59d94bb Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 00:26:42 +0100 Subject: [PATCH 128/128] [nbc] add new extractor for csnne.com(#5432) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bd1f7d293..7b0f2b21a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -442,6 +442,7 @@ from .nationalgeographic import NationalGeographicIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( + CSNNEIE, NBCIE, NBCNewsIE, NBCSportsIE, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index a622f2212..43d75d3ca 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -134,6 +134,30 @@ class NBCSportsIE(InfoExtractor): NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') +class CSNNEIE(InfoExtractor): + _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)' + + _TEST = { + 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', + 'info_dict': { + 'id': 'yvBLLUgQ8WU0', + 'ext': 'mp4', + 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.', + 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': self._html_search_meta('twitter:player:stream', webpage), + 'display_id': display_id, + } + + class NBCNewsIE(ThePlatformIE): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P<id>\d+)|