From 6310acf512136a1e37ef5905c4bdce8cf14ba5a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 12 Feb 2017 18:09:53 +0700 Subject: [PATCH 01/80] [youtube] Fix parsing codecs (closes #12091) --- youtube_dl/extractor/youtube.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 76710931a..dec02804b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -34,6 +34,7 @@ from ..utils import ( int_or_none, mimetype2ext, orderedSet, + parse_codecs, parse_duration, remove_quotes, remove_start, @@ -1696,15 +1697,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): codecs = mobj.group('val') break if codecs: - codecs = codecs.split(',') - if len(codecs) == 2: - acodec, vcodec = codecs[1], codecs[0] - else: - acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0]) - dct.update({ - 'acodec': acodec, - 'vcodec': vcodec, - }) + dct.update(parse_codecs(codecs)) formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] From 459818e2808e0dfccc5a3efb7c053afa847cb632 Mon Sep 17 00:00:00 2001 From: Aniruddh-J Date: Sun, 12 Feb 2017 17:48:11 +0530 Subject: [PATCH 02/80] [aenetworks] Add support for lifetimemovieclub.com --- youtube_dl/extractor/aenetworks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index c97317400..dd96a47ce 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -23,7 +23,7 @@ class AENetworksBaseIE(ThePlatformIE): class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' - _VALID_URL = r'https?://(?:www\.)?(?P(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P[^/]+(?:/[^/]+){0,2})|movies/(?P[^/]+)/full-movie)' + _VALID_URL = r'https?://(?:www\.)?(?P(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P[^/]+(?:/[^/]+){0,2})|movies/(?P[^/]+)(?:/full-movie)?)' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', @@ -62,11 +62,15 @@ class AENetworksIE(AENetworksBaseIE): }, { 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', 'only_matching': True + }, { + 'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us', + 'only_matching': True }] _DOMAIN_TO_REQUESTOR_ID = { 'history.com': 'HISTORY', 'aetv.com': 'AETV', 'mylifetime.com': 'LIFETIME', + 'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB', 'fyi.tv': 'FYI', } From f8514630db9ba72a9bddc000c393698f4c116c81 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 12 Feb 2017 20:53:55 +0800 Subject: [PATCH 03/80] [einthusan] Fix extraction (closes #11416) The old test URLs are no longer valid, so I replace them with the one from #11416 --- ChangeLog | 3 + youtube_dl/extractor/einthusan.py | 117 ++++++++++++++++++------------ 2 files changed, 75 insertions(+), 45 deletions(-) diff --git a/ChangeLog b/ChangeLog index cba47a296..089449dfb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) +Extractor +* [einthusan] Fix extraction (#11416) + version 2017.02.11 diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 6ca07a13d..8a2a17b63 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -1,67 +1,94 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 +import json + from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_str, +) from ..utils import ( - remove_start, - sanitized_Request, + extract_attributes, + ExtractorError, + get_elements_by_class, + urlencode_postdata, ) class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.einthusan.com/movies/watch.php?id=2447', - 'md5': 'd71379996ff5b7f217eca034c34e3461', - 'info_dict': { - 'id': '2447', - 'ext': 'mp4', - 'title': 'Ek Villain', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:9d29fc91a7abadd4591fb862fa560d93', - } - }, - { - 'url': 'http://www.einthusan.com/movies/watch.php?id=1671', - 'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213', - 'info_dict': { - 'id': '1671', - 'ext': 'mp4', - 'title': 'Soodhu Kavvuum', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c', - } - }, - ] + _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P[0-9]+)' + _TEST = { + 'url': 'https://einthusan.tv/movie/watch/9097/', + 'md5': 'ff0f7f2065031b8a2cf13a933731c035', + 'info_dict': { + 'id': '9097', + 'ext': 'mp4', + 'title': 'Ae Dil Hai Mushkil', + 'description': 'md5:33ef934c82a671a94652a9b4e54d931b', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js + def _decrypt(self, encrypted_data, video_id): + return self._parse_json(base64.b64decode(( + encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1] + ).encode('ascii')).decode('utf-8'), video_id) def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request(url) - request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0') - webpage = self._download_webpage(request, video_id) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'

]+class=["\']movie-title["\'][^>]*>(.+?)

', - webpage, 'title') + title = self._html_search_regex(r'

([^<]+)

', webpage, 'title') - video_id = self._search_regex( - r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) + player_params = extract_attributes(self._search_regex( + r'(]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters')) - m3u8_url = self._download_webpage( - 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' - % video_id, video_id, headers={'Referer': url}) - formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native') + page_id = self._html_search_regex( + ']+data-pageid="([^"]+)"', webpage, 'page ID') + video_data = self._download_json( + 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id, + data=urlencode_postdata({ + 'xEvent': 'UIVideoPlayer.PingOutcome', + 'xJson': json.dumps({ + 'EJOutcomes': player_params['data-ejpingables'], + 'NativeHLS': False + }), + 'arcVersion': 3, + 'appVersion': 59, + 'gorilla.csrf.Token': page_id, + }))['Data'] - description = self._html_search_meta('description', webpage) + if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'): + raise ExtractorError( + 'Download rate reached. Please try again later.', expected=True) + + ej_links = self._decrypt(video_data['EJLinks'], video_id) + + formats = [] + + m3u8_url = ej_links.get('HLSLink') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + + mp4_url = ej_links.get('MP4Link') + if mp4_url: + formats.append({ + 'url': mp4_url, + }) + + self._sort_formats(formats) + + description = get_elements_by_class('synopsis', webpage)[0] thumbnail = self._html_search_regex( - r'''''', - webpage, "thumbnail url", fatal=False) + r''']+src=(["'])(?P(?!\1).+?/moviecovers/(?!\1).+?)\1''', + webpage, 'thumbnail url', fatal=False, group='url') if thumbnail is not None: - thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..')) + thumbnail = compat_urlparse.urljoin(url, thumbnail) return { 'id': video_id, From 0dac7cbb092c804f1548c4a60f15ac29a7db06b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 12 Feb 2017 17:24:45 +0100 Subject: [PATCH 04/80] [hotstar] improve extraction(closes #12096) - extract all qualities - detect drm protected videos - extract more metadata --- youtube_dl/extractor/hotstar.py | 46 +++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index f05d765d6..3a7a66a34 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor): 'only_matching': True, }] - _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' - _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' - - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): - json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None): + json_data = super(HotStarIE, self)._download_json( + url_or_request, video_id, note, fatal=fatal, query=query) if json_data['resultCode'] != 'OK': if fatal: raise ExtractorError(json_data['errorDescription']) @@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - self._GET_CONTENT_TEMPLATE % video_id, - video_id)['contentInfo'][0] + 'http://account.hotstar.com/AVS/besc', video_id, query={ + 'action': 'GetAggregatedContentDetails', + 'channel': 'PCTV', + 'contentId': video_id, + })['contentInfo'][0] + title = video_data['episodeTitle'] + + if video_data.get('encrypted') == 'Y': + raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - # PCTV for extracting f4m manifest - for f in ('TABLET',): + for f in ('JIO',): format_data = self._download_json( - self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'), - video_id, 'Downloading %s JSON metadata' % f, fatal=False) + 'http://getcdn.hotstar.com/AVS/besc', + video_id, 'Downloading %s JSON metadata' % f, + fatal=False, query={ + 'action': 'GetCDN', + 'asJson': 'Y', + 'channel': f, + 'id': video_id, + 'type': 'VOD', + }) if format_data: - format_url = format_data['src'] + format_url = format_data.get('src') + if not format_url: + continue ext = determine_ext(format_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + m3u8_id='hls', fatal=False)) elif ext == 'f4m': # produce broken files continue @@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor): return { 'id': video_id, - 'title': video_data['episodeTitle'], + 'title': title, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate')), 'formats': formats, + 'episode': title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('contentTitle'), } From 1e2c3f61fc952620a52a8a3a79bcd1a6f7d8ecae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Feb 2017 03:33:23 +0700 Subject: [PATCH 05/80] [travis] Separate builds for core and download --- .travis.yml | 7 ++++++- devscripts/run_tests.sh | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 4833c76e9..8ba93ec02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: - "3.5" - "3.6" sudo: false -script: nosetests test --verbose +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +before_script: + - chmod +x ./devscripts/run_tests.sh +script: ./devscripts/run_tests.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100644 index 000000000..7f4c1e083 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" + +test_set="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set From 9dad94185367cdfde0de21cd8e595094cbe31acc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 11:43:20 +0100 Subject: [PATCH 06/80] [disney] improve extraction - add support for more urls - detect expired videos - skip Adobe Flash Access protected videos closes #4975 closes #11000 closes #11882 closes #11936 --- youtube_dl/extractor/disney.py | 60 ++++++++++++++++++++++++++++----- youtube_dl/extractor/generic.py | 13 ------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py index 396873c6d..939d1338c 100644 --- a/youtube_dl/extractor/disney.py +++ b/youtube_dl/extractor/disney.py @@ -9,13 +9,15 @@ from ..utils import ( unified_strdate, compat_str, determine_ext, + ExtractorError, ) class DisneyIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})''' + https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})|(?:[^/]+/)?(?P[^/?#]+))''' _TESTS = [{ + # Disney.EmbedVideo 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', 'info_dict': { 'id': '545ed1857afee5a0ec239977', @@ -28,6 +30,20 @@ class DisneyIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + # Grill.burger + 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', + 'info_dict': { + 'id': '5454e9f4e9804a552e3524c8', + 'ext': 'mp4', + 'title': '"Intro" Featurette: Rogue One: A Star Wars Story', + 'upload_date': '20170104', + 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', 'only_matching': True, @@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor): }, { 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', 'only_matching': True, + }, { + 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677', + 'only_matching': True, + }, { + 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', + 'only_matching': True, }] def _real_extract(self, url): - domain, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'http://%s/embed/%s' % (domain, video_id), video_id) - video_data = self._parse_json(self._search_regex( - r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] + domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( + r'Grill\.burger\s*=\s*({.+})\s*:', + webpage, 'grill data')) + page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video') + video_data = page_data['data'][0] + else: + webpage = self._download_webpage( + 'http://%s/embed/%s' % (domain, video_id), video_id) + page_data = self._parse_json(self._search_regex( + r'Disney\.EmbedVideo\s*=\s*({.+});', + webpage, 'embed data'), video_id) + video_data = page_data['video'] for external in video_data.get('externals', []): if external.get('source') == 'vevo': return self.url_result('vevo:' + external['data_id'], 'Vevo') + video_id = video_data['id'] title = video_data['title'] formats = [] for flavor in video_data.get('flavors', []): flavor_format = flavor.get('format') flavor_url = flavor.get('url') - if not flavor_url or not re.match(r'https?://', flavor_url): + if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access': continue tbr = int_or_none(flavor.get('bitrate')) if tbr == 99999: formats.extend(self._extract_m3u8_formats( - flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) + flavor_url, video_id, 'mp4', + m3u8_id=flavor_format, fatal=False)) continue format_id = [] if flavor_format: @@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor): 'ext': ext, 'vcodec': 'none' if (width == 0 and height == 0) else None, }) + if not formats and video_data.get('expired'): + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), + expected=True) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c233f038..494cc3c84 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -991,19 +991,6 @@ class GenericIE(InfoExtractor): 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', }, }, - # Kaltura embed protected with referrer - { - 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', - 'info_dict': { - 'id': '1_g4fbemnq', - 'ext': 'mp4', - 'title': 'Violetta - Achter De Schermen - Ruggero', - 'description': 'Achter de schermen met Ruggero', - 'timestamp': 1435133761, - 'upload_date': '20150624', - 'uploader_id': 'echojecka', - }, - }, # Kaltura embed with single quotes { 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', From 1de9f78e71214e130b5882662cdcd716b737e6ca Mon Sep 17 00:00:00 2001 From: Sergey M Date: Mon, 13 Feb 2017 18:56:05 +0800 Subject: [PATCH 07/80] [travis] Separate builds for core and download --- .travis.yml | 7 ++++++- devscripts/run_tests.sh | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 4833c76e9..8ba93ec02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: - "3.5" - "3.6" sudo: false -script: nosetests test --verbose +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +before_script: + - chmod +x ./devscripts/run_tests.sh +script: ./devscripts/run_tests.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100644 index 000000000..7f4c1e083 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" + +test_set="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set From 454e5cdb17dd4e77f3d387045b083f3d3ed61ae0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 14:28:30 +0100 Subject: [PATCH 08/80] [limelight] add support referer protected videos --- youtube_dl/extractor/generic.py | 9 ++++++--- youtube_dl/extractor/limelight.py | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 494cc3c84..a2b0298ec 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2337,8 +2337,9 @@ class GenericIE(InfoExtractor): 'Channel': 'channel', 'ChannelList': 'channel_list', } - return self.url_result('limelight:%s:%s' % ( - lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + return self.url_result(smuggle_url('limelight:%s:%s' % ( + lm[mobj.group(1)], mobj.group(2)), {'source_url': url}), + 'Limelight%s' % mobj.group(1), mobj.group(2)) mobj = re.search( r'''(?sx) @@ -2348,7 +2349,9 @@ class GenericIE(InfoExtractor): value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) ''', webpage) if mobj: - return self.url_result('limelight:media:%s' % mobj.group('id')) + return self.url_result(smuggle_url( + 'limelight:media:%s' % mobj.group('id'), + {'source_url': url}), 'LimelightMedia', mobj.group('id')) # Look for AdobeTVVideo embeds mobj = re.search( diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index e635f3c4d..a3712665b 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + unsmuggle_url, ) @@ -15,20 +16,23 @@ class LimelightBaseIE(InfoExtractor): _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' - def _call_playlist_service(self, item_id, method, fatal=True): + def _call_playlist_service(self, item_id, method, fatal=True, referer=None): + headers = {} + if referer: + headers['Referer'] = referer return self._download_json( self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal) + item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) def _call_api(self, organization_id, item_id, method): return self._download_json( self._API_URL % (organization_id, self._API_PATH, item_id, method), item_id, 'Downloading API %s JSON' % method) - def _extract(self, item_id, pc_method, mobile_method, meta_method): - pc = self._call_playlist_service(item_id, pc_method) + def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None): + pc = self._call_playlist_service(item_id, pc_method, referer=referer) metadata = self._call_api(pc['orgId'], item_id, meta_method) - mobile = self._call_playlist_service(item_id, mobile_method, fatal=False) + mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer) return pc, mobile, metadata def _extract_info(self, streams, mobile_urls, properties): @@ -207,10 +211,13 @@ class LimelightMediaIE(LimelightBaseIE): _API_PATH = 'media' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) pc, mobile, metadata = self._extract( - video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties') + video_id, 'getPlaylistByMediaId', + 'getMobilePlaylistByMediaId', 'properties', + smuggled_data.get('source_url')) return self._extract_info( pc['playlistItems'][0].get('streams', []), @@ -247,11 +254,13 @@ class LimelightChannelIE(LimelightBaseIE): _API_PATH = 'channels' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) channel_id = self._match_id(url) pc, mobile, medias = self._extract( channel_id, 'getPlaylistByChannelId', - 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media') + 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', + 'media', smuggled_data.get('source_url')) entries = [ self._extract_info( From 89c6691f9d130ec63552a6ece4743caa572fc962 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 15:08:48 +0100 Subject: [PATCH 09/80] [bellmedia] accept longer video id(closes #12114) --- youtube_dl/extractor/bellmedia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index 32326ed9e..1f5b6ed92 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor): space )\.ca| much\.com - )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6})''' + )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor): }, { 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', + 'only_matching': True, }] _DOMAINS = { 'thecomedynetwork': 'comedy', From 6e5956e6ba32c5e4d186e79fbaff0842818ae56b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Feb 2017 23:17:48 +0700 Subject: [PATCH 10/80] [lemonde] Fallback delegate extraction to generic extractor (closes #12115, closes #12116) --- youtube_dl/extractor/lemonde.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py index 42568f315..3306892e8 100644 --- a/youtube_dl/extractor/lemonde.py +++ b/youtube_dl/extractor/lemonde.py @@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P[^/]+)\.html' _TESTS = [{ 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html', - 'md5': '01fb3c92de4c12c573343d63e163d302', + 'md5': 'da120c8722d8632eec6ced937536cc98', 'info_dict': { 'id': 'lqm3kl', 'ext': 'mp4', 'title': "Comprendre l'affaire Bygmalion en 5 minutes", 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 320, + 'duration': 309, 'upload_date': '20160119', 'timestamp': 1453194778, 'uploader_id': '3pmkp', }, + }, { + # standard iframe embed + 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html', + 'info_dict': { + 'id': 'uzsxms', + 'ext': 'mp4', + 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?", + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 325, + 'upload_date': '20161021', + 'timestamp': 1477044540, + 'uploader_id': '3pmkp', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html', 'only_matching': True, + }, { + # YouTube embeds + 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor): digiteka_url = self._proto_relative_url(self._search_regex( r'url\s*:\s*(["\'])(?P(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1', - webpage, 'digiteka url', group='url')) - return self.url_result(digiteka_url, 'Digiteka') + webpage, 'digiteka url', group='url', default=None)) + + if digiteka_url: + return self.url_result(digiteka_url, 'Digiteka') + + return self.url_result(url, 'Generic') From f6d6ca1db3020e7c7771880d0c4b58fdf732a8d5 Mon Sep 17 00:00:00 2001 From: Vobe Date: Sat, 11 Feb 2017 21:11:55 +0100 Subject: [PATCH 11/80] [xtube] Improve title extraction --- youtube_dl/extractor/xtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 11717fe98..ed3a37649 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -53,7 +53,7 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/video-watch/-%s' % video_id + url = 'http://www.xtube.com/watch.php?v=%s' % video_id req = sanitized_Request(url) req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') @@ -73,7 +73,7 @@ class XTubeIE(InfoExtractor): self._sort_formats(formats) title = self._search_regex( - (r'

(?P[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), + (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), webpage, 'title', group='title') description = self._search_regex( r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) From 085f169ffebc17ec8b2bfc63aec8f5df57c7bdcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Feb 2017 23:44:43 +0700 Subject: [PATCH 12/80] [xtube] Fix extraction for both kinds of video id (closes #12088) --- youtube_dl/extractor/xtube.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index ed3a37649..5584674a0 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -44,6 +44,9 @@ class XTubeIE(InfoExtractor): }, { 'url': 'xtube:625837', 'only_matching': True, + }, { + 'url': 'xtube:kVTUy_G222_', + 'only_matching': True, }] def _real_extract(self, url): @@ -53,11 +56,16 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/watch.php?v=%s' % video_id - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') - webpage = self._download_webpage(req, display_id) + if video_id.isdigit() and len(video_id) < 11: + url_pattern = 'http://www.xtube.com/video-watch/-%s' + else: + url_pattern = 'http://www.xtube.com/watch.php?v=%s' + + webpage = self._download_webpage( + url_pattern % video_id, display_id, headers={ + 'Cookie': 'age_verified=1; cookiesAccepted=1', + }) sources = self._parse_json(self._search_regex( r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),', From 50de3dbad39d0b8cc1529113894f146f6f3f24b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:00:06 +0700 Subject: [PATCH 13/80] [zdf] Fix extraction (closes #12117) --- youtube_dl/extractor/zdf.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index a365923fb..523bb5c95 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -20,9 +20,9 @@ from ..utils import ( class ZDFBaseIE(InfoExtractor): - def _call_api(self, url, player, referrer, video_id): + def _call_api(self, url, player, referrer, video_id, item): return self._download_json( - url, video_id, 'Downloading JSON content', + url, video_id, 'Downloading JSON %s' % item, headers={ 'Referer': referrer, 'Api-Auth': 'Bearer %s' % player['apiToken'], @@ -104,7 +104,7 @@ class ZDFIE(ZDFBaseIE): }) formats.append(f) - def _extract_entry(self, url, content, video_id): + def _extract_entry(self, url, player, content, video_id): title = content.get('title') or content['teaserHeadline'] t = content['mainVideoContent']['http://zdf.de/rels/target'] @@ -116,7 +116,8 @@ class ZDFIE(ZDFBaseIE): 'http://zdf.de/rels/streams/ptmd-template'].replace( '{playerId}', 'portal') - ptmd = self._download_json(urljoin(url, ptmd_path), video_id) + ptmd = self._call_api( + urljoin(url, ptmd_path), player, url, video_id, 'metadata') formats = [] track_uris = set() @@ -174,8 +175,9 @@ class ZDFIE(ZDFBaseIE): } def _extract_regular(self, url, player, video_id): - content = self._call_api(player['content'], player, url, video_id) - return self._extract_entry(player['content'], content, video_id) + content = self._call_api( + player['content'], player, url, video_id, 'content') + return self._extract_entry(player['content'], player, content, video_id) def _extract_mobile(self, video_id): document = self._download_json( From cedf08ff54d192a0e32ecb3b943f50299cda7ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:07:35 +0700 Subject: [PATCH 14/80] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 089449dfb..d651f8880 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,17 @@ Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) Extractor +* [zdf] Fix extraction (#12117) +* [xtube] Fix extraction for both kinds of video id (#12088) +* [xtube] Improve title extraction (#12088) ++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116) +* [bellmedia] Allow video id longer than 6 characters (#12114) ++ [limelight] Add support for referer protected videos +* [disney] Improve extraction (#4975, #11000, #11882, #11936) +* [hotstar] Improve extraction (#12096) * [einthusan] Fix extraction (#11416) ++ [aenetworks] Add support for lifetimemovieclub.com (#12097) +* [youtube] Fix parsing codecs (#12091) version 2017.02.11 From 58a65ba852443075fe38a3ef74798de05dd57bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:09:18 +0700 Subject: [PATCH 15/80] release 2017.02.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7bd301cc8..32aa55d83 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.11 +[debug] youtube-dl version 2017.02.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d651f8880..9242b3eee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.14 Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1f84acfea..3e7e7c0bf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.11' +__version__ = '2017.02.14' From fcca0d53a8fa47614a39a433a3da7d1ab1d88ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Caletka?= <ondrej@caletka.cz> Date: Tue, 14 Feb 2017 15:57:17 +0100 Subject: [PATCH 16/80] [ceskatelevize] Quick fix to revert to using old HLS-based playlist This fixes recent changes in iVysilani. Proper patch should migrate to MPEG-DASH version, which is now the default. --- youtube_dl/extractor/ceskatelevize.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 4f88c31ad..0f1453b99 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -21,10 +21,10 @@ class CeskaTelevizeIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { - 'id': '61924494876951776', + 'id': '61924494877246241', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace', - 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', + 'title': 'Hyde Park Civilizace: Život v Grónsku', + 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3350, }, @@ -121,6 +121,7 @@ class CeskaTelevizeIE(InfoExtractor): req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') req.add_header('X-Requested-With', 'XMLHttpRequest') + req.add_header('User-agent', 'Mozilla/5.0') req.add_header('Referer', url) playlistpage = self._download_json(req, playlist_id) From 5cb2d36c82abf3b753910afe3013b274e31a247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 22:56:39 +0700 Subject: [PATCH 17/80] [ceskatelevize] Extract DASH formats (closes #12119, closes #12133) --- youtube_dl/extractor/ceskatelevize.py | 142 +++++++++++++++----------- 1 file changed, 83 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 0f1453b99..e08bf264c 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -13,6 +13,7 @@ from ..utils import ( float_or_none, sanitized_Request, urlencode_postdata, + USER_AGENTS, ) @@ -114,71 +115,94 @@ class CeskaTelevizeIE(InfoExtractor): 'requestSource': 'iVysilani', } - req = sanitized_Request( - 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=urlencode_postdata(data)) - - req.add_header('Content-type', 'application/x-www-form-urlencoded') - req.add_header('x-addr', '127.0.0.1') - req.add_header('X-Requested-With', 'XMLHttpRequest') - req.add_header('User-agent', 'Mozilla/5.0') - req.add_header('Referer', url) - - playlistpage = self._download_json(req, playlist_id) - - playlist_url = playlistpage['url'] - if playlist_url == 'error_region': - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - - req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) - req.add_header('Referer', url) - - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - - playlist = self._download_json(req, playlist_id)['playlist'] - playlist_len = len(playlist) - entries = [] - for item in playlist: - is_live = item.get('type') == 'LIVE' - formats = [] - for format_id, stream_url in item['streamUrls'].items(): - formats.extend(self._extract_m3u8_formats( - stream_url, playlist_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', - fatal=False)) - self._sort_formats(formats) - item_id = item.get('id') or item['assetId'] - title = item['title'] + for user_agent in (None, USER_AGENTS['Safari']): + req = sanitized_Request( + 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + data=urlencode_postdata(data)) - duration = float_or_none(item.get('duration')) - thumbnail = item.get('previewImageUrl') + req.add_header('Content-type', 'application/x-www-form-urlencoded') + req.add_header('x-addr', '127.0.0.1') + req.add_header('X-Requested-With', 'XMLHttpRequest') + if user_agent: + req.add_header('User-Agent', user_agent) + req.add_header('Referer', url) - subtitles = {} - if item.get('type') == 'VOD': - subs = item.get('subtitles') - if subs: - subtitles = self.extract_subtitles(episode_id, subs) + playlistpage = self._download_json(req, playlist_id, fatal=False) - if playlist_len == 1: - final_title = playlist_title or title - if is_live: - final_title = self._live_title(final_title) - else: - final_title = '%s (%s)' % (playlist_title, title) + if not playlistpage: + continue - entries.append({ - 'id': item_id, - 'title': final_title, - 'description': playlist_description if playlist_len == 1 else None, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - }) + playlist_url = playlistpage['url'] + if playlist_url == 'error_region': + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + + req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) + req.add_header('Referer', url) + + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) + + playlist = self._download_json(req, playlist_id, fatal=False) + if not playlist: + continue + + playlist = playlist.get('playlist') + if not isinstance(playlist, list): + continue + + playlist_len = len(playlist) + + for num, item in enumerate(playlist): + is_live = item.get('type') == 'LIVE' + formats = [] + for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'playerType=flash' in stream_url: + formats.extend(self._extract_m3u8_formats( + stream_url, playlist_id, 'mp4', + entry_protocol='m3u8' if is_live else 'm3u8_native', + fatal=False)) + else: + formats.extend(self._extract_mpd_formats( + stream_url, playlist_id, fatal=False)) + + if user_agent and len(entries) == playlist_len: + entries[num]['formats'].extend(formats) + continue + + item_id = item.get('id') or item['assetId'] + title = item['title'] + + duration = float_or_none(item.get('duration')) + thumbnail = item.get('previewImageUrl') + + subtitles = {} + if item.get('type') == 'VOD': + subs = item.get('subtitles') + if subs: + subtitles = self.extract_subtitles(episode_id, subs) + + if playlist_len == 1: + final_title = playlist_title or title + if is_live: + final_title = self._live_title(final_title) + else: + final_title = '%s (%s)' % (playlist_title, title) + + entries.append({ + 'id': item_id, + 'title': final_title, + 'description': playlist_description if playlist_len == 1 else None, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + }) + + for e in entries: + self._sort_formats(e['formats']) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From 9a372f14b422de15acf91e25a90375688b2ba3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 23:52:41 +0700 Subject: [PATCH 18/80] [pornhub] Extract video URL from tv platform site (#12007, #12129) --- youtube_dl/extractor/pornhub.py | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 818d99c1f..7a2737032 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -2,27 +2,27 @@ from __future__ import unicode_literals import itertools -import os +# import os import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, - compat_urllib_parse_urlparse, + # compat_urllib_parse_unquote, + # compat_urllib_parse_unquote_plus, + # compat_urllib_parse_urlparse, ) from ..utils import ( ExtractorError, int_or_none, js_to_json, orderedSet, - sanitized_Request, + # sanitized_Request, str_to_int, ) -from ..aes import ( - aes_decrypt_text -) +# from ..aes import ( +# aes_decrypt_text +# ) class PornHubIE(InfoExtractor): @@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - req = sanitized_Request( - 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + def dl_webpage(platform): + return self._download_webpage( + 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id, + video_id, headers={ + 'Cookie': 'age_verified=1; platform=%s' % platform, + }) + + webpage = dl_webpage('pc') error_msg = self._html_search_regex( r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>', @@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + tv_webpage = dl_webpage('tv') + + video_url = self._search_regex( + r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, + 'video url', group='url') + + title = self._search_regex( + r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) + # video_title from flashvars contains whitespace instead of non-ASCII (see # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. - title = self._html_search_meta( + title = title or self._html_search_meta( 'twitter:title', webpage, default=None) or self._search_regex( (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', @@ -156,6 +169,7 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') + """ video_variables = {} for video_variablename, quote, video_variable in re.findall( r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): @@ -197,6 +211,7 @@ class PornHubIE(InfoExtractor): 'height': height, }) self._sort_formats(formats) + """ page_params = self._parse_json(self._search_regex( r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', @@ -209,6 +224,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, + 'url': video_url, 'uploader': video_uploader, 'title': title, 'thumbnail': thumbnail, @@ -217,7 +233,7 @@ class PornHubIE(InfoExtractor): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - 'formats': formats, + # 'formats': formats, 'age_limit': 18, 'tags': tags, 'categories': categories, From 22ce9ad2bdad2bf79b22f82cfff7f58156c9d349 Mon Sep 17 00:00:00 2001 From: Marek Rusinowski <marekrusinowski@gmail.com> Date: Mon, 13 Feb 2017 21:42:26 +0100 Subject: [PATCH 19/80] [vod.pl] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vodpl.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/vodpl.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 76ad7c40b..657e45e6f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1147,6 +1147,7 @@ from .vlive import ( VLiveChannelIE ) from .vodlocker import VodlockerIE +from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py new file mode 100644 index 000000000..f612347ce --- /dev/null +++ b/youtube_dl/extractor/vodpl.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .onet import OnetBaseIE +from ..utils import clean_html + + +class VODPlIE(OnetBaseIE): + _VALID_URL = r'https?://vod\.pl/(?:.*/)?(?P<id>[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', + 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', + 'info_dict': { + 'id': '3ep3jns', + 'ext': 'mp4', + 'title': 'Chłopaki nie płaczą', + 'description': 'Kuba Brenner aby pomóc swojemu nieśmiałemu przyjacielowi Oskarowi wynajmuje w agencji towarzyskiej dwie panie. Po upojnej nocy okazuje się, że chłopcy nie byli przygotowani finansowo. "Opiekun artystyczny" dziewczyn zabiera w ramach rekompensaty drogocenną rzeźbę należącą do wujka Oskara. Kłopoty chłopców zaczynają się, gdy Kuba udaje się do agencji aby wykupić figurkę i trafia w sam środek mafijnej transakcji... Idiotyczny przypadek sprawia, że w klubie dochodzi do strzelaniny podczas której Grucha i Bolec zostają ranni, ginie również walizka z pieniędzmi... Podejrzenie pada na Kubę.', + 'timestamp': 1463415154, + 'duration': 5765, + 'upload_date': '20160516', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mvp_id = self._search_mvp_id(webpage) + + info_dict = self._extract_from_id(mvp_id, webpage) + info_dict.update({ + 'id': video_id, + 'description': clean_html(info_dict['description']).strip().replace('\r', '\n') + }) + + return info_dict From 6092ccd05844976ea946ba5277f2b00ccb5c7920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 00:52:31 +0700 Subject: [PATCH 20/80] [vodpl] Make more robust and add another test (closes #12122) --- youtube_dl/extractor/vodpl.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py index f612347ce..9e919708e 100644 --- a/youtube_dl/extractor/vodpl.py +++ b/youtube_dl/extractor/vodpl.py @@ -2,35 +2,31 @@ from __future__ import unicode_literals from .onet import OnetBaseIE -from ..utils import clean_html class VODPlIE(OnetBaseIE): - _VALID_URL = r'https?://vod\.pl/(?:.*/)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', 'info_dict': { 'id': '3ep3jns', 'ext': 'mp4', 'title': 'Chłopaki nie płaczą', - 'description': 'Kuba Brenner aby pomóc swojemu nieśmiałemu przyjacielowi Oskarowi wynajmuje w agencji towarzyskiej dwie panie. Po upojnej nocy okazuje się, że chłopcy nie byli przygotowani finansowo. "Opiekun artystyczny" dziewczyn zabiera w ramach rekompensaty drogocenną rzeźbę należącą do wujka Oskara. Kłopoty chłopców zaczynają się, gdy Kuba udaje się do agencji aby wykupić figurkę i trafia w sam środek mafijnej transakcji... Idiotyczny przypadek sprawia, że w klubie dochodzi do strzelaniny podczas której Grucha i Bolec zostają ranni, ginie również walizka z pieniędzmi... Podejrzenie pada na Kubę.', + 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224', 'timestamp': 1463415154, 'duration': 5765, 'upload_date': '20160516', }, - } + }, { + 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_mvp_id(webpage) - - info_dict = self._extract_from_id(mvp_id, webpage) - info_dict.update({ - 'id': video_id, - 'description': clean_html(info_dict['description']).strip().replace('\r', '\n') - }) - + info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage) + info_dict['id'] = video_id return info_dict From d31aa74fdb3f69071ba869feba03525f67e974f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 00:58:18 +0700 Subject: [PATCH 21/80] [onetmvp] Add shortcut extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/onet.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 657e45e6f..b2ee0c1b0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -694,6 +694,7 @@ from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, OnetChannelIE, + OnetMVPIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 0a501b3e5..46bad492a 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor): return self._search_regex( r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') - def _extract_from_id(self, video_id, webpage): + def _extract_from_id(self, video_id, webpage=None): response = self._download_json( 'http://qi.ckm.onetapi.pl/', video_id, query={ @@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor): meta = video.get('meta', {}) - title = self._og_search_title(webpage, default=None) or meta['title'] - description = self._og_search_description(webpage, default=None) or meta.get('description') + title = (self._og_search_title( + webpage, default=None) if webpage else None) or meta['title'] + description = (self._og_search_description( + webpage, default=None) if webpage else None) or meta.get('description') duration = meta.get('length') or meta.get('lenght') timestamp = parse_iso8601(meta.get('addDate'), ' ') @@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor): } +class OnetMVPIE(OnetBaseIE): + _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' + + _TEST = { + 'url': 'onetmvp:381027.1509591944', + 'only_matching': True, + } + + def _real_extract(self, url): + return self._extract_from_id(self._match_id(url)) + + class OnetIE(OnetBaseIE): _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' IE_NAME = 'onet.tv' From 43a3d9edfcdad8eb33758c4a7f4f912322001b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 01:14:06 +0700 Subject: [PATCH 22/80] [onetpl] Add support for onet.pl (closes #10507) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/onet.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b2ee0c1b0..be3688d5a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -695,6 +695,7 @@ from .onet import ( OnetIE, OnetChannelIE, OnetMVPIE, + OnetPlIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 46bad492a..801aadbff 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -181,3 +181,35 @@ class OnetChannelIE(OnetBaseIE): channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) return self.playlist_result(entries, channel_id, channel_title, channel_description) + + +class OnetPlIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' + IE_NAME = 'onet.pl' + + _TESTS = [{ + 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', + 'md5': 'b94021eb56214c3969380388b6e73cb0', + 'info_dict': { + 'id': '1561707.1685479', + 'ext': 'mp4', + 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', + 'description': 'md5:61fb0740084d2d702ea96512a03585b4', + 'upload_date': '20170214', + 'timestamp': 1487078046, + }, + }, { + 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mvp_id = self._search_regex( + r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + + return self.url_result( + 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) From 04a741232f8e03cc91a3539066c66aed802076b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 01:23:55 +0700 Subject: [PATCH 23/80] [onetpl] Add support for businessinsider.com.pl and plejada.pl --- youtube_dl/extractor/onet.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 801aadbff..94f57990b 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -184,7 +184,7 @@ class OnetChannelIE(OnetBaseIE): class OnetPlIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' IE_NAME = 'onet.pl' _TESTS = [{ @@ -201,6 +201,15 @@ class OnetPlIE(InfoExtractor): }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, + }, { + 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', + 'only_matching': True, + }, { + 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', + 'only_matching': True, + }, { + 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', + 'only_matching': True, }] def _real_extract(self, url): From 3021cf83b7cd45283fd1a72859e46f44e67ce7bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 02:08:32 +0700 Subject: [PATCH 24/80] [pinkbike] Fix uploader extraction (closes #12054) --- youtube_dl/extractor/pinkbike.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py index 6a4580d54..9f3501f77 100644 --- a/youtube_dl/extractor/pinkbike.py +++ b/youtube_dl/extractor/pinkbike.py @@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor): 'video:duration', webpage, 'duration')) uploader = self._search_regex( - r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False) + r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, + 'uploader', fatal=False) upload_date = unified_strdate(self._search_regex( r'class="fullTime"[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)) From 1bd05345ea4b91598ec04b8e0d33fd14f9e2eddc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 15 Feb 2017 14:18:50 +0100 Subject: [PATCH 25/80] [amcnetworks] fix extraction(closes #12127) --- youtube_dl/extractor/amcnetworks.py | 30 ++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 87c803e94..b71d1a093 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE): 'mbr': 'true', 'manifest': 'm3u', } - media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url') + media_url = self._search_regex( + r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', + webpage, 'media url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id) + r'link\.theplatform\.com/s/([^?]+)', + media_url, 'theplatform_path'), display_id) info = self._parse_theplatform_metadata(theplatform_metadata) video_id = theplatform_metadata['pid'] title = theplatform_metadata['title'] rating = theplatform_metadata['ratings'][0]['rating'] - auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') + auth_required = self._search_regex( + r'window\.authRequired\s*=\s*(true|false);', + webpage, 'auth required') if auth_required == 'true': - requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id') - resource = self._get_mvpd_resource(requestor_id, title, video_id, rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource) + requestor_id = self._search_regex( + r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', + webpage, 'requestor id') + resource = self._get_mvpd_resource( + requestor_id, title, video_id, rating) + query['auth'] = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) media_url = update_url_query(media_url, query) - formats, subtitles = self._extract_theplatform_smil(media_url, video_id) + formats, subtitles = self._extract_theplatform_smil( + media_url, video_id) self._sort_formats(formats) info.update({ 'id': video_id, @@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE): if ns_keys: ns = list(ns_keys)[0] series = theplatform_metadata.get(ns + '$show') - season_number = int_or_none(theplatform_metadata.get(ns + '$season')) + season_number = int_or_none( + theplatform_metadata.get(ns + '$season')) episode = theplatform_metadata.get(ns + '$episodeTitle') - episode_number = int_or_none(theplatform_metadata.get(ns + '$episode')) + episode_number = int_or_none( + theplatform_metadata.get(ns + '$episode')) if season_number: title = 'Season %d - %s' % (season_number, title) if series: From db13c16ef8968613680e2bbc85f373c3e74faf98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:12:10 +0700 Subject: [PATCH 26/80] [utils] Add support for quoted string literals in --match-filter (closes #8050, closes #12142, closes #12144) --- test/test_YoutubeDL.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 9 +++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8bf00bea9..d07c35be8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals @@ -606,6 +607,8 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', + 'uploader': "變態妍字幕版 太妍 тест", + 'creator': "тест ' 123 ' тест--", } second = { 'id': '2', @@ -616,6 +619,7 @@ class TestYoutubeDL(unittest.TestCase): 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', + 'uploader': "тест 123", } videos = [first, second] @@ -656,6 +660,26 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos(f) self.assertEqual(res, ['1']) + f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('creator = "тест \' 123 \' тест--"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") + res = get_videos(f) + self.assertEqual(res, []) + def test_playlist_items_selection(self): entries = [{ 'id': compat_str(i), diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1279a9042..07c07be6f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2383,6 +2383,7 @@ def _match_one(filter_part, dct): \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* (?: (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| + (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| (?P<strval>(?![0-9.])[a-z0-9A-Z]*) ) \s*$ @@ -2391,7 +2392,8 @@ def _match_one(filter_part, dct): if m: op = COMPARISON_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) - if (m.group('strval') is not None or + if (m.group('quotedstrval') is not None or + m.group('strval') is not None or # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see @@ -2401,7 +2403,10 @@ def _match_one(filter_part, dct): if m.group('op') not in ('=', '!='): raise ValueError( 'Operator %s does not support string values!' % m.group('op')) - comparison_value = m.group('strval') or m.group('intval') + comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') + quote = m.group('quote') + if quote is not None: + comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: try: comparison_value = int(m.group('intval')) From 398dea321001b99ac4ad28d3d60a5317c4a439d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:20:46 +0700 Subject: [PATCH 27/80] [test_YoutubeDL] Fix invalid escape sequences --- test/test_YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index d07c35be8..2cfcf743a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -541,10 +541,10 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(ydl._format_note({}), '') assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, - }), '^\s*10k$') + }), r'^\s*10k$') assertRegexpMatches(self, ydl._format_note({ 'fps': 30, - }), '^30fps$') + }), r'^30fps$') def test_postprocessors(self): filename = 'post-processor-testfile.mp4' From 099cfdb770f458de7cfdf3e814fbb9f43db217ea Mon Sep 17 00:00:00 2001 From: Anisse Astier <anisse@astier.eu> Date: Wed, 15 Feb 2017 17:28:31 +0100 Subject: [PATCH 28/80] [devscripts/run_tests.sh] Change permission for script to 755 --- .travis.yml | 2 -- devscripts/run_tests.sh | 0 2 files changed, 2 deletions(-) mode change 100644 => 100755 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 8ba93ec02..f41e11137 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,8 +11,6 @@ sudo: false env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download -before_script: - - chmod +x ./devscripts/run_tests.sh script: ./devscripts/run_tests.sh notifications: email: diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh old mode 100644 new mode 100755 From de4d378c0cd9035d4ab93dc6826a17c76f388641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:38:00 +0700 Subject: [PATCH 29/80] [ceskatelevize] Prefix format ids --- youtube_dl/extractor/ceskatelevize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e08bf264c..1b16e5aaa 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -162,10 +162,10 @@ class CeskaTelevizeIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', entry_protocol='m3u8' if is_live else 'm3u8_native', - fatal=False)) + m3u8_id='hls', fatal=False)) else: formats.extend(self._extract_mpd_formats( - stream_url, playlist_id, fatal=False)) + stream_url, playlist_id, mpd_id='dash', fatal=False)) if user_agent and len(entries) == playlist_len: entries[num]['formats'].extend(formats) From eafaeb226a277008fb8df72bf0326f2b369ff6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:04:15 +0700 Subject: [PATCH 30/80] [ceskatelevize] Lower priority for audio description sources (#12119) --- youtube_dl/extractor/ceskatelevize.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 1b16e5aaa..b1dfacf80 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -159,13 +159,19 @@ class CeskaTelevizeIE(InfoExtractor): formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): if 'playerType=flash' in stream_url: - formats.extend(self._extract_m3u8_formats( + stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', entry_protocol='m3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls-%s' % format_id, fatal=False) else: - formats.extend(self._extract_mpd_formats( - stream_url, playlist_id, mpd_id='dash', fatal=False)) + stream_formats = self._extract_mpd_formats( + stream_url, playlist_id, + mpd_id='dash-%s' % format_id, fatal=False) + # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031 + if format_id == 'audioDescription': + for f in stream_formats: + f['source_preference'] = -10 + formats.extend(stream_formats) if user_agent and len(entries) == playlist_len: entries[num]['formats'].extend(formats) From 3aa25395aa02b7a33e0fbf6d38e39fffee268255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:08:56 +0700 Subject: [PATCH 31/80] [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9242b3eee..912e1bbdc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core ++ [utils] Add support for quoted string literals in --match-filter (#8050, + #12142, #12144) + +Extractors +* [ceskatelevize] Lower priority for audio description sources (#12119) +* [amcnetworks] Fix extraction (#12127) +* [pinkbike] Fix uploader extraction (#12054) ++ [onetpl] Add support for businessinsider.com.pl and plejada.pl ++ [onetpl] Add support for onet.pl (#10507) ++ [onetmvp] Add shortcut extractor ++ [vodpl] Add support for vod.pl (#12122) ++ [pornhub] Extract video URL from tv platform site (#12007, #12129) ++ [ceskatelevize] Extract DASH formats (#12119, #12133) + + version 2017.02.14 Core From 2480b056c137e514662b70053ec2df1391b6c2ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:10:04 +0700 Subject: [PATCH 32/80] release 2017.02.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 32aa55d83..06711f73b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.14 +[debug] youtube-dl version 2017.02.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 912e1bbdc..8ef8a8307 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.16 Core + [utils] Add support for quoted string literals in --match-filter (#8050, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3e84f1237..5a436e8f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -546,8 +546,10 @@ - **OktoberfestTV** - **on.aol.com** - **OnDemandKorea** + - **onet.pl** - **onet.tv** - **onet.tv:channel** + - **OnetMVP** - **OnionStudios** - **Ooyala** - **OoyalaExternal** @@ -900,6 +902,7 @@ - **vlive** - **vlive:channel** - **Vodlocker** + - **VODPl** - **VODPlatform** - **VoiceRepublic** - **VoxMedia** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3e7e7c0bf..323e80954 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.14' +__version__ = '2017.02.16' From b898f0a173fa040ddf95dbd97650cec07a8f19f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 04:57:42 +0700 Subject: [PATCH 33/80] [elpais] Fix typo and improve extraction (closes #12139) --- youtube_dl/extractor/elpais.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py index 99e00cf3c..b89f6db62 100644 --- a/youtube_dl/extractor/elpais.py +++ b/youtube_dl/extractor/elpais.py @@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor): 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas', 'upload_date': '20170127', }, + }, { + 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html', + 'info_dict': { + 'id': '1487062137_075943', + 'ext': 'mp4', + 'title': 'Disyuntivas', + 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218', + 'upload_date': '20170214', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor): video_url = prefix + video_suffix thumbnail_suffix = self._search_regex( r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", - webpage, 'thumbnail URL', fatal=False) + webpage, 'thumbnail URL', default=None) thumbnail = ( None if thumbnail_suffix is None - else prefix + thumbnail_suffix) + else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage) title = self._html_search_regex( - (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title', - r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'), - webpage, 'title') + (r"tituloVideo\s*=\s*'([^']+)'", + r'<h2 class="entry-header entry-title.*?>(.*?)</h2>', + r'<h1[^>]+class="titulo"[^>]*>([^<]+)'), + webpage, 'title', default=None) or self._og_search_title(webpage) upload_date = unified_strdate(self._search_regex( r'<p class="date-header date-int updated"\s+title="([^"]+)">', webpage, 'upload date', default=None) or self._html_search_meta( From a4a554a79354981fcab55de8eaab7b95a40bbb48 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Feb 2017 23:42:36 +0800 Subject: [PATCH 34/80] [generic] Try parsing JWPlayer embedded videos (closes #12030) --- ChangeLog | 6 ++ youtube_dl/extractor/archiveorg.py | 4 +- youtube_dl/extractor/common.py | 118 ++++++++++++++++++++ youtube_dl/extractor/generic.py | 20 ++++ youtube_dl/extractor/jwplatform.py | 132 +---------------------- youtube_dl/extractor/ondemandkorea.py | 4 +- youtube_dl/extractor/pornhub.py | 44 -------- youtube_dl/extractor/pornoxo.py | 4 +- youtube_dl/extractor/rentv.py | 3 +- youtube_dl/extractor/rudo.py | 4 +- youtube_dl/extractor/screencastomatic.py | 4 +- youtube_dl/extractor/sendtonews.py | 4 +- youtube_dl/extractor/thisav.py | 4 +- youtube_dl/extractor/tvnoe.py | 4 +- youtube_dl/extractor/vidzi.py | 4 +- youtube_dl/extractor/wimp.py | 4 +- 16 files changed, 166 insertions(+), 197 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ef8a8307..4e69b03d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [generic] Support complex JWPlayer embedded videos (#12030) + + version 2017.02.16 Core diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 486dff82d..e21045bed 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( unified_strdate, clean_html, ) -class ArchiveOrgIE(JWPlatformBaseIE): +class ArchiveOrgIE(InfoExtractor): IE_NAME = 'archive.org' IE_DESC = 'archive.org videos' _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9681453ca..f6ff56eda 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -40,6 +40,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, int_or_none, + js_to_json, parse_iso8601, RegexNotFoundError, sanitize_filename, @@ -2073,6 +2074,123 @@ class InfoExtractor(object): }) return formats + @staticmethod + def _find_jwplayer_data(webpage): + mobj = re.search( + r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', + webpage) + if mobj: + return mobj.group('options') + + def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + jwplayer_data = self._parse_json( + self._find_jwplayer_data(webpage), video_id, + transform_source=js_to_json) + return self._parse_jwplayer_data( + jwplayer_data, video_id, *args, **kwargs) + + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + # JWPlayer backward compatibility: flattened playlists + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if 'playlist' not in jwplayer_data: + jwplayer_data = {'playlist': [jwplayer_data]} + + entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + + for video_data in jwplayer_data['playlist']: + # JWPlayer backward compatibility: flattened sources + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 + if 'sources' not in video_data: + video_data['sources'] = [video_data] + + this_video_id = video_id or video_data['mediaid'] + + formats = [] + for source in video_data['sources']: + source_url = self._proto_relative_url(source['file']) + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + source_type = source.get('type') or '' + ext = mimetype2ext(source_type) or determine_ext(source_url) + if source_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) + # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 + elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + formats.append({ + 'url': source_url, + 'vcodec': 'none', + 'ext': ext, + }) + else: + height = int_or_none(source.get('height')) + if height is None: + # Often no height is provided but there is a label in + # format like 1080p. + height = int_or_none(self._search_regex( + r'^(\d{3,})[pP]$', source.get('label') or '', + 'height', default=None)) + a_format = { + 'url': source_url, + 'width': int_or_none(source.get('width')), + 'height': height, + 'ext': ext, + } + if source_url.startswith('rtmp'): + a_format['ext'] = 'flv' + + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url_parts = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) + self._sort_formats(formats) + + subtitles = {} + tracks = video_data.get('tracks') + if tracks and isinstance(tracks, list): + for track in tracks: + if track.get('kind') != 'captions': + continue + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) + + entries.append({ + 'id': this_video_id, + 'title': video_data['title'] if require_title else video_data.get('title'), + 'description': video_data.get('description'), + 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'timestamp': int_or_none(video_data.get('pubdate')), + 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), + 'subtitles': subtitles, + 'formats': formats, + }) + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries) + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a2b0298ec..3db31debe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -20,6 +20,7 @@ from ..utils import ( float_or_none, HEADRequest, is_html, + js_to_json, orderedSet, sanitized_Request, smuggle_url, @@ -961,6 +962,16 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # Complex jwplayer + { + 'url': 'http://www.indiedb.com/games/king-machine/videos', + 'info_dict': { + 'id': 'videos', + 'ext': 'mp4', + 'title': 'king machine trailer 1', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2488,6 +2499,15 @@ class GenericIE(InfoExtractor): self._sort_formats(entry['formats']) return self.playlist_result(entries) + jwplayer_data_str = self._find_jwplayer_data(webpage) + if jwplayer_data_str: + try: + jwplayer_data = self._parse_json( + jwplayer_data_str, video_id, transform_source=js_to_json) + return self._parse_jwplayer_data(jwplayer_data, video_id) + except ExtractorError: + pass + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index aff7ab49a..33d55f770 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,139 +4,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - js_to_json, - mimetype2ext, - urljoin, -) -class JWPlatformBaseIE(InfoExtractor): - @staticmethod - def _find_jwplayer_data(webpage): - # TODO: Merge this with JWPlayer-related codes in generic.py - - mobj = re.search( - r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', - webpage) - if mobj: - return mobj.group('options') - - def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): - jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id, - transform_source=js_to_json) - return self._parse_jwplayer_data( - jwplayer_data, video_id, *args, **kwargs) - - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, - m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - - entries = [] - - # JWPlayer backward compatibility: single playlist item - # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] - - for video_data in jwplayer_data['playlist']: - # JWPlayer backward compatibility: flattened sources - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 - if 'sources' not in video_data: - video_data['sources'] = [video_data] - - this_video_id = video_id or video_data['mediaid'] - - formats = [] - for source in video_data['sources']: - source_url = self._proto_relative_url(source['file']) - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - source_url, this_video_id, mpd_id=mpd_id, fatal=False)) - # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): - formats.append({ - 'url': source_url, - 'vcodec': 'none', - 'ext': ext, - }) - else: - height = int_or_none(source.get('height')) - if height is None: - # Often no height is provided but there is a label in - # format like 1080p. - height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', - 'height', default=None)) - a_format = { - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': height, - 'ext': ext, - } - if source_url.startswith('rtmp'): - a_format['ext'] = 'flv' - - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as - # of jwplayer.flash.swf - rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) - if len(rtmp_url_parts) == 3: - rtmp_url, prefix, play_path = rtmp_url_parts - a_format.update({ - 'url': rtmp_url, - 'play_path': prefix + play_path, - }) - if rtmp_params: - a_format.update(rtmp_params) - formats.append(a_format) - self._sort_formats(formats) - - subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if track.get('kind') != 'captions': - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) - - entries.append({ - 'id': this_video_id, - 'title': video_data['title'] if require_title else video_data.get('title'), - 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), - 'timestamp': int_or_none(video_data.get('pubdate')), - 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), - 'subtitles': subtitles, - 'formats': formats, - }) - if len(entries) == 1: - return entries[0] - else: - return self.playlist_result(entries) - - -class JWPlatformIE(JWPlatformBaseIE): +class JWPlatformIE(InfoExtractor): _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _TEST = { 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index de1d6b08a..dcd157777 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, js_to_json, ) -class OnDemandKoreaIE(JWPlatformBaseIE): +class OnDemandKoreaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7a2737032..9b413590a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -169,50 +169,6 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - """ - video_variables = {} - for video_variablename, quote, video_variable in re.findall( - r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): - video_variables[video_variablename] = video_variable - - video_urls = [] - for encoded_video_url in re.findall( - r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage): - for varname, varval in video_variables.items(): - encoded_video_url = encoded_video_url.replace(varname, varval) - video_urls.append(re.sub(r'[\s+]', '', encoded_video_url)) - - if webpage.find('"encrypted":true') != -1: - password = compat_urllib_parse_unquote_plus( - self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) - video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) - - formats = [] - for video_url in video_urls: - path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] - format = path.split('/')[5].split('_')[:2] - format = '-'.join(format) - - m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format) - if m is None: - height = None - tbr = None - else: - height = int(m.group('height')) - tbr = int(m.group('tbr')) - - formats.append({ - 'url': video_url, - 'ext': extension, - 'format': format, - 'format_id': format, - 'tbr': tbr, - 'height': height, - }) - self._sort_formats(formats) - """ - page_params = self._parse_json(self._search_regex( r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', webpage, 'page parameters', group='data', default='{}'), diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 1a0cce7e0..2831368b6 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( str_to_int, ) -class PornoXOIE(JWPlatformBaseIE): +class PornoXOIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index 422c02cff..d338b3a93 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -2,11 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .jwplatform import JWPlatformBaseIE from ..compat import compat_str -class RENTVIE(JWPlatformBaseIE): +class RENTVIE(InfoExtractor): _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://ren.tv/video/epizod/118577', diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 3bfe934d8..51644011e 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( js_to_json, get_element_by_class, @@ -11,7 +11,7 @@ from ..utils import ( ) -class RudoIE(JWPlatformBaseIE): +class RudoIE(InfoExtractor): _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' _TEST = { diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py index 94a2a37d2..b5e76c9af 100644 --- a/youtube_dl/extractor/screencastomatic.py +++ b/youtube_dl/extractor/screencastomatic.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import js_to_json -class ScreencastOMaticIE(JWPlatformBaseIE): +class ScreencastOMaticIE(InfoExtractor): _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)' _TEST = { 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py index 9880a5a78..9d9652949 100644 --- a/youtube_dl/extractor/sendtonews.py +++ b/youtube_dl/extractor/sendtonews.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( float_or_none, parse_iso8601, @@ -14,7 +14,7 @@ from ..utils import ( ) -class SendtoNewsIE(JWPlatformBaseIE): +class SendtoNewsIE(InfoExtractor): _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' _TEST = { diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 4473a3c77..b7b3568cb 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import remove_end -class ThisAVIE(JWPlatformBaseIE): +class ThisAVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _TESTS = [{ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index 6d5c74826..1a5b76bf2 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( clean_html, get_element_by_class, @@ -9,7 +9,7 @@ from ..utils import ( ) -class TVNoeIE(JWPlatformBaseIE): +class TVNoeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.tvnoe.cz/video/10362', diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 9950c62ad..1f1828fce 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( decode_packed_codes, js_to_json, @@ -12,7 +12,7 @@ from ..utils import ( ) -class VidziIE(JWPlatformBaseIE): +class VidziIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://vidzi.tv/cghql9yq6emu.html', diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 54eb51427..c022fb33e 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals +from .common import InfoExtractor from .youtube import YoutubeIE -from .jwplatform import JWPlatformBaseIE -class WimpIE(JWPlatformBaseIE): +class WimpIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.wimp.com/maru-is-exhausted/', From 4cead6a614b5a293e78dce5cd5eda7476f83985d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 22:02:01 +0700 Subject: [PATCH 35/80] [einthusan] Relax _VALID_URL (closes #12141, closes #12159) --- youtube_dl/extractor/einthusan.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 8a2a17b63..3f6268637 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -18,8 +18,8 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', 'info_dict': { @@ -29,7 +29,10 @@ class EinthusanIE(InfoExtractor): 'description': 'md5:33ef934c82a671a94652a9b4e54d931b', 'thumbnail': r're:^https?://.*\.jpg$', } - } + }, { + 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', + 'only_matching': True, + }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js def _decrypt(self, encrypted_data, video_id): From fef51645d6c224f898ff6f44d041a458d21e8547 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 17 Feb 2017 23:13:51 +0800 Subject: [PATCH 36/80] [theplatform] Recognize URLs with whitespaces (closes #12044) --- ChangeLog | 1 + youtube_dl/extractor/generic.py | 7 ++++++- youtube_dl/extractor/theplatform.py | 6 ++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4e69b03d0..d5fe3dd5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [theplatform] Recognize URLs with whitespaces (#12044) + [generic] Support complex JWPlayer embedded videos (#12030) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3db31debe..9868ca6d0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1501,7 +1501,12 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [VideoPressIE.ie_key()], - } + }, + { + # ThePlatform embedded with whitespaces in URLs + 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', + 'only_matching': True, + }, # { # # TODO: find another test # # http://schema.org/VideoObject diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 5c5987c6a..9a424b1c6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): if m: return [m.group('url')] + # Are whitesapces ignored in URLs? + # https://github.com/rg3/youtube-dl/issues/12044 matches = re.findall( - r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) if matches: - return list(zip(*matches))[1] + return [re.sub(r'\s', '', list(zip(*matches))[1][0])] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): From d94badc755228ee3159b9b499aa718d27fa472ed Mon Sep 17 00:00:00 2001 From: Vijay Singh <sudovijay@users.noreply.github.com> Date: Tue, 7 Feb 2017 10:32:45 +0530 Subject: [PATCH 37/80] [openload] Semifix extraction (closes #10408) just updated the code. i don't do much python still i tried to convert my code. lemme know if there is any prob with it --- youtube_dl/extractor/openload.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32289d897..bd1120fd8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,17 +75,20 @@ class OpenloadIE(InfoExtractor): '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>', webpage, 'openload ID') - first_three_chars = int(float(ol_id[0:][:3])) - fifth_char = int(float(ol_id[3:5])) - urlcode = '' - num = 5 + first_two_chars = int(float(ol_id[0:][:2])) + urlcode = {} + num = 2 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) + - first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) + key = int(float(ol_id[num + 3:][:2])) + urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) num += 5 + + sorted(urlcode, key=lambda key: urlcode[key]) - video_url = 'https://openload.co/stream/' + urlcode + urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) + + video_url = 'https://openload.co/stream/' + urllink title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From 90fad0e74cd8079246c5f3d8150650b5f65f998b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 22:31:16 +0700 Subject: [PATCH 38/80] [openload] Fix extraction (closes #12002) --- youtube_dl/extractor/openload.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index bd1120fd8..10896c442 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -76,19 +76,16 @@ class OpenloadIE(InfoExtractor): webpage, 'openload ID') first_two_chars = int(float(ol_id[0:][:2])) - urlcode = {} + urlcode = [] num = 2 while num < len(ol_id): key = int(float(ol_id[num + 3:][:2])) - urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) + urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) num += 5 - - sorted(urlcode, key=lambda key: urlcode[key]) - urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) - - video_url = 'https://openload.co/stream/' + urllink + video_url = 'https://openload.co/stream/' + ''.join( + [value for _, value in sorted(urlcode, key=lambda x: x[0])]) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From c2bde5d08163ce46548ea60333750a0a74a8fe44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <TRox1972@noreply.github.com> Date: Mon, 9 Jan 2017 18:22:53 +0100 Subject: [PATCH 39/80] [ellentv] Improve --- youtube_dl/extractor/ellentv.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 74bbc5c51..e0a13dd76 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -1,13 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import json - from .common import InfoExtractor -from ..utils import ( - ExtractorError, - NO_DEFAULT, -) +from .kaltura import KalturaIE +from ..utils import NO_DEFAULT class EllenTVIE(InfoExtractor): @@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor): if partner_id and kaltura_id: break - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) class EllenTVClipsIE(InfoExtractor): @@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor): 'id': 'meryl-streep-vanessa-hudgens', 'title': 'Meryl Streep, Vanessa Hudgens', }, - 'playlist_mincount': 7, + 'playlist_mincount': 5, } def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - playlist = self._extract_playlist(webpage) + playlist = self._extract_playlist(webpage, playlist_id) return { '_type': 'playlist', @@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor): 'entries': self._extract_entries(playlist) } - def _extract_playlist(self, webpage): + def _extract_playlist(self, webpage, playlist_id): json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json') - try: - return json.loads('[{' + json_string + '}]') - except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + return self._parse_json('[{' + json_string + '}]', playlist_id) def _extract_entries(self, playlist): return [ self.url_result( 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']), - 'Kaltura') + KalturaIE.ie_key(), video_id=item['kaltura_entry_id']) for item in playlist] From db76c30c6ecb5d198a72f1807163c9b69771bba1 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher <tobias-git@23.gs> Date: Tue, 7 Jun 2016 23:42:56 +0200 Subject: [PATCH 40/80] [heise] Support videos embedded in any article. --- youtube_dl/extractor/heise.py | 119 +++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 1629cdb8d..a5ec0fae9 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -11,54 +11,115 @@ from ..utils import ( class HeiseIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?:www\.)?heise\.de/video/artikel/ - .+?(?P<id>[0-9]+)\.html(?:$|[?#]) + https?://(?:www\.)?heise\.de/.+?(?P<id>[0-9]+)\.html(?:$|[?#]) ''' - _TEST = { - 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404147', - 'ext': 'mp4', - 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" + _TESTS = [ + { + 'url': ( + 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' ), - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': r're:^https?://.*\.jpe?g$', - } - } + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': ( + "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411812600, + 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2403911', + 'ext': 'mp4', + 'title': ( + "c't uplink 3.3: Owncloud, Tastaturen, Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411803000, + 'upload_date': '20140927', + 'description': "In c't uplink erklären wir in dieser Woche, wie man mit Owncloud die Kontrolle über die eigenen Daten behält. Darüber hinaus erklären wir, dass zur Wahl der richtigen Tastatur mehr gehört, als man denkt und wie Smartphones uns weiter verraten.", + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404251', + 'ext': 'mp4', + 'title': ( + "c't uplink: Owncloud, Tastaturen, Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411811400, + 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 sprechen wir über Owncloud und wie man sich damit von Cloudanbietern emanzipieren kann. Außerdem erklären wir, woran man alles beim Kauf einer Tastatur denken sollte und was Smartphones nun über uns verraten.', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html' + ), + 'md5': '0616c9297d9c989f9b2a23b483b408c3', + 'info_dict': { + 'id': '3214137', + 'ext': 'mp4', + 'title': ( + "c\u2019t zockt \u201eGlitchspace\u201c, \u201eThe Mind's Eclipse\u201c und \u201eWindowframe\u201c." + ), + 'format_id': 'mp4_720p', + 'timestamp': 1464011220, + 'upload_date': '20160523', + 'description': "Unsere Spiele-Tipps der Woche: Das Puzzle-Adventure Glitchspace, das Jump&Run-Spiel Windowframe und The Mind's Eclipse", + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + + ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) container_id = self._search_regex( - r'<div class="videoplayerjw".*?data-container="([0-9]+)"', + r'<div class="videoplayerjw"[^>]*data-container="([0-9]+)"', webpage, 'container ID') sequenz_id = self._search_regex( - r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"', + r'<div class="videoplayerjw"[^>]*data-sequenz="([0-9]+)"', webpage, 'sequenz ID') data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) doc = self._download_xml(data_url, video_id) info = { 'id': video_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': doc.find('.//{http://rss.jwpcdn.com/}image').text, 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)), - 'description': self._og_search_description(webpage), + self._html_search_meta('date', webpage)) } - title = self._html_search_meta('fulltitle', webpage) - if title: - info['title'] = title - else: - info['title'] = self._og_search_title(webpage) + title = self._html_search_meta('fulltitle', webpage, default=None) + if not title or title == "c't": + title = self._search_regex( + r'<div class="videoplayerjw"[^>]*data-title="([^"]+)"', + webpage, 'video title') + info['title'] = title + + desc = self._og_search_description(webpage, default=None) + if not desc: + desc = self._html_search_meta('description', webpage) + info['description'] = desc formats = [] for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): From bad4ccdb5db7c00865d433558ddfcdfdbd499343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:09:40 +0700 Subject: [PATCH 41/80] [heise] Improve (closes #9725) --- youtube_dl/extractor/heise.py | 146 +++++++++++----------------------- 1 file changed, 48 insertions(+), 98 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index a5ec0fae9..382f32771 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -6,120 +6,58 @@ from ..utils import ( determine_ext, int_or_none, parse_iso8601, + xpath_text, ) class HeiseIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?heise\.de/.+?(?P<id>[0-9]+)\.html(?:$|[?#]) - ''' - _TESTS = [ - { - 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404147', - 'ext': 'mp4', - 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2403911', - 'ext': 'mp4', - 'title': ( - "c't uplink 3.3: Owncloud, Tastaturen, Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411803000, - 'upload_date': '20140927', - 'description': "In c't uplink erklären wir in dieser Woche, wie man mit Owncloud die Kontrolle über die eigenen Daten behält. Darüber hinaus erklären wir, dass zur Wahl der richtigen Tastatur mehr gehört, als man denkt und wie Smartphones uns weiter verraten.", - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404251', - 'ext': 'mp4', - 'title': ( - "c't uplink: Owncloud, Tastaturen, Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411811400, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 sprechen wir über Owncloud und wie man sich damit von Cloudanbietern emanzipieren kann. Außerdem erklären wir, woran man alles beim Kauf einer Tastatur denken sollte und was Smartphones nun über uns verraten.', - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html' - ), - 'md5': '0616c9297d9c989f9b2a23b483b408c3', - 'info_dict': { - 'id': '3214137', - 'ext': 'mp4', - 'title': ( - "c\u2019t zockt \u201eGlitchspace\u201c, \u201eThe Mind's Eclipse\u201c und \u201eWindowframe\u201c." - ), - 'format_id': 'mp4_720p', - 'timestamp': 1464011220, - 'upload_date': '20160523', - 'description': "Unsere Spiele-Tipps der Woche: Das Puzzle-Adventure Glitchspace, das Jump&Run-Spiel Windowframe und The Mind's Eclipse", - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - - ] + _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html' + _TESTS = [{ + 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone", + 'format_id': 'mp4_720p', + 'timestamp': 1411812600, + 'upload_date': '20140927', + 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, { + 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', + 'only_matching': True, + }, { + 'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom', + 'only_matching': True, + }, { + 'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) container_id = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-container="([0-9]+)"', + r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"', webpage, 'container ID') sequenz_id = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-sequenz="([0-9]+)"', + r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) - doc = self._download_xml(data_url, video_id) - - info = { - 'id': video_id, - 'thumbnail': doc.find('.//{http://rss.jwpcdn.com/}image').text, - 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)) - } title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": title = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-title="([^"]+)"', - webpage, 'video title') - info['title'] = title + r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') - desc = self._og_search_description(webpage, default=None) - if not desc: - desc = self._html_search_meta('description', webpage) - info['description'] = desc + doc = self._download_xml( + 'http://www.heise.de/videout/feed', video_id, query={ + 'container': container_id, + 'sequenz': sequenz_id, + }) formats = [] for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): @@ -135,6 +73,18 @@ class HeiseIE(InfoExtractor): 'height': height, }) self._sort_formats(formats) - info['formats'] = formats - return info + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or + self._og_search_thumbnail(webpage)), + 'timestamp': parse_iso8601( + self._html_search_meta('date', webpage)), + 'formats': formats, + } From 2c1f442c2bb4de65479f2e6c2f81c5741445184e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:18:26 +0700 Subject: [PATCH 42/80] [options] Add missing spaces --- youtube_dl/options.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 349f44778..2fea99ff2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -298,14 +298,14 @@ def parseOpts(overrideArguments=None): metavar='FILTER', dest='match_filter', default=None, help=( 'Generic video filter. ' - 'Specify any key (see help for -o for a list of available keys) to' - ' match if the key is present, ' - '!key to check if the key is not present,' + 'Specify any key (see help for -o for a list of available keys) to ' + 'match if the key is present, ' + '!key to check if the key is not present, ' 'key > NUMBER (like "comment_count > 12", also works with ' '>=, <, <=, !=, =) to compare against a number, and ' '& to require multiple matches. ' - 'Values which are not known are excluded unless you' - ' put a question mark (?) after the operator.' + 'Values which are not known are excluded unless you ' + 'put a question mark (?) after the operator. ' 'For example, to only match videos that have been liked more than ' '100 times and disliked less than 50 times (or the dislike ' 'functionality is not available at the given service), but who ' From cf3704c132800809caacc6ce89afa87f0dfae487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:47:54 +0700 Subject: [PATCH 43/80] [ChangeLog] Actualize --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index d5fe3dd5b..00ee0a5a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,13 @@ version <unreleased> Extractors +* [heise] Improve extraction (#9725) +* [ellentv] Improve (#11653) +* [openload] Fix extraction (#10408, #12002) + [theplatform] Recognize URLs with whitespaces (#12044) +* [einthusan] Relax URL regular expression (#12141, #12159) + [generic] Support complex JWPlayer embedded videos (#12030) +* [elpais] Improve extraction (#12139) version 2017.02.16 From 28e35f50702a8841b4caf072a546ff06ca63db96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:59:56 +0700 Subject: [PATCH 44/80] release 2017.02.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 06711f73b..6f1361b32 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.16 +[debug] youtube-dl version 2017.02.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 00ee0a5a9..2c90f791d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.17 Extractors * [heise] Improve extraction (#9725) diff --git a/README.md b/README.md index 89876bd7a..c2a1a6b02 100644 --- a/README.md +++ b/README.md @@ -137,13 +137,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --match-filter FILTER Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to - check if the key is not present,key > + check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) - after the operator.For example, to only + after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 323e80954..530e1856b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.16' +__version__ = '2017.02.17' From 70bcc444a990ee9ca3daab6f3dc2d5d58a948ba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 18 Feb 2017 09:52:43 +0100 Subject: [PATCH 45/80] [viceland] improve info extraction and update test --- youtube_dl/extractor/vice.py | 6 +++--- youtube_dl/extractor/viceland.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 8a00c8fee..f0a7fd739 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -70,10 +70,10 @@ class ViceBaseIE(AdobePassIE): 'url': uplynk_preplay_url, 'id': video_id, 'title': title, - 'description': base.get('body'), + 'description': base.get('body') or base.get('display_body'), 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), + 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating')), 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 0eff055a6..87f9216b5 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -7,16 +7,16 @@ from .vice import ViceBaseIE class VicelandIE(ViceBaseIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)' _TEST = { - 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', + 'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316', 'info_dict': { - 'id': '57608447973ee7705f6fbd4e', + 'id': '588a70d0dba8a16007de7316', 'ext': 'mp4', - 'title': 'CYBERWAR (Trailer)', - 'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.', + 'title': 'TRAPPED (Series Trailer)', + 'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf', 'age_limit': 14, - 'timestamp': 1466008539, - 'upload_date': '20160615', - 'uploader_id': '11', + 'timestamp': 1485474122, + 'upload_date': '20170126', + 'uploader_id': '57a204098cb727dec794c6a3', 'uploader': 'Viceland', }, 'params': { From bdabbc220c60ea6be50c9b1058405b636f70fb71 Mon Sep 17 00:00:00 2001 From: Alex Monk <krenair@gmail.com> Date: Wed, 17 Aug 2016 21:13:28 +0100 Subject: [PATCH 46/80] [metacafe] Bypass family filter If you don't send this user=ffilter: false cookie, it will 301 redirect you to a page asking about it, and then the title check will fail. --- youtube_dl/extractor/metacafe.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 9880924e6..adbd44fd1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -50,6 +50,18 @@ class MetacafeIE(InfoExtractor): }, 'skip': 'Page is temporarily unavailable.', }, + # metacafe video with family filter + { + 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/', + 'md5': 'b06082c5079bbdcde677a6291fbdf376', + 'info_dict': { + 'id': '2155630', + 'ext': 'mp4', + 'title': 'Adult Art By David Hart #156', + 'uploader': 'hartistry', + 'description': 'Adult Art By David Hart. All the Art Works presented here are not in the possession of the American Artist, David John Hart. The paintings are in collections worldwide of individuals, countries, art museums, foundations and charities.', + } + }, # AnyClip video { 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/', @@ -148,8 +160,9 @@ class MetacafeIE(InfoExtractor): # AnyClip videos require the flashversion cookie so that we get the link # to the mp4 file headers = {} + headers['Cookie'] = 'user=%7B%22ffilter%22%3Afalse%7D;'; if video_id.startswith('an-'): - headers['Cookie'] = 'flashVersion=0;' + headers['Cookie'] += ' flashVersion=0;' # Retrieve video webpage to extract further information webpage = self._download_webpage(url, video_id, headers=headers) From f75caf059eb7a1a156921124cbf4b720fea526e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 19:58:25 +0700 Subject: [PATCH 47/80] [metacafe] Improve (closes #10371) --- youtube_dl/extractor/metacafe.py | 38 +++++++++++--------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index adbd44fd1..28f59f63c 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -6,12 +6,12 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, ExtractorError, int_or_none, - urlencode_postdata, get_element_by_attribute, mimetype2ext, ) @@ -57,10 +57,13 @@ class MetacafeIE(InfoExtractor): 'info_dict': { 'id': '2155630', 'ext': 'mp4', - 'title': 'Adult Art By David Hart #156', - 'uploader': 'hartistry', - 'description': 'Adult Art By David Hart. All the Art Works presented here are not in the possession of the American Artist, David John Hart. The paintings are in collections worldwide of individuals, countries, art museums, foundations and charities.', - } + 'title': 'Adult Art By David Hart 156', + 'uploader': '63346', + 'description': 'md5:9afac8fc885252201ad14563694040fc', + }, + 'params': { + 'skip_download': True, + }, }, # AnyClip video { @@ -124,22 +127,6 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): self.to_screen('Retrieving disclaimer') - def _confirm_age(self): - # Retrieve disclaimer - self.report_disclaimer() - self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer') - - # Confirm age - self.report_age_confirmation() - self._download_webpage( - self._FILTER_POST, None, False, 'Unable to confirm age', - data=urlencode_postdata({ - 'filters': '0', - 'submit': "Continue - I'm over 18", - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - def _real_extract(self, url): # Extract id and simplified title from URL video_id, display_id = re.match(self._VALID_URL, url).groups() @@ -155,14 +142,15 @@ class MetacafeIE(InfoExtractor): if prefix == 'cb': return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') - # self._confirm_age() + headers = { + # Disable family filter + 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False}) + } # AnyClip videos require the flashversion cookie so that we get the link # to the mp4 file - headers = {} - headers['Cookie'] = 'user=%7B%22ffilter%22%3Afalse%7D;'; if video_id.startswith('an-'): - headers['Cookie'] += ' flashVersion=0;' + headers['Cookie'] += 'flashVersion=0; ' # Retrieve video webpage to extract further information webpage = self._download_webpage(url, video_id, headers=headers) From a2e3286676606103601f9499154ad465037314d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 20:21:37 +0700 Subject: [PATCH 48/80] [thisav] Add support for html5 media (closes #11771) --- youtube_dl/extractor/thisav.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index b7b3568cb..33683b139 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -10,6 +10,7 @@ from ..utils import remove_end class ThisAVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _TESTS = [{ + # jwplayer 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', 'md5': '0480f1ef3932d901f0e0e719f188f19b', 'info_dict': { @@ -20,6 +21,7 @@ class ThisAVIE(InfoExtractor): 'uploader_id': 'dj7970' } }, { + # html5 media 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', 'md5': 'ba90c076bd0f80203679e5b60bf523ee', 'info_dict': { @@ -48,8 +50,12 @@ class ThisAVIE(InfoExtractor): }], } else: - info_dict = self._extract_jwplayer_data( - webpage, video_id, require_title=False) + entries = self._parse_html5_media_entries(url, webpage, video_id) + if entries: + info_dict = entries[0] + else: + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) uploader = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', webpage, 'uploader name', fatal=False) From 02d9b82a233abcb778f3f8601b229f996fd7df94 Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Wed, 11 Jan 2017 18:49:40 +0100 Subject: [PATCH 49/80] [tvn24] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvn24.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/tvn24.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index be3688d5a..55b4782d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1009,6 +1009,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py new file mode 100644 index 000000000..225ee4a6a --- /dev/null +++ b/youtube_dl/extractor/tvn24.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TVN24IE(InfoExtractor): + _VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', + 'md5': 'fbdec753d7bc29d96036808275f2130c', + 'info_dict': { + 'id': '1584444', + 'ext': 'mp4', + 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'thumbnail': 're:http://.*[.]jpeg', + } + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster') + share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params') + share_params = self._parse_json(share_params, page_id) + video_id = share_params['id'] + quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality') + quality_data = self._parse_json(quality_data, page_id) + formats = [] + for format_id, url in quality_data.items(): + formats.append({ + 'format_id': format_id, + 'height': int(format_id.rstrip('p')), + 'url': url, + 'ext': 'mp4', + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } From e84888b4322abd2e2a74e8a89b7942a68dd0b6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 23:34:09 +0700 Subject: [PATCH 50/80] [tvn24] Improve extraction (closes #11679) --- youtube_dl/extractor/tvn24.py | 59 ++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 225ee4a6a..12ed6039c 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + unescapeHTML, +) class TVN24IE(InfoExtractor): - _VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html' - _TEST = { + _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html' + _TESTS = [{ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'md5': 'fbdec753d7bc29d96036808275f2130c', 'info_dict': { @@ -16,28 +20,53 @@ class TVN24IE(InfoExtractor): 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', 'thumbnail': 're:http://.*[.]jpeg', } - } + }, { + 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', + 'only_matching': True, + }, { + 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', + 'only_matching': True, + }, { + 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', + 'only_matching': True, + }] def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster') - share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params') - share_params = self._parse_json(share_params, page_id) - video_id = share_params['id'] - quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality') - quality_data = self._parse_json(quality_data, page_id) + + def extract_json(attr, name, fatal=True): + return self._parse_json( + self._search_regex( + r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, + name, group='json', fatal=fatal) or '{}', + video_id, transform_source=unescapeHTML, fatal=fatal) + + quality_data = extract_json('data-quality', 'formats') + formats = [] for format_id, url in quality_data.items(): formats.append({ - 'format_id': format_id, - 'height': int(format_id.rstrip('p')), 'url': url, - 'ext': 'mp4', + 'format_id': format_id, + 'height': int_or_none(format_id.rstrip('p')), }) self._sort_formats(formats) + + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_regex( + r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, + 'thumbnail', group='url') + + share_params = extract_json( + 'data-share-params', 'share params', fatal=False) + if isinstance(share_params, dict): + video_id = share_params.get('id') or video_id + return { 'id': video_id, 'title': title, From ac33accd96279ee541952aaa4f0bb72b4f76b9ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 23:59:26 +0700 Subject: [PATCH 51/80] [options] Mention quoted string literals for --match-filter --- youtube_dl/options.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2fea99ff2..deff54324 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -302,8 +302,10 @@ def parseOpts(overrideArguments=None): 'match if the key is present, ' '!key to check if the key is not present, ' 'key > NUMBER (like "comment_count > 12", also works with ' - '>=, <, <=, !=, =) to compare against a number, and ' - '& to require multiple matches. ' + '>=, <, <=, !=, =) to compare against a number, ' + 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) ' + 'to match against a string literal ' + 'and & to require multiple matches. ' 'Values which are not known are excluded unless you ' 'put a question mark (?) after the operator. ' 'For example, to only match videos that have been liked more than ' From 049a0f4d6da55f4062658da7593363147c92f4a8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 18 Feb 2017 21:07:09 +0100 Subject: [PATCH 52/80] [brightcove:legacy] restrict videoPlayer value(closes #12040) --- youtube_dl/extractor/brightcove.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5c6e99da1..27685eed0 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -191,6 +191,10 @@ class BrightcoveLegacyIE(InfoExtractor): # These fields hold the id of the video videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') if videoPlayer is not None: + if isinstance(videoPlayer, list): + videoPlayer = videoPlayer[0] + if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')): + return None params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') if linkBase is not None: From bf5b9d859a1f2a68fda0dc57eb839448c7571dfa Mon Sep 17 00:00:00 2001 From: Pierre Mdawar <p.mdawar@gmail.com> Date: Mon, 17 Oct 2016 14:38:37 +0300 Subject: [PATCH 53/80] [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions --- youtube_dl/utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 07c07be6f..3f9e592e3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -701,7 +701,12 @@ def bug_reports_message(): return msg -class ExtractorError(Exception): +class YoutubeDLError(Exception): + """Base exception for YoutubeDL errors.""" + pass + + +class ExtractorError(YoutubeDLError): """Error during info extraction.""" def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): @@ -742,7 +747,7 @@ class RegexNotFoundError(ExtractorError): pass -class DownloadError(Exception): +class DownloadError(YoutubeDLError): """Download Error exception. This exception may be thrown by FileDownloader objects if they are not @@ -756,7 +761,7 @@ class DownloadError(Exception): self.exc_info = exc_info -class SameFileError(Exception): +class SameFileError(YoutubeDLError): """Same File exception. This exception will be thrown by FileDownloader objects if they detect @@ -765,7 +770,7 @@ class SameFileError(Exception): pass -class PostProcessingError(Exception): +class PostProcessingError(YoutubeDLError): """Post Processing exception. This exception may be raised by PostProcessor's .run() method to @@ -773,15 +778,16 @@ class PostProcessingError(Exception): """ def __init__(self, msg): + super(PostProcessingError, self).__init__(msg) self.msg = msg -class MaxDownloadsReached(Exception): +class MaxDownloadsReached(YoutubeDLError): """ --max-downloads limit has been reached. """ pass -class UnavailableVideoError(Exception): +class UnavailableVideoError(YoutubeDLError): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -790,7 +796,7 @@ class UnavailableVideoError(Exception): pass -class ContentTooShortError(Exception): +class ContentTooShortError(YoutubeDLError): """Content Too Short exception. This exception may be raised by FileDownloader objects when a file they @@ -799,12 +805,15 @@ class ContentTooShortError(Exception): """ def __init__(self, downloaded, expected): + super(ContentTooShortError, self).__init__( + 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) + ) # Both in bytes self.downloaded = downloaded self.expected = expected -class XAttrMetadataError(Exception): +class XAttrMetadataError(YoutubeDLError): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code @@ -820,7 +829,7 @@ class XAttrMetadataError(Exception): self.reason = 'NOT_SUPPORTED' -class XAttrUnavailableError(Exception): +class XAttrUnavailableError(YoutubeDLError): pass From 773f291dcbce486fefe24e1abd29735d374d0a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:49:58 +0700 Subject: [PATCH 54/80] Add experimental geo restriction bypass mechanism Based on faking X-Forwarded-For HTTP header --- youtube_dl/YoutubeDL.py | 17 +++ youtube_dl/__init__.py | 2 + youtube_dl/extractor/common.py | 48 +++++- youtube_dl/options.py | 12 ++ youtube_dl/utils.py | 267 +++++++++++++++++++++++++++++++++ 5 files changed, 340 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a7bf5a1b0..ebace6b57 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -56,6 +56,8 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, + GeoRestrictedError, + ISO3166Utils, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -272,6 +274,13 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. + bypass_geo_restriction: + Bypass geographic restriction via faking X-Forwarded-For + HTTP header (experimental) + bypass_geo_restriction_as_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header (experimental) The following options determine which downloader is picked: external_downloader: Executable of the external downloader to call. @@ -707,6 +716,14 @@ class YoutubeDL(object): return self.process_ie_result(ie_result, download, extra_info) else: return ie_result + except GeoRestrictedError as e: + msg = e.msg + if e.countries: + msg += '\nThis video is available in %s.' % ', '.join( + map(ISO3166Utils.short2full, e.countries)) + msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' + self.report_error(msg) + break except ExtractorError as e: # An error we somewhat expected self.report_error(compat_str(e), e.format_traceback()) break diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5c5b8094b..94f461a78 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -414,6 +414,8 @@ def _real_main(argv=None): 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'config_location': opts.config_location, + 'bypass_geo_restriction': opts.bypass_geo_restriction, + 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f6ff56eda..96815099d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -6,6 +6,7 @@ import hashlib import json import netrc import os +import random import re import socket import sys @@ -39,6 +40,8 @@ from ..utils import ( ExtractorError, fix_xml_ampersands, float_or_none, + GeoRestrictedError, + GeoUtils, int_or_none, js_to_json, parse_iso8601, @@ -320,17 +323,25 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + _BYPASS_GEO attribute may be set to False in order to disable + geo restriction bypass mechanisms for a particular extractor. + Though it won't disable explicit geo restriction bypass based on + country code provided with bypass_geo_restriction_as_country. + Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ _ready = False _downloader = None + _x_forwarded_for_ip = None + _BYPASS_GEO = True _WORKING = True def __init__(self, downloader=None): """Constructor. Receives an optional downloader.""" self._ready = False + self._x_forwarded_for_ip = None self.set_downloader(downloader) @classmethod @@ -359,6 +370,10 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" + if not self._x_forwarded_for_ip: + country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) + if country_code: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if not self._ready: self._real_initialize() self._ready = True @@ -366,8 +381,22 @@ class InfoExtractor(object): def extract(self, url): """Extracts URL information and returns it in list of dicts.""" try: - self.initialize() - return self._real_extract(url) + for _ in range(2): + try: + self.initialize() + return self._real_extract(url) + except GeoRestrictedError as e: + if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and + self._BYPASS_GEO and + self._downloader.params.get('bypass_geo_restriction', True) and + not self._x_forwarded_for_ip and + e.countries): + self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + continue + raise except ExtractorError: raise except compat_http_client.IncompleteRead as e: @@ -434,6 +463,15 @@ class InfoExtractor(object): if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] + # Some sites check X-Forwarded-For HTTP header in order to figure out + # the origin of the client behind proxy. This allows bypassing geo + # restriction by faking this header's value to IP that belongs to some + # geo unrestricted country. We will do so once we encounter any + # geo restriction error. + if self._x_forwarded_for_ip: + if 'X-Forwarded-For' not in headers: + headers['X-Forwarded-For'] = self._x_forwarded_for_ip + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) if urlh is False: assert not fatal @@ -609,10 +647,8 @@ class InfoExtractor(object): expected=True) @staticmethod - def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): - raise ExtractorError( - '%s. You might want to use --proxy to workaround.' % msg, - expected=True) + def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None): + raise GeoRestrictedError(msg, countries=countries) # Methods for following #608 @staticmethod diff --git a/youtube_dl/options.py b/youtube_dl/options.py index deff54324..2e194f6dc 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -549,6 +549,18 @@ def parseOpts(overrideArguments=None): 'Upper bound of a range for randomized sleep before each download ' '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) + workarounds.add_option( + '--bypass-geo', + action='store_true', dest='bypass_geo_restriction', default=True, + help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + workarounds.add_option( + '--no-bypass-geo', + action='store_false', dest='bypass_geo_restriction', default=True, + help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + workarounds.add_option( + '--bypass-geo-as-country', metavar='CODE', + dest='bypass_geo_restriction_as_country', default=None, + help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3f9e592e3..4e76b6b7b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -23,6 +23,7 @@ import operator import os import pipes import platform +import random import re import socket import ssl @@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError): pass +class GeoRestrictedError(ExtractorError): + """Geographic restriction Error exception. + + This exception may be thrown when a video is not available from your + geographic location due to geographic restrictions imposed by a website. + """ + def __init__(self, msg, countries=None): + super(GeoRestrictedError, self).__init__(msg, expected=True) + self.msg = msg + self.countries = countries + + class DownloadError(YoutubeDLError): """Download Error exception. @@ -3027,6 +3040,260 @@ class ISO3166Utils(object): return cls._country_map.get(code.upper()) +class GeoUtils(object): + # Major IPv4 address blocks per country + _country_ip_map = { + 'AD': '85.94.160.0/19', + 'AE': '94.200.0.0/13', + 'AF': '149.54.0.0/17', + 'AG': '209.59.64.0/18', + 'AI': '204.14.248.0/21', + 'AL': '46.99.0.0/16', + 'AM': '46.70.0.0/15', + 'AO': '105.168.0.0/13', + 'AP': '159.117.192.0/21', + 'AR': '181.0.0.0/12', + 'AS': '202.70.112.0/20', + 'AT': '84.112.0.0/13', + 'AU': '1.128.0.0/11', + 'AW': '181.41.0.0/18', + 'AZ': '5.191.0.0/16', + 'BA': '31.176.128.0/17', + 'BB': '65.48.128.0/17', + 'BD': '114.130.0.0/16', + 'BE': '57.0.0.0/8', + 'BF': '129.45.128.0/17', + 'BG': '95.42.0.0/15', + 'BH': '37.131.0.0/17', + 'BI': '154.117.192.0/18', + 'BJ': '137.255.0.0/16', + 'BL': '192.131.134.0/24', + 'BM': '196.12.64.0/18', + 'BN': '156.31.0.0/16', + 'BO': '161.56.0.0/16', + 'BQ': '161.0.80.0/20', + 'BR': '152.240.0.0/12', + 'BS': '24.51.64.0/18', + 'BT': '119.2.96.0/19', + 'BW': '168.167.0.0/16', + 'BY': '178.120.0.0/13', + 'BZ': '179.42.192.0/18', + 'CA': '99.224.0.0/11', + 'CD': '41.243.0.0/16', + 'CF': '196.32.200.0/21', + 'CG': '197.214.128.0/17', + 'CH': '85.0.0.0/13', + 'CI': '154.232.0.0/14', + 'CK': '202.65.32.0/19', + 'CL': '152.172.0.0/14', + 'CM': '165.210.0.0/15', + 'CN': '36.128.0.0/10', + 'CO': '181.240.0.0/12', + 'CR': '201.192.0.0/12', + 'CU': '152.206.0.0/15', + 'CV': '165.90.96.0/19', + 'CW': '190.88.128.0/17', + 'CY': '46.198.0.0/15', + 'CZ': '88.100.0.0/14', + 'DE': '53.0.0.0/8', + 'DJ': '197.241.0.0/17', + 'DK': '87.48.0.0/12', + 'DM': '192.243.48.0/20', + 'DO': '152.166.0.0/15', + 'DZ': '41.96.0.0/12', + 'EC': '186.68.0.0/15', + 'EE': '90.190.0.0/15', + 'EG': '156.160.0.0/11', + 'ER': '196.200.96.0/20', + 'ES': '88.0.0.0/11', + 'ET': '196.188.0.0/14', + 'EU': '2.16.0.0/13', + 'FI': '91.152.0.0/13', + 'FJ': '144.120.0.0/16', + 'FM': '119.252.112.0/20', + 'FO': '88.85.32.0/19', + 'FR': '90.0.0.0/9', + 'GA': '41.158.0.0/15', + 'GB': '25.0.0.0/8', + 'GD': '74.122.88.0/21', + 'GE': '31.146.0.0/16', + 'GF': '161.22.64.0/18', + 'GG': '62.68.160.0/19', + 'GH': '45.208.0.0/14', + 'GI': '85.115.128.0/19', + 'GL': '88.83.0.0/19', + 'GM': '160.182.0.0/15', + 'GN': '197.149.192.0/18', + 'GP': '104.250.0.0/19', + 'GQ': '105.235.224.0/20', + 'GR': '94.64.0.0/13', + 'GT': '168.234.0.0/16', + 'GU': '168.123.0.0/16', + 'GW': '197.214.80.0/20', + 'GY': '181.41.64.0/18', + 'HK': '113.252.0.0/14', + 'HN': '181.210.0.0/16', + 'HR': '93.136.0.0/13', + 'HT': '148.102.128.0/17', + 'HU': '84.0.0.0/14', + 'ID': '39.192.0.0/10', + 'IE': '87.32.0.0/12', + 'IL': '79.176.0.0/13', + 'IM': '5.62.80.0/20', + 'IN': '117.192.0.0/10', + 'IO': '203.83.48.0/21', + 'IQ': '37.236.0.0/14', + 'IR': '2.176.0.0/12', + 'IS': '82.221.0.0/16', + 'IT': '79.0.0.0/10', + 'JE': '87.244.64.0/18', + 'JM': '72.27.0.0/17', + 'JO': '176.29.0.0/16', + 'JP': '126.0.0.0/8', + 'KE': '105.48.0.0/12', + 'KG': '158.181.128.0/17', + 'KH': '36.37.128.0/17', + 'KI': '103.25.140.0/22', + 'KM': '197.255.224.0/20', + 'KN': '198.32.32.0/19', + 'KP': '175.45.176.0/22', + 'KR': '175.192.0.0/10', + 'KW': '37.36.0.0/14', + 'KY': '64.96.0.0/15', + 'KZ': '2.72.0.0/13', + 'LA': '115.84.64.0/18', + 'LB': '178.135.0.0/16', + 'LC': '192.147.231.0/24', + 'LI': '82.117.0.0/19', + 'LK': '112.134.0.0/15', + 'LR': '41.86.0.0/19', + 'LS': '129.232.0.0/17', + 'LT': '78.56.0.0/13', + 'LU': '188.42.0.0/16', + 'LV': '46.109.0.0/16', + 'LY': '41.252.0.0/14', + 'MA': '105.128.0.0/11', + 'MC': '88.209.64.0/18', + 'MD': '37.246.0.0/16', + 'ME': '178.175.0.0/17', + 'MF': '74.112.232.0/21', + 'MG': '154.126.0.0/17', + 'MH': '117.103.88.0/21', + 'MK': '77.28.0.0/15', + 'ML': '154.118.128.0/18', + 'MM': '37.111.0.0/17', + 'MN': '49.0.128.0/17', + 'MO': '60.246.0.0/16', + 'MP': '202.88.64.0/20', + 'MQ': '109.203.224.0/19', + 'MR': '41.188.64.0/18', + 'MS': '208.90.112.0/22', + 'MT': '46.11.0.0/16', + 'MU': '105.16.0.0/12', + 'MV': '27.114.128.0/18', + 'MW': '105.234.0.0/16', + 'MX': '187.192.0.0/11', + 'MY': '175.136.0.0/13', + 'MZ': '197.218.0.0/15', + 'NA': '41.182.0.0/16', + 'NC': '101.101.0.0/18', + 'NE': '197.214.0.0/18', + 'NF': '203.17.240.0/22', + 'NG': '105.112.0.0/12', + 'NI': '186.76.0.0/15', + 'NL': '145.96.0.0/11', + 'NO': '84.208.0.0/13', + 'NP': '36.252.0.0/15', + 'NR': '203.98.224.0/19', + 'NU': '49.156.48.0/22', + 'NZ': '49.224.0.0/14', + 'OM': '5.36.0.0/15', + 'PA': '186.72.0.0/15', + 'PE': '186.160.0.0/14', + 'PF': '123.50.64.0/18', + 'PG': '124.240.192.0/19', + 'PH': '49.144.0.0/13', + 'PK': '39.32.0.0/11', + 'PL': '83.0.0.0/11', + 'PM': '70.36.0.0/20', + 'PR': '66.50.0.0/16', + 'PS': '188.161.0.0/16', + 'PT': '85.240.0.0/13', + 'PW': '202.124.224.0/20', + 'PY': '181.120.0.0/14', + 'QA': '37.210.0.0/15', + 'RE': '139.26.0.0/16', + 'RO': '79.112.0.0/13', + 'RS': '178.220.0.0/14', + 'RU': '5.136.0.0/13', + 'RW': '105.178.0.0/15', + 'SA': '188.48.0.0/13', + 'SB': '202.1.160.0/19', + 'SC': '154.192.0.0/11', + 'SD': '154.96.0.0/13', + 'SE': '78.64.0.0/12', + 'SG': '152.56.0.0/14', + 'SI': '188.196.0.0/14', + 'SK': '78.98.0.0/15', + 'SL': '197.215.0.0/17', + 'SM': '89.186.32.0/19', + 'SN': '41.82.0.0/15', + 'SO': '197.220.64.0/19', + 'SR': '186.179.128.0/17', + 'SS': '105.235.208.0/21', + 'ST': '197.159.160.0/19', + 'SV': '168.243.0.0/16', + 'SX': '190.102.0.0/20', + 'SY': '5.0.0.0/16', + 'SZ': '41.84.224.0/19', + 'TC': '65.255.48.0/20', + 'TD': '154.68.128.0/19', + 'TG': '196.168.0.0/14', + 'TH': '171.96.0.0/13', + 'TJ': '85.9.128.0/18', + 'TK': '27.96.24.0/21', + 'TL': '180.189.160.0/20', + 'TM': '95.85.96.0/19', + 'TN': '197.0.0.0/11', + 'TO': '175.176.144.0/21', + 'TR': '78.160.0.0/11', + 'TT': '186.44.0.0/15', + 'TV': '202.2.96.0/19', + 'TW': '120.96.0.0/11', + 'TZ': '156.156.0.0/14', + 'UA': '93.72.0.0/13', + 'UG': '154.224.0.0/13', + 'US': '3.0.0.0/8', + 'UY': '167.56.0.0/13', + 'UZ': '82.215.64.0/18', + 'VA': '212.77.0.0/19', + 'VC': '24.92.144.0/20', + 'VE': '186.88.0.0/13', + 'VG': '172.103.64.0/18', + 'VI': '146.226.0.0/16', + 'VN': '14.160.0.0/11', + 'VU': '202.80.32.0/20', + 'WF': '117.20.32.0/21', + 'WS': '202.4.32.0/19', + 'YE': '134.35.0.0/16', + 'YT': '41.242.116.0/22', + 'ZA': '41.0.0.0/11', + 'ZM': '165.56.0.0/13', + 'ZW': '41.85.192.0/19', + } + + @classmethod + def random_ipv4(cls, code): + block = cls._country_ip_map.get(code.upper()) + if not block: + return None + addr, preflen = block.split('/') + addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] + addr_max = addr_min | (0xffffffff >> int(preflen)) + return socket.inet_ntoa( + compat_struct_pack('!I', random.randint(addr_min, addr_max))) + + class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): def __init__(self, proxies=None): # Set default handlers From d392005a795a6cf85fda3c0f982254f8a2731e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:16 +0700 Subject: [PATCH 55/80] [dramafever] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/dramafever.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index bcd9fe2a0..755db806a 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -116,8 +116,9 @@ class DramaFeverIE(DramaFeverBaseIE): 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): - raise ExtractorError( - 'Currently unavailable in your country.', expected=True) + self.raise_geo_restricted( + msg='Currently unavailable in your country', + countries=['US', 'CA']) raise series_id, episode_number = video_id.split('.') From e633f21a96f37a96e8ef0fd4d6c1e4d3c0b41fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:33 +0700 Subject: [PATCH 56/80] [go] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/go.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index f28e6fbf5..ec902c670 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -101,6 +101,10 @@ class GoIE(AdobePassIE): video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) errors = entitlement.get('errors', {}).get('errors', []) if errors: + for error in errors: + if error.get('code') == 1002: + self.raise_geo_restricted( + error['message'], countries=['US']) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] From 28200e654b8051cadca12e51bd57f77e1ff0a4ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:51 +0700 Subject: [PATCH 57/80] [itv] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/itv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index b0d860452..aabde15f3 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -98,7 +98,10 @@ class ITVIE(InfoExtractor): headers=headers, data=etree.tostring(req_env)) playlist = xpath_element(resp_env, './/Playlist') if playlist is None: + fault_code = xpath_text(resp_env, './/faultcode') fault_string = xpath_text(resp_env, './/faultstring') + if fault_code == 'InvalidGeoRegion': + self.raise_geo_restricted(msg=fault_string, countries=['GB']) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) From ff4007891fde74212eb0898bb04c14b2de92ed03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:59 +0700 Subject: [PATCH 58/80] [nrk] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/nrk.py | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index fc3c0cd3c..78ece33e1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import random import re from .common import InfoExtractor @@ -15,25 +14,6 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): - _faked_ip = None - - def _download_webpage_handle(self, *args, **kwargs): - # NRK checks X-Forwarded-For HTTP header in order to figure out the - # origin of the client behind proxy. This allows to bypass geo - # restriction by faking this header's value to some Norway IP. - # We will do so once we encounter any geo restriction error. - if self._faked_ip: - # NB: str is intentional - kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip - return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs) - - def _fake_ip(self): - # Use fake IP from 37.191.128.0/17 in order to workaround geo - # restriction - def octet(lb=0, ub=255): - return random.randint(lb, ub) - self._faked_ip = '37.191.%d.%d' % (octet(128), octet()) - def _real_extract(self, url): video_id = self._match_id(url) @@ -44,8 +24,6 @@ class NRKBaseIE(InfoExtractor): title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id - http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {} - entries = [] conviva = data.get('convivaStatistics') or {} @@ -90,7 +68,6 @@ class NRKBaseIE(InfoExtractor): 'duration': duration, 'subtitles': subtitles, 'formats': formats, - 'http_headers': http_headers, }) if not entries: @@ -107,19 +84,16 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - message_type = data.get('messageType', '') - # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* - if 'IsGeoBlocked' in message_type and not self._faked_ip: - self.report_warning( - 'Video is geo restricted, trying to fake IP') - self._fake_ip() - return self._real_extract(url) - MESSAGES = { 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', 'ProgramRightsHasExpired': 'Programmet har gått ut', 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', } + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type: + self.raise_geo_restricted( + msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) raise ExtractorError( '%s said: %s' % (self.IE_NAME, MESSAGES.get( message_type, message_type)), From 01b1aa9ff408ce15b8bbea08dbc190f3282141a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:11 +0700 Subject: [PATCH 59/80] [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/ondemandkorea.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index dcd157777..0c85d549e 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -35,7 +35,8 @@ class OnDemandKoreaIE(InfoExtractor): if 'msg_block_01.png' in webpage: self.raise_geo_restricted( - 'This content is not available in your region') + msg='This content is not available in your region', + countries=['US', 'CA']) if 'This video is only available to ODK PLUS members.' in webpage: raise ExtractorError( From 8ab8066cf08352ad336c3ff594d0ac27f6c809c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:20 +0700 Subject: [PATCH 60/80] [pbs] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/pbs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 6baed773f..64f47bae3 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -489,11 +489,12 @@ class PBSIE(InfoExtractor): headers=self.geo_verification_headers()) if redirect_info['status'] == 'error': + message = self._ERRORS.get( + redirect_info['http_code'], redirect_info['message']) + if redirect_info['http_code'] == 403: + self.raise_geo_restricted(msg=message, countries=['US']) raise ExtractorError( - '%s said: %s' % ( - self.IE_NAME, - self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])), - expected=True) + '%s said: %s' % (self.IE_NAME, message), expected=True) format_url = redirect_info.get('url') if not format_url: From 04d906eae3071e37049cfcd2a02e9079b72a265c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:33 +0700 Subject: [PATCH 61/80] [svt] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/svt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 10cf80885..f2a2200bf 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -38,7 +38,8 @@ class SVTBaseIE(InfoExtractor): 'url': vurl, }) if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): - self.raise_geo_restricted('This video is only available in Sweden') + self.raise_geo_restricted( + 'This video is only available in Sweden', countries=['SE']) self._sort_formats(formats) subtitles = {} From 89cc7fe7705b6534f434b514265a0507b70ef40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:42 +0700 Subject: [PATCH 62/80] [vbox7] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/vbox7.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index bef639462..f86d804c1 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -78,7 +78,7 @@ class Vbox7IE(InfoExtractor): video_url = video['src'] if '/na.mp4' in video_url: - self.raise_geo_restricted() + self.raise_geo_restricted(countries=['BG']) uploader = video.get('uploader') From 71631862f4de5a10223642ebdbd5e10db374d270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:23 +0700 Subject: [PATCH 63/80] [srgssr] Improve geo restriction detection --- youtube_dl/extractor/srgssr.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 319a48a7a..a35a0a538 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -14,6 +14,7 @@ from ..utils import ( class SRGSSRIE(InfoExtractor): _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' + _BYPASS_GEO = False _ERRORS = { 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', @@ -40,8 +41,11 @@ class SRGSSRIE(InfoExtractor): media_id)[media_type.capitalize()] if media_data.get('block') and media_data['block'] in self._ERRORS: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, self._ERRORS[media_data['block']]), expected=True) + message = self._ERRORS[media_data['block']] + if media_data['block'] == 'GEOBLOCK': + self.raise_geo_restricted(msg=message, countries=['CH']) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) return media_data From 80b59020e02e9c61f74f8f8f8891f9745667edb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:31 +0700 Subject: [PATCH 64/80] [vgtv] Improve geo restriction detection --- youtube_dl/extractor/vgtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 8a574bc26..1709fd6bb 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -14,6 +14,7 @@ from ..utils import ( class VGTVIE(XstreamIE): IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' + _BYPASS_GEO = False _HOST_TO_APPNAME = { 'vgtv.no': 'vgtv', @@ -217,7 +218,7 @@ class VGTVIE(XstreamIE): properties = try_get( data, lambda x: x['streamConfiguration']['properties'], list) if properties and 'geoblocked' in properties: - raise self.raise_geo_restricted() + raise self.raise_geo_restricted(countries=['NO']) self._sort_formats(info['formats']) From 5d3fbf77d96ade64c645b6942979c0b99aa4d775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:39 +0700 Subject: [PATCH 65/80] [viki] Improve geo restriction detection --- youtube_dl/extractor/viki.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 9c48701c1..68a74e246 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -27,6 +27,7 @@ class VikiBaseIE(InfoExtractor): _APP_VERSION = '2.2.5.1428709186' _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' + _BYPASS_GEO = False _NETRC_MACHINE = 'viki' _token = None @@ -77,8 +78,11 @@ class VikiBaseIE(InfoExtractor): def _check_errors(self, data): for reason, status in data.get('blocking', {}).items(): if status and reason in self._ERRORS: + message = self._ERRORS[reason] + if reason == 'geo': + self.raise_geo_restricted(msg=message) raise ExtractorError('%s said: %s' % ( - self.IE_NAME, self._ERRORS[reason]), expected=True) + self.IE_NAME, message), expected=True) def _real_initialize(self): self._login() From 18a0defab063523cd76a30be2dd5a80e9f9172d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 20:26:43 +0700 Subject: [PATCH 66/80] [utils] Make random_ipv4 return unicode string --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4e76b6b7b..cbf7639c5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3290,8 +3290,8 @@ class GeoUtils(object): addr, preflen = block.split('/') addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] addr_max = addr_min | (0xffffffff >> int(preflen)) - return socket.inet_ntoa( - compat_struct_pack('!I', random.randint(addr_min, addr_max))) + return compat_str(socket.inet_ntoa( + compat_struct_pack('!I', random.randint(addr_min, addr_max)))) class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): From 0016b84e16965a07c52946c4672363153e8b18a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 21:06:07 +0700 Subject: [PATCH 67/80] Add faked X-Forwarded-For to formats' HTTP headers --- youtube_dl/YoutubeDL.py | 14 ++++++++++++++ youtube_dl/extractor/common.py | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ebace6b57..1c04e46c1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -864,8 +864,14 @@ class YoutubeDL(object): if self.params.get('playlistrandom', False): random.shuffle(entries) + x_forwarded_for = ie_result.get('__x_forwarded_for_ip') + for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) + # This __x_forwarded_for_ip thing is a bit ugly but requires + # minimal changes + if x_forwarded_for: + entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, 'playlist': playlist, @@ -1250,6 +1256,11 @@ class YoutubeDL(object): if cookies: res['Cookie'] = cookies + if 'X-Forwarded-For' not in res: + x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') + if x_forwarded_for_ip: + res['X-Forwarded-For'] = x_forwarded_for_ip + return res def _calc_cookies(self, info_dict): @@ -1392,6 +1403,9 @@ class YoutubeDL(object): full_format_info = info_dict.copy() full_format_info.update(format) format['http_headers'] = self._calc_headers(full_format_info) + # Remove private housekeeping stuff + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] # TODO Central sorting goes here diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 96815099d..c1f7f28a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -384,7 +384,10 @@ class InfoExtractor(object): for _ in range(2): try: self.initialize() - return self._real_extract(url) + ie_result = self._real_extract(url) + if self._x_forwarded_for_ip: + ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip + return ie_result except GeoRestrictedError as e: if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and self._BYPASS_GEO and From 0a840f584c3f1fedb6957c05587dec697143f2d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 01:53:41 +0700 Subject: [PATCH 68/80] Rename bypass geo restriction options --- youtube_dl/YoutubeDL.py | 5 ++--- youtube_dl/__init__.py | 4 ++-- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/options.py | 12 ++++++------ 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1c04e46c1..68000dea2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -274,10 +274,9 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. - bypass_geo_restriction: - Bypass geographic restriction via faking X-Forwarded-For + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental) - bypass_geo_restriction_as_country: + geo_bypass_country: Two-letter ISO 3166-2 country code that will be used for explicit geographic restriction bypassing via faking X-Forwarded-For HTTP header (experimental) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 94f461a78..f91d29a7b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -414,8 +414,8 @@ def _real_main(argv=None): 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'config_location': opts.config_location, - 'bypass_geo_restriction': opts.bypass_geo_restriction, - 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, + 'geo_bypass': opts.geo_bypass, + 'geo_bypass_country': opts.geo_bypass_country, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c1f7f28a0..6eb6a25b8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -326,7 +326,7 @@ class InfoExtractor(object): _BYPASS_GEO attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on - country code provided with bypass_geo_restriction_as_country. + country code provided with geo_bypass_country. Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. @@ -371,7 +371,7 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" if not self._x_forwarded_for_ip: - country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) + country_code = self._downloader.params.get('geo_bypass_country', None) if country_code: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if not self._ready: @@ -389,9 +389,9 @@ class InfoExtractor(object): ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip return ie_result except GeoRestrictedError as e: - if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and + if (not self._downloader.params.get('geo_bypass_country', None) and self._BYPASS_GEO and - self._downloader.params.get('bypass_geo_restriction', True) and + self._downloader.params.get('geo_bypass', True) and not self._x_forwarded_for_ip and e.countries): self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2e194f6dc..ae3f50754 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -550,16 +550,16 @@ def parseOpts(overrideArguments=None): '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) workarounds.add_option( - '--bypass-geo', - action='store_true', dest='bypass_geo_restriction', default=True, + '--geo-bypass', + action='store_true', dest='geo_bypass', default=True, help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') workarounds.add_option( - '--no-bypass-geo', - action='store_false', dest='bypass_geo_restriction', default=True, + '--no-geo-bypass', + action='store_false', dest='geo_bypass', default=True, help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') workarounds.add_option( - '--bypass-geo-as-country', metavar='CODE', - dest='bypass_geo_restriction_as_country', default=None, + '--geo-bypass-country', metavar='CODE', + dest='geo_bypass_country', default=None, help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From 4248dad92bd87650c791194276296b148f668e68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 03:53:23 +0700 Subject: [PATCH 69/80] Improve geo bypass mechanism * Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction --- youtube_dl/extractor/common.py | 59 +++++++++++++++++++-------- youtube_dl/extractor/dramafever.py | 3 +- youtube_dl/extractor/go.py | 3 +- youtube_dl/extractor/itv.py | 4 +- youtube_dl/extractor/nrk.py | 4 +- youtube_dl/extractor/ondemandkorea.py | 3 +- youtube_dl/extractor/pbs.py | 5 ++- youtube_dl/extractor/srgssr.py | 6 ++- youtube_dl/extractor/svt.py | 4 +- youtube_dl/extractor/vbox7.py | 3 +- youtube_dl/extractor/vgtv.py | 5 ++- youtube_dl/extractor/viki.py | 2 +- youtube_dl/utils.py | 2 +- 13 files changed, 72 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6eb6a25b8..272da74b6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -323,10 +323,15 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. - _BYPASS_GEO attribute may be set to False in order to disable + _GEO_BYPASS attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on - country code provided with geo_bypass_country. + country code provided with geo_bypass_country. (experimental) + + _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted + countries for this extractor. One of these countries will be used by + geo restriction bypass mechanism right away in order to bypass + geo restriction, of course, if the mechanism is not disabled. (experimental) Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. @@ -335,7 +340,8 @@ class InfoExtractor(object): _ready = False _downloader = None _x_forwarded_for_ip = None - _BYPASS_GEO = True + _GEO_BYPASS = True + _GEO_COUNTRIES = None _WORKING = True def __init__(self, downloader=None): @@ -370,14 +376,28 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" - if not self._x_forwarded_for_ip: - country_code = self._downloader.params.get('geo_bypass_country', None) - if country_code: - self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) + self.__initialize_geo_bypass() if not self._ready: self._real_initialize() self._ready = True + def __initialize_geo_bypass(self): + if not self._x_forwarded_for_ip: + country_code = self._downloader.params.get('geo_bypass_country', None) + # If there is no explicit country for geo bypass specified and + # the extractor is known to be geo restricted let's fake IP + # as X-Forwarded-For right away. + if (not country_code and + self._GEO_BYPASS and + self._downloader.params.get('geo_bypass', True) and + self._GEO_COUNTRIES): + country_code = random.choice(self._GEO_COUNTRIES) + if country_code: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) + if self._downloader.params.get('verbose', False): + self._downloader.to_stdout( + '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + def extract(self, url): """Extracts URL information and returns it in list of dicts.""" try: @@ -389,16 +409,8 @@ class InfoExtractor(object): ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip return ie_result except GeoRestrictedError as e: - if (not self._downloader.params.get('geo_bypass_country', None) and - self._BYPASS_GEO and - self._downloader.params.get('geo_bypass', True) and - not self._x_forwarded_for_ip and - e.countries): - self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) - if self._x_forwarded_for_ip: - self.report_warning( - 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) - continue + if self.__maybe_fake_ip_and_retry(e.countries): + continue raise except ExtractorError: raise @@ -407,6 +419,19 @@ class InfoExtractor(object): except (KeyError, StopIteration) as e: raise ExtractorError('An extractor error has occurred.', cause=e) + def __maybe_fake_ip_and_retry(self, countries): + if (not self._downloader.params.get('geo_bypass_country', None) and + self._GEO_BYPASS and + self._downloader.params.get('geo_bypass', True) and + not self._x_forwarded_for_ip and + countries): + self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries)) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + return True + return False + def set_downloader(self, downloader): """Sets the downloader for this IE.""" self._downloader = downloader diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 755db806a..e7abc8889 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -20,6 +20,7 @@ from ..utils import ( class DramaFeverBaseIE(AMPIE): _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' + _GEO_COUNTRIES = ['US', 'CA'] _CONSUMER_SECRET = 'DA59dtVXYLxajktV' @@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE): if isinstance(e.cause, compat_HTTPError): self.raise_geo_restricted( msg='Currently unavailable in your country', - countries=['US', 'CA']) + countries=self._GEO_COUNTRIES) raise series_id, episode_number = video_id.split('.') diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index ec902c670..b205bfc7c 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -37,6 +37,7 @@ class GoIE(AdobePassIE): } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) + _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -104,7 +105,7 @@ class GoIE(AdobePassIE): for error in errors: if error.get('code') == 1002: self.raise_geo_restricted( - error['message'], countries=['US']) + error['message'], countries=self._GEO_COUNTRIES) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index aabde15f3..021c6b278 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -24,6 +24,7 @@ from ..utils import ( class ITVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' + _GEO_COUNTRIES = ['GB'] _TEST = { 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { @@ -101,7 +102,8 @@ class ITVIE(InfoExtractor): fault_code = xpath_text(resp_env, './/faultcode') fault_string = xpath_text(resp_env, './/faultstring') if fault_code == 'InvalidGeoRegion': - self.raise_geo_restricted(msg=fault_string, countries=['GB']) + self.raise_geo_restricted( + msg=fault_string, countries=self._GEO_COUNTRIES) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 78ece33e1..13af9ed1f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -14,6 +14,7 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['NO'] def _real_extract(self, url): video_id = self._match_id(url) @@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor): # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* if 'IsGeoBlocked' in message_type: self.raise_geo_restricted( - msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) + msg=MESSAGES.get('ProgramIsGeoBlocked'), + countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, MESSAGES.get( message_type, message_type)), diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index 0c85d549e..df1ce3c1d 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -10,6 +10,7 @@ from ..utils import ( class OnDemandKoreaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' + _GEO_COUNTRIES = ['US', 'CA'] _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', 'info_dict': { @@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor): if 'msg_block_01.png' in webpage: self.raise_geo_restricted( msg='This content is not available in your region', - countries=['US', 'CA']) + countries=self._GEO_COUNTRIES) if 'This video is only available to ODK PLUS members.' in webpage: raise ExtractorError( diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 64f47bae3..3e51b4dd7 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -193,6 +193,8 @@ class PBSIE(InfoExtractor): ) ''' % '|'.join(list(zip(*_STATIONS))[0]) + _GEO_COUNTRIES = ['US'] + _TESTS = [ { 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', @@ -492,7 +494,8 @@ class PBSIE(InfoExtractor): message = self._ERRORS.get( redirect_info['http_code'], redirect_info['message']) if redirect_info['http_code'] == 403: - self.raise_geo_restricted(msg=message, countries=['US']) + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, message), expected=True) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index a35a0a538..bb73eb1d5 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -14,7 +14,8 @@ from ..utils import ( class SRGSSRIE(InfoExtractor): _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' - _BYPASS_GEO = False + _GEO_BYPASS = False + _GEO_COUNTRIES = ['CH'] _ERRORS = { 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', @@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor): if media_data.get('block') and media_data['block'] in self._ERRORS: message = self._ERRORS[media_data['block']] if media_data['block'] == 'GEOBLOCK': - self.raise_geo_restricted(msg=message, countries=['CH']) + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, message), expected=True) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index f2a2200bf..9e2c9fcc6 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -13,6 +13,7 @@ from ..utils import ( class SVTBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['SE'] def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: @@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor): }) if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): self.raise_geo_restricted( - 'This video is only available in Sweden', countries=['SE']) + 'This video is only available in Sweden', + countries=self._GEO_COUNTRIES) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index f86d804c1..8152acefd 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor): ) (?P<id>[\da-fA-F]+) ''' + _GEO_COUNTRIES = ['BG'] _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor): video_url = video['src'] if '/na.mp4' in video_url: - self.raise_geo_restricted(countries=['BG']) + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) uploader = video.get('uploader') diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 1709fd6bb..0f8c156a7 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -14,7 +14,7 @@ from ..utils import ( class VGTVIE(XstreamIE): IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' - _BYPASS_GEO = False + _GEO_BYPASS = False _HOST_TO_APPNAME = { 'vgtv.no': 'vgtv', @@ -218,7 +218,8 @@ class VGTVIE(XstreamIE): properties = try_get( data, lambda x: x['streamConfiguration']['properties'], list) if properties and 'geoblocked' in properties: - raise self.raise_geo_restricted(countries=['NO']) + raise self.raise_geo_restricted( + countries=[host.rpartition('.')[-1].partition('/')[0].upper()]) self._sort_formats(info['formats']) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 68a74e246..e9c8bf824 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor): _APP_VERSION = '2.2.5.1428709186' _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' - _BYPASS_GEO = False + _GEO_BYPASS = False _NETRC_MACHINE = 'viki' _token = None diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cbf7639c5..17b83794a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3291,7 +3291,7 @@ class GeoUtils(object): addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] addr_max = addr_min | (0xffffffff >> int(preflen)) return compat_str(socket.inet_ntoa( - compat_struct_pack('!I', random.randint(addr_min, addr_max)))) + compat_struct_pack('!L', random.randint(addr_min, addr_max)))) class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): From 0aa10994f452b4ca978baf124df0cb2239d49305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 03:58:17 +0700 Subject: [PATCH 70/80] [options] Move geo restriction related options to separate section --- youtube_dl/options.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ae3f50754..2c880d06a 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -228,17 +228,29 @@ def parseOpts(overrideArguments=None): action='store_const', const='::', dest='source_address', help='Make all connections via IPv6', ) - network.add_option( + + geo = optparse.OptionGroup(parser, 'Geo Restriction') + geo.add_option( '--geo-verification-proxy', dest='geo_verification_proxy', default=None, metavar='URL', help='Use this proxy to verify the IP address for some geo-restricted sites. ' - 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.' - ) - network.add_option( + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.') + geo.add_option( '--cn-verification-proxy', dest='cn_verification_proxy', default=None, metavar='URL', - help=optparse.SUPPRESS_HELP, - ) + help=optparse.SUPPRESS_HELP) + geo.add_option( + '--geo-bypass', + action='store_true', dest='geo_bypass', default=True, + help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + geo.add_option( + '--no-geo-bypass', + action='store_false', dest='geo_bypass', default=True, + help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + geo.add_option( + '--geo-bypass-country', metavar='CODE', + dest='geo_bypass_country', default=None, + help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') selection = optparse.OptionGroup(parser, 'Video Selection') selection.add_option( @@ -549,18 +561,6 @@ def parseOpts(overrideArguments=None): 'Upper bound of a range for randomized sleep before each download ' '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) - workarounds.add_option( - '--geo-bypass', - action='store_true', dest='geo_bypass', default=True, - help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') - workarounds.add_option( - '--no-geo-bypass', - action='store_false', dest='geo_bypass', default=True, - help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') - workarounds.add_option( - '--geo-bypass-country', metavar='CODE', - dest='geo_bypass_country', default=None, - help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( @@ -848,6 +848,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(general) parser.add_option_group(network) + parser.add_option_group(geo) parser.add_option_group(selection) parser.add_option_group(downloader) parser.add_option_group(filesystem) From 553f6dbac7afac84994eae18f551799f807d1503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 04:18:22 +0700 Subject: [PATCH 71/80] [downloader/dash] Honor HTTP headers when downloading fragments For example, https://www.oppetarkiv.se/video/1196142/natten-ar-dagens-mor --- youtube_dl/downloader/dash.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8437dde30..e2ddc369e 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -43,7 +43,10 @@ class DashSegmentsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': segment_url}) + success = ctx['dl'].download(target_filename, { + 'url': segment_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') From de64e23c5663ceb4f62264077a7993d13ace0d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 04:18:36 +0700 Subject: [PATCH 72/80] [downloader/ism] Honor HTTP headers when downloading fragments --- youtube_dl/downloader/ism.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 93cac5e98..63a636cb7 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -238,7 +238,10 @@ class IsmFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': segment_url}) + success = ctx['dl'].download(target_filename, { + 'url': segment_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') From f1a78ee4ef3bfd8e7ff06a3014d96c3cf11b4d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 06:16:00 +0700 Subject: [PATCH 73/80] [tv4] Switch to hls3 protocol (closes #12177) --- youtube_dl/extractor/tv4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index ad79db92b..7918e3d86 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -80,7 +80,7 @@ class TV4IE(InfoExtractor): subtitles = {} formats = [] # http formats are linked with unresolvable host - for kind in ('hls', ''): + for kind in ('hls3', ''): data = self._download_json( 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, video_id, 'Downloading sources JSON', query={ From c58b7ffef43f60fa6a183c849cfdca42e36eae0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 06:24:38 +0700 Subject: [PATCH 74/80] [tv4] Bypass geo restriction and improve detection --- youtube_dl/extractor/tv4.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 7918e3d86..7aeb2c620 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -24,6 +24,7 @@ class TV4IE(InfoExtractor): sport/| ) )(?P<id>[0-9]+)''' + _GEO_COUNTRIES = ['SE'] _TESTS = [ { 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', @@ -71,10 +72,6 @@ class TV4IE(InfoExtractor): 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') - # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info.get('is_geo_restricted'): - self.report_warning('This content might not be available in your country due to licensing restrictions.') - title = info['title'] subtitles = {} @@ -113,6 +110,10 @@ class TV4IE(InfoExtractor): 'url': manifest_url, 'ext': 'vtt', }]}) + + if not formats and info.get('is_geo_restricted'): + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + self._sort_formats(formats) return { From 8936f68a0ba3284c88ec619fb4cc22eb0499e7f3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 21 Oct 2015 00:37:28 +0800 Subject: [PATCH 75/80] [travis] Run tests in parallel [test_download] Print test names in case of network errors [test_download] Add comments for nose parameters [test_download] Modify outtmpl to prevent info JSON filename conflicts Thanks @jaimeMF for the idea. [travis] Only download tests should be run in parallel --- devscripts/run_tests.sh | 4 +++- test/test_download.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index 7f4c1e083..c60807215 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -3,6 +3,7 @@ DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" test_set="" +multiprocess_args="" case "$YTDL_TEST_SET" in core) @@ -10,10 +11,11 @@ case "$YTDL_TEST_SET" in ;; download) test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + multiprocess_args="--processes=4 --process-timeout=540" ;; *) break ;; esac -nosetests test --verbose $test_set +nosetests test --verbose $test_set $multiprocess_args diff --git a/test/test_download.py b/test/test_download.py index 463952989..30034f978 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -65,6 +65,10 @@ defs = gettestcases() class TestDownload(unittest.TestCase): + # Parallel testing in nosetests. See + # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html + _multiprocess_shared_ = True + maxDiff = None def setUp(self): @@ -73,7 +77,7 @@ class TestDownload(unittest.TestCase): # Dynamically generate tests -def generator(test_case): +def generator(test_case, tname): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) @@ -102,6 +106,7 @@ def generator(test_case): return params = get_params(test_case.get('params', {})) + params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: params.setdefault('extract_flat', 'in_playlist') params.setdefault('skip_download', True) @@ -146,7 +151,7 @@ def generator(test_case): raise if try_num == RETRIES: - report_warning('Failed due to network errors, skipping...') + report_warning('%s failed due to network errors, skipping...' % tname) return print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) @@ -221,12 +226,12 @@ def generator(test_case): # And add them to TestDownload for n, test_case in enumerate(defs): - test_method = generator(test_case) tname = 'test_' + str(test_case['name']) i = 1 while hasattr(TestDownload, tname): tname = 'test_%s_%d' % (test_case['name'], i) i += 1 + test_method = generator(test_case, tname) test_method.__name__ = str(tname) setattr(TestDownload, test_method.__name__, test_method) del test_method From 983e9b774643fc588fbfb51d314381025ffac248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 00:59:31 +0700 Subject: [PATCH 76/80] [nrk] Update _API_HOST and relax _VALID_URL --- youtube_dl/extractor/nrk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 13af9ed1f..7b98626f2 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -164,12 +164,12 @@ class NRKIE(NRKBaseIE): https?:// (?: (?:www\.)?nrk\.no/video/PS\*| - v8-psapi\.nrk\.no/mediaelement/ + v8[-.]psapi\.nrk\.no/mediaelement/ ) ) - (?P<id>[^/?#&]+) + (?P<id>[^?#&]+) ''' - _API_HOST = 'v8.psapi.nrk.no' + _API_HOST = 'v8-psapi.nrk.no' _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', @@ -195,6 +195,9 @@ class NRKIE(NRKBaseIE): }, { 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, + }, { + 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70', + 'only_matching': True, }, { 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, From 8ffb8e63fe2853f9e51420ba224db428f1241c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 01:00:53 +0700 Subject: [PATCH 77/80] [prosiebensat1] Throw ExtractionError on unsupported page type (closes #12180) --- youtube_dl/extractor/prosiebensat1.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 5091d8456..1245309a7 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -424,3 +424,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): return self._extract_clip(url, webpage) elif page_type == 'playlist': return self._extract_playlist(url, webpage) + else: + raise ExtractorError( + 'Unsupported page type %s' % page_type, expected=True) From c78dd3549155d4cb8f70707c1b4085f9f974db2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 02:25:39 +0700 Subject: [PATCH 78/80] [nrk] PEP 8 --- youtube_dl/extractor/nrk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7b98626f2..7fe79cb53 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -15,6 +15,7 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): _GEO_COUNTRIES = ['NO'] + def _real_extract(self, url): video_id = self._match_id(url) From 6d4c259765de86bdb8a10e71bfbc7b6e196f6967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 02:25:55 +0700 Subject: [PATCH 79/80] [svt] PEP 8 --- youtube_dl/extractor/svt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 9e2c9fcc6..1b5afb73e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -14,6 +14,7 @@ from ..utils import ( class SVTBaseIE(InfoExtractor): _GEO_COUNTRIES = ['SE'] + def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: From 2cc7fcd338e8690a5c211b95fb9e0dcdc5d98ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 03:06:52 +0700 Subject: [PATCH 80/80] [commonmistakes] Disable UnicodeBOM extractor test for python 3.2 --- youtube_dl/extractor/commonmistakes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index d3ed4a9a4..79f7a9cd1 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import sys + from .common import InfoExtractor from ..utils import ExtractorError @@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor): IE_DESC = False _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' - _TESTS = [{ + # Disable test for python 3.2 since BOM is broken in re in this version + # (see https://github.com/rg3/youtube-dl/issues/9751) + _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'only_matching': True, }]