From a0758dfa1afd5b04773ba3b3b17ac71d22054821 Mon Sep 17 00:00:00 2001 From: felix Date: Wed, 5 Aug 2015 22:40:46 +0200 Subject: [PATCH 001/586] [filmon] new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/filmon.py | 144 +++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 youtube_dl/extractor/filmon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..c9b9ebd23 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,6 +287,7 @@ from .fc2 import ( FC2EmbedIE, ) from .fczenit import FczenitIE +from .filmon import FilmOnIE, FilmOnVODIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py new file mode 100644 index 000000000..987792fec --- /dev/null +++ b/youtube_dl/extractor/filmon.py @@ -0,0 +1,144 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import qualities +from ..compat import compat_urllib_request + + +_QUALITY = qualities(('low', 'high')) + + +class FilmOnIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P[a-z0-9-]+)' + _TESTS = [{ + 'url': 'https://www.filmon.com/channel/filmon-sports', + 'only_matching': True, + }, { + 'url': 'https://www.filmon.com/tv/2894', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel_id = self._match_id(url) + + request = compat_urllib_request.Request('https://www.filmon.com/channel/%s' % (channel_id)) + request.add_header('X-Requested-With', 'XMLHttpRequest') + channel_info = self._download_json(request, channel_id) + now_playing = channel_info['now_playing'] + + thumbnails = [] + for thumb in now_playing.get('images', ()): + if thumb['type'] != '2': + continue + thumbnails.append({ + 'url': thumb['url'], + 'width': int(thumb['width']), + 'height': int(thumb['height']), + }) + + formats = [] + + for stream in channel_info['streams']: + formats.append({ + 'format_id': str(stream['id']), + # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats + # because 0) it doesn't have bitrate variants anyway, and 1) the ids generated + # by that method are highly unstable (because the bitrate is variable) + 'url': stream['url'], + 'resolution': stream['name'], + 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'ext': 'mp4', + 'quality': _QUALITY(stream['quality']), + 'preference': int(stream['watch-timeout']), + }) + self._sort_formats(formats) + + return { + 'id': str(channel_info['id']), + 'display_id': channel_info['alias'], + 'formats': formats, + # XXX: use the channel description (channel_info['description'])? + 'uploader_id': channel_info['alias'], + 'uploader': channel_info['title'], # XXX: kinda stretching it... + 'title': now_playing.get('programme_name') or channel_info['title'], + 'description': now_playing.get('programme_description'), + 'thumbnails': thumbnails, + 'is_live': True, + } + + +class FilmOnVODIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmon\.com/vod/view/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', + 'info_dict': { + 'id': '24869', + 'ext': 'mp4', + 'title': 'Plan 9 From Outer Space', + 'description': 'Dead human, zombies and vampires', + }, + }, { + 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1', + 'info_dict': { + 'id': '2825', + 'title': 'Popeye Series 1', + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + result = self._download_json('https://www.filmon.com/api/vod/movie?id=%s' % (video_id), video_id) + if result['code'] != 200: + raise ExtractorError('FilmOn said: %s' % (result['reason']), expected=True) + + response = result['response'] + + if response.get('episodes'): + return { + '_type': 'playlist', + 'id': video_id, + 'title': response['title'], + 'entries': [{ + '_type': 'url', + 'url': 'https://www.filmon.com/vod/view/%s' % (ep), + } for ep in response['episodes']] + } + + formats = [] + for (id, stream) in response['streams'].items(): + formats.append({ + 'format_id': id, + 'url': stream['url'], + 'resolution': stream['name'], + 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'ext': 'mp4', + 'quality': _QUALITY(stream['quality']), + 'preference': int(stream['watch-timeout']), + }) + self._sort_formats(formats) + + poster = response['poster'] + thumbnails = [{ + 'id': 'poster', + 'url': poster['url'], + 'width': poster['width'], + 'height': poster['height'], + }] + for (id, thumb) in poster['thumbs'].items(): + thumbnails.append({ + 'id': id, + 'url': thumb['url'], + 'width': thumb['width'], + 'height': thumb['height'], + }) + + return { + 'id': video_id, + 'title': response['title'], + 'formats': formats, + 'description': response['description'], + 'thumbnails': thumbnails, + } From 594601f54570b8e79606002b6342dd5fcdc1f133 Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 27 Sep 2016 13:29:21 +0800 Subject: [PATCH 002/586] [ondemandkorea] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ondemandkorea.py | 58 +++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/ondemandkorea.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bcf9f1906..519908857 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -662,6 +662,7 @@ from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE +from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py new file mode 100644 index 000000000..125c310c8 --- /dev/null +++ b/youtube_dl/extractor/ondemandkorea.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class OnDemandKoreaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P[^/]+)\.html' + _TEST = { + 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', + 'info_dict': { + 'id': 'ask-us-anything-e43', + 'ext': 'mp4', + 'title': 'Ask Us Anything : E43', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': 'm3u8 download' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id, fatal=False) + + if not webpage: + # Page sometimes returns captcha page with HTTP 403 + raise ExtractorError('Unable to access page. You may have been blocked.', expected=True) + + if 'msg_block_01.png' in webpage: + raise ExtractorError('This content is not available in your region.', expected=True) + + if 'This video is only available to ODK PLUS members.' in webpage: + raise ExtractorError('This video is only available to ODK PLUS members.', expected=True) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest') + formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + subs = re.findall(r'file:\s\'(?P[^\']+\.vtt)\',\s+label:\s+\'(?P[^\']+)\'', webpage) + subtitles = {} + for sub in subs: + subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + 'subtitles': subtitles, + } From b0c65c677f5298df8653df1e382b406bea420ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:44:53 +0700 Subject: [PATCH 003/586] [utils] Improve urljoin --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 3f45b0bd1..1cdac82fc 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -448,11 +448,14 @@ class TestUtil(unittest.TestCase): def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 694e9a600..528d87bb9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1703,9 +1703,9 @@ def base_url(url): def urljoin(base, path): if not isinstance(path, compat_str) or not path: return None - if re.match(r'https?://', path): + if re.match(r'^(?:https?:)?//', path): return path - if not isinstance(base, compat_str) or not re.match(r'https?://', base): + if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base): return None return compat_urlparse.urljoin(base, path) From a495840d3bb05619f9d38168b47f55f7aeb1ca87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:45:44 +0700 Subject: [PATCH 004/586] [jwplatform] Improve subtitles extraction --- youtube_dl/extractor/jwplatform.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 5d56e0a28..7037763cb 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + urljoin, ) @@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor): tracks = video_data.get('tracks') if tracks and isinstance(tracks, list): for track in tracks: - if track.get('file') and track.get('kind') == 'captions': - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track['file']) - }) + if track.get('kind') != 'captions': + continue + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) entries.append({ 'id': this_video_id, From 732d116aa7bed0940bec29af67dd271e47932818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:45:53 +0700 Subject: [PATCH 005/586] [jwplatform] Improve duration extraction --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 7037763cb..aff7ab49a 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -126,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor): 'description': video_data.get('description'), 'thumbnail': self._proto_relative_url(video_data.get('image')), 'timestamp': int_or_none(video_data.get('pubdate')), - 'duration': float_or_none(jwplayer_data.get('duration')), + 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, 'formats': formats, }) From 47c914f9954f6a88a9cd56f487f493e08eb78765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:47:49 +0700 Subject: [PATCH 006/586] [ondemandkorea] Fix extraction (closes #10772) --- youtube_dl/extractor/ondemandkorea.py | 52 ++++++++++++++------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index 125c310c8..c3e830c23 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - -from .common import InfoExtractor -from ..utils import ExtractorError +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + ExtractorError, + js_to_json, +) -class OnDemandKoreaIE(InfoExtractor): +class OnDemandKoreaIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', @@ -29,30 +29,32 @@ class OnDemandKoreaIE(InfoExtractor): if not webpage: # Page sometimes returns captcha page with HTTP 403 - raise ExtractorError('Unable to access page. You may have been blocked.', expected=True) + raise ExtractorError( + 'Unable to access page. You may have been blocked.', + expected=True) if 'msg_block_01.png' in webpage: - raise ExtractorError('This content is not available in your region.', expected=True) - + self.raise_geo_restricted( + 'This content is not available in your region') + if 'This video is only available to ODK PLUS members.' in webpage: - raise ExtractorError('This video is only available to ODK PLUS members.', expected=True) + raise ExtractorError( + 'This video is only available to ODK PLUS members.', + expected=True) title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) - manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest') - formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) + jw_config = self._parse_json( + self._search_regex( + r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P.+?)\);', + webpage, 'jw config', group='options'), + video_id, transform_source=js_to_json) + info = self._parse_jwplayer_data( + jw_config, video_id, require_title=False, m3u8_id='hls', + base_url=url) - subs = re.findall(r'file:\s\'(?P[^\']+\.vtt)\',\s+label:\s+\'(?P[^\']+)\'', webpage) - subtitles = {} - for sub in subs: - subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}] - - return { - 'id': video_id, + info.update({ 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - 'subtitles': subtitles, - } + 'thumbnail': self._og_search_thumbnail(webpage), + }) + return info From 9b785768acdb384dba1f81af6f144b4b99c4d7a3 Mon Sep 17 00:00:00 2001 From: Philip Xu Date: Tue, 20 Sep 2016 20:14:24 -0400 Subject: [PATCH 007/586] [meipai] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/meipai.py | 99 ++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 youtube_dl/extractor/meipai.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 519908857..af9b7003b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -498,6 +498,7 @@ from .mangomolo import ( ) from .matchtv import MatchTVIE from .mdr import MDRIE +from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE from .metacafe import MetacafeIE diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py new file mode 100644 index 000000000..2ea592055 --- /dev/null +++ b/youtube_dl/extractor/meipai.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import parse_iso8601 +from .common import InfoExtractor + + +class MeipaiIE(InfoExtractor): + IE_DESC = '美拍' + _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.meipai.com/media/531697625', + 'md5': 'e3e9600f9e55a302daecc90825854b4f', + 'info_dict': { + 'id': '531697625', + 'ext': 'mp4', + 'title': '#葉子##阿桑##余姿昀##超級女聲#', + 'description': '#葉子##阿桑##余姿昀##超級女聲#', + 'thumbnail': 're:^https?://.*\.jpg$', + 'creator': '她她-TATA', + 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], + 'release_date': 1465492420, + } + }, + { + 'url': 'http://www.meipai.com/media/576409659', + 'md5': '2e807c16ebe67b8b6b3c8dcacbc32f48', + 'info_dict': { + 'id': '576409659', + 'ext': 'mp4', + 'title': '#失語者##蔡健雅##吉他彈唱#', + 'description': '#失語者##蔡健雅##吉他彈唱#', + 'thumbnail': 're:^https?://.*\.jpg$', + 'creator': '她她-TATA', + 'tags': ['失語者', '蔡健雅', '吉他彈唱'], + 'release_date': 1472534847, + } + }, + # record of live streaming + { + 'url': 'http://www.meipai.com/media/585526361', + 'md5': 'ff7d6afdbc6143342408223d4f5fb99a', + 'info_dict': { + 'id': '585526361', + 'ext': 'mp4', + 'title': '姿昀和善願 練歌練琴啦😁😁😁', + 'description': '姿昀和善願 練歌練琴啦😁😁😁', + 'thumbnail': 're:^https?://.*\.jpg$', + 'creator': '她她-TATA', + 'release_date': 1474311799, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage, default=None) + if title is None: + # fall back to text used in title + title = self._html_search_regex( + r']*>(.+)', webpage, 'title') + + release_date = self._og_search_property( + 'video:release_date', webpage, 'release date', fatal=False) + release_date = parse_iso8601(release_date) + + tags = self._og_search_property( + 'video:tag', webpage, 'tags', default='').split(',') + + info = { + 'id': video_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'release_date': release_date, + 'creator': self._og_search_property( + 'video:director', webpage, 'creator', fatal=False), + 'tags': tags, + } + + keywords = self._html_search_meta( + 'keywords', webpage, 'keywords', default=[]) + + if '直播回放' in keywords: + # recorded playback of live streaming + m3u8_url = self._html_search_regex( + r'file:\s*encodeURIComponent\(["\'](.+)["\']\)', + webpage, + 'm3u8_url') + info['formats'] = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native') + else: + # regular uploaded video + info['url'] = self._og_search_video_url(webpage) + + return info From 2786818c3360bcadc21109a9f740fba8f698c8a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 19:42:34 +0700 Subject: [PATCH 008/586] [meipai] Fix regular videos extraction and improve (closes #10718) --- youtube_dl/extractor/meipai.py | 153 +++++++++++++++++---------------- 1 file changed, 79 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index 2ea592055..35914fd4b 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -1,99 +1,104 @@ # coding: utf-8 from __future__ import unicode_literals -from ..utils import parse_iso8601 from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + unified_timestamp, +) class MeipaiIE(InfoExtractor): IE_DESC = '美拍' _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.meipai.com/media/531697625', - 'md5': 'e3e9600f9e55a302daecc90825854b4f', - 'info_dict': { - 'id': '531697625', - 'ext': 'mp4', - 'title': '#葉子##阿桑##余姿昀##超級女聲#', - 'description': '#葉子##阿桑##余姿昀##超級女聲#', - 'thumbnail': 're:^https?://.*\.jpg$', - 'creator': '她她-TATA', - 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], - 'release_date': 1465492420, - } - }, - { - 'url': 'http://www.meipai.com/media/576409659', - 'md5': '2e807c16ebe67b8b6b3c8dcacbc32f48', - 'info_dict': { - 'id': '576409659', - 'ext': 'mp4', - 'title': '#失語者##蔡健雅##吉他彈唱#', - 'description': '#失語者##蔡健雅##吉他彈唱#', - 'thumbnail': 're:^https?://.*\.jpg$', - 'creator': '她她-TATA', - 'tags': ['失語者', '蔡健雅', '吉他彈唱'], - 'release_date': 1472534847, - } - }, + _TESTS = [{ + # regular uploaded video + 'url': 'http://www.meipai.com/media/531697625', + 'md5': 'e3e9600f9e55a302daecc90825854b4f', + 'info_dict': { + 'id': '531697625', + 'ext': 'mp4', + 'title': '#葉子##阿桑##余姿昀##超級女聲#', + 'description': '#葉子##阿桑##余姿昀##超級女聲#', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 152, + 'timestamp': 1465492420, + 'upload_date': '20160609', + 'view_count': 35511, + 'creator': '她她-TATA', + 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], + } + }, { # record of live streaming - { - 'url': 'http://www.meipai.com/media/585526361', - 'md5': 'ff7d6afdbc6143342408223d4f5fb99a', - 'info_dict': { - 'id': '585526361', - 'ext': 'mp4', - 'title': '姿昀和善願 練歌練琴啦😁😁😁', - 'description': '姿昀和善願 練歌練琴啦😁😁😁', - 'thumbnail': 're:^https?://.*\.jpg$', - 'creator': '她她-TATA', - 'release_date': 1474311799, - } - }, - ] + 'url': 'http://www.meipai.com/media/585526361', + 'md5': 'ff7d6afdbc6143342408223d4f5fb99a', + 'info_dict': { + 'id': '585526361', + 'ext': 'mp4', + 'title': '姿昀和善願 練歌練琴啦😁😁😁', + 'description': '姿昀和善願 練歌練琴啦😁😁😁', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 5975, + 'timestamp': 1474311799, + 'upload_date': '20160919', + 'view_count': 1215, + 'creator': '她她-TATA', + } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage, default=None) - if title is None: - # fall back to text used in title - title = self._html_search_regex( - r']*>(.+)', webpage, 'title') + title = self._og_search_title( + webpage, default=None) or self._html_search_regex( + r']*>([^<]+)', webpage, 'title') - release_date = self._og_search_property( - 'video:release_date', webpage, 'release date', fatal=False) - release_date = parse_iso8601(release_date) + formats = [] + + # recorded playback of live streaming + m3u8_url = self._html_search_regex( + r'file:\s*encodeURIComponent\((["\'])(?P(?:(?!\1).)+)\1\)', + webpage, 'm3u8 url', group='url', default=None) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + if not formats: + # regular uploaded video + video_url = self._search_regex( + r'data-video=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video url', + group='url', default=None) + if video_url: + formats.append({ + 'url': video_url, + 'format_id': 'http', + }) + + timestamp = unified_timestamp(self._og_search_property( + 'video:release_date', webpage, 'release date', fatal=False)) tags = self._og_search_property( 'video:tag', webpage, 'tags', default='').split(',') - info = { + view_count = int_or_none(self._html_search_meta( + 'interactionCount', webpage, 'view count')) + duration = parse_duration(self._html_search_meta( + 'duration', webpage, 'duration')) + creator = self._og_search_property( + 'video:director', webpage, 'creator', fatal=False) + + return { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), - 'release_date': release_date, - 'creator': self._og_search_property( - 'video:director', webpage, 'creator', fatal=False), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'creator': creator, 'tags': tags, + 'formats': formats, } - - keywords = self._html_search_meta( - 'keywords', webpage, 'keywords', default=[]) - - if '直播回放' in keywords: - # recorded playback of live streaming - m3u8_url = self._html_search_regex( - r'file:\s*encodeURIComponent\(["\'](.+)["\']\)', - webpage, - 'm3u8_url') - info['formats'] = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native') - else: - # regular uploaded video - info['url'] = self._og_search_video_url(webpage) - - return info From 93753aad20991c3fc23566b9fb7db8299dbc9ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 20:20:23 +0700 Subject: [PATCH 009/586] [twitch] Adapt to new videos pages schema (closes #11469) --- youtube_dl/extractor/twitch.py | 69 ++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 77414a242..8de8ec65b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -300,7 +300,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): response = self._call_api( self._PLAYLIST_PATH % (channel_id, offset, limit), channel_id, - 'Downloading %s videos JSON page %s' + 'Downloading %s JSON page %s' % (self._PLAYLIST_TYPE, counter_override or counter)) page_entries = self._extract_playlist_page(response) if not page_entries: @@ -350,19 +350,72 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): } -class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): - IE_NAME = 'twitch:past_broadcasts' - _VALID_URL = r'%s/(?P[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true' - _PLAYLIST_TYPE = 'past broadcasts' +class TwitchVideosBaseIE(TwitchPlaylistBaseIE): + _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE + _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type=' + + +class TwitchAllVideosIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:all' + _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' + _PLAYLIST_TYPE = 'all videos' _TEST = { - 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts', + 'url': 'https://www.twitch.tv/spamfish/videos/all', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, - 'playlist_mincount': 54, + 'playlist_mincount': 869, + } + + +class TwitchUploadsIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:uploads' + _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' + _PLAYLIST_TYPE = 'uploads' + + _TEST = { + 'url': 'https://www.twitch.tv/spamfish/videos/uploads', + 'info_dict': { + 'id': 'spamfish', + 'title': 'Spamfish', + }, + 'playlist_mincount': 0, + } + + +class TwitchPastBroadcastsIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:past-broadcasts' + _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' + _PLAYLIST_TYPE = 'past broadcasts' + + _TEST = { + 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', + 'info_dict': { + 'id': 'spamfish', + 'title': 'Spamfish', + }, + 'playlist_mincount': 0, + } + + +class TwitchHighlightsIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:highlights' + _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' + _PLAYLIST_TYPE = 'highlights' + + _TEST = { + 'url': 'https://www.twitch.tv/spamfish/videos/highlights', + 'info_dict': { + 'id': 'spamfish', + 'title': 'Spamfish', + }, + 'playlist_mincount': 805, } From 87a449c1edbbb3761fbb6fc3a100152aa961f95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 23:03:13 +0700 Subject: [PATCH 010/586] [extractor/common] Recognize DASH formats in html5 media entries --- youtube_dl/extractor/common.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6ae946569..40f3e2323 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1888,7 +1888,7 @@ class InfoExtractor(object): }) return formats - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -1905,11 +1905,16 @@ class InfoExtractor(object): def _media_formats(src, cur_media_type): full_url = absolute_url(src) - if determine_ext(full_url) == 'm3u8': + ext = determine_ext(full_url) + if ext == 'm3u8': is_plain_url = False formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) + elif ext == 'mpd': + is_plain_url = False + formats = self._extract_mpd_formats( + full_url, video_id, mpd_id=mpd_id) else: is_plain_url = True formats = [{ From 04bf59ff64e49f06caffaa121ca5adbd0da66d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 23:03:50 +0700 Subject: [PATCH 011/586] [extractors] Add missing twitch imports --- youtube_dl/extractor/extractors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index af9b7003b..202b971e0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1001,7 +1001,10 @@ from .twitch import ( TwitchChapterIE, TwitchVodIE, TwitchProfileIE, + TwitchAllVideosIE, + TwitchUploadsIE, TwitchPastBroadcastsIE, + TwitchHighlightsIE, TwitchStreamIE, TwitchClipsIE, ) From 6e416b210cfa565d2fc692722612a2ad2fde09fa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 17 Dec 2016 18:11:13 +0100 Subject: [PATCH 012/586] [nbc] fix extraction for msnbc videos(fixes #11466) --- youtube_dl/extractor/nbc.py | 43 ++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 7f1bd9229..4e96e78c3 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,6 +9,7 @@ from ..utils import ( lowercase_escape, smuggle_url, unescapeHTML, + update_url_query, ) @@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'md5': 'af1adfa51312291a017720403826bb64', 'info_dict': { - 'id': '269389891880', + 'id': 'p_tweet_snow_140529', 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', @@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'md5': '73135a2e0ef819107bbb55a5a9b2a802', 'info_dict': { - 'id': '394064451844', + 'id': 'nn_netcast_150204', 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', @@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'md5': 'a49e173825e5fcd15c13fc297fced39d', 'info_dict': { - 'id': '529953347624', + 'id': 'x_lon_vwhorn_150922', 'ext': 'mp4', 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', 'description': 'md5:c8be487b2d80ff0594c005add88d8351', @@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', 'md5': '118d7ca3f0bea6534f119c68ef539f71', 'info_dict': { - 'id': '669831235788', + 'id': 'tdy_al_space_160420', 'ext': 'mp4', 'title': 'See the aurora borealis from space in stunning new NASA video', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', @@ -271,7 +272,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', 'info_dict': { - 'id': '314487875924', + 'id': 'n_hayes_Aimm_140801_272214', 'ext': 'mp4', 'title': 'The chaotic GOP immigration vote', 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', @@ -279,7 +280,6 @@ class NBCNewsIE(ThePlatformIE): 'timestamp': 1406937606, 'upload_date': '20140802', 'uploader': 'NBCU-NEWS', - 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], }, }, { @@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE): else: # "feature" and "nightly-news" pages use theplatform.com video_id = mobj.group('mpx_id') - if not video_id.isdigit(): - webpage = self._download_webpage(url, video_id) - info = None - bootstrap_json = self._search_regex( - [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', - r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], - webpage, 'bootstrap json', default=None) + webpage = self._download_webpage(url, video_id) + + filter_param = 'byId' + bootstrap_json = self._search_regex( + [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', + r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"', + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'], + webpage, 'bootstrap json', default=None) + if bootstrap_json: bootstrap = self._parse_json( bootstrap_json, video_id, transform_source=unescapeHTML) + + info = None if 'results' in bootstrap: info = bootstrap['results'][0]['video'] elif 'video' in bootstrap: info = bootstrap['video'] + elif 'msnbcVideoInfo' in bootstrap: + info = bootstrap['msnbcVideoInfo']['meta'] + elif 'msnbcThePlatform' in bootstrap: + info = bootstrap['msnbcThePlatform']['videoPlayer']['video'] else: info = bootstrap - video_id = info['mpxId'] + + if 'guid' in info: + video_id = info['guid'] + filter_param = 'byGuid' + elif 'mpxId' in info: + video_id = info['mpxId'] return { '_type': 'url_transparent', 'id': video_id, # http://feed.theplatform.com/f/2E2eJC/nbcnews also works - 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, + 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}), 'ie_key': 'ThePlatformFeed', } From b42a0bf360878025817dba0b71479d509b5df4b4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 17 Dec 2016 21:48:45 +0100 Subject: [PATCH 013/586] [laola1tv] add support embed urls and improve extraction(#11460) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/laola1tv.py | 149 ++++++++++++++--------------- 2 files changed, 74 insertions(+), 80 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 202b971e0..16606a86c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -446,7 +446,10 @@ from .kuwo import ( KuwoMvIE, ) from .la7 import LA7IE -from .laola1tv import Laola1TvIE +from .laola1tv import ( + Laola1TvEmbedIE, + Laola1TvIE, +) from .lci import LCIIE from .lcp import ( LcpPlayIE, diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 2fab38079..37e38dba0 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -1,25 +1,81 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( ExtractorError, - sanitized_Request, unified_strdate, urlencode_postdata, xpath_element, xpath_text, + urljoin, ) +class Laola1TvEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + flash_vars = self._search_regex( + r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars') + get_flashvar = lambda x: self._search_regex(r'%s\s*:\s*"([^"]+)"' % x, flash_vars, x) + + hd_doc = self._download_xml( + 'http://www.laola1.tv/server/hd_video.php', video_id, query={ + 'play': get_flashvar('streamid'), + 'partner': get_flashvar('partnerid'), + 'portal': get_flashvar('portalid'), + 'lang': get_flashvar('sprache'), + 'v5ident': '', + }) + + _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) + title = _v('title', fatal=True) + + data_abo = urlencode_postdata( + dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) + token_url = self._download_json( + 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', + video_id, query={ + 'videoId': _v('id'), + 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), + 'label': _v('label'), + 'area': _v('area'), + }, data=data_abo)['data']['stream-access'][0] + token_doc = self._download_xml( + token_url, video_id, 'Downloading token', + headers=self.geo_verification_headers()) + + token_attrib = xpath_element(token_doc, './/token').attrib + + if token_attrib['status'] != '0': + raise ExtractorError( + 'Token error: %s' % token_attrib['comment'], expected=True) + + formats = self._extract_akamai_formats( + '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), + video_id) + self._sort_formats(formats) + + categories_str = _v('meta_sports') + categories = categories_str.split(',') if categories_str else [] + is_live = _v('islive') == 'true' + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'upload_date': unified_strdate(_v('time_date')), + 'uploader': _v('meta_organisation'), + 'categories': categories, + 'is_live': is_live, + 'formats': formats, + } + + class Laola1TvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P[a-z]+)-(?P[a-z]+)/(?P[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { @@ -67,85 +123,20 @@ class Laola1TvIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('slug') - kind = mobj.group('kind') - lang = mobj.group('lang') - portal = mobj.group('portal') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) if 'Dieser Livestream ist bereits beendet.' in webpage: raise ExtractorError('This live stream has already finished.', expected=True) - iframe_url = self._search_regex( + iframe_url = urljoin(url, self._search_regex( r']*?id="videoplayer"[^>]*?src="([^"]+)"', - webpage, 'iframe url') - - video_id = self._search_regex( - r'videoid=(\d+)', iframe_url, 'video id') - - iframe = self._download_webpage(compat_urlparse.urljoin( - url, iframe_url), display_id, 'Downloading iframe') - - partner_id = self._search_regex( - r'partnerid\s*:\s*(["\'])(?P.+?)\1', - iframe, 'partner id', group='partner_id') - - hd_doc = self._download_xml( - 'http://www.laola1.tv/server/hd_video.php?%s' - % compat_urllib_parse_urlencode({ - 'play': video_id, - 'partner': partner_id, - 'portal': portal, - 'lang': lang, - 'v5ident': '', - }), display_id) - - _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) - title = _v('title', fatal=True) - - VS_TARGETS = { - 'video': '2', - 'livestream': '17', - } - - req = sanitized_Request( - 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' % - compat_urllib_parse_urlencode({ - 'videoId': video_id, - 'target': VS_TARGETS.get(kind, '2'), - 'label': _v('label'), - 'area': _v('area'), - }), - urlencode_postdata( - dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))) - - token_url = self._download_json(req, display_id)['data']['stream-access'][0] - token_doc = self._download_xml(token_url, display_id, 'Downloading token') - - token_attrib = xpath_element(token_doc, './/token').attrib - token_auth = token_attrib['auth'] - - if token_auth in ('blocked', 'restricted', 'error'): - raise ExtractorError( - 'Token error: %s' % token_attrib['comment'], expected=True) - - formats = self._extract_f4m_formats( - '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth), - video_id, f4m_id='hds') - self._sort_formats(formats) - - categories_str = _v('meta_sports') - categories = categories_str.split(',') if categories_str else [] + webpage, 'iframe url')) return { - 'id': video_id, + '_type': 'url', 'display_id': display_id, - 'title': title, - 'upload_date': unified_strdate(_v('time_date')), - 'uploader': _v('meta_organisation'), - 'categories': categories, - 'is_live': _v('islive') == 'true', - 'formats': formats, + 'url': iframe_url, + 'ie_key': 'Laola1TvEmbed', } From 199a47abba4ce57b6df4cd9f3223837ac6c4ee85 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 10:49:10 +0100 Subject: [PATCH 014/586] [ccma] Add new extractor(closes #11359) --- youtube_dl/extractor/ccma.py | 99 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 100 insertions(+) create mode 100644 youtube_dl/extractor/ccma.py diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py new file mode 100644 index 000000000..39938c9ac --- /dev/null +++ b/youtube_dl/extractor/ccma.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, + clean_html, +) + + +class CCMAIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', + 'md5': '7296ca43977c8ea4469e719c609b0871', + 'info_dict': { + 'id': '5630208', + 'ext': 'mp4', + 'title': 'L\'espot de La Marató de TV3', + 'description': 'md5:f12987f320e2f6e988e9908e4fe97765', + 'timestamp': 1470918540, + 'upload_date': '20160811', + } + }, { + 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', + 'md5': 'fa3e38f269329a278271276330261425', + 'info_dict': { + 'id': '943685', + 'ext': 'mp3', + 'title': 'El Consell de Savis analitza el derbi', + 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', + 'upload_date': '20171205', + 'timestamp': 1512507300, + } + }] + + def _real_extract(self, url): + media_type, media_id = re.match(self._VALID_URL, url).groups() + media_data = {} + formats = [] + profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc'] + for i, profile in enumerate(profiles): + md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ + 'media': media_type, + 'idint': media_id, + 'profile': profile, + }, fatal=False) + if md: + media_data = md + media_url = media_data.get('media', {}).get('url') + if media_url: + formats.append({ + 'format_id': profile, + 'url': media_url, + 'quality': i, + }) + self._sort_formats(formats) + + informacio = media_data['informacio'] + title = informacio['titol'] + durada = informacio.get('durada', {}) + duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) + timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc')) + + subtitles = {} + subtitols = media_data.get('subtitols', {}) + if subtitols: + sub_url = subtitols.get('url') + if sub_url: + subtitles.setdefault( + subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({ + 'url': sub_url, + }) + + thumbnails = [] + imatges = media_data.get('imatges', {}) + if imatges: + thumbnail_url = imatges.get('url') + if thumbnail_url: + thumbnails = [{ + 'url': thumbnail_url, + 'width': int_or_none(imatges.get('amplada')), + 'height': int_or_none(imatges.get('alcada')), + }] + + return { + 'id': media_id, + 'title': title, + 'description': clean_html(informacio.get('descripcio')), + 'duration': duration, + 'timestamp': timestamp, + 'thumnails': thumbnails, + 'subtitles': subtitles, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 16606a86c..c0b10f6d0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -150,6 +150,7 @@ from .cbsnews import ( ) from .cbssports import CBSSportsIE from .ccc import CCCIE +from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE From d5e623aaa18a1a08731e46e3aff6a2a9361b69b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 19:46:57 +0700 Subject: [PATCH 015/586] Credit @pyx for meipai (#10718) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4a6f7e13f..798503ca7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -190,3 +190,4 @@ John Hawkinson Rich Leeper Zhong Jianxin Thor77 +Philip Xu From 52a1d48d9fa1c33d3541b0219d5f7e36e9f66953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 19:48:59 +0700 Subject: [PATCH 016/586] [ChangeLog] Actualize --- ChangeLog | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index a9bfd2711..8fbc39c6a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +version + +Core ++ [extractor/common] Recognize DASH formats in html5 media entries + +Extractors ++ [ccma] Add support for ccma.cat (#11359) +* [laola1tv] Improve extraction ++ [laola1tv] Add support embed URLs (#11460) +* [nbc] Fix extraction for MSNBC videos (#11466) +* [twitch] Adapt to new videos pages URL schema (#11469) ++ [meipai] Add support for meipai.com (#10718) +* [jwplatform] Improve subtitles and duration extraction ++ [ondemandkorea] Add support for ondemandkorea.com (#10772) ++ [vvvvid] Add support for vvvvid.it (#5915) + + version 2016.12.15 Core From f73d7d50749cd8e49f048da62ff418f6ce0bf036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 19:50:33 +0700 Subject: [PATCH 017/586] release 2016.12.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 10 +++++++++- youtube_dl/version.py | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7bf8d6fd4..fffdefa45 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.15 +[debug] youtube-dl version 2016.12.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8fbc39c6a..3ed5da7fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.18 Core + [extractor/common] Recognize DASH formats in html5 media entries diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 226552d4d..b55044520 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -131,6 +131,7 @@ - **cbsnews**: CBS News - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** + - **CCMA** - **CCTV** - **CDA** - **CeskaTelevize** @@ -365,6 +366,7 @@ - **kuwo:song**: 酷我音乐 - **la7.it** - **Laola1Tv** + - **Laola1TvEmbed** - **LCI** - **Lcp** - **LcpPlay** @@ -402,6 +404,7 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **Meipai**: 美拍 - **MelonVOD** - **META** - **metacafe** @@ -524,6 +527,7 @@ - **Odnoklassniki** - **OktoberfestTV** - **on.aol.com** + - **OnDemandKorea** - **onet.tv** - **onet.tv:channel** - **OnionStudios** @@ -785,10 +789,13 @@ - **Tweakers** - **twitch:chapter** - **twitch:clips** - - **twitch:past_broadcasts** - **twitch:profile** - **twitch:stream** - **twitch:video** + - **twitch:videos:all** + - **twitch:videos:highlights** + - **twitch:videos:past-broadcasts** + - **twitch:videos:uploads** - **twitch:vod** - **twitter** - **twitter:amplify** @@ -874,6 +881,7 @@ - **VRT** - **vube**: Vube.com - **VuClip** + - **VVVVID** - **VyboryMos** - **Vzaar** - **Walla** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a4bba4a52..5f06d4b52 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.15' +__version__ = '2016.12.18' From ec79b1de1cda9745e83ba7950d6da8eeb6b53293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 20:56:21 +0700 Subject: [PATCH 018/586] Revert "Credit @pyx for meipai (#10718)" This reverts commit d5e623aaa18a1a08731e46e3aff6a2a9361b69b8. --- AUTHORS | 1 - 1 file changed, 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 798503ca7..4a6f7e13f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -190,4 +190,3 @@ John Hawkinson Rich Leeper Zhong Jianxin Thor77 -Philip Xu From e7b6caef248a834078333c166db5d12c01b102af Mon Sep 17 00:00:00 2001 From: ping Date: Sat, 10 Sep 2016 01:49:56 +0800 Subject: [PATCH 019/586] [viu] New extractor for viu.com --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/viu.py | 253 +++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 youtube_dl/extractor/viu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c0b10f6d0..cae429f67 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1105,6 +1105,10 @@ from .viki import ( VikiIE, VikiChannelIE, ) +from .viu import ( + ViuIE, + ViuPlaylistIE, +) from .vk import ( VKIE, VKUserVideosIE, diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py new file mode 100644 index 000000000..2cab3495f --- /dev/null +++ b/youtube_dl/extractor/viu.py @@ -0,0 +1,253 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + clean_html, +) + + +class ViuBaseIE(InfoExtractor): + + def _get_viu_auth(self, video_id): + viu_auth_res = self._request_webpage( + 'https://www.viu.com/api/apps/v2/authenticate', video_id, + note='Requesting Viu auth', + query={ + 'acct': 'test', 'appid': 'viu_desktop', 'fmt': 'json', + 'iid': 'guest', 'languageid': 'default', 'platform': 'desktop', + 'userid': 'guest', 'useridtype': 'guest', 'ver': '1.0' + }) + return viu_auth_res.info().get('X-VIU-AUTH') + + +class ViuIE(ViuBaseIE): + IE_NAME = 'viu:show' + _VALID_URL = r'https?://www\.viu\.com/.+/(?:vod|media)/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I', + 'info_dict': { + 'id': '3421', + 'ext': 'mp4', + 'title': 'The Prime Minister and I - Episode 17', + 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Singapore', + }, { + 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90', + 'info_dict': { + 'id': '7123', + 'ext': 'mp4', + 'title': '大人女子 - Episode 10', + 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Hong Kong', + }, { + 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059', + 'info_dict': { + 'id': '1116705532', + 'ext': 'mp4', + 'title': 'Citizen Khan - Episode 1', + 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to India', + }, { + 'url': 'https://www.viu.com/en/media/1130599965', + 'info_dict': { + 'id': '1130599965', + 'ext': 'mp4', + 'title': 'Jealousy Incarnate - Episode 1', + 'description': 'md5:d3d82375cab969415d2720b6894361e9', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Indonesia', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + url, video_id, note='Downloading video page') + + mobj = re.search( + r'
]+?>(?P.+?)
', webpage, flags=re.DOTALL) + + if mobj: + raise ExtractorError(clean_html(mobj.group('err')), expected=True) + + config_js_url = self._search_regex( + r'src=(["\'])(?P.+?/js/config\.js)(?:\?.+?)?\1', webpage, 'config_js', + group='api_url', default=None) + + if not config_js_url: + # content is from ID, IN, MY + video_info = self._download_json( + 'https://www.viu.com/api/clip/load', video_id, + headers={'X-VIU-AUTH': self._get_viu_auth(video_id)}, + query={'appid': 'viu_desktop', 'fmt': 'json', 'id': video_id}, + note='Downloading video info').get('response', {}).get('item', [{}])[0] + + formats = self._extract_m3u8_formats( + video_info['href'], video_id, 'mp4', + m3u8_id='hls', fatal=False) + self._sort_formats(formats) + + subtitles = {} + for key, value in list(video_info.items()): + mobj = re.match(r'^subtitle_(?P[^_]+?)_(?P(vtt|srt))', key) + if not mobj: + continue + if not subtitles.get(mobj.group('lang')): + subtitles[mobj.group('lang')] = [] + subtitles[mobj.group('lang')].append( + {'url': value, 'ext': mobj.group('ext')}) + + title = '%s - Episode %s' % (video_info['moviealbumshowname'], + video_info.get('episodeno')) + description = video_info.get('description') + duration = int_or_none(video_info.get('duration')) + series = video_info.get('moviealbumshowname') + episode_title = video_info.get('title') + episode_num = int_or_none(video_info.get('episodeno')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'series': series, + 'episode': episode_title, + 'episode_number': episode_num, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } + + # content from HK, SG + config_js = self._download_webpage( + 'http://www.viu.com' + config_js_url, video_id, note='Downloading config js') + + # try to strip away commented code which contains test urls + config_js = re.sub(r'^//.*?$', '', config_js, flags=re.MULTILINE) + config_js = re.sub(r'/\*.*?\*/', '', config_js, flags=re.DOTALL) + + # Slightly different api_url between HK and SG config.js + # http://www.viu.com/ott/hk/v1/js/config.js => '//www.viu.com/ott/hk/index.php?r=' + # http://www.viu.com/ott/sg/v1/js/config.js => 'http://www.viu.com/ott/sg/index.php?r=' + api_url = self._proto_relative_url( + self._search_regex( + r'var\s+api_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?r=)\1', + config_js, 'api_url', group='api_url'), scheme='http:') + + stream_info_url = self._proto_relative_url( + self._search_regex( + r'var\s+video_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?ccs_product_id=)\1', + config_js, 'video_url', group='video_url'), scheme='http:') + + if url.startswith('https://'): + api_url = re.sub('^http://', 'https://', api_url) + + video_info = self._download_json( + api_url + 'vod/ajax-detail&platform_flag_label=web&product_id=' + video_id, + video_id, note='Downloading video info').get('data', {}) + + ccs_product_id = video_info.get('current_product', {}).get('ccs_product_id') + + if not ccs_product_id: + raise ExtractorError('This video is not available in your region.', expected=True) + + stream_info = self._download_json( + stream_info_url + ccs_product_id, video_id, + note='Downloading stream info').get('data', {}).get('stream', {}) + + formats = [] + for vid_format, stream_url in stream_info.get('url', {}).items(): + br = int_or_none(self._search_regex( + r's(?P
[0-9]+)p', vid_format, 'bitrate', group='br')) + formats.append({ + 'format_id': vid_format, + 'url': stream_url, + 'vbr': br, + 'ext': 'mp4', + 'filesize': stream_info.get('size', {}).get(vid_format) + }) + self._sort_formats(formats) + + subtitles = {} + if video_info.get('current_product', {}).get('subtitle', []): + for sub in video_info.get('current_product', {}).get('subtitle', []): + subtitles[sub.get('name')] = [{ + 'url': sub.get('url'), + 'ext': 'srt', + }] + + episode_info = next( + p for p in video_info.get('series', {}).get('product', []) + if p.get('product_id') == video_id) + + title = '%s - Episode %s' % (video_info.get('series', {}).get('name'), + episode_info.get('number')) + description = episode_info.get('description') + thumbnail = episode_info.get('cover_image_url') + duration = int_or_none(stream_info.get('duration')) + series = video_info.get('series', {}).get('name') + episode_title = episode_info.get('synopsis') + episode_num = int_or_none(episode_info.get('number')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'series': series, + 'episode': episode_title, + 'episode_number': episode_num, + 'duration': duration, + 'thumbnail': thumbnail, + 'formats': formats, + 'subtitles': subtitles, + } + + +class ViuPlaylistIE(ViuBaseIE): + IE_NAME = 'viu:playlist' + _VALID_URL = r'https?://www\.viu\.com/.+/listing/(?Pplaylist\-[0-9]+)' + _TEST = { + 'url': 'https://www.viu.com/en/listing/playlist-22461380', + 'info_dict': { + 'id': 'playlist-22461380', + 'title': 'The Good Wife', + }, + 'playlist_count': 16, + 'skip': 'Geo-restricted to Indonesia', + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + playlist_info = self._download_json( + 'https://www.viu.com/api/container/load', playlist_id, + headers={'X-VIU-AUTH': self._get_viu_auth(playlist_id)}, + query={'appid': 'viu_desktop', 'fmt': 'json', 'id': playlist_id}, + note='Downloading playlist info').get('response', {}).get('container') + + name = playlist_info['title'] + entries = [ + self.url_result( + 'https://www.viu.com/en/media/%s' % item['id'], + 'Viu', item['id']) + for item in playlist_info['item'] if item['id']] + + return self.playlist_result(entries, playlist_id, name) From 723103151ead8b22ff4a61d009d16ec26b31248a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 17:15:53 +0100 Subject: [PATCH 020/586] [viu] improve extraction(closes #10607)(closes #11329) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/viu.py | 348 ++++++++++++++--------------- 2 files changed, 171 insertions(+), 178 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cae429f67..d75ea0c92 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1108,6 +1108,7 @@ from .viki import ( from .viu import ( ViuIE, ViuPlaylistIE, + ViuOTTIE, ) from .vk import ( VKIE, diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index 2cab3495f..1a81b4845 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -4,60 +4,53 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, - clean_html, ) class ViuBaseIE(InfoExtractor): - - def _get_viu_auth(self, video_id): + def _real_initialize(self): viu_auth_res = self._request_webpage( - 'https://www.viu.com/api/apps/v2/authenticate', video_id, - note='Requesting Viu auth', - query={ - 'acct': 'test', 'appid': 'viu_desktop', 'fmt': 'json', - 'iid': 'guest', 'languageid': 'default', 'platform': 'desktop', - 'userid': 'guest', 'useridtype': 'guest', 'ver': '1.0' + 'https://www.viu.com/api/apps/v2/authenticate', None, + 'Requesting Viu auth', query={ + 'acct': 'test', + 'appid': 'viu_desktop', + 'fmt': 'json', + 'iid': 'guest', + 'languageid': 'default', + 'platform': 'desktop', + 'userid': 'guest', + 'useridtype': 'guest', + 'ver': '1.0' }) - return viu_auth_res.info().get('X-VIU-AUTH') + self._auth_token = viu_auth_res.info()['X-VIU-AUTH'] + + def _call_api(self, path, *args, **kwargs): + headers = self.geo_verification_headers() + headers.update({ + 'X-VIU-AUTH': self._auth_token + }) + headers.update(kwargs.get('headers', {})) + kwargs['headers'] = headers + response = self._download_json( + 'https://www.viu.com/api/' + path, *args, **kwargs)['response'] + if response.get('status') != 'success': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, response['message']), expected=True) + return response class ViuIE(ViuBaseIE): - IE_NAME = 'viu:show' - _VALID_URL = r'https?://www\.viu\.com/.+/(?:vod|media)/(?P[0-9]+)' + _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P\d+)' _TESTS = [{ - 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I', - 'info_dict': { - 'id': '3421', - 'ext': 'mp4', - 'title': 'The Prime Minister and I - Episode 17', - 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c', - }, - 'params': { - 'skip_download': 'm3u8 download', - }, - 'skip': 'Geo-restricted to Singapore', - }, { - 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90', - 'info_dict': { - 'id': '7123', - 'ext': 'mp4', - 'title': '大人女子 - Episode 10', - 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f', - }, - 'params': { - 'skip_download': 'm3u8 download', - }, - 'skip': 'Geo-restricted to Hong Kong', - }, { 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059', 'info_dict': { 'id': '1116705532', 'ext': 'mp4', - 'title': 'Citizen Khan - Episode 1', + 'title': 'Citizen Khan - Ep 1', 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e', }, 'params': { @@ -81,142 +74,46 @@ class ViuIE(ViuBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, note='Downloading video page') + video_data = self._call_api( + 'clip/load', video_id, 'Downloading video data', query={ + 'appid': 'viu_desktop', + 'fmt': 'json', + 'id': video_id + })['item'][0] - mobj = re.search( - r'
]+?>(?P.+?)
', webpage, flags=re.DOTALL) + title = video_data['title'] - if mobj: - raise ExtractorError(clean_html(mobj.group('err')), expected=True) - - config_js_url = self._search_regex( - r'src=(["\'])(?P.+?/js/config\.js)(?:\?.+?)?\1', webpage, 'config_js', - group='api_url', default=None) - - if not config_js_url: - # content is from ID, IN, MY - video_info = self._download_json( - 'https://www.viu.com/api/clip/load', video_id, - headers={'X-VIU-AUTH': self._get_viu_auth(video_id)}, - query={'appid': 'viu_desktop', 'fmt': 'json', 'id': video_id}, - note='Downloading video info').get('response', {}).get('item', [{}])[0] - - formats = self._extract_m3u8_formats( - video_info['href'], video_id, 'mp4', - m3u8_id='hls', fatal=False) - self._sort_formats(formats) - - subtitles = {} - for key, value in list(video_info.items()): - mobj = re.match(r'^subtitle_(?P[^_]+?)_(?P(vtt|srt))', key) - if not mobj: - continue - if not subtitles.get(mobj.group('lang')): - subtitles[mobj.group('lang')] = [] - subtitles[mobj.group('lang')].append( - {'url': value, 'ext': mobj.group('ext')}) - - title = '%s - Episode %s' % (video_info['moviealbumshowname'], - video_info.get('episodeno')) - description = video_info.get('description') - duration = int_or_none(video_info.get('duration')) - series = video_info.get('moviealbumshowname') - episode_title = video_info.get('title') - episode_num = int_or_none(video_info.get('episodeno')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'series': series, - 'episode': episode_title, - 'episode_number': episode_num, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } - - # content from HK, SG - config_js = self._download_webpage( - 'http://www.viu.com' + config_js_url, video_id, note='Downloading config js') - - # try to strip away commented code which contains test urls - config_js = re.sub(r'^//.*?$', '', config_js, flags=re.MULTILINE) - config_js = re.sub(r'/\*.*?\*/', '', config_js, flags=re.DOTALL) - - # Slightly different api_url between HK and SG config.js - # http://www.viu.com/ott/hk/v1/js/config.js => '//www.viu.com/ott/hk/index.php?r=' - # http://www.viu.com/ott/sg/v1/js/config.js => 'http://www.viu.com/ott/sg/index.php?r=' - api_url = self._proto_relative_url( - self._search_regex( - r'var\s+api_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?r=)\1', - config_js, 'api_url', group='api_url'), scheme='http:') - - stream_info_url = self._proto_relative_url( - self._search_regex( - r'var\s+video_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?ccs_product_id=)\1', - config_js, 'video_url', group='video_url'), scheme='http:') - - if url.startswith('https://'): - api_url = re.sub('^http://', 'https://', api_url) - - video_info = self._download_json( - api_url + 'vod/ajax-detail&platform_flag_label=web&product_id=' + video_id, - video_id, note='Downloading video info').get('data', {}) - - ccs_product_id = video_info.get('current_product', {}).get('ccs_product_id') - - if not ccs_product_id: - raise ExtractorError('This video is not available in your region.', expected=True) - - stream_info = self._download_json( - stream_info_url + ccs_product_id, video_id, - note='Downloading stream info').get('data', {}).get('stream', {}) - - formats = [] - for vid_format, stream_url in stream_info.get('url', {}).items(): - br = int_or_none(self._search_regex( - r's(?P
[0-9]+)p', vid_format, 'bitrate', group='br')) - formats.append({ - 'format_id': vid_format, - 'url': stream_url, - 'vbr': br, - 'ext': 'mp4', - 'filesize': stream_info.get('size', {}).get(vid_format) - }) + m3u8_url = None + url_path = video_data.get('urlpathd') or video_data.get('urlpath') + tdirforwhole = video_data.get('tdirforwhole') + hls_file = video_data.get('hlsfile') + if url_path and tdirforwhole and hls_file: + m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file) + else: + m3u8_url = re.sub( + r'(/hlsc_)[a-z]+(\d+\.m3u8)', + r'\1whe\2', video_data['href']) + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') self._sort_formats(formats) subtitles = {} - if video_info.get('current_product', {}).get('subtitle', []): - for sub in video_info.get('current_product', {}).get('subtitle', []): - subtitles[sub.get('name')] = [{ - 'url': sub.get('url'), - 'ext': 'srt', - }] - - episode_info = next( - p for p in video_info.get('series', {}).get('product', []) - if p.get('product_id') == video_id) - - title = '%s - Episode %s' % (video_info.get('series', {}).get('name'), - episode_info.get('number')) - description = episode_info.get('description') - thumbnail = episode_info.get('cover_image_url') - duration = int_or_none(stream_info.get('duration')) - series = video_info.get('series', {}).get('name') - episode_title = episode_info.get('synopsis') - episode_num = int_or_none(episode_info.get('number')) + for key, value in video_data.items(): + mobj = re.match(r'^subtitle_(?P[^_]+)_(?P(vtt|srt))', key) + if not mobj: + continue + subtitles.setdefault(mobj.group('lang'), []).append({ + 'url': value, + 'ext': mobj.group('ext') + }) return { 'id': video_id, 'title': title, - 'description': description, - 'series': series, - 'episode': episode_title, - 'episode_number': episode_num, - 'duration': duration, - 'thumbnail': thumbnail, + 'description': video_data.get('description'), + 'series': video_data.get('moviealbumshowname'), + 'episode': title, + 'episode_number': int_or_none(video_data.get('episodeno')), + 'duration': int_or_none(video_data.get('duration')), 'formats': formats, 'subtitles': subtitles, } @@ -224,11 +121,11 @@ class ViuIE(ViuBaseIE): class ViuPlaylistIE(ViuBaseIE): IE_NAME = 'viu:playlist' - _VALID_URL = r'https?://www\.viu\.com/.+/listing/(?Pplaylist\-[0-9]+)' + _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P\d+)' _TEST = { 'url': 'https://www.viu.com/en/listing/playlist-22461380', 'info_dict': { - 'id': 'playlist-22461380', + 'id': '22461380', 'title': 'The Good Wife', }, 'playlist_count': 16, @@ -237,17 +134,112 @@ class ViuPlaylistIE(ViuBaseIE): def _real_extract(self, url): playlist_id = self._match_id(url) - playlist_info = self._download_json( - 'https://www.viu.com/api/container/load', playlist_id, - headers={'X-VIU-AUTH': self._get_viu_auth(playlist_id)}, - query={'appid': 'viu_desktop', 'fmt': 'json', 'id': playlist_id}, - note='Downloading playlist info').get('response', {}).get('container') + playlist_data = self._call_api( + 'container/load', playlist_id, + 'Downloading playlist info', query={ + 'appid': 'viu_desktop', + 'fmt': 'json', + 'id': 'playlist-' + playlist_id + })['container'] - name = playlist_info['title'] - entries = [ - self.url_result( - 'https://www.viu.com/en/media/%s' % item['id'], - 'Viu', item['id']) - for item in playlist_info['item'] if item['id']] + entries = [] + for item in playlist_data.get('item', []): + item_id = item.get('id') + if not item_id: + continue + item_id = compat_str(item_id) + entries.append(self.url_result( + 'viu:' + item_id, 'Viu', item_id)) - return self.playlist_result(entries, playlist_id, name) + return self.playlist_result( + entries, playlist_id, playlist_data.get('title')) + + +class ViuOTTIE(InfoExtractor): + IE_NAME = 'viu:ott' + _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I', + 'info_dict': { + 'id': '3421', + 'ext': 'mp4', + 'title': 'A New Beginning', + 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Singapore', + }, { + 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90', + 'info_dict': { + 'id': '7123', + 'ext': 'mp4', + 'title': '這就是我的生活之道', + 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Hong Kong', + }] + + def _real_extract(self, url): + country_code, video_id = re.match(self._VALID_URL, url).groups() + + product_data = self._download_json( + 'http://www.viu.com/ott/%s/index.php' % country_code, video_id, + 'Downloading video info', query={ + 'r': 'vod/ajax-detail', + 'platform_flag_label': 'web', + 'product_id': video_id, + })['data'] + + video_data = product_data.get('current_product') + if not video_data: + raise ExtractorError('This video is not available in your region.', expected=True) + + stream_data = self._download_json( + 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code, + video_id, 'Downloading stream info', query={ + 'ccs_product_id': video_data['ccs_product_id'], + })['data']['stream'] + + stream_sizes = stream_data.get('size', {}) + formats = [] + for vid_format, stream_url in stream_data.get('url', {}).items(): + height = int_or_none(self._search_regex( + r's(\d+)p', vid_format, 'height', default=None)) + formats.append({ + 'format_id': vid_format, + 'url': stream_url, + 'height': height, + 'ext': 'mp4', + 'filesize': int_or_none(stream_sizes.get(vid_format)) + }) + self._sort_formats(formats) + + subtitles = {} + for sub in video_data.get('subtitle', []): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles.setdefault(sub.get('name'), []).append({ + 'url': sub_url, + 'ext': 'srt', + }) + + title = video_data['synopsis'].strip() + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'series': product_data.get('series', {}).get('name'), + 'episode': title, + 'episode_number': int_or_none(video_data.get('number')), + 'duration': int_or_none(stream_data.get('duration')), + 'thumbnail': video_data.get('cover_image_url'), + 'formats': formats, + 'subtitles': subtitles, + } From ed7b333fbfe7bfb0ac0986c6be15d223341a7ac7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 18:24:01 +0100 Subject: [PATCH 021/586] [viu] extract supported hls manifest --- youtube_dl/extractor/viu.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index 1a81b4845..d4861a2fe 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -86,13 +86,17 @@ class ViuIE(ViuBaseIE): m3u8_url = None url_path = video_data.get('urlpathd') or video_data.get('urlpath') tdirforwhole = video_data.get('tdirforwhole') - hls_file = video_data.get('hlsfile') + # #EXT-X-BYTERANGE is not supported by native hls downloader + # and ffmpeg (#10955) + # hls_file = video_data.get('hlsfile') + hls_file = video_data.get('jwhlsfile') if url_path and tdirforwhole and hls_file: m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file) else: - m3u8_url = re.sub( - r'(/hlsc_)[a-z]+(\d+\.m3u8)', - r'\1whe\2', video_data['href']) + # m3u8_url = re.sub( + # r'(/hlsc_)[a-z]+(\d+\.m3u8)', + # r'\1whe\2', video_data['href']) + m3u8_url = video_data['href'] formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') self._sort_formats(formats) From 954529c10fd847d58374dda2a3661f0df2c1d5f6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 21:39:59 +0100 Subject: [PATCH 022/586] [brightcove:new] skip widevine classic videos --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 945cf19e8..ac5f32541 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -548,7 +548,7 @@ class BrightcoveNewIE(InfoExtractor): container = source.get('container') ext = mimetype2ext(source.get('type')) src = source.get('src') - if ext == 'ism': + if ext == 'ism' or container == 'WVM': continue elif ext == 'm3u8' or container == 'M2TS': if not src: From 5aaf012a4eacc50bb5b131f6c26027e391fc379a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 19 Dec 2016 16:27:12 +0100 Subject: [PATCH 023/586] [pbs] fix extraction for geo restricted videos(#7095) --- youtube_dl/extractor/pbs.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index b490ef74c..f1c0cd068 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -350,6 +350,15 @@ class PBSIE(InfoExtractor): 410: 'This video has expired and is no longer available for online streaming.', } + def _real_initialize(self): + cookie = (self._download_json( + 'http://localization.services.pbs.org/localize/auto/cookie/', + None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie') + if cookie: + station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station') + if station: + self._set_cookie('.pbs.org', 'pbsol.station', station) + def _extract_webpage(self, url): mobj = re.match(self._VALID_URL, url) @@ -476,7 +485,8 @@ class PBSIE(InfoExtractor): redirect_info = self._download_json( '%s?format=json' % redirect['url'], display_id, - 'Downloading %s video url info' % (redirect_id or num)) + 'Downloading %s video url info' % (redirect_id or num), + headers=self.geo_verification_headers()) if redirect_info['status'] == 'error': raise ExtractorError( From c80db5d3988b31a1f17b2e894099ff16b6b777c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Dec 2016 23:47:45 +0700 Subject: [PATCH 024/586] [nrktv:direkte] Add support for live streams (#11488) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nrk.py | 32 +++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d75ea0c92..e44cf5d85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -655,6 +655,7 @@ from .nrk import ( NRKPlaylistIE, NRKSkoleIE, NRKTVIE, + NRKTVDirekteIE, ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c89aac63e..776c40b94 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor): entries = [] + conviva = data.get('convivaStatistics') or {} + live = (data.get('mediaElementType') == 'Live' or + data.get('isLive') is True or conviva.get('isLive')) + + def make_title(t): + return self._live_title(t) if live else t + media_assets = data.get('mediaAssets') if media_assets and isinstance(media_assets, list): def video_id_and_title(idx): @@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor): if not formats: continue self._sort_formats(formats) + + # Some f4m streams may not work with hdcore in fragments' URLs + for f in formats: + extra_param = f.get('extra_param_to_segment_url') + if extra_param and 'hdcore' in extra_param: + del f['extra_param_to_segment_url'] + entry_id, entry_title = video_id_and_title(num) duration = parse_duration(asset.get('duration')) subtitles = {} @@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor): }) entries.append({ 'id': asset.get('carrierId') or entry_id, - 'title': entry_title, + 'title': make_title(entry_title), 'duration': duration, 'subtitles': subtitles, 'formats': formats, @@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor): duration = parse_duration(data.get('duration')) entries = [{ 'id': video_id, - 'title': title, + 'title': make_title(title), 'duration': duration, 'formats': formats, }] @@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor): message_type, message_type)), expected=True) - conviva = data.get('convivaStatistics') or {} series = conviva.get('seriesName') or data.get('seriesTitle') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') @@ -260,6 +273,19 @@ class NRKTVIE(NRKBaseIE): }] +class NRKTVDirekteIE(NRKTVIE): + IE_DESC = 'NRK TV Direkte and NRK Radio Direkte' + _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P[^/?#&]+)' + + _TESTS = [{ + 'url': 'https://tv.nrk.no/direkte/nrk1', + 'only_matching': True, + }, { + 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus', + 'only_matching': True, + }] + + class NRKPlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' From 8ab7e6c4cc93d998a39fda9733587b58f5252999 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 20 Dec 2016 18:45:52 +0800 Subject: [PATCH 025/586] [kaltura] Improve widget ID extraction (closes #11480) --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 14 ++++++++++++++ youtube_dl/extractor/kaltura.py | 5 ++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3ed5da7fe..da0d37f80 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [kaltura] Fix wrong widget ID in some cases (#11480) + + version 2016.12.18 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3949c8bf7..a6a5f193e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -972,6 +972,20 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # Kaltura embedded, some fileExt broken (#11480) + 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics', + 'info_dict': { + 'id': '1_sgtvehim', + 'ext': 'mp4', + 'title': 'Our "Standard Models" of particle physics and cosmology', + 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861', + 'timestamp': 1321158993, + 'upload_date': '20111113', + 'uploader_id': 'kps1', + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 91bc3a0a7..c0ddad6f9 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -107,7 +107,7 @@ class KalturaIE(InfoExtractor): (?P['\"])wid(?P=q1)\s*:\s* (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4), + (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) """, webpage) or re.search( r'''(?xs) @@ -266,6 +266,9 @@ class KalturaIE(InfoExtractor): # skip for now. if f.get('fileExt') == 'chun': continue + if not f.get('fileExt') and f.get('containerFormat') == 'qt': + # QT indicates QuickTime; some videos have broken fileExt + f['fileExt'] = 'mov' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g From 7fe1592073c0a775dcd3ea7fcb400fbcfad624f7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:23:16 +0100 Subject: [PATCH 026/586] [common] fix dash codec information for mixed videos and fragment url construction(#11490) --- test/test_utils.py | 1 + youtube_dl/extractor/common.py | 11 +++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 1cdac82fc..3092db5c1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -461,6 +461,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(urljoin('http://foo.de/', None), None) self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 40f3e2323..58da27025 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -59,6 +59,7 @@ from ..utils import ( parse_m3u8_attributes, extract_attributes, parse_codecs, + urljoin, ) @@ -1631,11 +1632,6 @@ class InfoExtractor(object): extract_Initialization(segment_template) return ms_info - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) - mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] for period in mpd_doc.findall(_add_ns('Period')): @@ -1685,12 +1681,11 @@ class InfoExtractor(object): 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), 'asr': int_or_none(representation_attrib.get('audioSamplingRate')), 'fps': int_or_none(representation_attrib.get('frameRate')), - 'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'), - 'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'), 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, 'format_note': 'DASH %s' % content_type, 'filesize': filesize, } + f.update(parse_codecs(representation_attrib.get('codecs'))) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: @@ -1774,7 +1769,7 @@ class InfoExtractor(object): f['fragments'].append({'url': initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) for fragment in f['fragments']: - fragment['url'] = combine_url(base_url, fragment['url']) + fragment['url'] = urljoin(base_url, fragment['url']) try: existing_format = next( fo for fo in formats From d8c507c9e2879b16a429cdd7bded5d308a4cdb10 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:25:05 +0100 Subject: [PATCH 027/586] [vimeo] fix extraction for hls formats and add support for dash formats(closes #11490) --- youtube_dl/extractor/vimeo.py | 39 ++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 51c69a80c..c35cafcc6 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -92,29 +92,30 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _vimeo_sort_formats(self, formats): # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) def _parse_config(self, config, video_id): + video_data = config['video'] # Extract title - video_title = config['video']['title'] + video_title = video_data['title'] # Extract uploader, uploader_url and uploader_id - video_uploader = config['video'].get('owner', {}).get('name') - video_uploader_url = config['video'].get('owner', {}).get('url') + video_uploader = video_data.get('owner', {}).get('name') + video_uploader_url = video_data.get('owner', {}).get('url') video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None # Extract video thumbnail - video_thumbnail = config['video'].get('thumbnail') + video_thumbnail = video_data.get('thumbnail') if video_thumbnail is None: - video_thumbs = config['video'].get('thumbs') + video_thumbs = video_data.get('thumbs') if video_thumbs and isinstance(video_thumbs, dict): _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video duration - video_duration = int_or_none(config['video'].get('duration')) + video_duration = int_or_none(video_data.get('duration')) formats = [] - config_files = config['video'].get('files') or config['request'].get('files', {}) + config_files = video_data.get('files') or config['request'].get('files', {}) for f in config_files.get('progressive', []): video_url = f.get('url') if not video_url: @@ -127,10 +128,24 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'fps': int_or_none(f.get('fps')), 'tbr': int_or_none(f.get('bitrate')), }) - m3u8_url = config_files.get('hls', {}).get('url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + for files_type in ('hls', 'dash'): + for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): + manifest_url = cdn_data.get('url') + if not manifest_url: + continue + format_id = '%s-%s' % (files_type, cdn_name) + if files_type == 'hls': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', + 'm3u8_native', m3u8_id=format_id, + note='Downloading %s m3u8 information' % cdn_name, + fatal=False)) + elif files_type == 'dash': + formats.extend(self._extract_mpd_formats( + manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id, + 'Downloading %s MPD information' % cdn_name, + fatal=False)) subtitles = {} text_tracks = config['request'].get('text_tracks') From b1c357975d554920720db971ea4695259218d8d8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:34:46 +0100 Subject: [PATCH 028/586] [piksel] Add new extractor(closes #11246) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 6 ++ youtube_dl/extractor/piksel.py | 106 +++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 youtube_dl/extractor/piksel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e44cf5d85..11dcaf668 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -699,6 +699,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE from .playfm import PlayFMIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a6a5f193e..87daf83f8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -75,6 +75,7 @@ from .facebook import FacebookIE from .soundcloud import SoundcloudIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE +from .piksel import PikselIE class GenericIE(InfoExtractor): @@ -2225,6 +2226,11 @@ class GenericIE(InfoExtractor): if arkena_url: return self.url_result(arkena_url, ArkenaIE.ie_key()) + # Look for Piksel embeds + piksel_url = PikselIE._extract_url(webpage) + if piksel_url: + return self.url_result(piksel_url, PikselIE.ie_key()) + # Look for Limelight embeds mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage) if mobj: diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py new file mode 100644 index 000000000..d44edcdfb --- /dev/null +++ b/youtube_dl/extractor/piksel.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + dict_get, + int_or_none, + unescapeHTML, + parse_iso8601, +) + + +class PikselIE(InfoExtractor): + _VALID_URL = r'https?://player\.piksel\.com/v/(?P[a-z0-9]+)' + _TEST = { + 'url': 'http://player.piksel.com/v/nv60p12f', + 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'info_dict': { + 'id': 'nv60p12f', + 'ext': 'mp4', + 'title': 'فن الحياة - الحلقة 1', + 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', + 'timestamp': 1465231790, + 'upload_date': '20160606', + } + } + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src=["\'](?P(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + app_token = self._search_regex( + r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token') + response = self._download_json( + 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, + video_id, query={ + 'v': video_id + })['response'] + failure = response.get('failure') + if failure: + raise ExtractorError(response['failure']['reason'], expected=True) + video_data = response['WsProgramResponse']['program']['asset'] + title = video_data['title'] + + formats = [] + + m3u8_url = dict_get(video_data, [ + 'm3u8iPadURL', + 'ipadM3u8Url', + 'm3u8AndroidURL', + 'm3u8iPhoneURL', + 'iphoneM3u8Url']) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + asset_type = dict_get(video_data, ['assetType', 'asset_type']) + for asset_file in video_data.get('assetFiles', []): + # TODO: extract rtmp formats + http_url = asset_file.get('http_url') + if not http_url: + continue + tbr = None + vbr = int_or_none(asset_file.get('videoBitrate'), 1024) + abr = int_or_none(asset_file.get('audioBitrate'), 1024) + if asset_type == 'video': + tbr = vbr + abr + elif asset_type == 'audio': + tbr = abr + + format_id = ['http'] + if tbr: + format_id.append(compat_str(tbr)) + + formats.append({ + 'format_id': '-'.join(format_id), + 'url': unescapeHTML(http_url), + 'vbr': vbr, + 'abr': abr, + 'width': int_or_none(asset_file.get('videoWidth')), + 'height': int_or_none(asset_file.get('videoHeight')), + 'filesize': int_or_none(asset_file.get('filesize')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailUrl'), + 'timestamp': parse_iso8601(video_data.get('dateadd')), + 'formats': formats, + } From f59d1146c0ca523ec03a4c7df8987e82ee5054b2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:52:46 +0100 Subject: [PATCH 029/586] [uktvplay] Add new extractor(closes #11027) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/uktvplay.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/uktvplay.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 11dcaf668..fcfe87f6f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1024,6 +1024,7 @@ from .udemy import ( UdemyCourseIE ) from .udn import UDNEmbedIE +from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .uol import UOLIE diff --git a/youtube_dl/extractor/uktvplay.py b/youtube_dl/extractor/uktvplay.py new file mode 100644 index 000000000..2137502a1 --- /dev/null +++ b/youtube_dl/extractor/uktvplay.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class UKTVPlayIE(InfoExtractor): + _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P\d+)' + _TEST = { + 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001', + 'md5': '', + 'info_dict': { + 'id': '2117008346001', + 'ext': 'mp4', + 'title': 'Pincers', + 'description': 'Pincers', + 'uploader_id': '1242911124001', + 'upload_date': '20130124', + 'timestamp': 1359049267, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download MPD manifest'] + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % video_id, + 'BrightcoveNew', video_id) From 3d6761ba92b96934bdbf1792dfacd0368d7b236c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Dec 2016 21:51:11 +0700 Subject: [PATCH 030/586] [vbox7] Fix extraction (closes #11494) --- youtube_dl/extractor/vbox7.py | 75 ++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index a1e0851b7..7fb7574ad 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -4,11 +4,22 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import urlencode_postdata +from ..utils import ExtractorError class Vbox7IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P[\da-fA-F]+)' + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?vbox7\.com/ + (?: + play:| + (?: + emb/external\.php| + player/ext\.swf + )\?.*?\bvid= + ) + (?P[\da-fA-F]+) + ''' _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -16,6 +27,14 @@ class Vbox7IE(InfoExtractor): 'id': '0946fff23c', 'ext': 'mp4', 'title': 'Борисов: Притеснен съм за бъдещето на България', + 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1470982814, + 'upload_date': '20160812', + 'uploader': 'zdraveibulgaria', + }, + 'params': { + 'proxy': '127.0.0.1:8118', }, }, { 'url': 'http://vbox7.com/play:249bb972c2', @@ -29,6 +48,9 @@ class Vbox7IE(InfoExtractor): }, { 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', 'only_matching': True, + }, { + 'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1', + 'only_matching': True, }] @staticmethod @@ -42,33 +64,40 @@ class Vbox7IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://vbox7.com/play:%s' % video_id, video_id) + response = self._download_json( + 'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id, + video_id) - title = self._html_search_regex( - r'(.+?)', webpage, 'title').split('/')[0].strip() + if 'error' in response: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']), expected=True) - video_url = self._search_regex( - r'src\s*:\s*(["\'])(?P.+?.mp4.*?)\1', - webpage, 'video url', default=None, group='url') + video = response['options'] - thumbnail_url = self._og_search_thumbnail(webpage) - - if not video_url: - info_response = self._download_webpage( - 'http://vbox7.com/play/magare.do', video_id, - 'Downloading info webpage', - data=urlencode_postdata({'as3': '1', 'vid': video_id}), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - final_url, thumbnail_url = map( - lambda x: x.split('=')[1], info_response.split('&')) + title = video['title'] + video_url = video['src'] if '/na.mp4' in video_url: self.raise_geo_restricted() - return { + uploader = video.get('uploader') + + webpage = self._download_webpage( + 'http://vbox7.com/play:%s' % video_id, video_id, fatal=None) + + info = {} + + if webpage: + info = self._search_json_ld( + webpage.replace('"/*@context"', '"@context"'), video_id) + + info.update({ 'id': video_id, - 'url': self._proto_relative_url(video_url, 'http:'), 'title': title, - 'thumbnail': thumbnail_url, - } + 'url': video_url, + 'uploader': uploader, + 'thumbnail': self._proto_relative_url( + info.get('thumbnail') or self._og_search_thumbnail(webpage), + 'http:'), + }) + return info From 1f6a79b0af356a800fd878ef4e8fb180071fa5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Dec 2016 22:36:06 +0700 Subject: [PATCH 031/586] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index da0d37f80..25eda233d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,21 @@ version +Core +* [extractor/common] Improve fragment URL construction for DASH media +* [extractor/common] Fix codec information extraction for mixed audio/video + DASH media (#11490) + Extractors +* [vbox7] Fix extraction (#11494) ++ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027) ++ [piksel] Add support for player.piksel.com (#11246) ++ [vimeo] Add support for DASH formats +* [vimeo] Fix extraction for HLS formats (#11490) * [kaltura] Fix wrong widget ID in some cases (#11480) ++ [nrktv:direkte] Add support for live streams (#11488) +* [pbs] Fix extraction for geo restricted videos (#7095) +* [brightcove:new] Skip widevine classic videos ++ [viu] Add support for viu.com (#10607, #11329) version 2016.12.18 From 90352a80412eabf639046348c0acd5669005120f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Dec 2016 22:39:39 +0700 Subject: [PATCH 032/586] release 2016.12.20 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 6 ++++++ youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fffdefa45..5f95a149e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.18 +[debug] youtube-dl version 2016.12.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 25eda233d..c14bce7d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.20 Core * [extractor/common] Improve fragment URL construction for DASH media diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b55044520..955aa5c68 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -517,6 +517,7 @@ - **NRKPlaylist** - **NRKSkole**: NRK Skole - **NRKTV**: NRK TV and NRK Radio + - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **ntv.ru** - **Nuvid** - **NYTimes** @@ -551,6 +552,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **Piksel** - **Pinkbike** - **Pladform** - **play.fm** @@ -803,6 +805,7 @@ - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 + - **UKTVPlay** - **Unistra** - **uol.com.br** - **uplynk** @@ -867,6 +870,9 @@ - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** + - **Viu** + - **viu:ott** + - **viu:playlist** - **Vivo**: vivo.sx - **vk**: VK - **vk:uservideos**: VK - User's Videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f06d4b52..a0c5c35da 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.18' +__version__ = '2016.12.20' From e029c43bd43ddde448b150b1e0f226e4dd8c9b90 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 18:22:57 +0100 Subject: [PATCH 033/586] [laola1] add support for another extraction scenario(closes #11460) --- youtube_dl/extractor/laola1tv.py | 56 +++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 37e38dba0..3190b187c 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -9,18 +9,41 @@ from ..utils import ( xpath_element, xpath_text, urljoin, + update_url_query, ) class Laola1TvEmbedIE(InfoExtractor): + IE_NAME = 'laola1tv:embed' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P\d+)' + _TEST = { + # flashvars.premium = "false"; + 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024', + 'info_dict': { + 'id': '708065', + 'ext': 'mp4', + 'title': 'MA Long CHN - FAN Zhendong CHN', + 'uploader': 'ITTF - International Table Tennis Federation', + 'upload_date': '20161211', + }, + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) flash_vars = self._search_regex( r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars') - get_flashvar = lambda x: self._search_regex(r'%s\s*:\s*"([^"]+)"' % x, flash_vars, x) + + def get_flashvar(x, *args, **kwargs): + flash_var = self._search_regex( + r'%s\s*:\s*"([^"]+)"' % x, + flash_vars, x, default=None) + if not flash_var: + flash_var = self._search_regex([ + r'flashvars\.%s\s*=\s*"([^"]+)"' % x, + r'%s\s*=\s*"([^"]+)"' % x], + webpage, x, *args, **kwargs) + return flash_var hd_doc = self._download_xml( 'http://www.laola1.tv/server/hd_video.php', video_id, query={ @@ -34,16 +57,26 @@ class Laola1TvEmbedIE(InfoExtractor): _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) title = _v('title', fatal=True) - data_abo = urlencode_postdata( - dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) - token_url = self._download_json( - 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', - video_id, query={ - 'videoId': _v('id'), - 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), - 'label': _v('label'), - 'area': _v('area'), - }, data=data_abo)['data']['stream-access'][0] + token_url = None + premium = get_flashvar('premium', default=None) + if premium: + token_url = update_url_query( + _v('url', fatal=True), { + 'timestamp': get_flashvar('timestamp'), + 'auth': get_flashvar('auth'), + }) + else: + data_abo = urlencode_postdata( + dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) + token_url = self._download_json( + 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', + video_id, query={ + 'videoId': _v('id'), + 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), + 'label': _v('label'), + 'area': _v('area'), + }, data=data_abo)['data']['stream-access'][0] + token_doc = self._download_xml( token_url, video_id, 'Downloading token', headers=self.geo_verification_headers()) @@ -75,6 +108,7 @@ class Laola1TvEmbedIE(InfoExtractor): class Laola1TvIE(InfoExtractor): + IE_NAME = 'laola1tv' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', From bfa1073e113cb7fa8a362112d4eae6dede197efa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 19:49:45 +0100 Subject: [PATCH 034/586] [uplynk] force downloading using hls native downloader(closes #11496) --- youtube_dl/downloader/hls.py | 3 +++ youtube_dl/extractor/uplynk.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7373ec05f..4989abce1 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -65,6 +65,9 @@ class HlsFD(FragmentFD): s = manifest.decode('utf-8', 'ignore') if not self.can_download(s, info_dict): + if info_dict.get('extra_param_to_segment_url'): + self.report_error('pycrypto not found. Please install it.') + return False self.report_warning( 'hlsnative has detected features it does not support, ' 'extraction will be delegated to ffmpeg') diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index 2cd22cf8a..f06bf5b12 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -30,7 +30,9 @@ class UplynkIE(InfoExtractor): def _extract_uplynk_info(self, uplynk_content_url): path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups() display_id = video_id or external_id - formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') + formats = self._extract_m3u8_formats( + 'http://content.uplynk.com/%s.m3u8' % path, + display_id, 'mp4', 'm3u8_native') if session_id: for f in formats: f['extra_param_to_segment_url'] = 'pbs=' + session_id From ae806db6286dc76de6ee53f8f7351ed99bf29bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 21 Dec 2016 22:39:05 +0700 Subject: [PATCH 035/586] [vbox7] Skip malformed JSON-LD (closes #11501) --- youtube_dl/extractor/vbox7.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 7fb7574ad..429893e38 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -89,7 +89,8 @@ class Vbox7IE(InfoExtractor): if webpage: info = self._search_json_ld( - webpage.replace('"/*@context"', '"@context"'), video_id) + webpage.replace('"/*@context"', '"@context"'), video_id, + fatal=False) info.update({ 'id': video_id, From 9c5b5f211535a832af4437930afcb6f4b64748c4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Dec 2016 18:45:01 +0100 Subject: [PATCH 036/586] [rtl2] extract more formats and metadata --- youtube_dl/extractor/rtl2.py | 57 +++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index cb4ee8803..721ee733c 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor +from ..utils import int_or_none class RTL2IE(InfoExtractor): @@ -13,7 +15,7 @@ class RTL2IE(InfoExtractor): 'id': 'folge-203-0', 'ext': 'f4v', 'title': 'GRIP sucht den Sommerkönig', - 'description': 'Matthias, Det und Helge treten gegeneinander an.' + 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' }, 'params': { # rtmp download @@ -25,7 +27,7 @@ class RTL2IE(InfoExtractor): 'id': '21040-anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', - 'description': 'Anna ist Alex\' Tochter bei Köln 50667.' + 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' }, 'params': { # rtmp download @@ -52,34 +54,47 @@ class RTL2IE(InfoExtractor): r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') vivi_id = self._html_search_regex( r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') - info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id - info = self._download_json(info_url, video_id) + info = self._download_json( + 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php', + video_id, query={ + 'vico_id': vico_id, + 'vivi_id': vivi_id, + }) video_info = info['video'] title = video_info['titel'] - description = video_info.get('beschreibung') - thumbnail = video_info.get('image') - download_url = video_info['streamurl'] - download_url = download_url.replace('\\', '') - stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL') - rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + formats = [] + + rtmp_url = video_info.get('streamurl') + if rtmp_url: + rtmp_url = rtmp_url.replace('\\', '') + stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') + rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': stream_url, + 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', + 'page_url': url, + 'flash_version': 'LNX 11,2,202,429', + 'rtmp_conn': rtmp_conn, + 'no_resume': True, + 'preference': 1, + }) + + m3u8_url = video_info.get('streamurl_hls') + if m3u8_url: + formats.extend(self._extract_akamai_formats(m3u8_url, video_id)) - formats = [{ - 'url': download_url, - 'play_path': stream_url, - 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', - 'page_url': url, - 'flash_version': 'LNX 11,2,202,429', - 'rtmp_conn': rtmp_conn, - 'no_resume': True, - }] self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, - 'description': description, + 'thumbnail': video_info.get('image'), + 'description': video_info.get('beschreibung'), + 'duration': int_or_none(video_info.get('duration')), 'formats': formats, } From f120646f044db8c93976afcfcc7f76f221cb0241 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Dec 2016 20:50:10 +0100 Subject: [PATCH 037/586] [viu] pass geo verification headers to auth request --- youtube_dl/extractor/viu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index d4861a2fe..3fd889c8e 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -25,7 +25,7 @@ class ViuBaseIE(InfoExtractor): 'userid': 'guest', 'useridtype': 'guest', 'ver': '1.0' - }) + }, headers=self.geo_verification_headers()) self._auth_token = viu_auth_res.info()['X-VIU-AUTH'] def _call_api(self, path, *args, **kwargs): From f5a723a78a2d4e395fca89e5b3bed53334b9385e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Dec 2016 20:59:03 +0100 Subject: [PATCH 038/586] [theplatform] pass geo verification headers to smil request(closes #10146) --- youtube_dl/extractor/theplatform.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index cfbf7f4e1..0405bd6b0 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -33,7 +33,9 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) class ThePlatformBaseIE(OnceIE): def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'): - meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'}) + meta = self._download_xml( + smil_url, video_id, note=note, query={'format': 'SMIL'}, + headers=self.geo_verification_headers()) error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') if error_element is not None and error_element.attrib['src'].startswith( 'http://link.theplatform.com/s/errorFiles/Unavailable.'): From a07588369fd330c747464d9ea75ddb861e322a2d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 22 Dec 2016 10:02:56 +0100 Subject: [PATCH 039/586] [common] improve detection for video only formats and m3u8 manifest(fixes #11507) --- youtube_dl/extractor/common.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 58da27025..07d101aef 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1225,7 +1225,7 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }] - audio_groups = set() + audio_in_video_stream = {} last_info = {} last_media = {} for line in m3u8_doc.splitlines(): @@ -1235,10 +1235,11 @@ class InfoExtractor(object): media = parse_m3u8_attributes(line) media_type = media.get('TYPE') if media_type in ('VIDEO', 'AUDIO'): + group_id = media.get('GROUP-ID') media_url = media.get('URI') if media_url: format_id = [] - for v in (media.get('GROUP-ID'), media.get('NAME')): + for v in (group_id, media.get('NAME')): if v: format_id.append(v) f = { @@ -1251,12 +1252,15 @@ class InfoExtractor(object): } if media_type == 'AUDIO': f['vcodec'] = 'none' - audio_groups.add(media['GROUP-ID']) + if group_id and not audio_in_video_stream.get(group_id): + audio_in_video_stream[group_id] = False formats.append(f) else: # When there is no URI in EXT-X-MEDIA let this tag's # data be used by regular URI lines below last_media = media + if media_type == 'AUDIO' and group_id: + audio_in_video_stream[group_id] = True elif line.startswith('#') or not line.strip(): continue else: @@ -1300,7 +1304,7 @@ class InfoExtractor(object): 'abr': abr, }) f.update(parse_codecs(last_info.get('CODECS'))) - if last_info.get('AUDIO') in audio_groups: + if audio_in_video_stream.get(last_info.get('AUDIO')) is False: # TODO: update acodec for for audio only formats with the same GROUP-ID f['acodec'] = 'none' formats.append(f) From ab3091feda7da5d04f1757dd0a2e26aa919086b5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 22 Dec 2016 10:02:56 +0100 Subject: [PATCH 040/586] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c14bce7d8..6a08ce230 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +* [extractor/common] Improve detection of video-only formats in m3u8 + manifests (#11507) + +Extractors ++ [theplatform] Pass geo verification headers to SMIL request (#10146) ++ [viu] Pass geo verification headers to auth request +* [rtl2] Extract more formats and metadata +* [vbox7] Skip malformed JSON-LD (#11501) +* [uplynk] Force downloading using native HLS downloader (#11496) ++ [laola1] Add support for another extraction scenario (#11460) + + version 2016.12.20 Core From 5e77c0b58eb4bae0804f639fa509d91b29a273ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Dec 2016 22:52:54 +0700 Subject: [PATCH 041/586] release 2016.12.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5f95a149e..693d787e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.20** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.20 +[debug] youtube-dl version 2016.12.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6a08ce230..c45441345 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.22 Core * [extractor/common] Improve detection of video-only formats in m3u8 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 955aa5c68..0b3d794c6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -365,8 +365,8 @@ - **kuwo:singer**: 酷我音乐 - 歌手 - **kuwo:song**: 酷我音乐 - **la7.it** - - **Laola1Tv** - - **Laola1TvEmbed** + - **laola1tv** + - **laola1tv:embed** - **LCI** - **Lcp** - **LcpPlay** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a0c5c35da..3082ebf66 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.20' +__version__ = '2016.12.22' From 19f37ce4b1e4251a3f53f8a5d3d0605d2526bc81 Mon Sep 17 00:00:00 2001 From: hub2git Date: Thu, 22 Dec 2016 18:25:39 -0800 Subject: [PATCH 042/586] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 840932298..e85fa1555 100644 --- a/README.md +++ b/README.md @@ -932,7 +932,7 @@ If you want to create a build of youtube-dl yourself, you'll need If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. -After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): +After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) 2. Check out the source code with: From e7ac722d6276198c8b88986f06a4e3c55366cb58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Dec 2016 22:01:22 +0700 Subject: [PATCH 043/586] [README.md] Add missing protocols to format selection section --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e85fa1555..71d37e8b0 100644 --- a/README.md +++ b/README.md @@ -638,7 +638,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native` + - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. From 12da830993f6f42ca309037da0eea161dcca90ec Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Fri, 23 Dec 2016 23:58:09 +0100 Subject: [PATCH 044/586] [acast] Fix broken audio URL and timestamp extraction Before first bling was used now we look for the first bling with type BlingAudio. Before publishingDate was a ms unix timestamp now it is iso8601. --- youtube_dl/extractor/acast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 94ce88c83..eb8d1b669 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -8,6 +8,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + parse_iso8601, OnDemandPagedList, ) @@ -22,7 +23,8 @@ class ACastIE(InfoExtractor): 'id': '57de3baa-4bb0-487e-9418-2692c1277a34', 'ext': 'mp3', 'title': '"Where Are You?": Taipei 101, Taiwan', - 'timestamp': 1196172000000, + 'timestamp': 1196172000, + 'upload_date': '20071127', 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'duration': 211, } @@ -35,11 +37,11 @@ class ACastIE(InfoExtractor): return { 'id': compat_str(cast_data['id']), 'display_id': display_id, - 'url': cast_data['blings'][0]['audio'], + 'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0], 'title': cast_data['name'], 'description': cast_data.get('description'), 'thumbnail': cast_data.get('image'), - 'timestamp': int_or_none(cast_data.get('publishingDate')), + 'timestamp': parse_iso8601(cast_data.get('publishingDate')), 'duration': int_or_none(cast_data.get('duration')), } From 846fd69bacfa739375922491f8ba6cee99941335 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Sat, 24 Dec 2016 11:59:43 +0100 Subject: [PATCH 045/586] [acast] Add test with multiple blings --- youtube_dl/extractor/acast.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index eb8d1b669..6dace3051 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -16,7 +16,8 @@ from ..utils import ( class ACastIE(InfoExtractor): IE_NAME = 'acast' _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)' - _TEST = { + _TESTS = [{ + # test with one bling 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan', 'md5': 'ada3de5a1e3a2a381327d749854788bb', 'info_dict': { @@ -28,7 +29,20 @@ class ACastIE(InfoExtractor): 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'duration': 211, } - } + }, { + # test with multiple blings + 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', + 'md5': '55c0097badd7095f494c99a172f86501', + 'info_dict': { + 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', + 'ext': 'mp3', + 'title': '2. Raggarmordet - Röster ur det förflutna', + 'timestamp': 1477346700, + 'upload_date': '20161024', + 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', + 'duration': 2797, + } + }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() From d1cd7e0ed9a562dbee6dfcb3601fdecc4158640b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 24 Dec 2016 15:00:23 +0100 Subject: [PATCH 046/586] Credit @wader for #11521 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4a6f7e13f..9e092cccc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -190,3 +190,4 @@ John Hawkinson Rich Leeper Zhong Jianxin Thor77 +Mattias Wadman From 264e77c406a3b14f15aafcd036524cb6fe86aa20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Dec 2016 22:10:54 +0700 Subject: [PATCH 047/586] [twitch] Add support for rechat messages (closes #11524) --- youtube_dl/extractor/twitch.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8de8ec65b..bbf071da3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -22,6 +22,7 @@ from ..utils import ( orderedSet, parse_duration, parse_iso8601, + update_url_query, urlencode_postdata, ) @@ -279,6 +280,18 @@ class TwitchVodIE(TwitchItemBaseIE): if 't' in query: info['start_time'] = parse_duration(query['t'][0]) + if info.get('timestamp') is not None: + info['subtitles'] = { + 'rechat': [{ + 'url': update_url_query( + 'https://rechat.twitch.tv/rechat-messages', { + 'video_id': 'v%s' % item_id, + 'start': info['timestamp'], + }), + 'ext': 'json', + }], + } + return info From 53a664edf4bf713df0159e604bbc131dde5ed1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Dec 2016 22:46:27 +0700 Subject: [PATCH 048/586] [brightcove:legacy] Improve embeds detection (closes #11523) --- youtube_dl/extractor/brightcove.py | 13 ++++++++----- youtube_dl/extractor/generic.py | 24 +++++++++++++++++++++--- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ac5f32541..aa2923ccf 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor): """Return a list of all Brightcove URLs from the webpage """ url_m = re.search( - r']+ + content=([\'"])(?Phttps?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 + ''', webpage) if url_m: - url = unescapeHTML(url_m.group(1)) + url = unescapeHTML(url_m.group('url')) # Some sites don't add it, we can't download with this url, for example: # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ - if 'playerKey' in url or 'videoId' in url: + if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: return [url] matches = re.findall( @@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) # Change the 'videoId' and others field to '@videoPlayer' - url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) + url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) # Change bckey (used by bcove.me urls) to playerKey url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 87daf83f8..79d10a1d1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -344,10 +344,10 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' in the - # http requests { + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { @@ -361,6 +361,24 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'add_id': ['BrightcoveLegacy'], + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + }, { # https://github.com/rg3/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', From 4606c34e19af395a1ddd31d2941d4ccd90e5e279 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 25 Dec 2016 01:50:50 +0800 Subject: [PATCH 049/586] [extractor/common] Allow non-lang in subtitles' keys See 264e77c406a3b14f15aafcd036524cb6fe86aa20 --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 07d101aef..6fa7c334e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -189,9 +189,10 @@ class InfoExtractor(object): uploader_url: Full URL to a personal webpage of the video uploader. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format - {language: subformats}. "subformats" is a list sorted from - lower to higher preference, each element is a dictionary - with the "ext" entry and one of: + {tag: subformats}. "tag" is usually a language code, and + "subformats" is a list sorted from lower to higher + preference, each element is a dictionary with the "ext" + entry and one of: * "data": The subtitles file contents * "url": A URL pointing to the subtitles file "ext" will be calculated from URL if missing From b63005f5afb164f8660c23ab62962287eb1e1c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Dec 2016 04:02:29 +0700 Subject: [PATCH 050/586] [rtve:live] Fix extraction (closes #11529) --- youtube_dl/extractor/rtve.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 6a43b036e..746677a24 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -209,7 +209,10 @@ class RTVELiveIE(InfoExtractor): title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time) vidplayer_id = self._search_regex( - r'playerId=player([0-9]+)', webpage, 'internal video ID') + (r'playerId=player([0-9]+)', + r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', + r'data-id=["\'](\d+)'), + webpage, 'internal video ID') png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id png = self._download_webpage(png_url, video_id, 'Downloading url information') m3u8_url = _decrypt_url(png) From 51378d359e790b1c4462c0577fa48dda97dc4c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Dec 2016 23:01:52 +0700 Subject: [PATCH 051/586] [xhamster] Fix duration extraction (closes #11549) --- youtube_dl/extractor/xhamster.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index bd8e1af2e..36a8c9840 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -5,8 +5,8 @@ import re from .common import InfoExtractor from ..utils import ( dict_get, - float_or_none, int_or_none, + parse_duration, unified_strdate, ) @@ -22,7 +22,7 @@ class XHamsterIE(InfoExtractor): 'title': 'FemaleAgent Shy beauty takes the bait', 'upload_date': '20121014', 'uploader': 'Ruseful2011', - 'duration': 893.52, + 'duration': 893, 'age_limit': 18, }, }, { @@ -33,7 +33,7 @@ class XHamsterIE(InfoExtractor): 'title': 'Britney Spears Sexy Booty', 'upload_date': '20130914', 'uploader': 'jojo747400', - 'duration': 200.48, + 'duration': 200, 'age_limit': 18, }, 'params': { @@ -48,7 +48,7 @@ class XHamsterIE(InfoExtractor): 'title': '....', 'upload_date': '20160208', 'uploader': 'parejafree', - 'duration': 72.0, + 'duration': 72, 'age_limit': 18, }, 'params': { @@ -101,9 +101,9 @@ class XHamsterIE(InfoExtractor): r''']+poster=(?P["'])(?P.+?)(?P=q)[^>]*>'''], webpage, 'thumbnail', fatal=False, group='thumbnail') - duration = float_or_none(self._search_regex( - r'(["\'])duration\1\s*:\s*(["\'])(?P.+?)\2', - webpage, 'duration', fatal=False, group='duration')) + duration = parse_duration(self._search_regex( + r'Runtime:\s*\s*([\d:]+)', webpage, + 'duration', fatal=False)) view_count = int_or_none(self._search_regex( r'content=["\']User(?:View|Play)s:(\d+)', From 963bd5ecfc389a02406766c46dff8b8b90f705e6 Mon Sep 17 00:00:00 2001 From: Arjan Verwer Date: Wed, 14 Dec 2016 21:43:54 +0100 Subject: [PATCH 052/586] [showroomlive] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/showroomlive.py | 80 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 youtube_dl/extractor/showroomlive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fcfe87f6f..9c1026ff7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -825,6 +825,7 @@ from .shared import ( VivoIE, ) from .sharesix import ShareSixIE +from .showroomlive import ShowroomLiveIE from .sina import SinaIE from .sixplay import SixPlayIE from .skynewsarabia import ( diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dl/extractor/showroomlive.py new file mode 100644 index 000000000..8bfae510b --- /dev/null +++ b/youtube_dl/extractor/showroomlive.py @@ -0,0 +1,80 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError, compat_urlparse + + +class ShowroomLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P[0-9a-zA-Z_]+)' + _TEST = { + 'url': 'https://www.showroom-live.com/48_Nana_Okada', + 'skip': 'Only live broadcasts, can\'t predict test case.', + 'info_dict': { + 'id': '48_Nana_Okada', + 'ext': 'mp4', + 'uploader_id': '48_Nana_Okada', + } + } + + def _real_extract(self, url): + broadcaster_id = self._match_id(url) + + # There is no showroom on these pages. + if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']: + raise ExtractorError('URL %s does not contain a showroom' % url) + + # Retrieve the information we need + webpage = self._download_webpage(url, broadcaster_id) + room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id') + room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id + room = self._download_json(room_url, broadcaster_id) + + is_live = room.get('is_onlive') + if not is_live: + raise ExtractorError('%s their showroom is not live' % broadcaster_id) + + # Prepare and return the information + uploader = room.get('performer_name') or broadcaster_id # performer_name can be an empty string. + title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader)) + + return { + 'is_live': is_live, + 'id': str(room.get('live_id')), + 'timestamp': room.get('current_live_started_at'), + 'uploader': uploader, + 'uploader_id': broadcaster_id, + 'title': title, + 'description': room.get('description'), + 'formats': self._extract_formats(url, broadcaster_id, room_id) + } + + def _extract_formats(self, url, broadcaster_id, room_id): + formats = [] + + stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id + streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', []) + + for stream in streaming_url_list: + if stream.get('type') == "hls": + formats.extend(self._extract_m3u8_formats( + stream.get('url'), + broadcaster_id, + ext='mp4', + m3u8_id='hls', + preference=stream.get('quality', 100), + live=True + )) + elif stream.get('type') == 'rtmp': + url = stream.get('url') + '/' + stream.get('stream_name') + formats.append({ + 'url': url, + 'format_id': 'rtmp', + 'protocol': 'rtmp', + 'ext': 'flv', + 'preference': stream.get('quality', 100), + 'format_note': stream.get('label') + }) + + self._sort_formats(formats) + return formats From df086e74e20e8de6f027c6c551887f5c22b8c637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Dec 2016 00:12:35 +0700 Subject: [PATCH 053/586] [showroomlive] Improve (closes #11458) --- youtube_dl/extractor/showroomlive.py | 120 ++++++++++++++------------- 1 file changed, 62 insertions(+), 58 deletions(-) diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dl/extractor/showroomlive.py index 8bfae510b..efd9d561f 100644 --- a/youtube_dl/extractor/showroomlive.py +++ b/youtube_dl/extractor/showroomlive.py @@ -2,79 +2,83 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError, compat_urlparse +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + urljoin, +) -class ShowroomLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P[0-9a-zA-Z_]+)' +class ShowRoomLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P[^/?#&]+)' _TEST = { 'url': 'https://www.showroom-live.com/48_Nana_Okada', - 'skip': 'Only live broadcasts, can\'t predict test case.', - 'info_dict': { - 'id': '48_Nana_Okada', - 'ext': 'mp4', - 'uploader_id': '48_Nana_Okada', - } + 'only_matching': True, } def _real_extract(self, url): broadcaster_id = self._match_id(url) - # There is no showroom on these pages. - if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']: - raise ExtractorError('URL %s does not contain a showroom' % url) - - # Retrieve the information we need webpage = self._download_webpage(url, broadcaster_id) - room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id') - room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id - room = self._download_json(room_url, broadcaster_id) + + room_id = self._search_regex( + (r'SrGlobal\.roomId\s*=\s*(\d+)', + r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id') + + room = self._download_json( + urljoin(url, '/api/room/profile?room_id=%s' % room_id), + broadcaster_id) is_live = room.get('is_onlive') - if not is_live: - raise ExtractorError('%s their showroom is not live' % broadcaster_id) + if is_live is not True: + raise ExtractorError('%s is offline' % broadcaster_id, expected=True) - # Prepare and return the information - uploader = room.get('performer_name') or broadcaster_id # performer_name can be an empty string. - title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader)) + uploader = room.get('performer_name') or broadcaster_id + title = room.get('room_name') or room.get('main_name') or uploader + + streaming_url_list = self._download_json( + urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id), + broadcaster_id)['streaming_url_list'] + + formats = [] + for stream in streaming_url_list: + stream_url = stream.get('url') + if not stream_url: + continue + stream_type = stream.get('type') + if stream_type == 'hls': + m3u8_formats = self._extract_m3u8_formats( + stream_url, broadcaster_id, ext='mp4', m3u8_id='hls', + live=True) + for f in m3u8_formats: + f['quality'] = int_or_none(stream.get('quality', 100)) + formats.extend(m3u8_formats) + elif stream_type == 'rtmp': + stream_name = stream.get('stream_name') + if not stream_name: + continue + formats.append({ + 'url': stream_url, + 'play_path': stream_name, + 'page_url': url, + 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf', + 'rtmp_live': True, + 'ext': 'flv', + 'format_id': 'rtmp', + 'format_note': stream.get('label'), + 'quality': int_or_none(stream.get('quality', 100)), + }) + self._sort_formats(formats) return { - 'is_live': is_live, - 'id': str(room.get('live_id')), - 'timestamp': room.get('current_live_started_at'), + 'id': compat_str(room.get('live_id') or broadcaster_id), + 'title': self._live_title(title), + 'description': room.get('description'), + 'timestamp': int_or_none(room.get('current_live_started_at')), 'uploader': uploader, 'uploader_id': broadcaster_id, - 'title': title, - 'description': room.get('description'), - 'formats': self._extract_formats(url, broadcaster_id, room_id) + 'view_count': int_or_none(room.get('view_num')), + 'formats': formats, + 'is_live': True, } - - def _extract_formats(self, url, broadcaster_id, room_id): - formats = [] - - stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id - streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', []) - - for stream in streaming_url_list: - if stream.get('type') == "hls": - formats.extend(self._extract_m3u8_formats( - stream.get('url'), - broadcaster_id, - ext='mp4', - m3u8_id='hls', - preference=stream.get('quality', 100), - live=True - )) - elif stream.get('type') == 'rtmp': - url = stream.get('url') + '/' + stream.get('stream_name') - formats.append({ - 'url': url, - 'format_id': 'rtmp', - 'protocol': 'rtmp', - 'ext': 'flv', - 'preference': stream.get('quality', 100), - 'format_note': stream.get('label') - }) - - self._sort_formats(formats) - return formats From 6cf261d882ab296a170473b82737832720fba84a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Dec 2016 00:32:23 +0700 Subject: [PATCH 054/586] [freevideo] Remove extractor (closes #11515) Handled by generic extractor --- youtube_dl/extractor/extractors.py | 3 +-- youtube_dl/extractor/freevideo.py | 38 ------------------------------ 2 files changed, 1 insertion(+), 40 deletions(-) delete mode 100644 youtube_dl/extractor/freevideo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9c1026ff7..811db6219 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -320,7 +320,6 @@ from .francetv import ( ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .freevideo import FreeVideoIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE @@ -825,7 +824,7 @@ from .shared import ( VivoIE, ) from .sharesix import ShareSixIE -from .showroomlive import ShowroomLiveIE +from .showroomlive import ShowRoomLiveIE from .sina import SinaIE from .sixplay import SixPlayIE from .skynewsarabia import ( diff --git a/youtube_dl/extractor/freevideo.py b/youtube_dl/extractor/freevideo.py deleted file mode 100644 index cd8423a6f..000000000 --- a/youtube_dl/extractor/freevideo.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class FreeVideoIE(InfoExtractor): - _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P[^.]+)\.html(?:$|[?#])' - - _TEST = { - 'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html', - 'info_dict': { - 'id': 'vysukany-zadecek-22033', - 'ext': 'mp4', - 'title': 'vysukany-zadecek-22033', - 'age_limit': 18, - }, - 'skip': 'Blocked outside .cz', - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage, handle = self._download_webpage_handle(url, video_id) - if '//www.czechav.com/' in handle.geturl(): - raise ExtractorError( - 'Access to freevideo is blocked from your location', - expected=True) - - video_url = self._search_regex( - r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"', - webpage, 'video URL') - - return { - 'id': video_id, - 'url': video_url, - 'title': video_id, - 'age_limit': 18, - } From 9cdb0a338d3c5c8c7e86ba9039c950c8501b3888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Dec 2016 04:21:49 +0700 Subject: [PATCH 055/586] [vk] Extract from playerParams (closes #11555) --- youtube_dl/extractor/vk.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 1990e7093..1639cd73a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -378,12 +378,22 @@ class VKIE(VKBaseIE): if not data: data = self._parse_json( self._search_regex( - r'\s*({.+?})\s*', info_page, 'json'), - video_id)['player']['params'][0] + r'\s*({.+?})\s*', info_page, 'json', default='{}'), + video_id) + if data: + data = data['player']['params'][0] + + if not data: + data = self._parse_json( + self._search_regex( + r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page, + 'player params'), + video_id)['params'][0] title = unescapeHTML(data['md_title']) - if data.get('live') == 2: + is_live = data.get('live') == 2 + if is_live: title = self._live_title(title) timestamp = unified_timestamp(self._html_search_regex( @@ -408,8 +418,9 @@ class VKIE(VKBaseIE): }) elif format_id == 'hls': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, - fatal=False, live=True)) + format_url, video_id, 'mp4', + entry_protocol='m3u8' if is_live else 'm3u8_native', + m3u8_id=format_id, fatal=False, live=is_live)) elif format_id == 'rtmp': formats.append({ 'format_id': format_id, @@ -427,6 +438,7 @@ class VKIE(VKBaseIE): 'duration': data.get('duration'), 'timestamp': timestamp, 'view_count': view_count, + 'is_live': is_live, } From 424ed37ec4dd7df16426f9f2f20fb85398bd68c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Dec 2016 04:31:19 +0700 Subject: [PATCH 056/586] [vk] Fix postlive videos extraction --- youtube_dl/extractor/vk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 1639cd73a..6e6c3a0e1 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -245,7 +245,7 @@ class VKIE(VKBaseIE): }, }, { - # finished live stream, live_mp4 + # finished live stream, postlive_mp4 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', 'md5': '90d22d051fccbbe9becfccc615be6791', 'info_dict': { @@ -258,7 +258,7 @@ class VKIE(VKBaseIE): }, }, { - # live stream, hls and rtmp links,most likely already finished live + # live stream, hls and rtmp links, most likely already finished live # stream by the time you are reading this comment 'url': 'https://vk.com/video-140332_456239111', 'only_matching': True, @@ -392,6 +392,8 @@ class VKIE(VKBaseIE): title = unescapeHTML(data['md_title']) + # 2 = live + # 3 = post live (finished live) is_live = data.get('live') == 2 if is_live: title = self._live_title(title) @@ -408,7 +410,8 @@ class VKIE(VKBaseIE): for format_id, format_url in data.items(): if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): continue - if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'): + if (format_id.startswith(('url', 'cache')) or + format_id in ('extra_data', 'live_mp4', 'postlive_mp4')): height = int_or_none(self._search_regex( r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) formats.append({ From 490da94edf39ecc0544e23833a7983c98a9feecf Mon Sep 17 00:00:00 2001 From: Chris Gavin Date: Sat, 31 Dec 2016 12:17:52 +0000 Subject: [PATCH 057/586] [devscripts/buildserver] Remove unreachable except block --- devscripts/buildserver.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index fc99c3213..1344b4d87 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -424,8 +424,6 @@ class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): self.send_header('Content-Length', len(msg)) self.end_headers() self.wfile.write(msg) - except HTTPError as e: - self.send_response(e.code, str(e)) else: self.send_response(500, 'Unknown build method "%s"' % action) else: From e746021577f27fbc458daa0797f81a9d57c51a2c Mon Sep 17 00:00:00 2001 From: Bagira Date: Thu, 3 Nov 2016 12:53:45 +0100 Subject: [PATCH 058/586] [videa] Add extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/videa.py | 98 ++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/videa.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 811db6219..9898b9803 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1064,6 +1064,10 @@ from .vice import ( from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE +from .videa import ( + VideaIE, + VideaEmbedIE, +) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py new file mode 100644 index 000000000..3d2e7e31c --- /dev/null +++ b/youtube_dl/extractor/videa.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + xpath_element, + xpath_text, + xpath_attr, + urlencode_postdata, + unescapeHTML, +) + + +class VideaIE(InfoExtractor): + _VALID_URL = r'https?://(?:.+?\.)?videa\.hu/videok/(?P[^#?]+)' + _TESTS = [{ + 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', + 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', + 'info_dict': { + 'id': '8YfIAjxwWGwT8HVQ', + 'display_id': '8YfIAjxwWGwT8HVQ', + 'ext': 'mp4', + 'title': 'Az őrült kígyász 285 kígyót enged szabadon', + 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', + 'duration': 21, + }, + }, { + 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._download_json("http://videa.hu/oembed/?" + urlencode_postdata({"url": url.split('?')[0], "format": "json"}), video_id) + video_url = self._search_regex( + r'src="(.+?)"', video_data.get('html'), 'embed url') + + return { + '_type': 'url_transparent', + 'url': video_url, + 'ie_key': 'VideaEmbed' + } + +class VideaEmbedIE(InfoExtractor): + _VALID_URL = r'(?Phttps?:)(?P//(?:.+?\.)?videa\.hu)/player(?:\?v=|/v/)(?P[^/#?]+)'; + _TESTS = [{ + 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', + 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', + 'info_dict': { + 'id': '8YfIAjxwWGwT8HVQ', + 'ext': 'mp4', + 'title': 'Az őrült kígyász 285 kígyót enged szabadon', + 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', + 'duration': 21 + }, + }, { + 'url': 'http://videa.hu/player?v=jAHDWfWSJH5XuFhH', + 'only_matching': True, + }]; + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//(?:.+?\.)?videa\.hu/player(?:\?v=|/v/)[^/#?]+)\1', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + protocol, base_url, display_id = re.search(self._VALID_URL, url).groups() + xml = self._download_xml(protocol + base_url + "/flvplayer_get_video_xml.php?v=" + display_id, display_id) + + medias = [] + + for xml_media in xml.findall('video') + xml.findall('audio'): + media_url = protocol + xpath_attr(xml_media, 'versions/version', 'video_url') + media = { + 'id': display_id, + 'ext': 'mp4', + 'title': xpath_text(xml_media, 'title', 'title', True), + 'duration': parse_duration(xpath_text(xml_media, 'duration')), + 'thumbnail': protocol + xpath_text(xml_media, 'still', 'still', True), + 'url': media_url, + } + medias.append(media) + + if len(medias) > 1: + self._downloader.report_warning( + 'found multiple medias; please ' + 'report this with the video URL to http://yt-dl.org/bug') + if not medias: + raise ExtractorError('No media entries found') + return medias[0] From 69677f3ee216b99ada7ac97ddd4d506245541b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 21:58:15 +0700 Subject: [PATCH 059/586] [videa] Improve and simplify (closes #8181, closes #11133) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/videa.py | 121 +++++++++++++---------------- 2 files changed, 57 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9898b9803..3b0bff0d7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1064,10 +1064,7 @@ from .vice import ( from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE -from .videa import ( - VideaIE, - VideaEmbedIE, -) +from .videa import VideaIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 3d2e7e31c..039add86b 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,28 +1,32 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, - parse_duration, + mimetype2ext, + parse_codecs, xpath_element, xpath_text, - xpath_attr, - urlencode_postdata, - unescapeHTML, ) class VideaIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?videa\.hu/videok/(?P[^#?]+)' + _VALID_URL = r'''(?x) + https?:// + videa\.hu/ + (?: + videok/(?:[^/]+/)*[^?#&]+-| + player\?.*?\bv=| + player/v/ + ) + (?P[^?#&]+) + ''' _TESTS = [{ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', 'info_dict': { 'id': '8YfIAjxwWGwT8HVQ', - 'display_id': '8YfIAjxwWGwT8HVQ', 'ext': 'mp4', 'title': 'Az őrült kígyász 285 kígyót enged szabadon', 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', @@ -31,68 +35,55 @@ class VideaIE(InfoExtractor): }, { 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', 'only_matching': True, + }, { + 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', + 'only_matching': True, + }, { + 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json("http://videa.hu/oembed/?" + urlencode_postdata({"url": url.split('?')[0], "format": "json"}), video_id) - video_url = self._search_regex( - r'src="(.+?)"', video_data.get('html'), 'embed url') + info = self._download_xml( + 'http://videa.hu/videaplayer_get_xml.php', video_id, + query={'v': video_id}) + + video = xpath_element(info, './/video', 'video', fatal=True) + sources = xpath_element(info, './/video_sources', 'sources', fatal=True) + + title = xpath_text(video, './title', fatal=True) + + formats = [] + for source in sources.findall('./video_source'): + source_url = source.text + if not source_url: + continue + f = parse_codecs(source.get('codecs')) + f.update({ + 'url': source_url, + 'ext': mimetype2ext(source.get('mimetype')) or 'mp4', + 'format_id': source.get('name'), + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + }) + formats.append(f) + self._sort_formats(formats) + + thumbnail = xpath_text(video, './poster_src') + duration = int_or_none(xpath_text(video, './duration')) + + age_limit = None + is_adult = xpath_text(video, './is_adult_content', default=None) + if is_adult: + age_limit = 18 if is_adult == '1' else 0 return { - '_type': 'url_transparent', - 'url': video_url, - 'ie_key': 'VideaEmbed' + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, } - -class VideaEmbedIE(InfoExtractor): - _VALID_URL = r'(?Phttps?:)(?P//(?:.+?\.)?videa\.hu)/player(?:\?v=|/v/)(?P[^/#?]+)'; - _TESTS = [{ - 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', - 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', - 'info_dict': { - 'id': '8YfIAjxwWGwT8HVQ', - 'ext': 'mp4', - 'title': 'Az őrült kígyász 285 kígyót enged szabadon', - 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', - 'duration': 21 - }, - }, { - 'url': 'http://videa.hu/player?v=jAHDWfWSJH5XuFhH', - 'only_matching': True, - }]; - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+src=(["\'])(?P(?:https?:)?//(?:.+?\.)?videa\.hu/player(?:\?v=|/v/)[^/#?]+)\1', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - protocol, base_url, display_id = re.search(self._VALID_URL, url).groups() - xml = self._download_xml(protocol + base_url + "/flvplayer_get_video_xml.php?v=" + display_id, display_id) - - medias = [] - - for xml_media in xml.findall('video') + xml.findall('audio'): - media_url = protocol + xpath_attr(xml_media, 'versions/version', 'video_url') - media = { - 'id': display_id, - 'ext': 'mp4', - 'title': xpath_text(xml_media, 'title', 'title', True), - 'duration': parse_duration(xpath_text(xml_media, 'duration')), - 'thumbnail': protocol + xpath_text(xml_media, 'still', 'still', True), - 'url': media_url, - } - medias.append(media) - - if len(medias) > 1: - self._downloader.report_warning( - 'found multiple medias; please ' - 'report this with the video URL to http://yt-dl.org/bug') - if not medias: - raise ExtractorError('No media entries found') - return medias[0] From e186a9ec0394400a6996f98d197d0d14937a60c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 22:04:29 +0700 Subject: [PATCH 060/586] [videa] Add support for videa embeds --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ youtube_dl/extractor/videa.py | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 79d10a1d1..8503966a7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -76,6 +76,7 @@ from .soundcloud import SoundcloudIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE +from .videa import VideaIE class GenericIE(InfoExtractor): @@ -1422,6 +1423,15 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Videa embeds + 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html', + 'info_dict': { + 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style', + 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum', + }, + 'playlist_mincount': 2, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2358,6 +2368,11 @@ class GenericIE(InfoExtractor): if dbtv_urls: return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + # Look for Videa embeds + videa_urls = VideaIE._extract_urls(webpage) + if videa_urls: + return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 039add86b..311df58f4 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -43,6 +45,12 @@ class VideaIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r']+src=(["\'])(?P(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url) From 3540fe262f6131b220d3af7ee8017e8407e44319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Haberth=C3=BCr?= Date: Mon, 12 Dec 2016 12:13:34 +0100 Subject: [PATCH 061/586] [README.md] Fix spelling and harmonize line length --- README.md | 14 +++++--------- youtube_dl/options.py | 4 ++-- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 71d37e8b0..42ca50181 100644 --- a/README.md +++ b/README.md @@ -44,11 +44,7 @@ Or with [MacPorts](https://www.macports.org/): Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). # DESCRIPTION -**youtube-dl** is a command-line program to download videos from -YouTube.com and a few more sites. It requires the Python interpreter, version -2.6, 2.7, or 3.2+, and it is not platform specific. It should work on -your Unix box, on Windows or on Mac OS X. It is released to the public domain, -which means you can modify it, redistribute it or use it however you like. +**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. youtube-dl [OPTIONS] URL [URL...] @@ -744,7 +740,7 @@ Most people asking this question are not aware that youtube-dl now defaults to d ### I get HTTP error 402 when trying to download a video. What's this? -Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. ### Do I need any other programs? @@ -756,7 +752,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [ Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/). -### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. +### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser. It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`. @@ -1037,7 +1033,7 @@ Assume at this point `meta`'s layout is: } ``` -Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: +Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: ```python description = meta.get('summary') # correct @@ -1252,7 +1248,7 @@ We are then presented with a very complicated request when the original problem Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. -In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. ### Is anyone going to need the feature? diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 53497fbc6..4144454e5 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -341,7 +341,7 @@ def parseOpts(overrideArguments=None): authentication.add_option( '-2', '--twofactor', dest='twofactor', metavar='TWOFACTOR', - help='Two-factor auth code') + help='Two-factor authentication code') authentication.add_option( '-n', '--netrc', action='store_true', dest='usenetrc', default=False, @@ -469,7 +469,7 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--xattr-set-filesize', dest='xattr_set_filesize', action='store_true', - help='Set file xattribute ytdl.filesize with expected filesize (experimental)') + help='Set file xattribute ytdl.filesize with expected file size (experimental)') downloader.add_option( '--hls-prefer-native', dest='hls_prefer_native', action='store_true', default=None, From 9a0f999585e392f04541e4f6d5c8d26cf18d31a8 Mon Sep 17 00:00:00 2001 From: Robert Smith Date: Sun, 25 Dec 2016 14:44:10 -0800 Subject: [PATCH 062/586] [twitch] Added support for player.twitch.tv URLs (closes #11535) --- youtube_dl/extractor/twitch.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bbf071da3..09efa505b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -206,7 +206,8 @@ class TwitchChapterIE(TwitchItemBaseIE): class TwitchVodIE(TwitchItemBaseIE): IE_NAME = 'twitch:vod' - _VALID_URL = r'%s/[^/]+/v/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL_BASE = r'https?://(?:www\.|player\.)?twitch\.tv' + _VALID_URL = r'%s/(?:[^/]+/v/|\?video=v)(?P\d+)' % _VALID_URL_BASE _ITEM_TYPE = 'vod' _ITEM_SHORTCUT = 'v' @@ -229,6 +230,26 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, + }, { + # player.twitch.tv URL + 'url': 'http://player.twitch.tv/?video=v6528877&t=5m10s', + 'info_dict': { + 'id': 'v6528877', + 'ext': 'mp4', + 'title': 'LCK Summer Split - Week 6 Day 1', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 17208, + 'timestamp': 1435131709, + 'upload_date': '20150624', + 'uploader': 'Riot Games', + 'uploader_id': 'riotgames', + 'view_count': int, + 'start_time': 310, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # Untitled broadcast (title is None) 'url': 'http://www.twitch.tv/belkao_o/v/11230755', From 3f1ce16876931f2aa57b566736d875a760f35f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 22:40:42 +0700 Subject: [PATCH 063/586] [twitch:vod] Improve _VALID_URL (closes #11537) --- youtube_dl/extractor/twitch.py | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 09efa505b..681f1b676 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -206,8 +206,14 @@ class TwitchChapterIE(TwitchItemBaseIE): class TwitchVodIE(TwitchItemBaseIE): IE_NAME = 'twitch:vod' - _VALID_URL_BASE = r'https?://(?:www\.|player\.)?twitch\.tv' - _VALID_URL = r'%s/(?:[^/]+/v/|\?video=v)(?P\d+)' % _VALID_URL_BASE + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?twitch\.tv/[^/]+/v/| + player\.twitch\.tv/\?.*?\bvideo=v + ) + (?P\d+) + ''' _ITEM_TYPE = 'vod' _ITEM_SHORTCUT = 'v' @@ -230,26 +236,6 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, - }, { - # player.twitch.tv URL - 'url': 'http://player.twitch.tv/?video=v6528877&t=5m10s', - 'info_dict': { - 'id': 'v6528877', - 'ext': 'mp4', - 'title': 'LCK Summer Split - Week 6 Day 1', - 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 17208, - 'timestamp': 1435131709, - 'upload_date': '20150624', - 'uploader': 'Riot Games', - 'uploader_id': 'riotgames', - 'view_count': int, - 'start_time': 310, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { # Untitled broadcast (title is None) 'url': 'http://www.twitch.tv/belkao_o/v/11230755', @@ -270,6 +256,9 @@ class TwitchVodIE(TwitchItemBaseIE): 'skip_download': True, }, 'skip': 'HTTP Error 404: Not Found', + }, { + 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877', + 'only_matching': True, }] def _real_extract(self, url): From e66dca5e4aa9e55762a5541dc1b0a332208451ee Mon Sep 17 00:00:00 2001 From: Fabian Stahl Date: Wed, 14 Sep 2016 17:28:17 +0200 Subject: [PATCH 064/586] Add option --config-location A configfile can now be passed to youtube_dl. undo changes Raise parser error if file not found, change to user_conf change metavar hand helptext for --configfile Fix help for --configfile Update help for --configfile Numbering placeholder in configfile error msg minor fix Change option --configfile top --config-file Fix -config-file error --- youtube_dl/__init__.py | 1 + youtube_dl/options.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6850d95e1..1b459a706 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -405,6 +405,7 @@ def _real_main(argv=None): 'postprocessor_args': postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, + 'configfile': opts.configfile, } diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 4144454e5..4941abe83 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -178,6 +178,10 @@ def parseOpts(overrideArguments=None): 'When given in the global configuration file /etc/youtube-dl.conf: ' 'Do not read the user configuration in ~/.config/youtube-dl/config ' '(%APPDATA%/youtube-dl/config.txt on Windows)') + general.add_option( + '--config-file', + dest='configfile', metavar='FILE', + help='File to read configuration from.') general.add_option( '--flat-playlist', action='store_const', dest='extract_flat', const='in_playlist', @@ -845,19 +849,29 @@ def parseOpts(overrideArguments=None): return conf command_line_conf = compat_conf(sys.argv[1:]) + opts, args = parser.parse_args(command_line_conf) if '--ignore-config' in command_line_conf: system_conf = [] user_conf = [] + elif '--config-file' in command_line_conf: + if not os.path.isfile(opts.configfile): + parser.error('Config file {0} not found.'.format(opts.configfile)) + else: + user_conf = _readOptions(opts.configfile) + system_conf = [] + else: system_conf = _readOptions('/etc/youtube-dl.conf') if '--ignore-config' in system_conf: user_conf = [] else: user_conf = _readUserConf() + argv = system_conf + user_conf + command_line_conf opts, args = parser.parse_args(argv) + if opts.verbose: write_string('[debug] System config: ' + repr(_hide_login_info(system_conf)) + '\n') write_string('[debug] User config: ' + repr(_hide_login_info(user_conf)) + '\n') From b6ee45e9fada3b1179e7e549ebf1dd8ad0bbca7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 23:41:37 +0700 Subject: [PATCH 065/586] Improve custom config support (closes #10648) --- youtube_dl/__init__.py | 3 +-- youtube_dl/options.py | 40 ++++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1b459a706..dfa4ae839 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -405,8 +405,7 @@ def _real_main(argv=None): 'postprocessor_args': postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, - 'configfile': opts.configfile, - + 'config_location': opts.config_location, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 4941abe83..0eb4924b6 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -179,9 +179,9 @@ def parseOpts(overrideArguments=None): 'Do not read the user configuration in ~/.config/youtube-dl/config ' '(%APPDATA%/youtube-dl/config.txt on Windows)') general.add_option( - '--config-file', - dest='configfile', metavar='FILE', - help='File to read configuration from.') + '--config-location', + dest='config_location', metavar='PATH', + help='Location of the configuration file; either the path to the config or its containing directory.') general.add_option( '--flat-playlist', action='store_const', dest='extract_flat', const='in_playlist', @@ -851,30 +851,30 @@ def parseOpts(overrideArguments=None): command_line_conf = compat_conf(sys.argv[1:]) opts, args = parser.parse_args(command_line_conf) - if '--ignore-config' in command_line_conf: - system_conf = [] - user_conf = [] - elif '--config-file' in command_line_conf: - if not os.path.isfile(opts.configfile): - parser.error('Config file {0} not found.'.format(opts.configfile)) - else: - user_conf = _readOptions(opts.configfile) - system_conf = [] + system_conf = user_conf = custom_conf = [] + if '--config-location' in command_line_conf: + location = compat_expanduser(opts.config_location) + if os.path.isdir(location): + location = os.path.join(location, 'youtube-dl.conf') + if not os.path.exists(location): + parser.error('config-location %s does not exist.' % location) + custom_conf = _readOptions(location) + elif '--ignore-config' in command_line_conf: + pass else: system_conf = _readOptions('/etc/youtube-dl.conf') - if '--ignore-config' in system_conf: - user_conf = [] - else: + if '--ignore-config' not in system_conf: user_conf = _readUserConf() argv = system_conf + user_conf + command_line_conf - opts, args = parser.parse_args(argv) - if opts.verbose: - write_string('[debug] System config: ' + repr(_hide_login_info(system_conf)) + '\n') - write_string('[debug] User config: ' + repr(_hide_login_info(user_conf)) + '\n') - write_string('[debug] Command-line args: ' + repr(_hide_login_info(command_line_conf)) + '\n') + for conf_label, conf in ( + ('System config', system_conf), + ('User config', user_conf), + ('Custom config', custom_conf), + ('Command-line args', command_line_conf)): + write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf)))) return parser, opts, args From 1ea0b727c4394612f49a6f781ed2028af17d86ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 23:53:30 +0700 Subject: [PATCH 066/586] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index c45441345..24ded2ca2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core ++ Introduce --config-location option for custom configuration files (#6745, + #10648) + +Extractors ++ [twitch] Add support for player.twitch.tv (#11535, #11537) ++ [videa] Add support for videa.hu (#8181, #11133) +* [vk] Fix postlive videos extraction +* [vk] Extract from playerParams (#11555) +- [freevideo] Remove extractor (#11515) ++ [showroomlive] Add support for showroom-live.com (#11458) +* [xhamster] Fix duration extraction (#11549) +* [rtve:live] Fix extraction (#11529) +* [brightcove:legacy] Improve embeds detection (#11523) ++ [twitch] Add support for rechat messages (#11524) +* [acast] Fix audio and timestamp extraction (#11521) + + version 2016.12.22 Core From dc48a3540487d6e3405721a586a4237dd9e5d19c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 31 Dec 2016 23:58:41 +0700 Subject: [PATCH 067/586] release 2016.12.31 --- ChangeLog | 2 +- README.md | 7 +++++-- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 24ded2ca2..93336e34d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.31 Core + Introduce --config-location option for custom configuration files (#6745, diff --git a/README.md b/README.md index 42ca50181..3bfa2c680 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo configuration in ~/.config/youtube- dl/config (%APPDATA%/youtube-dl/config.txt on Windows) + --config-location PATH Location of the configuration file; either + the path to the config or its containing + directory. --flat-playlist Do not extract the videos of a playlist, only list them. --mark-watched Mark videos watched (YouTube only) @@ -183,7 +186,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo of SIZE. --playlist-reverse Download playlist videos in reverse order --xattr-set-filesize Set file xattribute ytdl.filesize with - expected filesize (experimental) + expected file size (experimental) --hls-prefer-native Use the native HLS downloader instead of ffmpeg --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS @@ -350,7 +353,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo -u, --username USERNAME Login with this account ID -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively. - -2, --twofactor TWOFACTOR Two-factor auth code + -2, --twofactor TWOFACTOR Two-factor authentication code -n, --netrc Use .netrc authentication data --video-password PASSWORD Video password (vimeo, smotri, youku) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0b3d794c6..5598210cd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -263,7 +263,6 @@ - **francetvinfo.fr** - **Freesound** - **freespeech.org** - - **FreeVideo** - **Funimation** - **FunnyOrDie** - **Fusion** @@ -659,6 +658,7 @@ - **Shahid** - **Shared**: shared.sx - **ShareSix** + - **ShowRoomLive** - **Sina** - **SixPlay** - **skynewsarabia:article** @@ -834,6 +834,7 @@ - **ViceShow** - **Vidbit** - **Viddler** + - **Videa** - **video.google:search**: Google Video search - **video.mit.edu** - **VideoDetective** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3082ebf66..ca2258ca6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.22' +__version__ = '2016.12.31' From 1f766b6e7b72124dc3479c7cce270ba2b329a8f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Jan 2017 02:46:18 +0700 Subject: [PATCH 068/586] [arkena] Add support for video.arkena.com (closes #11568) --- youtube_dl/extractor/arkena.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py index d45cae301..50ffb442d 100644 --- a/youtube_dl/extractor/arkena.py +++ b/youtube_dl/extractor/arkena.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, + ExtractorError, float_or_none, int_or_none, mimetype2ext, @@ -15,7 +17,13 @@ from ..utils import ( class ArkenaIE(InfoExtractor): - _VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P[^/]+)/[^/]+/(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + video\.arkena\.com/play2/embed/player\?| + play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P[^/]+)/[^/]+/(?P\d+) + ) + ''' _TESTS = [{ 'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411', 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', @@ -37,6 +45,9 @@ class ArkenaIE(InfoExtractor): }, { 'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/', 'only_matching': True, + }, { + 'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled', + 'only_matching': True, }] @staticmethod @@ -53,6 +64,14 @@ class ArkenaIE(InfoExtractor): video_id = mobj.group('id') account_id = mobj.group('account_id') + # Handle http://video.arkena.com/play2/embed/player URL + if not video_id: + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + video_id = qs.get('mediaId', [None])[0] + account_id = qs.get('accountId', [None])[0] + if not video_id or not account_id: + raise ExtractorError('Invalid URL', expected=True) + playlist = self._download_json( 'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_' % (video_id, account_id), From e5e19379be8d2d721178e9b54780cdcce369939c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Jan 2017 15:11:49 +0700 Subject: [PATCH 069/586] [ISSUE_TEMPLATE_tmpl.md] Clarify example URLs constraints for site support request --- .github/ISSUE_TEMPLATE_tmpl.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index ab9968129..df79503d3 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -50,6 +50,8 @@ $ youtube-dl -v - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. + --- ### Description of your *issue*, suggested solution and other information From 966815e1390386948957ec5f269f470e87b85b2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Jan 2017 21:25:25 +0700 Subject: [PATCH 070/586] [nrktv:episodes] Add support for episodes (#11571) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nrk.py | 66 ++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b0bff0d7..3017bf56c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -655,6 +655,7 @@ from .nrk import ( NRKSkoleIE, NRKTVIE, NRKTVDirekteIE, + NRKTVEpisodesIE, ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 776c40b94..ea7be005a 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -207,7 +207,15 @@ class NRKIE(NRKBaseIE): class NRKTVIE(NRKBaseIE): IE_DESC = 'NRK TV and NRK Radio' - _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' + _EPISODE_RE = r'(?P[a-zA-Z]{4}\d{8})' + _VALID_URL = r'''(?x) + https?:// + (?:tv|radio)\.nrk(?:super)?\.no/ + (?:serie/[^/]+|program)/ + (?![Ee]pisodes)%s + (?:/\d{2}-\d{2}-\d{4})? + (?:\#del=(?P\d+))? + ''' % _EPISODE_RE _API_HOST = 'psapi-we.nrk.no' _TESTS = [{ @@ -286,9 +294,30 @@ class NRKTVDirekteIE(NRKTVIE): }] -class NRKPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' +class NRKPlaylistBaseIE(InfoExtractor): + def _extract_description(self, webpage): + pass + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('nrk:%s' % video_id, NRKIE.ie_key()) + for video_id in re.findall(self._ITEM_RE, webpage) + ] + + playlist_title = self. _extract_title(webpage) + playlist_description = self._extract_description(webpage) + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) + + +class NRKPlaylistIE(NRKPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' + _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"' _TESTS = [{ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 'info_dict': { @@ -307,23 +336,28 @@ class NRKPlaylistIE(InfoExtractor): 'playlist_count': 5, }] - def _real_extract(self, url): - playlist_id = self._match_id(url) + def _extract_title(self, webpage): + return self._og_search_title(webpage, fatal=False) - webpage = self._download_webpage(url, playlist_id) + def _extract_description(self, webpage): + return self._og_search_description(webpage) - entries = [ - self.url_result('nrk:%s' % video_id, 'NRK') - for video_id in re.findall( - r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', - webpage) - ] - playlist_title = self._og_search_title(webpage) - playlist_description = self._og_search_description(webpage) +class NRKTVEpisodesIE(NRKPlaylistBaseIE): + _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P\d+)' + _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE + _TESTS = [{ + 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031', + 'info_dict': { + 'id': '69031', + 'title': 'Nytt på nytt, sesong: 201210', + }, + 'playlist_count': 4, + }] - return self.playlist_result( - entries, playlist_id, playlist_title, playlist_description) + def _extract_title(self, webpage): + return self._html_search_regex( + r'

([^<]+)

', webpage, 'title', fatal=False) class NRKSkoleIE(InfoExtractor): From 7fc06b6a15d556aca99fa74b9ce9fec9affc03e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Jan 2017 23:36:12 +0700 Subject: [PATCH 071/586] [README.md] Update link to available YoutubeDL options --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3bfa2c680..bd01e2a10 100644 --- a/README.md +++ b/README.md @@ -1148,7 +1148,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From eec45445a8b56219168d57cdc24b54b00a9f389d Mon Sep 17 00:00:00 2001 From: Oskar Cieslik Date: Mon, 2 Jan 2017 00:22:10 +0100 Subject: [PATCH 072/586] Update Readme: Set home in sudo pip install Hi, it's not always a default behaviour, but when you use `sudo pip` you most likely may want to use `-H` flag to set HOME value to directory of the target user :) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd01e2a10..46aa78f66 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex You can also use pip: - sudo pip install --upgrade youtube-dl + sudo -H pip install --upgrade youtube-dl This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. From 295eac6165bae681576db8b8da32437822041b4d Mon Sep 17 00:00:00 2001 From: RPing Date: Sat, 13 Feb 2016 22:30:15 +0800 Subject: [PATCH 073/586] [cntv] Add extractor --- youtube_dl/extractor/cntv.py | 123 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 124 insertions(+) create mode 100644 youtube_dl/extractor/cntv.py diff --git a/youtube_dl/extractor/cntv.py b/youtube_dl/extractor/cntv.py new file mode 100644 index 000000000..0b26ad32d --- /dev/null +++ b/youtube_dl/extractor/cntv.py @@ -0,0 +1,123 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_request + +class CntvIE(InfoExtractor): + IE_NAME = 'cntv' + IE_DESC = '央视网' + _VALID_URL = r'''(?x) + (?: + http://(?:ent|tv|sports|english|chunwan)\.(?:cntv|cctv)\.(?:com|cn)/ + (?: + (?:video|special)/(?:[_A-Za-z0-9]+)|(?P\d+)/(?P\d+)/(?P\d+) + ) + ) + /(?P[A-Za-z0-9]+)(index)?(?:\.shtml|) + ''' + + _TESTS = [{ + 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', + 'md5': 'd0f8c3e043fe43534517ed47c831fd4e', + 'info_dict': { + 'id': '5ecdbeab623f4973b40ff25f18b174e8', + 'ext': 'flv', + 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)', + 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95', + 'uploader': 'songjunjie' + } + },{ + 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', + 'md5': '3bde627de65b6bad064f3e67ec9e71a1', + 'info_dict': { + 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', + 'ext': 'flv', + 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)', + 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。', + 'uploader': 'shujun' + } + },{ + 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', + 'md5': 'f37a56d5f16647eae24c3e618f8d23a2', + 'info_dict': { + 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', + 'ext': 'flv', + 'title': 'NHnews008 ANNUAL POLITICAL SEASON', + 'description': 'Four Comprehensives', + 'uploader': 'zhangyunlei' + } + },{ + 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', + 'md5': '37ad2407b5f28336ddf26aacea570ee5', + 'info_dict': { + 'id': '45336758dd474ef39dc6887f115e0375', + 'ext': 'flv', + 'title': '《爱国之恋》中国梦新歌展播 综艺视频精切', + 'description': 'md5:856836ae2660f000e458ead597b1e2bd', + 'uploader': 'baiyuqing' + } + },{ + 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44', + 'md5': 'd7188ecc2a2f8447c70023895724c31c', + 'info_dict': { + 'id': 'e0210d949f113ddfb38d31f00a4e5c44', + 'ext': 'flv', + 'title': '《传奇故事》 20150409 金钱“魔术”(下)', + 'description': '本期节目主要内容: 2014年10月23日凌晨,浙江余杭警方出动2000多名警力,一举摧毁1040阳光工程传销组织,抓获65名涉嫌传销的组织头目,教育遣返400多名参与传销人员。', + 'uploader': '陆银凯' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_id_regex = [r'var guid[ ="]+([a-zA-Z0-9]+)"', + r'"videoCenterId"[ ,"]+([a-zA-Z0-9]+)"', + r'"changePlayer\(\'([a-zA-Z0-9]+)\'\)\"'] + video_id = self._search_regex(video_id_regex, webpage, 'video_id', default=None) + + if video_id is None: + video_id = self._match_id(url) + + # refer to youku.py + def retrieve_data(req_url, note): + req = compat_urllib_request.Request(req_url) + + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + req.add_header('Ytdl-request-proxy', cn_verification_proxy) + + raw_data = self._download_json(req, video_id, note=note) + return raw_data + + get_info_url = \ + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do' + \ + '?pid=%s' % video_id + \ + '&tz=-8' + \ + '&from=000tv' + \ + '&url=%s' % url + \ + '&idl=32' + \ + '&idlr=32' + \ + '&modifyed=false' + + data = retrieve_data( + get_info_url, + 'Downloading JSON metadata') + + title = data.get('title') + uploader = data.get('editer_name') + hls_url = data.get('hls_url') + formats = self._extract_m3u8_formats(hls_url, video_id, 'flv') + self._sort_formats(formats) + + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'title': title, + 'uploader': uploader, + 'formats': formats, + 'description': description, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3017bf56c..4cd12562e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -178,6 +178,7 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) +from .cntv import CntvIE from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ( From ce7ccb1caa1c6122329779adfe5993dfd47c4021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Jan 2017 22:53:05 +0700 Subject: [PATCH 074/586] [cctv] Improve and merge with cntv (closes #879, closes #6753, closes #8541) --- youtube_dl/extractor/cctv.py | 156 +++++++++++++++++++++++------ youtube_dl/extractor/cntv.py | 123 ----------------------- youtube_dl/extractor/extractors.py | 1 - 3 files changed, 128 insertions(+), 152 deletions(-) delete mode 100644 youtube_dl/extractor/cntv.py diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py index 72a72cb73..c3d163cde 100644 --- a/youtube_dl/extractor/cctv.py +++ b/youtube_dl/extractor/cctv.py @@ -4,27 +4,84 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..compat import compat_str +from ..utils import ( + float_or_none, + try_get, + unified_timestamp, +) class CCTVIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:.+?\.)? - (?: - cctv\.(?:com|cn)| - cntv\.cn - )/ - (?: - video/[^/]+/(?P[0-9a-f]{32})| - \d{4}/\d{2}/\d{2}/(?PVID[0-9A-Za-z]+) - )''' + IE_DESC = '央视网' + _VALID_URL = r'https?://(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?\.s?html' _TESTS = [{ - 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', - 'md5': '819c7b49fc3927d529fb4cd555621823', + 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', + 'md5': 'd61ec00a493e09da810bf406a078f691', 'info_dict': { - 'id': '454368eb19ad44a1925bf1eb96140a61', + 'id': '5ecdbeab623f4973b40ff25f18b174e8', 'ext': 'mp4', - 'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1', - } + 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)', + 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95', + 'duration': 98, + 'uploader': 'songjunjie', + 'timestamp': 1455279956, + 'upload_date': '20160212', + }, + }, { + 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', + 'info_dict': { + 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', + 'ext': 'mp4', + 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)', + 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。', + 'duration': 37, + 'uploader': 'shujun', + 'timestamp': 1454677291, + 'upload_date': '20160205', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', + 'info_dict': { + 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', + 'ext': 'mp4', + 'title': 'NHnews008 ANNUAL POLITICAL SEASON', + 'description': 'Four Comprehensives', + 'duration': 60, + 'uploader': 'zhangyunlei', + 'timestamp': 1425385521, + 'upload_date': '20150303', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml', + 'info_dict': { + 'id': 'b15f009ff45c43968b9af583fc2e04b2', + 'ext': 'mp4', + 'title': 'Путь,усыпанный космеями Серия 1', + 'description': 'Путь, усыпанный космеями', + 'duration': 2645, + 'uploader': 'renxue', + 'timestamp': 1477479241, + 'upload_date': '20161026', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44', + 'only_matching': True, + }, { + 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', + 'only_matching': True, }, { 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', 'only_matching': True, @@ -34,20 +91,63 @@ class CCTVIE(InfoExtractor): }] def _real_extract(self, url): - video_id, display_id = re.match(self._VALID_URL, url).groups() - if not video_id: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})', - webpage, 'video_id') - api_data = self._download_json( - 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id) - m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url']) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', + r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', + r'"changePlayer\s*\(\s*["\']([\da-fA-F]+)', + r'"load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'], + webpage, 'video id', default=video_id) + + data = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id, + query={ + 'pid': video_id, + 'url': url, + 'idl': 32, + 'idlr': 32, + 'modifyed': 'false', + }) + + title = data['title'] + + formats = [] + + video = data.get('video') + if isinstance(video, dict): + for quality, chapters_key in enumerate(('lowChapters', 'chapters')): + video_url = try_get( + video, lambda x: x[chapters_key][0]['url'], compat_str) + if video_url: + formats.append({ + 'url': video_url, + 'format_id': 'http', + 'quality': quality, + 'preference': -1, + }) + + hls_url = try_get(data, lambda x: x['hls_url'], compat_str) + if hls_url: + hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + + uploader = data.get('editer_name') + description = self._html_search_meta('description', webpage) + timestamp = unified_timestamp(data.get('f_pgmtime')) + duration = float_or_none(try_get(video, lambda x: x['totalLength'])) return { 'id': video_id, - 'title': api_data['title'], - 'formats': self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False), - 'duration': float_or_none(api_data.get('video', {}).get('totalLength')), + 'title': title, + 'description': description, + 'uploader': uploader, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, } diff --git a/youtube_dl/extractor/cntv.py b/youtube_dl/extractor/cntv.py deleted file mode 100644 index 0b26ad32d..000000000 --- a/youtube_dl/extractor/cntv.py +++ /dev/null @@ -1,123 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_request - -class CntvIE(InfoExtractor): - IE_NAME = 'cntv' - IE_DESC = '央视网' - _VALID_URL = r'''(?x) - (?: - http://(?:ent|tv|sports|english|chunwan)\.(?:cntv|cctv)\.(?:com|cn)/ - (?: - (?:video|special)/(?:[_A-Za-z0-9]+)|(?P\d+)/(?P\d+)/(?P\d+) - ) - ) - /(?P[A-Za-z0-9]+)(index)?(?:\.shtml|) - ''' - - _TESTS = [{ - 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', - 'md5': 'd0f8c3e043fe43534517ed47c831fd4e', - 'info_dict': { - 'id': '5ecdbeab623f4973b40ff25f18b174e8', - 'ext': 'flv', - 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)', - 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95', - 'uploader': 'songjunjie' - } - },{ - 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', - 'md5': '3bde627de65b6bad064f3e67ec9e71a1', - 'info_dict': { - 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', - 'ext': 'flv', - 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)', - 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。', - 'uploader': 'shujun' - } - },{ - 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', - 'md5': 'f37a56d5f16647eae24c3e618f8d23a2', - 'info_dict': { - 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', - 'ext': 'flv', - 'title': 'NHnews008 ANNUAL POLITICAL SEASON', - 'description': 'Four Comprehensives', - 'uploader': 'zhangyunlei' - } - },{ - 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', - 'md5': '37ad2407b5f28336ddf26aacea570ee5', - 'info_dict': { - 'id': '45336758dd474ef39dc6887f115e0375', - 'ext': 'flv', - 'title': '《爱国之恋》中国梦新歌展播 综艺视频精切', - 'description': 'md5:856836ae2660f000e458ead597b1e2bd', - 'uploader': 'baiyuqing' - } - },{ - 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44', - 'md5': 'd7188ecc2a2f8447c70023895724c31c', - 'info_dict': { - 'id': 'e0210d949f113ddfb38d31f00a4e5c44', - 'ext': 'flv', - 'title': '《传奇故事》 20150409 金钱“魔术”(下)', - 'description': '本期节目主要内容: 2014年10月23日凌晨,浙江余杭警方出动2000多名警力,一举摧毁1040阳光工程传销组织,抓获65名涉嫌传销的组织头目,教育遣返400多名参与传销人员。', - 'uploader': '陆银凯' - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_id_regex = [r'var guid[ ="]+([a-zA-Z0-9]+)"', - r'"videoCenterId"[ ,"]+([a-zA-Z0-9]+)"', - r'"changePlayer\(\'([a-zA-Z0-9]+)\'\)\"'] - video_id = self._search_regex(video_id_regex, webpage, 'video_id', default=None) - - if video_id is None: - video_id = self._match_id(url) - - # refer to youku.py - def retrieve_data(req_url, note): - req = compat_urllib_request.Request(req_url) - - cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') - if cn_verification_proxy: - req.add_header('Ytdl-request-proxy', cn_verification_proxy) - - raw_data = self._download_json(req, video_id, note=note) - return raw_data - - get_info_url = \ - 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do' + \ - '?pid=%s' % video_id + \ - '&tz=-8' + \ - '&from=000tv' + \ - '&url=%s' % url + \ - '&idl=32' + \ - '&idlr=32' + \ - '&modifyed=false' - - data = retrieve_data( - get_info_url, - 'Downloading JSON metadata') - - title = data.get('title') - uploader = data.get('editer_name') - hls_url = data.get('hls_url') - formats = self._extract_m3u8_formats(hls_url, video_id, 'flv') - self._sort_formats(formats) - - description = self._html_search_meta('description', webpage) - - return { - 'id': video_id, - 'title': title, - 'uploader': uploader, - 'formats': formats, - 'description': description, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4cd12562e..3017bf56c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -178,7 +178,6 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) -from .cntv import CntvIE from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ( From 327caf661a2b65b4cc09550f471781918cb3b9bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Jan 2017 23:00:37 +0700 Subject: [PATCH 075/586] [cctv] Do not fallback on video id extracted from URL --- youtube_dl/extractor/cctv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py index c3d163cde..f44256d32 100644 --- a/youtube_dl/extractor/cctv.py +++ b/youtube_dl/extractor/cctv.py @@ -99,7 +99,7 @@ class CCTVIE(InfoExtractor): r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', r'"changePlayer\s*\(\s*["\']([\da-fA-F]+)', r'"load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'], - webpage, 'video id', default=video_id) + webpage, 'video id') data = self._download_json( 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id, From 3783a5ccba17f2c7e056a2ac5c9c38a61979e452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Jan 2017 23:15:38 +0700 Subject: [PATCH 076/586] [cctv] Relax _VALID_URL --- youtube_dl/extractor/cctv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py index f44256d32..d271cabad 100644 --- a/youtube_dl/extractor/cctv.py +++ b/youtube_dl/extractor/cctv.py @@ -14,7 +14,7 @@ from ..utils import ( class CCTVIE(InfoExtractor): IE_DESC = '央视网' - _VALID_URL = r'https?://(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?\.s?html' + _VALID_URL = r'https?://(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)' _TESTS = [{ 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', 'md5': 'd61ec00a493e09da810bf406a078f691', From b890caaf21b52aec165422b2a928f68c28d2c575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Jan 2017 23:52:54 +0700 Subject: [PATCH 077/586] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 93336e34d..9243fb2c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +version + +Extractors +* [cctv] Improve extraction (#879, #6753, #8541) ++ [nrktv:episodes] Add support for episodes (#11571) ++ [arkena] Add support for video.arkena.com (#11568) + + version 2016.12.31 Core From 2021b650dd1af1af63689755d91fc161fd8543b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Jan 2017 23:55:04 +0700 Subject: [PATCH 078/586] release 2017.01.02 --- .github/ISSUE_TEMPLATE.md | 8 +++++--- CONTRIBUTING.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 693d787e3..0e3bc1ce0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.22 +[debug] youtube-dl version 2017.01.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} @@ -50,6 +50,8 @@ $ youtube-dl -v - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. + --- ### Description of your *issue*, suggested solution and other information diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 495955bb5..f50f52841 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -58,7 +58,7 @@ We are then presented with a very complicated request when the original problem Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. -In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. ### Is anyone going to need the feature? @@ -94,7 +94,7 @@ If you want to create a build of youtube-dl yourself, you'll need If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. -After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): +After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) 2. Check out the source code with: @@ -199,7 +199,7 @@ Assume at this point `meta`'s layout is: } ``` -Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: +Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: ```python description = meta.get('summary') # correct diff --git a/ChangeLog b/ChangeLog index 9243fb2c0..eac3b6b05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.02 Extractors * [cctv] Improve extraction (#879, #6753, #8541) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5598210cd..0e301e8f3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -132,7 +132,7 @@ - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** - **CCMA** - - **CCTV** + - **CCTV**: 央视网 - **CDA** - **CeskaTelevize** - **channel9**: Channel 9 @@ -517,6 +517,7 @@ - **NRKSkole**: NRK Skole - **NRKTV**: NRK TV and NRK Radio - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte + - **NRKTVEpisodes** - **ntv.ru** - **Nuvid** - **NYTimes** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ca2258ca6..8f2228592 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.31' +__version__ = '2017.01.02' From 7a9e066972bef4f10822a81653af9d942f15971e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Jan 2017 01:12:41 +0700 Subject: [PATCH 079/586] [cctv] Relax some video id regexes --- youtube_dl/extractor/cctv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py index d271cabad..ba69c44b8 100644 --- a/youtube_dl/extractor/cctv.py +++ b/youtube_dl/extractor/cctv.py @@ -97,8 +97,8 @@ class CCTVIE(InfoExtractor): video_id = self._search_regex( [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', - r'"changePlayer\s*\(\s*["\']([\da-fA-F]+)', - r'"load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'], + r'changePlayer\s*\(\s*["\']([\da-fA-F]+)', + r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'], webpage, 'video id') data = self._download_json( From 027e23129515e0e64d378ca92b47e584bc6dd181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Jan 2017 01:45:59 +0700 Subject: [PATCH 080/586] [tunein] Add support for embeds (closes #11579) --- youtube_dl/extractor/generic.py | 21 +++++++ youtube_dl/extractor/tunein.py | 107 +++++++++++++++++--------------- 2 files changed, 78 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8503966a7..d71d0dc46 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -73,6 +73,7 @@ from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .soundcloud import SoundcloudIE +from .tunein import TuneInBaseIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE @@ -828,6 +829,21 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 7, }, + # TuneIn station embed + { + 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/', + 'info_dict': { + 'id': '204146', + 'ext': 'mp3', + 'title': 'CNRV', + 'location': 'Paris, France', + 'is_live': True, + }, + 'params': { + # Live stream + 'skip_download': True, + }, + }, # Livestream embed { 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast', @@ -2088,6 +2104,11 @@ class GenericIE(InfoExtractor): if soundcloud_urls: return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) + # Look for tunein player + tunein_urls = TuneInBaseIE._extract_urls(webpage) + if tunein_urls: + return _playlist_from_matches(tunein_urls) + # Look for embedded mtvservices player mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) if mtvservices_url: diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py index ae4cfaec2..7e51de89e 100644 --- a/youtube_dl/extractor/tunein.py +++ b/youtube_dl/extractor/tunein.py @@ -11,6 +11,12 @@ from ..compat import compat_urlparse class TuneInBaseIE(InfoExtractor): _API_BASE_URL = 'http://tunein.com/tuner/tune/' + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\'](?P(?:https?://)?tunein\.com/embed/player/[pst]\d+)', + webpage) + def _real_extract(self, url): content_id = self._match_id(url) @@ -69,82 +75,83 @@ class TuneInClipIE(TuneInBaseIE): _VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P\d+)' _API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s' - _TESTS = [ - { - 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816', - 'md5': '99f00d772db70efc804385c6b47f4e77', - 'info_dict': { - 'id': '816', - 'title': '32m', - 'ext': 'mp3', - }, + _TESTS = [{ + 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816', + 'md5': '99f00d772db70efc804385c6b47f4e77', + 'info_dict': { + 'id': '816', + 'title': '32m', + 'ext': 'mp3', }, - ] + }] class TuneInStationIE(TuneInBaseIE): IE_NAME = 'tunein:station' - _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId\=)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId=|embed/player/s)(?P\d+)' _API_URL_QUERY = '?tuneType=Station&stationId=%s' @classmethod def suitable(cls, url): return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url) - _TESTS = [ - { - 'url': 'http://tunein.com/radio/Jazz24-885-s34682/', - 'info_dict': { - 'id': '34682', - 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', - 'ext': 'mp3', - 'location': 'Tacoma, WA', - }, - 'params': { - 'skip_download': True, # live stream - }, + _TESTS = [{ + 'url': 'http://tunein.com/radio/Jazz24-885-s34682/', + 'info_dict': { + 'id': '34682', + 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', + 'ext': 'mp3', + 'location': 'Tacoma, WA', }, - ] + 'params': { + 'skip_download': True, # live stream + }, + }, { + 'url': 'http://tunein.com/embed/player/s6404/', + 'only_matching': True, + }] class TuneInProgramIE(TuneInBaseIE): IE_NAME = 'tunein:program' - _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId\=)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId=|embed/player/p)(?P\d+)' _API_URL_QUERY = '?tuneType=Program&programId=%s' - _TESTS = [ - { - 'url': 'http://tunein.com/radio/Jazz-24-p2506/', - 'info_dict': { - 'id': '2506', - 'title': 'Jazz 24 on 91.3 WUKY-HD3', - 'ext': 'mp3', - 'location': 'Lexington, KY', - }, - 'params': { - 'skip_download': True, # live stream - }, + _TESTS = [{ + 'url': 'http://tunein.com/radio/Jazz-24-p2506/', + 'info_dict': { + 'id': '2506', + 'title': 'Jazz 24 on 91.3 WUKY-HD3', + 'ext': 'mp3', + 'location': 'Lexington, KY', }, - ] + 'params': { + 'skip_download': True, # live stream + }, + }, { + 'url': 'http://tunein.com/embed/player/p191660/', + 'only_matching': True, + }] class TuneInTopicIE(TuneInBaseIE): IE_NAME = 'tunein:topic' - _VALID_URL = r'https?://(?:www\.)?tunein\.com/topic/.*?TopicId\=(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:topic/.*?TopicId=|embed/player/t)(?P\d+)' _API_URL_QUERY = '?tuneType=Topic&topicId=%s' - _TESTS = [ - { - 'url': 'http://tunein.com/topic/?TopicId=101830576', - 'md5': 'c31a39e6f988d188252eae7af0ef09c9', - 'info_dict': { - 'id': '101830576', - 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)', - 'ext': 'mp3', - 'location': 'Belgium', - }, + _TESTS = [{ + 'url': 'http://tunein.com/topic/?TopicId=101830576', + 'md5': 'c31a39e6f988d188252eae7af0ef09c9', + 'info_dict': { + 'id': '101830576', + 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)', + 'ext': 'mp3', + 'location': 'Belgium', }, - ] + }, { + 'url': 'http://tunein.com/embed/player/t101830576/', + 'only_matching': True, + }] class TuneInShortenerIE(InfoExtractor): From 1911d77d28594db62d803e5e5e5e5fb7171adae5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Jan 2017 01:30:40 +0700 Subject: [PATCH 081/586] [cctv] Add support for ncpa-classic.com (closes #11591) --- youtube_dl/extractor/cctv.py | 46 ++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py index ba69c44b8..c76f361c6 100644 --- a/youtube_dl/extractor/cctv.py +++ b/youtube_dl/extractor/cctv.py @@ -14,8 +14,9 @@ from ..utils import ( class CCTVIE(InfoExtractor): IE_DESC = '央视网' - _VALID_URL = r'https?://(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)' + _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)' _TESTS = [{ + # fo.addVariable("videoCenterId","id") 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', 'md5': 'd61ec00a493e09da810bf406a078f691', 'info_dict': { @@ -29,6 +30,7 @@ class CCTVIE(InfoExtractor): 'upload_date': '20160212', }, }, { + # var guid = "id" 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', 'info_dict': { 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', @@ -44,6 +46,7 @@ class CCTVIE(InfoExtractor): 'skip_download': True, }, }, { + # changePlayer('id') 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', 'info_dict': { 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', @@ -59,6 +62,7 @@ class CCTVIE(InfoExtractor): 'skip_download': True, }, }, { + # loadvideo('id') 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml', 'info_dict': { 'id': 'b15f009ff45c43968b9af583fc2e04b2', @@ -73,6 +77,37 @@ class CCTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # var initMyAray = 'id' + 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml', + 'info_dict': { + 'id': 'a194cfa7f18c426b823d876668325946', + 'ext': 'mp4', + 'title': '小泽征尔音乐塾 音乐梦想无国界', + 'duration': 2173, + 'timestamp': 1369248264, + 'upload_date': '20130522', + }, + 'params': { + 'skip_download': True, + }, + }, { + # var ids = ["id"] + 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml', + 'info_dict': { + 'id': 'a8606119a4884588a79d81c02abecc16', + 'ext': 'mp3', + 'title': '来自维也纳的新年贺礼', + 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7', + 'duration': 1578, + 'uploader': 'djy', + 'timestamp': 1482942419, + 'upload_date': '20161228', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', 'only_matching': True, @@ -87,7 +122,7 @@ class CCTVIE(InfoExtractor): 'only_matching': True, }, { 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -98,7 +133,9 @@ class CCTVIE(InfoExtractor): [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', r'changePlayer\s*\(\s*["\']([\da-fA-F]+)', - r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'], + r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)', + r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)', + r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'], webpage, 'video id') data = self._download_json( @@ -138,7 +175,8 @@ class CCTVIE(InfoExtractor): self._sort_formats(formats) uploader = data.get('editer_name') - description = self._html_search_meta('description', webpage) + description = self._html_search_meta( + 'description', webpage, default=None) timestamp = unified_timestamp(data.get('f_pgmtime')) duration = float_or_none(try_get(video, lambda x: x['totalLength'])) From 96d315c2be1c00339b4cb77e80b3d5f63770a735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Jan 2017 05:32:18 +0700 Subject: [PATCH 082/586] [pornhub:playlist] Improve extraction (closes #11594) --- youtube_dl/extractor/pornhub.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 40dbe6967..3eaf56973 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -229,7 +229,14 @@ class PornHubPlaylistBaseIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - entries = self._extract_entries(webpage) + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/rg3/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(]+class=["\']container.+)', webpage, + 'container', default=webpage) + + entries = self._extract_entries(container) playlist = self._parse_json( self._search_regex( @@ -243,12 +250,12 @@ class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistIE(PornHubPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P\d+)' _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/6201671', + 'url': 'http://www.pornhub.com/playlist/4667351', 'info_dict': { - 'id': '6201671', - 'title': 'P0p4', + 'id': '4667351', + 'title': 'Nataly Hot', }, - 'playlist_mincount': 35, + 'playlist_mincount': 2, }] From b6de53ea8a9e51c1d8a8d704b1530095828ec079 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Sat, 29 Oct 2016 14:06:07 +0200 Subject: [PATCH 083/586] [zdf] Fix extraction --- youtube_dl/extractor/zdf.py | 416 ++++++++++++++++++++---------------- 1 file changed, 230 insertions(+), 186 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 2ef177275..530a61cdf 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -6,207 +6,251 @@ import re from .common import InfoExtractor from ..utils import ( - int_or_none, - unified_strdate, OnDemandPagedList, - xpath_text, determine_ext, - qualities, - float_or_none, - ExtractorError, + parse_iso8601, + ExtractorError ) - +from ..compat import compat_str class ZDFIE(InfoExtractor): - _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P[0-9]+)(?:/[^/?]+)?(?:\?.*)?' + _VALID_URL = r'https?://www\.zdf\.de/.*?/(?P[^/?]*?)\.html' _TESTS = [{ - 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', + 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html', 'info_dict': { - 'id': '2037704', - 'ext': 'webm', - 'title': 'ZDFspezial - Ende des Machtpokers', - 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".', - 'duration': 1022, - 'uploader': 'spezial', - 'uploader_id': '225948', - 'upload_date': '20131127', - }, - 'skip': 'Videos on ZDF.de are depublicised in short order', - }] - - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): - param_groups = {} - for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): - group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace')) - params = {} - for param in param_group: - params[param.get('name')] = param.get('value') - param_groups[group_id] = params - - formats = [] - for video in smil.findall(self._xpath_ns('.//video', namespace)): - src = video.get('src') - if not src: - continue - bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - group_id = video.get('paramGroup') - param_group = param_groups[group_id] - for proto in param_group['protocols'].split(','): - formats.append({ - 'url': '%s://%s' % (proto, param_group['host']), - 'app': param_group['app'], - 'play_path': src, - 'ext': 'flv', - 'format_id': '%s-%d' % (proto, bitrate), - 'tbr': bitrate, - }) - self._sort_formats(formats) - return formats - - def extract_from_xml_url(self, video_id, xml_url): - doc = self._download_xml( - xml_url, video_id, - note='Downloading video info', - errnote='Failed to download video info') - - status_code = doc.find('./status/statuscode') - if status_code is not None and status_code.text != 'ok': - code = status_code.text - if code == 'notVisibleAnymore': - message = 'Video %s is not available' % video_id - else: - message = '%s returned error: %s' % (self.IE_NAME, code) - raise ExtractorError(message, expected=True) - - title = doc.find('.//information/title').text - description = xpath_text(doc, './/information/detail', 'description') - duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) - uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') - uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') - upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) - subtitles = {} - captions_url = doc.find('.//caption/url') - if captions_url is not None: - subtitles['de'] = [{ - 'url': captions_url.text, - 'ext': 'ttml', - }] - - def xml_to_thumbnails(fnode): - thumbnails = [] - for node in fnode: - thumbnail_url = node.text - if not thumbnail_url: - continue - thumbnail = { - 'url': thumbnail_url, - } - if 'key' in node.attrib: - m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) - if m: - thumbnail['width'] = int(m.group(1)) - thumbnail['height'] = int(m.group(2)) - thumbnails.append(thumbnail) - return thumbnails - - thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) - - format_nodes = doc.findall('.//formitaeten/formitaet') - quality = qualities(['veryhigh', 'high', 'med', 'low']) - - def get_quality(elem): - return quality(xpath_text(elem, 'quality')) - format_nodes.sort(key=get_quality) - format_ids = [] - formats = [] - for fnode in format_nodes: - video_url = fnode.find('url').text - is_available = 'http://www.metafilegenerator' not in video_url - if not is_available: - continue - format_id = fnode.attrib['basetype'] - quality = xpath_text(fnode, './quality', 'quality') - format_m = re.match(r'''(?x) - (?P[^_]+)_(?P[^_]+)_(?P[^_]+)_ - (?P[^_]+)_(?P[^_]+)_(?P[^_]+) - ''', format_id) - - ext = determine_ext(video_url, None) or format_m.group('container') - if ext not in ('smil', 'f4m', 'm3u8'): - format_id = format_id + '-' + quality - if format_id in format_ids: - continue - - if ext == 'meta': - continue - elif ext == 'smil': - formats.extend(self._extract_smil_formats( - video_url, video_id, fatal=False)) - elif ext == 'm3u8': - # the certificates are misconfigured (see - # https://github.com/rg3/youtube-dl/issues/8665) - if video_url.startswith('https://'): - continue - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id=format_id, fatal=False)) - else: - proto = format_m.group('proto').lower() - - abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) - vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) - - width = int_or_none(xpath_text(fnode, './width', 'width')) - height = int_or_none(xpath_text(fnode, './height', 'height')) - - filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) - - format_note = '' - if not format_note: - format_note = None - - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'ext': ext, - 'acodec': format_m.group('acodec'), - 'vcodec': format_m.group('vcodec'), - 'abr': abr, - 'vbr': vbr, - 'width': width, - 'height': height, - 'filesize': filesize, - 'format_note': format_note, - 'protocol': proto, - '_available': is_available, - }) - format_ids.append(format_id) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, - 'formats': formats, - 'subtitles': subtitles, + 'id': 'zdfmediathek-trailer-100', + 'ext': 'mp4', + 'title': 'Trailer ZDFmediathek Supermarkt', } + }] def _real_extract(self, url): video_id = self._match_id(url) - xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id - return self.extract_from_xml_url(video_id, xml_url) + try: + extr_player = ZDFExtractorPlayer(self, url, video_id) + formats = extr_player._real_extract() + except (ExtractorError, KeyError) as e: + self._downloader.report_warning('%s: %s\nusing fallback method (mobile url)' % (type(e).__name__, compat_str(e))) + extr_mobile = ZDFExtractorMobile(self, url, video_id) + formats = extr_mobile._real_extract() + return formats +class ZDFExtractor: + """Super class for the 2 extraction methods""" + def __init__(self, parent, url, video_id): + self.parent = parent + self.url = url + self.video_id = video_id + + def _real_extract(self): + formats = [] + for entry in self._fetch_entries(): + video_url = self._get_video_url(entry) + if not video_url: + continue + format_id = self._get_format_id(entry) + ext = determine_ext(video_url, None) + if ext == 'meta': + continue + if ext == 'm3u8': + formats.extend(self.parent._extract_m3u8_formats( + video_url, self.video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self.parent._extract_f4m_formats( + video_url, self.video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'format_note': self._get_format_note(entry) + }) + self.parent._sort_formats(formats) + + return { + 'id': self.video_id, + 'title': self._get_title(), + 'formats': formats, + 'subtitles': self._get_subtitles(), + 'thumbnail': self._get_thumbnail(), + 'description': self._get_description(), + 'timestamp': self._get_timestamp() + } + +class ZDFExtractorMobile(ZDFExtractor): + """Simple URL extraction method. Disadvantage: fewer formats, no subtitles""" + def __init__(self, parent, url, video_id): + ZDFExtractor.__init__(self, parent, url, video_id) + + def _fetch_entries(self): + meta_data_url = 'https://zdf-cdn.live.cellular.de/mediathekV2/document/' + self.video_id + self.meta_data = self.parent._download_json(meta_data_url, self.video_id, note='Downloading meta data') + return self.meta_data['document']['formitaeten'] + + def _get_title(self): + return self.meta_data['document']['titel'] + + def _get_video_url(self, entry): + return entry['url'] + + def _get_format_id(self, entry): + format_id = entry['type'] + if 'quality' in entry: + format_id += '-' + entry['quality'] + return format_id + + def _get_format_note(self, entry): + return None + + def _get_subtitles(self): + return None + + def _get_description(self): + return self.meta_data['document'].get('beschreibung') + + def _get_timestamp(self): + meta = self.meta_data['meta'] + if meta: + return parse_iso8601(meta.get('editorialDate')) + + def _get_thumbnail(self): + teaser_images = self.meta_data['document'].get('teaserBild') + if teaser_images: + max_res = max(teaser_images, key=int) + return teaser_images[max_res].get('url') + +class ZDFExtractorPlayer(ZDFExtractor): + """Extraction method that requires downloads of several pages. + + Follows the requests of the website.""" + def __init__(self, parent, url, video_id): + ZDFExtractor.__init__(self, parent, url, video_id) + + def _fetch_entries(self): + webpage = self.parent._download_webpage(self.url, self.video_id) + + jsb = self.parent._search_regex(r"data-zdfplayer-jsb='([^']*)'", webpage, 'zdfplayer jsb data') + jsb_json = self.parent._parse_json(jsb, self.video_id) + + configuration_url = 'https://www.zdf.de' + jsb_json['config'] + configuration_json = self.parent._download_json(configuration_url, self.video_id, note='Downloading player configuration') + api_token = configuration_json['apiToken'] + + player_js = self.parent._download_webpage('https://www.zdf.de/ZDFplayer/latest-v2/skins/zdf/zdf-player.js', self.video_id, fatal=False, note='Downloading player script') + if player_js: + player_id = self.parent._search_regex(r'this\.ptmd_player_id="([^"]*)"', player_js, 'player id', fatal=False) + else: + player_id = None + + self.content_json = self.parent._download_json(jsb_json['content'], self.video_id, headers={'Api-Auth': 'Bearer %s' % api_token}, note='Downloading content description') + + main_video_content = self.content_json['mainVideoContent']['http://zdf.de/rels/target'] + meta_data_url = None + if not player_id: + # could not determine player_id => try alternativ generic URL + meta_data_url = main_video_content.get('http://zdf.de/rels/streams/ptmd') + if meta_data_url: + meta_data_url = 'https://api.zdf.de' + meta_data_url + else: + # no generic URL found => 2nd fallback: hardcoded player_id + player_id = 'ngplayer_2_3' + if not meta_data_url: + meta_data_url_template = main_video_content['http://zdf.de/rels/streams/ptmd-template'] + meta_data_url = 'https://api.zdf.de' + meta_data_url_template.replace('{playerId}', player_id) + + self.meta_data = self.parent._download_json(meta_data_url, self.video_id, note='Downloading meta data') + + formats = [] + for p_list_entry in self.meta_data['priorityList']: + for formitaet in p_list_entry['formitaeten']: + for entry in formitaet['qualities']: + yield (formitaet, entry) + + def _get_title(self): + return self.content_json['title'] + + def _get_video_url(self, entry_tuple): + (formitaet, entry) = entry_tuple + tracks = entry['audio'].get('tracks') + if not tracks: + return + if len(tracks) > 1: + self._downloader.report_warning('unexpected input: multiple tracks') + track = tracks[0] + return track['uri'] + + def _get_format_id(self, entry_tuple): + (formitaet, entry) = entry_tuple + facets = self._get_facets(formitaet) + add = '' + if 'adaptive' in facets: + add += 'a' + if 'restriction_useragent' in facets: + add += 'b' + if 'progressive' in facets: + add += 'p' + type_ = formitaet['type'] + format_id = type_ + '-' + if add: + format_id += add + '-' + # named qualities are not very useful for sorting the formats: + # a 'high' m3u8 entry can be better quality than a 'veryhigh' direct mp4 download + format_id += entry['quality'] + return format_id + + def _get_facets(self, formitaet): + facets = formitaet.get('facets') or [] + if formitaet.get('isAdaptive'): + facets.append('adaptive') + return facets + + def _get_format_note(self, entry_tuple): + (formitaet, entry) = entry_tuple + return ', '.join(self._get_facets(formitaet)) + + def _get_subtitles(self): + subtitles = {} + if 'captions' in self.meta_data: + for caption in self.meta_data['captions']: + lang = caption.get('language') + if not lang: + continue + if lang == 'deu': + lang = 'de' + subformat = {'url': caption.get('uri')} + if caption.get('format') == 'webvtt': + subformat['ext'] = 'vtt' + elif caption.get('format') == 'ebu-tt-d-basic-de': + subformat['ext'] = 'ttml' + if not lang in subtitles: + subtitles[lang] = [] + subtitles[lang].append(subformat) + return subtitles + + def _get_description(self): + return self.content_json.get('teasertext') + + def _get_timestamp(self): + return parse_iso8601(self.content_json.get('editorialDate')) + + def _get_thumbnail(self): + teaser_images = self.content_json.get('teaserImageRef') + if teaser_images: + teaser_images_layouts = teaser_images.get('layouts') + if teaser_images_layouts: + if 'original' in teaser_images_layouts: + return teaser_images_layouts['original'] + teasers = {} + for key in teaser_images_layouts: + width = self.parent._search_regex(r'(\d+)x\d+', key, 'teaser width', fatal=False) + if width: + teasers[int(width)] = teaser_images_layouts[key] + if teasers: + best = max(teasers) + return teasers[best] class ZDFChannelIE(InfoExtractor): + _WORKING = False _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P[0-9]+)' _TESTS = [{ 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', From fb47cb5b235badeb40afb2de1bc56d98982e40fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Jan 2017 04:05:27 +0700 Subject: [PATCH 084/586] [zdf] Improve (closes #11055, closes #11063) --- youtube_dl/extractor/zdf.py | 548 ++++++++++++++++++------------------ 1 file changed, 277 insertions(+), 271 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 530a61cdf..12537737c 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -1,306 +1,312 @@ # coding: utf-8 from __future__ import unicode_literals -import functools import re from .common import InfoExtractor -from ..utils import ( - OnDemandPagedList, - determine_ext, - parse_iso8601, - ExtractorError -) from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + NO_DEFAULT, + orderedSet, + parse_codecs, + qualities, + try_get, + unified_timestamp, + update_url_query, + urljoin, +) -class ZDFIE(InfoExtractor): - _VALID_URL = r'https?://www\.zdf\.de/.*?/(?P[^/?]*?)\.html' + +class ZDFBaseIE(InfoExtractor): + def _call_api(self, url, player, referrer, video_id): + return self._download_json( + url, video_id, 'Downloading JSON content', + headers={ + 'Referer': referrer, + 'Api-Auth': 'Bearer %s' % player['apiToken'], + }) + + def _extract_player(self, webpage, video_id, fatal=True): + return self._parse_json( + self._search_regex( + r'(?s)data-zdfplayer-jsb=(["\'])(?P{.+?})\1', webpage, + 'player JSON', default='{}' if not fatal else NO_DEFAULT, + group='json'), + video_id) + + +class ZDFIE(ZDFBaseIE): + _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?]+)\.html' + _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') _TESTS = [{ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html', 'info_dict': { 'id': 'zdfmediathek-trailer-100', 'ext': 'mp4', - 'title': 'Trailer ZDFmediathek Supermarkt', + 'title': 'Die neue ZDFmediathek', + 'description': 'md5:3003d36487fb9a5ea2d1ff60beb55e8d', + 'duration': 30, + 'timestamp': 1477627200, + 'upload_date': '20161028', } + }, { + 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html', + 'only_matching': True, }] + @staticmethod + def _extract_subtitles(src): + subtitles = {} + for caption in try_get(src, lambda x: x['captions'], list) or []: + subtitle_url = caption.get('uri') + if subtitle_url and isinstance(subtitle_url, compat_str): + lang = caption.get('language', 'deu') + subtitles.setdefault(lang, []).append({ + 'url': subtitle_url, + }) + return subtitles + + def _extract_format(self, video_id, formats, format_urls, meta): + format_url = meta.get('url') + if not format_url or not isinstance(format_url, compat_str): + return + if format_url in format_urls: + return + format_urls.add(format_url) + mime_type = meta.get('mimeType') + ext = determine_ext(format_url) + if mime_type == 'application/x-mpegURL' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls', + entry_protocol='m3u8_native', fatal=False)) + elif mime_type == 'application/f4m+xml' or ext == 'f4m': + formats.extend(self._extract_f4m_formats( + update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)) + else: + f = parse_codecs(meta.get('mimeCodec')) + format_id = ['http'] + for p in (meta.get('type'), meta.get('quality')): + if p and isinstance(p, compat_str): + format_id.append(p) + f.update({ + 'url': format_url, + 'format_id': '-'.join(format_id), + 'format_note': meta.get('quality'), + 'language': meta.get('language'), + 'quality': qualities(self._QUALITIES)(meta.get('quality')), + 'preference': -10, + }) + formats.append(f) + + def _extract_entry(self, url, content, video_id): + title = content.get('title') or content['teaserHeadline'] + + t = content['mainVideoContent']['http://zdf.de/rels/target'] + + ptmd_path = t.get('http://zdf.de/rels/streams/ptmd') + + if not ptmd_path: + ptmd_path = t[ + 'http://zdf.de/rels/streams/ptmd-template'].replace( + '{playerId}', 'portal') + + ptmd = self._download_json(urljoin(url, ptmd_path), video_id) + + formats = [] + track_uris = set() + for p in ptmd['priorityList']: + formitaeten = p.get('formitaeten') + if not isinstance(formitaeten, list): + continue + for f in formitaeten: + f_qualities = f.get('qualities') + if not isinstance(f_qualities, list): + continue + for quality in f_qualities: + tracks = try_get(quality, lambda x: x['audio']['tracks'], list) + if not tracks: + continue + for track in tracks: + self._extract_format( + video_id, formats, track_uris, { + 'url': track.get('uri'), + 'type': f.get('type'), + 'mimeType': f.get('mimeType'), + 'quality': quality.get('quality'), + 'language': track.get('language'), + }) + self._sort_formats(formats) + + thumbnails = [] + layouts = try_get( + content, lambda x: x['teaserImageRef']['layouts'], dict) + if layouts: + for layout_key, layout_url in layouts.items(): + if not isinstance(layout_url, compat_str): + continue + thumbnail = { + 'url': layout_url, + 'format_id': layout_key, + } + mobj = re.search(r'(?P\d+)x(?P\d+)', layout_key) + if mobj: + thumbnail.update({ + 'width': int(mobj.group('width')), + 'height': int(mobj.group('height')), + }) + thumbnails.append(thumbnail) + + return { + 'id': video_id, + 'title': title, + 'description': content.get('leadParagraph') or content.get('teasertext'), + 'duration': int_or_none(t.get('duration')), + 'timestamp': unified_timestamp(content.get('editorialDate')), + 'thumbnails': thumbnails, + 'subtitles': self._extract_subtitles(ptmd), + 'formats': formats, + } + + def _extract_regular(self, url, player, video_id): + content = self._call_api(player['content'], player, url, video_id) + return self._extract_entry(player['content'], content, video_id) + + def _extract_mobile(self, video_id): + document = self._download_json( + 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, + video_id)['document'] + + title = document['titel'] + + formats = [] + format_urls = set() + for f in document['formitaeten']: + self._extract_format(video_id, formats, format_urls, f) + self._sort_formats(formats) + + thumbnails = [] + teaser_bild = document.get('teaserBild') + if isinstance(teaser_bild, dict): + for thumbnail_key, thumbnail in teaser_bild.items(): + thumbnail_url = try_get( + thumbnail, lambda x: x['url'], compat_str) + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'id': thumbnail_key, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'description': document.get('beschreibung'), + 'duration': int_or_none(document.get('length')), + 'timestamp': unified_timestamp(try_get( + document, lambda x: x['meta']['editorialDate'], compat_str)), + 'thumbnails': thumbnails, + 'subtitles': self._extract_subtitles(document), + 'formats': formats, + } + def _real_extract(self, url): video_id = self._match_id(url) - try: - extr_player = ZDFExtractorPlayer(self, url, video_id) - formats = extr_player._real_extract() - except (ExtractorError, KeyError) as e: - self._downloader.report_warning('%s: %s\nusing fallback method (mobile url)' % (type(e).__name__, compat_str(e))) - extr_mobile = ZDFExtractorMobile(self, url, video_id) - formats = extr_mobile._real_extract() - return formats -class ZDFExtractor: - """Super class for the 2 extraction methods""" - def __init__(self, parent, url, video_id): - self.parent = parent - self.url = url - self.video_id = video_id + webpage = self._download_webpage(url, video_id, fatal=False) + if webpage: + player = self._extract_player(webpage, url, fatal=False) + if player: + return self._extract_regular(url, player, video_id) - def _real_extract(self): - formats = [] - for entry in self._fetch_entries(): - video_url = self._get_video_url(entry) - if not video_url: - continue - format_id = self._get_format_id(entry) - ext = determine_ext(video_url, None) - if ext == 'meta': - continue - if ext == 'm3u8': - formats.extend(self.parent._extract_m3u8_formats( - video_url, self.video_id, 'mp4', m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self.parent._extract_f4m_formats( - video_url, self.video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'format_note': self._get_format_note(entry) - }) - self.parent._sort_formats(formats) + return self._extract_mobile(video_id) - return { - 'id': self.video_id, - 'title': self._get_title(), - 'formats': formats, - 'subtitles': self._get_subtitles(), - 'thumbnail': self._get_thumbnail(), - 'description': self._get_description(), - 'timestamp': self._get_timestamp() - } -class ZDFExtractorMobile(ZDFExtractor): - """Simple URL extraction method. Disadvantage: fewer formats, no subtitles""" - def __init__(self, parent, url, video_id): - ZDFExtractor.__init__(self, parent, url, video_id) - - def _fetch_entries(self): - meta_data_url = 'https://zdf-cdn.live.cellular.de/mediathekV2/document/' + self.video_id - self.meta_data = self.parent._download_json(meta_data_url, self.video_id, note='Downloading meta data') - return self.meta_data['document']['formitaeten'] - - def _get_title(self): - return self.meta_data['document']['titel'] - - def _get_video_url(self, entry): - return entry['url'] - - def _get_format_id(self, entry): - format_id = entry['type'] - if 'quality' in entry: - format_id += '-' + entry['quality'] - return format_id - - def _get_format_note(self, entry): - return None - - def _get_subtitles(self): - return None - - def _get_description(self): - return self.meta_data['document'].get('beschreibung') - - def _get_timestamp(self): - meta = self.meta_data['meta'] - if meta: - return parse_iso8601(meta.get('editorialDate')) - - def _get_thumbnail(self): - teaser_images = self.meta_data['document'].get('teaserBild') - if teaser_images: - max_res = max(teaser_images, key=int) - return teaser_images[max_res].get('url') - -class ZDFExtractorPlayer(ZDFExtractor): - """Extraction method that requires downloads of several pages. - - Follows the requests of the website.""" - def __init__(self, parent, url, video_id): - ZDFExtractor.__init__(self, parent, url, video_id) - - def _fetch_entries(self): - webpage = self.parent._download_webpage(self.url, self.video_id) - - jsb = self.parent._search_regex(r"data-zdfplayer-jsb='([^']*)'", webpage, 'zdfplayer jsb data') - jsb_json = self.parent._parse_json(jsb, self.video_id) - - configuration_url = 'https://www.zdf.de' + jsb_json['config'] - configuration_json = self.parent._download_json(configuration_url, self.video_id, note='Downloading player configuration') - api_token = configuration_json['apiToken'] - - player_js = self.parent._download_webpage('https://www.zdf.de/ZDFplayer/latest-v2/skins/zdf/zdf-player.js', self.video_id, fatal=False, note='Downloading player script') - if player_js: - player_id = self.parent._search_regex(r'this\.ptmd_player_id="([^"]*)"', player_js, 'player id', fatal=False) - else: - player_id = None - - self.content_json = self.parent._download_json(jsb_json['content'], self.video_id, headers={'Api-Auth': 'Bearer %s' % api_token}, note='Downloading content description') - - main_video_content = self.content_json['mainVideoContent']['http://zdf.de/rels/target'] - meta_data_url = None - if not player_id: - # could not determine player_id => try alternativ generic URL - meta_data_url = main_video_content.get('http://zdf.de/rels/streams/ptmd') - if meta_data_url: - meta_data_url = 'https://api.zdf.de' + meta_data_url - else: - # no generic URL found => 2nd fallback: hardcoded player_id - player_id = 'ngplayer_2_3' - if not meta_data_url: - meta_data_url_template = main_video_content['http://zdf.de/rels/streams/ptmd-template'] - meta_data_url = 'https://api.zdf.de' + meta_data_url_template.replace('{playerId}', player_id) - - self.meta_data = self.parent._download_json(meta_data_url, self.video_id, note='Downloading meta data') - - formats = [] - for p_list_entry in self.meta_data['priorityList']: - for formitaet in p_list_entry['formitaeten']: - for entry in formitaet['qualities']: - yield (formitaet, entry) - - def _get_title(self): - return self.content_json['title'] - - def _get_video_url(self, entry_tuple): - (formitaet, entry) = entry_tuple - tracks = entry['audio'].get('tracks') - if not tracks: - return - if len(tracks) > 1: - self._downloader.report_warning('unexpected input: multiple tracks') - track = tracks[0] - return track['uri'] - - def _get_format_id(self, entry_tuple): - (formitaet, entry) = entry_tuple - facets = self._get_facets(formitaet) - add = '' - if 'adaptive' in facets: - add += 'a' - if 'restriction_useragent' in facets: - add += 'b' - if 'progressive' in facets: - add += 'p' - type_ = formitaet['type'] - format_id = type_ + '-' - if add: - format_id += add + '-' - # named qualities are not very useful for sorting the formats: - # a 'high' m3u8 entry can be better quality than a 'veryhigh' direct mp4 download - format_id += entry['quality'] - return format_id - - def _get_facets(self, formitaet): - facets = formitaet.get('facets') or [] - if formitaet.get('isAdaptive'): - facets.append('adaptive') - return facets - - def _get_format_note(self, entry_tuple): - (formitaet, entry) = entry_tuple - return ', '.join(self._get_facets(formitaet)) - - def _get_subtitles(self): - subtitles = {} - if 'captions' in self.meta_data: - for caption in self.meta_data['captions']: - lang = caption.get('language') - if not lang: - continue - if lang == 'deu': - lang = 'de' - subformat = {'url': caption.get('uri')} - if caption.get('format') == 'webvtt': - subformat['ext'] = 'vtt' - elif caption.get('format') == 'ebu-tt-d-basic-de': - subformat['ext'] = 'ttml' - if not lang in subtitles: - subtitles[lang] = [] - subtitles[lang].append(subformat) - return subtitles - - def _get_description(self): - return self.content_json.get('teasertext') - - def _get_timestamp(self): - return parse_iso8601(self.content_json.get('editorialDate')) - - def _get_thumbnail(self): - teaser_images = self.content_json.get('teaserImageRef') - if teaser_images: - teaser_images_layouts = teaser_images.get('layouts') - if teaser_images_layouts: - if 'original' in teaser_images_layouts: - return teaser_images_layouts['original'] - teasers = {} - for key in teaser_images_layouts: - width = self.parent._search_regex(r'(\d+)x\d+', key, 'teaser width', fatal=False) - if width: - teasers[int(width)] = teaser_images_layouts[key] - if teasers: - best = max(teasers) - return teasers[best] - -class ZDFChannelIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P[0-9]+)' +class ZDFChannelIE(ZDFBaseIE): + _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', + 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', 'info_dict': { - 'id': '1586442', + 'id': 'das-aktuelle-sportstudio', + 'title': 'das aktuelle sportstudio | ZDF', }, - 'playlist_count': 3, + 'playlist_count': 21, }, { - 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332', - 'only_matching': True, + 'url': 'https://www.zdf.de/dokumentation/planet-e', + 'info_dict': { + 'id': 'planet-e', + 'title': 'planet e.', + }, + 'playlist_count': 4, }, { - 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332', - 'only_matching': True, - }, { - 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off', + 'url': 'https://www.zdf.de/filme/taunuskrimi/', 'only_matching': True, }] - _PAGE_SIZE = 50 - def _fetch_page(self, channel_id, page): - offset = page * self._PAGE_SIZE - xml_url = ( - 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' - % (offset, self._PAGE_SIZE, channel_id)) - doc = self._download_xml( - xml_url, channel_id, - note='Downloading channel info', - errnote='Failed to download channel info') - - title = doc.find('.//information/title').text - description = doc.find('.//information/detail').text - for asset in doc.findall('.//teasers/teaser'): - a_type = asset.find('./type').text - a_id = asset.find('./details/assetId').text - if a_type not in ('video', 'topic'): - continue - yield { - '_type': 'url', - 'playlist_title': title, - 'playlist_description': description, - 'url': 'zdf:%s:%s' % (a_type, a_id), - } + @classmethod + def suitable(cls, url): + return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) def _real_extract(self, url): channel_id = self._match_id(url) - entries = OnDemandPagedList( - functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) - return { - '_type': 'playlist', - 'id': channel_id, - 'entries': entries, - } + webpage = self._download_webpage(url, channel_id) + + entries = [ + self.url_result(item_url, ie=ZDFIE.ie_key()) + for item_url in orderedSet(re.findall( + r'data-plusbar-url=["\'](http.+?\.html)', webpage))] + + return self.playlist_result( + entries, channel_id, self._og_search_title(webpage, fatal=False)) + + """ + player = self._extract_player(webpage, channel_id) + + channel_id = self._search_regex( + r'docId\s*:\s*(["\'])(?P(?!\1).+?)\1', webpage, + 'channel id', group='id') + + channel = self._call_api( + 'https://api.zdf.de/content/documents/%s.json' % channel_id, + player, url, channel_id) + + items = [] + for module in channel['module']: + for teaser in try_get(module, lambda x: x['teaser'], list) or []: + t = try_get( + teaser, lambda x: x['http://zdf.de/rels/target'], dict) + if not t: + continue + items.extend(try_get( + t, + lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], + list) or []) + items.extend(try_get( + module, + lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], + list) or []) + + entries = [] + entry_urls = set() + for item in items: + t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) + if not t: + continue + sharing_url = t.get('http://zdf.de/rels/sharing-url') + if not sharing_url or not isinstance(sharing_url, compat_str): + continue + if sharing_url in entry_urls: + continue + entry_urls.add(sharing_url) + entries.append(self.url_result( + sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) + + return self.playlist_result(entries, channel_id, channel.get('title')) + """ From 2b12e340760da32e1ddac63fb1cc4238bcac78da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Jan 2017 04:07:32 +0700 Subject: [PATCH 085/586] [ChangeLog] Actualize --- ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index eac3b6b05..40cbdae3b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +version + +Extractors ++ [zdf] Fix extraction (#11055, #11063) +* [pornhub:playlist] Improve extraction (#11594) ++ [cctv] Add support for ncpa-classic.com (#11591) ++ [tunein] Add support for embeds (#11579) + + version 2017.01.02 Extractors From 7232bb299b69ee0cf8634a0ebf26f15ecb476a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Jan 2017 04:10:15 +0700 Subject: [PATCH 086/586] release 2017.01.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0e3bc1ce0..a78413518 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.02 +[debug] youtube-dl version 2017.01.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 40cbdae3b..a31b676e3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.05 Extractors + [zdf] Fix extraction (#11055, #11063) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8f2228592..2c8e5bcf6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.02' +__version__ = '2017.01.05' From 24d8a75982de3fd2fefd2c0d4a1248d03997d843 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 5 Jan 2017 18:45:26 +0800 Subject: [PATCH 087/586] [discoverygo] Fix JSON data parsing HTMLParser, which is used by extract_attributes, already unescapes attribute values with HTMLParser.unescape. They shouldn't be unescaped again, to there may be parsing errors. Ref: #11219, #11522 --- ChangeLog | 6 ++++++ youtube_dl/extractor/discoverygo.py | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a31b676e3..f3e9e1b5f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [discoverygo] Fix JSON data parsing (#11219, #11522) + + version 2017.01.05 Extractors diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index c4e83b2c3..2042493a8 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -6,7 +6,6 @@ from ..utils import ( extract_attributes, int_or_none, parse_age_limit, - unescapeHTML, ExtractorError, ) @@ -49,7 +48,7 @@ class DiscoveryGoIE(InfoExtractor): webpage, 'video container')) video = self._parse_json( - unescapeHTML(container.get('data-video') or container.get('data-json')), + container.get('data-video') or container.get('data-json'), display_id) title = video['name'] From ec85ded83cbfa652ba94cb080aab52d8b270212a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 2 Jan 2017 20:08:07 +0800 Subject: [PATCH 088/586] Fix "invalid escape sequences" error on Python 3.6 --- README.md | 2 +- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/compat.py | 2 +- youtube_dl/extractor/abcnews.py | 4 +- youtube_dl/extractor/abcotvs.py | 2 +- youtube_dl/extractor/adobetv.py | 2 +- youtube_dl/extractor/airmozilla.py | 2 +- youtube_dl/extractor/allocine.py | 8 +-- youtube_dl/extractor/alphaporno.py | 2 +- youtube_dl/extractor/ard.py | 2 +- youtube_dl/extractor/atresplayer.py | 4 +- youtube_dl/extractor/atttechchannel.py | 2 +- youtube_dl/extractor/audioboom.py | 2 +- youtube_dl/extractor/azubu.py | 4 +- youtube_dl/extractor/bet.py | 4 +- youtube_dl/extractor/bild.py | 2 +- youtube_dl/extractor/bilibili.py | 2 +- youtube_dl/extractor/biobiochiletv.py | 4 +- youtube_dl/extractor/byutv.py | 2 +- youtube_dl/extractor/camdemy.py | 4 +- youtube_dl/extractor/canvas.py | 6 +-- youtube_dl/extractor/carambatv.py | 4 +- youtube_dl/extractor/cbsnews.py | 2 +- youtube_dl/extractor/ccc.py | 4 +- youtube_dl/extractor/cda.py | 4 +- youtube_dl/extractor/ceskatelevize.py | 8 +-- youtube_dl/extractor/channel9.py | 6 +-- youtube_dl/extractor/charlierose.py | 2 +- youtube_dl/extractor/cliphunter.py | 4 +- youtube_dl/extractor/clipsyndicate.py | 2 +- youtube_dl/extractor/clubic.py | 2 +- youtube_dl/extractor/collegerama.py | 2 +- youtube_dl/extractor/comedycentral.py | 2 +- youtube_dl/extractor/coub.py | 2 +- youtube_dl/extractor/crackle.py | 2 +- youtube_dl/extractor/criterion.py | 2 +- youtube_dl/extractor/crooksandliars.py | 2 +- youtube_dl/extractor/crunchyroll.py | 4 +- youtube_dl/extractor/ctsnews.py | 4 +- youtube_dl/extractor/cultureunplugged.py | 2 +- youtube_dl/extractor/dailymotion.py | 2 +- youtube_dl/extractor/daum.py | 8 +-- youtube_dl/extractor/dbtv.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- youtube_dl/extractor/deezer.py | 2 +- youtube_dl/extractor/dhm.py | 4 +- youtube_dl/extractor/digiteka.py | 4 +- youtube_dl/extractor/douyutv.py | 10 ++-- youtube_dl/extractor/dramafever.py | 4 +- youtube_dl/extractor/drbonanza.py | 4 +- youtube_dl/extractor/drtuber.py | 2 +- youtube_dl/extractor/dumpert.py | 2 +- youtube_dl/extractor/eagleplatform.py | 4 +- youtube_dl/extractor/einthusan.py | 4 +- youtube_dl/extractor/eroprofile.py | 4 +- youtube_dl/extractor/escapist.py | 4 +- youtube_dl/extractor/esri.py | 2 +- youtube_dl/extractor/europa.py | 2 +- youtube_dl/extractor/expotv.py | 2 +- youtube_dl/extractor/fc2.py | 2 +- youtube_dl/extractor/firsttv.py | 4 +- youtube_dl/extractor/fivetv.py | 6 +-- youtube_dl/extractor/fktv.py | 2 +- youtube_dl/extractor/foxgay.py | 2 +- youtube_dl/extractor/foxnews.py | 6 +-- youtube_dl/extractor/franceculture.py | 2 +- youtube_dl/extractor/francetv.py | 4 +- youtube_dl/extractor/funimation.py | 6 +-- youtube_dl/extractor/funnyordie.py | 4 +- youtube_dl/extractor/gamersyde.py | 2 +- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/gamestar.py | 2 +- youtube_dl/extractor/gazeta.py | 2 +- youtube_dl/extractor/generic.py | 22 ++++---- youtube_dl/extractor/giantbomb.py | 2 +- youtube_dl/extractor/giga.py | 2 +- youtube_dl/extractor/glide.py | 2 +- youtube_dl/extractor/godtube.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hbo.py | 4 +- youtube_dl/extractor/hearthisat.py | 4 +- youtube_dl/extractor/heise.py | 2 +- youtube_dl/extractor/hellporno.py | 2 +- youtube_dl/extractor/historicfilms.py | 2 +- youtube_dl/extractor/hitbox.py | 4 +- youtube_dl/extractor/hornbunny.py | 2 +- youtube_dl/extractor/howstuffworks.py | 6 +-- youtube_dl/extractor/huajiao.py | 2 +- youtube_dl/extractor/huffpost.py | 2 +- youtube_dl/extractor/indavideo.py | 4 +- youtube_dl/extractor/instagram.py | 6 +-- youtube_dl/extractor/ir90tv.py | 2 +- youtube_dl/extractor/ivi.py | 6 +-- youtube_dl/extractor/izlesene.py | 4 +- youtube_dl/extractor/jamendo.py | 2 +- youtube_dl/extractor/jove.py | 4 +- youtube_dl/extractor/karrierevideos.py | 4 +- youtube_dl/extractor/keezmovies.py | 2 +- youtube_dl/extractor/ketnet.py | 2 +- youtube_dl/extractor/krasview.py | 2 +- youtube_dl/extractor/kusi.py | 2 +- youtube_dl/extractor/leeco.py | 4 +- youtube_dl/extractor/lemonde.py | 2 +- youtube_dl/extractor/libraryofcongress.py | 2 +- youtube_dl/extractor/libsyn.py | 2 +- youtube_dl/extractor/lifenews.py | 2 +- youtube_dl/extractor/limelight.py | 4 +- youtube_dl/extractor/litv.py | 4 +- youtube_dl/extractor/liveleak.py | 6 +-- youtube_dl/extractor/livestream.py | 2 +- youtube_dl/extractor/lnkgo.py | 4 +- youtube_dl/extractor/lynda.py | 2 +- youtube_dl/extractor/matchtv.py | 2 +- youtube_dl/extractor/mdr.py | 2 +- youtube_dl/extractor/meipai.py | 4 +- youtube_dl/extractor/melonvod.py | 2 +- youtube_dl/extractor/metacafe.py | 2 +- youtube_dl/extractor/mgoon.py | 2 +- youtube_dl/extractor/mgtv.py | 2 +- youtube_dl/extractor/minhateca.py | 2 +- youtube_dl/extractor/ministrygrid.py | 2 +- youtube_dl/extractor/mitele.py | 4 +- youtube_dl/extractor/mixcloud.py | 2 +- youtube_dl/extractor/mlb.py | 8 +-- youtube_dl/extractor/mnet.py | 2 +- youtube_dl/extractor/moevideo.py | 4 +- youtube_dl/extractor/mofosex.py | 2 +- youtube_dl/extractor/mojvideo.py | 2 +- youtube_dl/extractor/motherless.py | 6 +-- youtube_dl/extractor/movieclips.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/movingimage.py | 2 +- youtube_dl/extractor/muenchentv.py | 2 +- youtube_dl/extractor/mwave.py | 4 +- youtube_dl/extractor/myvi.py | 2 +- youtube_dl/extractor/myvideo.py | 2 +- youtube_dl/extractor/nbc.py | 2 +- youtube_dl/extractor/ndr.py | 4 +- youtube_dl/extractor/ndtv.py | 2 +- youtube_dl/extractor/netzkino.py | 2 +- youtube_dl/extractor/nextmedia.py | 12 ++--- youtube_dl/extractor/nfl.py | 4 +- youtube_dl/extractor/nosvideo.py | 4 +- youtube_dl/extractor/nova.py | 10 ++-- youtube_dl/extractor/novamov.py | 12 ++--- youtube_dl/extractor/nowness.py | 6 +-- youtube_dl/extractor/nowtv.py | 12 ++--- youtube_dl/extractor/noz.py | 2 +- youtube_dl/extractor/ntvde.py | 2 +- youtube_dl/extractor/ntvru.py | 10 ++-- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/ondemandkorea.py | 2 +- youtube_dl/extractor/onionstudios.py | 2 +- youtube_dl/extractor/openload.py | 4 +- youtube_dl/extractor/orf.py | 2 +- youtube_dl/extractor/pandoratv.py | 2 +- youtube_dl/extractor/pbs.py | 16 +++--- youtube_dl/extractor/people.py | 2 +- youtube_dl/extractor/pinkbike.py | 2 +- youtube_dl/extractor/pladform.py | 2 +- youtube_dl/extractor/playtvak.py | 10 ++-- youtube_dl/extractor/playvid.py | 2 +- youtube_dl/extractor/playwire.py | 2 +- youtube_dl/extractor/polskieradio.py | 2 +- youtube_dl/extractor/porncom.py | 2 +- youtube_dl/extractor/pornhd.py | 4 +- youtube_dl/extractor/pornotube.py | 2 +- youtube_dl/extractor/pornovoisines.py | 2 +- youtube_dl/extractor/pornoxo.py | 2 +- youtube_dl/extractor/presstv.py | 2 +- youtube_dl/extractor/promptfile.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 2 +- youtube_dl/extractor/qqmusic.py | 6 +-- youtube_dl/extractor/r7.py | 2 +- youtube_dl/extractor/radiobremen.py | 2 +- youtube_dl/extractor/radiode.py | 2 +- youtube_dl/extractor/radiojavan.py | 2 +- youtube_dl/extractor/rai.py | 6 +-- youtube_dl/extractor/rbmaradio.py | 2 +- youtube_dl/extractor/reuters.py | 2 +- youtube_dl/extractor/reverbnation.py | 2 +- youtube_dl/extractor/ro220.py | 2 +- youtube_dl/extractor/rockstargames.py | 2 +- youtube_dl/extractor/roosterteeth.py | 2 +- youtube_dl/extractor/rottentomatoes.py | 2 +- youtube_dl/extractor/rte.py | 6 +-- youtube_dl/extractor/rtlnl.py | 4 +- youtube_dl/extractor/rtp.py | 2 +- youtube_dl/extractor/rts.py | 6 +-- youtube_dl/extractor/rtvnh.py | 2 +- youtube_dl/extractor/rudo.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/ruutu.py | 6 +-- youtube_dl/extractor/savefrom.py | 2 +- youtube_dl/extractor/sbs.py | 2 +- youtube_dl/extractor/screencast.py | 8 +-- youtube_dl/extractor/screencastomatic.py | 2 +- youtube_dl/extractor/screenjunkies.py | 6 +-- youtube_dl/extractor/senateisvp.py | 2 +- youtube_dl/extractor/sendtonews.py | 2 +- youtube_dl/extractor/sexu.py | 2 +- youtube_dl/extractor/slutload.py | 2 +- youtube_dl/extractor/smotri.py | 4 +- youtube_dl/extractor/snotr.py | 4 +- youtube_dl/extractor/soundgasm.py | 2 +- youtube_dl/extractor/spankbang.py | 2 +- youtube_dl/extractor/spankwire.py | 2 +- youtube_dl/extractor/spiegeltv.py | 2 +- youtube_dl/extractor/sport5.py | 2 +- youtube_dl/extractor/sportbox.py | 4 +- youtube_dl/extractor/sportdeutschland.py | 6 +-- youtube_dl/extractor/srgssr.py | 2 +- youtube_dl/extractor/srmediathek.py | 2 +- youtube_dl/extractor/stanfordoc.py | 4 +- youtube_dl/extractor/stitcher.py | 4 +- youtube_dl/extractor/streamable.py | 4 +- youtube_dl/extractor/streetvoice.py | 2 +- youtube_dl/extractor/sunporno.py | 2 +- youtube_dl/extractor/svt.py | 2 +- youtube_dl/extractor/swrmediathek.py | 6 +-- youtube_dl/extractor/tagesschau.py | 12 ++--- youtube_dl/extractor/tass.py | 2 +- youtube_dl/extractor/tdslifeway.py | 2 +- youtube_dl/extractor/teachertube.py | 4 +- youtube_dl/extractor/ted.py | 4 +- youtube_dl/extractor/telegraaf.py | 2 +- youtube_dl/extractor/telemb.py | 4 +- youtube_dl/extractor/telewebion.py | 2 +- youtube_dl/extractor/theplatform.py | 4 +- youtube_dl/extractor/thisamericanlife.py | 2 +- youtube_dl/extractor/tinypic.py | 2 +- youtube_dl/extractor/tnaflix.py | 14 +++--- youtube_dl/extractor/tudou.py | 4 +- youtube_dl/extractor/tumblr.py | 8 +-- youtube_dl/extractor/turbo.py | 2 +- youtube_dl/extractor/tv2.py | 2 +- youtube_dl/extractor/tv4.py | 4 +- youtube_dl/extractor/tvc.py | 8 +-- youtube_dl/extractor/tweakers.py | 2 +- youtube_dl/extractor/twentyfourvideo.py | 2 +- youtube_dl/extractor/twitch.py | 6 +-- youtube_dl/extractor/twitter.py | 12 ++--- youtube_dl/extractor/udn.py | 2 +- youtube_dl/extractor/urort.py | 2 +- youtube_dl/extractor/ustudio.py | 2 +- youtube_dl/extractor/varzesh3.py | 4 +- youtube_dl/extractor/vbox7.py | 4 +- youtube_dl/extractor/vessel.py | 2 +- youtube_dl/extractor/vgtv.py | 10 ++-- youtube_dl/extractor/vidbit.py | 2 +- youtube_dl/extractor/viddler.py | 2 +- youtube_dl/extractor/videomega.py | 2 +- youtube_dl/extractor/videomore.py | 10 ++-- youtube_dl/extractor/vidio.py | 2 +- youtube_dl/extractor/vidme.py | 10 ++-- youtube_dl/extractor/viewlift.py | 8 +-- youtube_dl/extractor/viewster.py | 2 +- youtube_dl/extractor/viidea.py | 12 ++--- youtube_dl/extractor/vimeo.py | 28 +++++------ youtube_dl/extractor/vimple.py | 2 +- youtube_dl/extractor/vodlocker.py | 2 +- youtube_dl/extractor/voicerepublic.py | 2 +- youtube_dl/extractor/vporn.py | 4 +- youtube_dl/extractor/vube.py | 8 +-- youtube_dl/extractor/walla.py | 2 +- youtube_dl/extractor/watchindianporn.py | 2 +- youtube_dl/extractor/webcaster.py | 2 +- youtube_dl/extractor/webofstories.py | 8 +-- youtube_dl/extractor/weiqitv.py | 4 +- youtube_dl/extractor/xbef.py | 2 +- youtube_dl/extractor/xfileshare.py | 6 +-- youtube_dl/extractor/xuite.py | 6 +-- youtube_dl/extractor/yesjapan.py | 2 +- youtube_dl/extractor/yinyuetai.py | 2 +- youtube_dl/extractor/ynet.py | 4 +- youtube_dl/extractor/youporn.py | 4 +- youtube_dl/extractor/yourupload.py | 2 +- youtube_dl/extractor/youtube.py | 50 +++++++++---------- youtube_dl/extractor/zapiks.py | 2 +- youtube_dl/extractor/zdf.py | 2 +- youtube_dl/extractor/zingmp3.py | 2 +- youtube_dl/postprocessor/metadatafromtitle.py | 2 +- youtube_dl/utils.py | 4 +- 283 files changed, 538 insertions(+), 538 deletions(-) diff --git a/README.md b/README.md index 46aa78f66..ead6e3dbf 100644 --- a/README.md +++ b/README.md @@ -961,7 +961,7 @@ After you have ensured this site is distributing its content legally, you can fo 'id': '42', 'ext': 'mp4', 'title': 'Video title goes here', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 53f20ac2c..5d654f55f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1339,7 +1339,7 @@ class YoutubeDL(object): format['format_id'] = compat_str(i) else: # Sanitize format_id from characters used in format selector expression - format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id']) + format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) format_id = format['format_id'] if format_id not in formats_dict: formats_dict[format_id] = [] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 83ee7e257..02abf8c1e 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2344,7 +2344,7 @@ try: from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus except ImportError: # Python 2 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') - else re.compile('([\x00-\x7f]+)')) + else re.compile(r'([\x00-\x7f]+)')) # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus # implementations from cpython 3.4.3's stdlib. Python 2's version diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index 6ae5d9a96..4f56c4c11 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -23,7 +23,7 @@ class AbcNewsVideoIE(AMPIE): 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', 'duration': 180, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download @@ -59,7 +59,7 @@ class AbcNewsIE(InfoExtractor): 'display_id': 'dramatic-video-rare-death-job-america', 'title': 'Occupational Hazards', 'description': 'Nightline investigates the dangers that lurk at various jobs.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20100428', 'timestamp': 1272412800, }, diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py index 054bb0596..76e98132b 100644 --- a/youtube_dl/extractor/abcotvs.py +++ b/youtube_dl/extractor/abcotvs.py @@ -23,7 +23,7 @@ class ABCOTVSIE(InfoExtractor): 'ext': 'mp4', 'title': 'East Bay museum celebrates vintage synthesizers', 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1421123075, 'upload_date': '20150113', 'uploader': 'Jonathan Bloom', diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 5ae16fa16..008c98e51 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -30,7 +30,7 @@ class AdobeTVIE(AdobeTVBaseIE): 'ext': 'mp4', 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'upload_date': '20110914', 'duration': 60, 'view_count': int, diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py index f8e70f4e5..0e0691879 100644 --- a/youtube_dl/extractor/airmozilla.py +++ b/youtube_dl/extractor/airmozilla.py @@ -20,7 +20,7 @@ class AirMozillaIE(InfoExtractor): 'id': '6x4q2w', 'ext': 'mp4', 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', - 'thumbnail': 're:https?://vid\.ly/(?P[0-9a-z-]+)/poster', + 'thumbnail': r're:https?://vid\.ly/(?P[0-9a-z-]+)/poster', 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', 'timestamp': 1422487800, 'upload_date': '20150128', diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 517b06def..90f11d39f 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -21,7 +21,7 @@ class AllocineIE(InfoExtractor): 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }, { 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', @@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor): 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }, { 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', @@ -43,7 +43,7 @@ class AllocineIE(InfoExtractor): 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', @@ -53,7 +53,7 @@ class AllocineIE(InfoExtractor): 'ext': 'mp4', 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }] diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dl/extractor/alphaporno.py index c34719d1f..3a6d99f6b 100644 --- a/youtube_dl/extractor/alphaporno.py +++ b/youtube_dl/extractor/alphaporno.py @@ -19,7 +19,7 @@ class AlphaPornoIE(InfoExtractor): 'display_id': 'sensual-striptease-porn-with-samantha-alexandra', 'ext': 'mp4', 'title': 'Sensual striptease porn with Samantha Alexandra', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'timestamp': 1418694611, 'upload_date': '20141216', 'duration': 387, diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 35f3656f1..2d5599456 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -253,7 +253,7 @@ class ARDIE(InfoExtractor): 'duration': 2600, 'title': 'Die Story im Ersten: Mission unter falscher Flagge', 'upload_date': '20140804', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'HTTP Error 404: Not Found', } diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index d2f388964..e3c669830 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -30,7 +30,7 @@ class AtresPlayerIE(InfoExtractor): 'title': 'Especial Solidario de Nochebuena', 'description': 'md5:e2d52ff12214fa937107d21064075bf1', 'duration': 5527.6, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'This video is only available for registered users' }, @@ -43,7 +43,7 @@ class AtresPlayerIE(InfoExtractor): 'title': 'David Bustamante', 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', 'duration': 1439.0, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { diff --git a/youtube_dl/extractor/atttechchannel.py b/youtube_dl/extractor/atttechchannel.py index b01d35bb2..8f93fb353 100644 --- a/youtube_dl/extractor/atttechchannel.py +++ b/youtube_dl/extractor/atttechchannel.py @@ -14,7 +14,7 @@ class ATTTechChannelIE(InfoExtractor): 'ext': 'flv', 'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use', 'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140127', }, 'params': { diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index d7d1c6306..8fc5f65c6 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -17,7 +17,7 @@ class AudioBoomIE(InfoExtractor): 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', 'duration': 2245.72, 'uploader': 'Steve Czaban', - 'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', + 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', } }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 1eebf5dfd..3ba2f00d3 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -21,7 +21,7 @@ class AzubuIE(InfoExtractor): 'ext': 'mp4', 'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1', 'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01', - 'thumbnail': 're:^https?://.*\.jpe?g', + 'thumbnail': r're:^https?://.*\.jpe?g', 'timestamp': 1417523507.334, 'upload_date': '20141202', 'duration': 9988.7, @@ -38,7 +38,7 @@ class AzubuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"', 'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af', - 'thumbnail': 're:^https?://.*\.jpe?g', + 'thumbnail': r're:^https?://.*\.jpe?g', 'timestamp': 1410530893.320, 'upload_date': '20140912', 'duration': 172.385, diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 1f8ef0303..d7ceaa85e 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -17,7 +17,7 @@ class BetIE(MTVServicesInfoExtractor): 'description': 'President Obama urges persistence in confronting racism and bias.', 'duration': 1534, 'upload_date': '20141208', - 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', } @@ -37,7 +37,7 @@ class BetIE(MTVServicesInfoExtractor): 'description': 'A BET News special.', 'duration': 1696, 'upload_date': '20141125', - 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', } diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py index 1a0184861..b8dfbd42b 100644 --- a/youtube_dl/extractor/bild.py +++ b/youtube_dl/extractor/bild.py @@ -19,7 +19,7 @@ class BildIE(InfoExtractor): 'ext': 'mp4', 'title': 'Das können die neuen iPads', 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 196, } } diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 2d174e6f9..5051934ef 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -28,7 +28,7 @@ class BiliBiliIE(InfoExtractor): 'duration': 308.315, 'timestamp': 1398012660, 'upload_date': '20140420', - 'thumbnail': 're:^https?://.+\.jpg', + 'thumbnail': r're:^https?://.+\.jpg', 'uploader': '菊子桑', 'uploader_id': '156160', }, diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index 7608c0a08..b92031c8a 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -19,7 +19,7 @@ class BioBioChileTVIE(InfoExtractor): 'id': 'sobre-camaras-y-camarillas-parlamentarias', 'ext': 'mp4', 'title': 'Sobre Cámaras y camarillas parlamentarias', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Fernando Atria', }, 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', @@ -31,7 +31,7 @@ class BioBioChileTVIE(InfoExtractor): 'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades', 'ext': 'mp4', 'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Piangella Obrador', }, 'params': { diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 4be175d70..8ef089653 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor): 'ext': 'mp4', 'title': 'Season 5 Episode 5', 'description': 'md5:e07269172baff037f8e8bf9956bc9747', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1486.486, }, 'params': { diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index d4e6fbdce..8f0c6c545 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -26,7 +26,7 @@ class CamdemyIE(InfoExtractor): 'id': '5181', 'ext': 'mp4', 'title': 'Ch1-1 Introduction, Signals (02-23-2012)', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'creator': 'ss11spring', 'duration': 1591, 'upload_date': '20130114', @@ -41,7 +41,7 @@ class CamdemyIE(InfoExtractor): 'id': '13885', 'ext': 'mp4', 'title': 'EverCam + Camdemy QuickStart', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', 'creator': 'evercam', 'duration': 318, diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index 2cc539a6c..544c6657c 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -17,7 +17,7 @@ class CanvasIE(InfoExtractor): 'ext': 'mp4', 'title': 'De afspraak veilt voor de Warmste Week', 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 49.02, } }, { @@ -29,7 +29,7 @@ class CanvasIE(InfoExtractor): 'ext': 'mp4', 'title': 'Pieter 0167', 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2553.08, 'subtitles': { 'nl': [{ @@ -48,7 +48,7 @@ class CanvasIE(InfoExtractor): 'ext': 'mp4', 'title': 'Herbekijk Sorry voor alles', 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 3788.06, }, 'params': { diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py index 66c0f900a..9ba909a91 100644 --- a/youtube_dl/extractor/carambatv.py +++ b/youtube_dl/extractor/carambatv.py @@ -21,7 +21,7 @@ class CarambaTVIE(InfoExtractor): 'id': '191910501', 'ext': 'mp4', 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2678.31, }, }, { @@ -69,7 +69,7 @@ class CarambaTVPageIE(InfoExtractor): 'id': '475222', 'ext': 'flv', 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', # duration reported by videomore is incorrect 'duration': int, }, diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 91b0f5fa9..17bb9af4f 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -39,7 +39,7 @@ class CBSNewsIE(CBSIE): 'upload_date': '20140404', 'timestamp': 1396650660, 'uploader': 'CBSI-NEW', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 205, 'subtitles': { 'en': [{ diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 8f7f09e22..734702144 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -19,7 +19,7 @@ class CCCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Introduction to Processor Design', 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20131228', 'timestamp': 1388188800, 'duration': 3710, @@ -32,7 +32,7 @@ class CCCIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id') + event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) formats = [] diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index e00bdaf66..ae7af2f0e 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -24,7 +24,7 @@ class CDAIE(InfoExtractor): 'height': 720, 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', 'description': 'md5:269ccd135d550da90d1662651fcb9772', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'average_rating': float, 'duration': 39 } @@ -36,7 +36,7 @@ class CDAIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lądowanie na lotnisku na Maderze', 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'crash404', 'view_count': int, 'average_rating': float, diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 4ec79d19d..4f88c31ad 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -25,7 +25,7 @@ class CeskaTelevizeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Hyde Park Civilizace', 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3350, }, 'params': { @@ -39,7 +39,7 @@ class CeskaTelevizeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Hyde Park Civilizace: Bonus 01 - En', 'description': 'English Subtittles', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 81.3, }, 'params': { @@ -52,7 +52,7 @@ class CeskaTelevizeIE(InfoExtractor): 'info_dict': { 'id': 402, 'ext': 'mp4', - 'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, }, 'params': { @@ -80,7 +80,7 @@ class CeskaTelevizeIE(InfoExtractor): 'id': '61924494877068022', 'ext': 'mp4', 'title': 'Queer: Bogotart (Queer)', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 1558.3, }, }], diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 34d4e6156..865dbcaba 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -31,7 +31,7 @@ class Channel9IE(InfoExtractor): 'title': 'Developer Kick-Off Session: Stuff We Love', 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', 'duration': 4576, - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'session_code': 'KOS002', 'session_day': 'Day 1', 'session_room': 'Arena 1A', @@ -47,7 +47,7 @@ class Channel9IE(InfoExtractor): 'title': 'Self-service BI with Power BI - nuclear testing', 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', 'duration': 1540, - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'authors': ['Mike Wilmot'], }, }, { @@ -59,7 +59,7 @@ class Channel9IE(InfoExtractor): 'title': 'Ranges for the Standard Library', 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', 'duration': 5646, - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index 4bf2cf7b0..2d517f231 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -13,7 +13,7 @@ class CharlieRoseIE(InfoExtractor): 'id': '27996', 'ext': 'mp4', 'title': 'Remembering Zaha Hadid', - 'thumbnail': 're:^https?://.*\.jpg\?\d+', + 'thumbnail': r're:^https?://.*\.jpg\?\d+', 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', 'subtitles': { 'en': [{ diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 252c2e846..ab651d1c8 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -30,7 +30,7 @@ class CliphunterIE(InfoExtractor): 'id': '1012420', 'ext': 'flv', 'title': 'Fun Jynx Maze solo', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 18, }, 'skip': 'Video gone', @@ -41,7 +41,7 @@ class CliphunterIE(InfoExtractor): 'id': '2019449', 'ext': 'mp4', 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 18, }, }] diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index 0b6ad895f..6cdb42f5a 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -18,7 +18,7 @@ class ClipsyndicateIE(InfoExtractor): 'ext': 'mp4', 'title': 'Brick Briscoe', 'duration': 612, - 'thumbnail': 're:^https?://.+\.jpg', + 'thumbnail': r're:^https?://.+\.jpg', }, }, { 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index f7ee3a8f8..98f9cb596 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -19,7 +19,7 @@ class ClubicIE(InfoExtractor): 'ext': 'mp4', 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', - 'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$', + 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', } }, { 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py index f9e84193d..18c734766 100644 --- a/youtube_dl/extractor/collegerama.py +++ b/youtube_dl/extractor/collegerama.py @@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.', 'description': '', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 7713.088, 'timestamp': 1413309600, 'upload_date': '20141014', diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 0239dfd84..512f072f6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -79,7 +79,7 @@ class ToshIE(MTVServicesInfoExtractor): 'ext': 'mp4', 'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans', 'description': 'Tosh asked fans to share their summer plans.', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', # It's really reported to be published on year 2077 'upload_date': '20770610', 'timestamp': 3390510600, diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py index a901b8d22..5fa1f006b 100644 --- a/youtube_dl/extractor/coub.py +++ b/youtube_dl/extractor/coub.py @@ -20,7 +20,7 @@ class CoubIE(InfoExtractor): 'id': '5u5n1', 'ext': 'mp4', 'title': 'The Matrix Moonwalk', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 4.6, 'timestamp': 1428527772, 'upload_date': '20150408', diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index cc68f1c00..25c5e7d04 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -14,7 +14,7 @@ class CrackleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Everybody Respects A Bloody Nose', 'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 906, 'series': 'Comedians In Cars Getting Coffee', 'season_number': 8, diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index cf6a5d6cb..f7815b905 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -14,7 +14,7 @@ class CriterionIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le Samouraï', 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py index 443eb7691..7fb782db7 100644 --- a/youtube_dl/extractor/crooksandliars.py +++ b/youtube_dl/extractor/crooksandliars.py @@ -16,7 +16,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1428207000, 'upload_date': '20150405', 'uploader': 'Heather', diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8d5b69f68..559044352 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -142,7 +142,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'ext': 'flv', 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11', 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Danny Choo Network', 'upload_date': '20120213', }, @@ -158,7 +158,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'ext': 'mp4', 'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant', 'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'TV TOKYO', 'upload_date': '20160508', }, diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index 83ca90c3b..d565335cf 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -28,7 +28,7 @@ class CtsNewsIE(InfoExtractor): 'ext': 'mp4', 'title': '韓國31歲童顏男 貌如十多歲小孩', 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1378205880, 'upload_date': '20130903', } @@ -41,7 +41,7 @@ class CtsNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'iPhone6熱銷 蘋果財報亮眼', 'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20150128', 'uploader_id': 'TBSCTS', 'uploader': '中華電視公司', diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py index 9f26fa587..bcdf27323 100644 --- a/youtube_dl/extractor/cultureunplugged.py +++ b/youtube_dl/extractor/cultureunplugged.py @@ -21,7 +21,7 @@ class CultureUnpluggedIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Next, Best West', 'description': 'md5:0423cd00833dea1519cf014e9d0903b1', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'creator': 'Coldstream Creative', 'duration': 2203, 'view_count': int, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4a3314ea7..31bf5faf6 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -58,7 +58,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', 'description': 'Several come bundled with the Steam Controller.', - 'thumbnail': 're:^https?:.*\.(?:jpg|png)$', + 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', 'duration': 74, 'timestamp': 1425657362, 'upload_date': '20150306', diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 732b4362a..76f021892 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -32,7 +32,7 @@ class DaumIE(InfoExtractor): 'title': '마크 헌트 vs 안토니오 실바', 'description': 'Mark Hunt vs Antonio Silva', 'upload_date': '20131217', - 'thumbnail': 're:^https?://.*\.(?:jpg|png)', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 2117, 'view_count': int, 'comment_count': int, @@ -45,7 +45,7 @@ class DaumIE(InfoExtractor): 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118', 'description': 'md5:79794514261164ff27e36a21ad229fc5', 'upload_date': '20150604', - 'thumbnail': 're:^https?://.*\.(?:jpg|png)', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 154, 'view_count': int, 'comment_count': int, @@ -61,7 +61,7 @@ class DaumIE(InfoExtractor): 'title': '01-Korean War ( Trouble on the horizon )', 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름', 'upload_date': '20080223', - 'thumbnail': 're:^https?://.*\.(?:jpg|png)', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 249, 'view_count': int, 'comment_count': int, @@ -139,7 +139,7 @@ class DaumClipIE(InfoExtractor): 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', 'upload_date': '20130831', - 'thumbnail': 're:^https?://.*\.(?:jpg|png)', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 3868, 'view_count': int, }, diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index 6d880d43d..f232f0dc5 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -17,7 +17,7 @@ class DBTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'timestamp': 1404039863, 'upload_date': '20140629', 'duration': 69.544, diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 14ba88715..00fbbff2f 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -17,7 +17,7 @@ class DctpTvIE(InfoExtractor): 'title': 'Videoinstallation für eine Kaufhausfassade', 'description': 'Kurzfilm', 'upload_date': '20110407', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, } diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index 7a07f3267..ec87b94db 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor): 'id': '176747451', 'title': 'Best!', 'uploader': 'Anonymous', - 'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', + 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', }, 'playlist_count': 30, 'skip': 'Only available in .de', diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index 44e0c5d4d..aee72a6ed 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -17,7 +17,7 @@ class DHMIE(InfoExtractor): 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', 'description': 'md5:1fabd480c153f97b07add61c44407c82', 'duration': 660, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/', @@ -26,7 +26,7 @@ class DHMIE(InfoExtractor): 'id': 'rolle-1', 'ext': 'flv', 'title': 'ROLLE 1', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }] diff --git a/youtube_dl/extractor/digiteka.py b/youtube_dl/extractor/digiteka.py index 7bb79ffda..3dfde0d8c 100644 --- a/youtube_dl/extractor/digiteka.py +++ b/youtube_dl/extractor/digiteka.py @@ -36,7 +36,7 @@ class DigitekaIE(InfoExtractor): 'id': 's8uk0r', 'ext': 'mp4', 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 74, 'upload_date': '20150317', 'timestamp': 1426604939, @@ -50,7 +50,7 @@ class DigitekaIE(InfoExtractor): 'id': 'xvpfp8', 'ext': 'mp4', 'title': 'Two - C\'est La Vie (clip)', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 233, 'upload_date': '20150224', 'timestamp': 1424760500, diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index e366e17e6..2f3c5113e 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -26,8 +26,8 @@ class DouyuTVIE(InfoExtractor): 'display_id': 'iseven', 'ext': 'flv', 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 're:.*m7show@163\.com.*', - 'thumbnail': 're:^https?://.*\.jpg$', + 'description': r're:.*m7show@163\.com.*', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': '7师傅', 'is_live': True, }, @@ -42,7 +42,7 @@ class DouyuTVIE(InfoExtractor): 'ext': 'flv', 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'douyu小漠', 'is_live': True, }, @@ -57,8 +57,8 @@ class DouyuTVIE(InfoExtractor): 'display_id': '17732', 'ext': 'flv', 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 're:.*m7show@163\.com.*', - 'thumbnail': 're:^https?://.*\.jpg$', + 'description': r're:.*m7show@163\.com.*', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': '7师傅', 'is_live': True, }, diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index c11595612..1edd8e7bd 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -76,7 +76,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', 'episode_number': 1, - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1404336058, 'upload_date': '20140702', 'duration': 343, @@ -94,7 +94,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', 'episode_number': 4, - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1450213200, 'upload_date': '20151215', 'duration': 5602, diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py index 01271f8f0..79ec212c8 100644 --- a/youtube_dl/extractor/drbonanza.py +++ b/youtube_dl/extractor/drbonanza.py @@ -20,7 +20,7 @@ class DRBonanzaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Talkshowet - Leonard Cohen', 'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca', - 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', 'timestamp': 1295537932, 'upload_date': '20110120', 'duration': 3664, @@ -36,7 +36,7 @@ class DRBonanzaIE(InfoExtractor): 'ext': 'mp3', 'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission', 'description': 'md5:501e5a195749480552e214fbbed16c4e', - 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', 'timestamp': 1223274900, 'upload_date': '20081006', 'duration': 7369, diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 22da8e481..1eca82b3b 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -22,7 +22,7 @@ class DrTuberIE(InfoExtractor): 'like_count': int, 'comment_count': int, 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, } }, { diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index e5aadcd25..c9fc9b5a9 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -21,7 +21,7 @@ class DumpertIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ik heb nieuws voor je', 'description': 'Niet schrikken hoor', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/', diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index c2f593eca..76d39adac 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -31,7 +31,7 @@ class EaglePlatformIE(InfoExtractor): 'ext': 'mp4', 'title': 'Навальный вышел на свободу', 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 87, 'view_count': int, 'age_limit': 0, @@ -45,7 +45,7 @@ class EaglePlatformIE(InfoExtractor): 'id': '12820', 'ext': 'mp4', 'title': "'O Sole Mio", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 216, 'view_count': int, }, diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 443865ad2..6ca07a13d 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -19,7 +19,7 @@ class EinthusanIE(InfoExtractor): 'id': '2447', 'ext': 'mp4', 'title': 'Ek Villain', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:9d29fc91a7abadd4591fb862fa560d93', } }, @@ -30,7 +30,7 @@ class EinthusanIE(InfoExtractor): 'id': '1671', 'ext': 'mp4', 'title': 'Soodhu Kavvuum', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c', } }, diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 297f8a6f5..c08643a17 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -22,7 +22,7 @@ class EroProfileIE(InfoExtractor): 'display_id': 'sexy-babe-softcore', 'ext': 'm4v', 'title': 'sexy babe softcore', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, } }, { @@ -32,7 +32,7 @@ class EroProfileIE(InfoExtractor): 'id': '1133519', 'ext': 'm4v', 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, }, 'skip': 'Requires login', diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index a3d7bbbcb..4d8a3c134 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -45,7 +45,7 @@ class EscapistIE(InfoExtractor): 'ext': 'mp4', 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", 'title': "Breaking Down Baldur's Gate", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 264, 'uploader': 'The Escapist', } @@ -57,7 +57,7 @@ class EscapistIE(InfoExtractor): 'ext': 'mp4', 'description': 'This week, Zero Punctuation reviews Evolve.', 'title': 'Evolve - One vs Multiplayer', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 304, 'uploader': 'The Escapist', } diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py index d4205d7fb..e9dcaeb1d 100644 --- a/youtube_dl/extractor/esri.py +++ b/youtube_dl/extractor/esri.py @@ -22,7 +22,7 @@ class EsriVideoIE(InfoExtractor): 'ext': 'mp4', 'title': 'ArcGIS Online - Developing Applications', 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 185, 'upload_date': '20120419', } diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py index adc43919e..1efc0b2ec 100644 --- a/youtube_dl/extractor/europa.py +++ b/youtube_dl/extractor/europa.py @@ -23,7 +23,7 @@ class EuropaIE(InfoExtractor): 'ext': 'mp4', 'title': 'TRADE - Wikileaks on TTIP', 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20150811', 'duration': 34, 'view_count': int, diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py index ef11962f3..95a897782 100644 --- a/youtube_dl/extractor/expotv.py +++ b/youtube_dl/extractor/expotv.py @@ -17,7 +17,7 @@ class ExpoTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'NYX Butter Lipstick Little Susie', 'description': 'Goes on like butter, but looks better!', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Stephanie S.', 'upload_date': '20150520', 'view_count': int, diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c032d4d02..448647d72 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -133,7 +133,7 @@ class FC2EmbedIE(InfoExtractor): 'id': '201403223kCqB3Ez', 'ext': 'flv', 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, } diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 47673e2d4..c6fb67057 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -26,7 +26,7 @@ class FirstTVIE(InfoExtractor): 'id': '40049', 'ext': 'mp4', 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20150212', 'duration': 2694, }, @@ -37,7 +37,7 @@ class FirstTVIE(InfoExtractor): 'id': '364746', 'ext': 'mp4', 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20160407', 'duration': 179, 'formats': 'mincount:3', diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 13fbc4da2..15736c9fe 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -25,7 +25,7 @@ class FiveTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Россияне выбрали имя для общенациональной платежной системы', 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 180, }, }, { @@ -35,7 +35,7 @@ class FiveTVIE(InfoExtractor): 'ext': 'mp4', 'title': '3D принтер', 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 180, }, }, { @@ -44,7 +44,7 @@ class FiveTVIE(InfoExtractor): 'id': 'glavnoe', 'ext': 'mp4', 'title': 'Итоги недели с 8 по 14 июня 2015 года', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index a3a291599..2958452f4 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -19,7 +19,7 @@ class FKTVIE(InfoExtractor): 'id': '1', 'ext': 'mp4', 'title': 'Folge 1 vom 10. April 2007', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, } diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py index 39174fcec..e887ae488 100644 --- a/youtube_dl/extractor/foxgay.py +++ b/youtube_dl/extractor/foxgay.py @@ -20,7 +20,7 @@ class FoxgayIE(InfoExtractor): 'title': 'Fuck Turkish-style', 'description': 'md5:6ae2d9486921891efe89231ace13ffdf', 'age_limit': 18, - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', }, } diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 229bcb175..dc0662f74 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -22,7 +22,7 @@ class FoxNewsIE(AMPIE): 'duration': 265, 'timestamp': 1304411491, 'upload_date': '20110503', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -36,7 +36,7 @@ class FoxNewsIE(AMPIE): 'duration': 292, 'timestamp': 1417662047, 'upload_date': '20141204', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download @@ -111,7 +111,7 @@ class FoxNewsInsiderIE(InfoExtractor): 'description': 'Is campus censorship getting out of control?', 'timestamp': 1472168725, 'upload_date': '20160825', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 56048ffc2..b98da692c 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -17,7 +17,7 @@ class FranceCultureIE(InfoExtractor): 'display_id': 'rendez-vous-au-pays-des-geeks', 'ext': 'mp3', 'title': 'Rendez-vous au pays des geeks', - 'thumbnail': 're:^https?://.*\\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140301', 'vcodec': 'none', } diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e7068d1ae..48d43ae58 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -168,7 +168,7 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'id': 'NI_173343', 'ext': 'mp4', 'title': 'Les entreprises familiales : le secret de la réussite', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'timestamp': 1433273139, 'upload_date': '20150602', }, @@ -184,7 +184,7 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'ext': 'mp4', 'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"', 'description': 'md5:a3264114c9d29aeca11ced113c37b16c', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'timestamp': 1458300695, 'upload_date': '20160318', }, diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 0ad0d9b6a..eba00cd5a 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -29,7 +29,7 @@ class FunimationIE(InfoExtractor): 'ext': 'mp4', 'title': 'Air - 1 - Breeze', 'description': 'md5:1769f43cd5fc130ace8fd87232207892', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', }, 'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed', }, { @@ -40,7 +40,7 @@ class FunimationIE(InfoExtractor): 'ext': 'mp4', 'title': '.hack//SIGN - 1 - Role Play', 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', }, 'skip': 'Access without user interaction is forbidden by CloudFlare', }, { @@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor): 'ext': 'mp4', 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview', 'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803', - 'thumbnail': 're:https?://.*\.(?:jpg|png)', + 'thumbnail': r're:https?://.*\.(?:jpg|png)', }, 'skip': 'Access without user interaction is forbidden by CloudFlare', }] diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index f2928b5fe..81c0ce9a3 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -17,7 +17,7 @@ class FunnyOrDieIE(InfoExtractor): 'ext': 'mp4', 'title': 'Heart-Shaped Box: Literal Video Version', 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', - 'thumbnail': 're:^http:.*\.jpg$', + 'thumbnail': r're:^http:.*\.jpg$', }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', @@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor): 'ext': 'mp4', 'title': 'Please Use This Song (Jon Lajoie)', 'description': 'Please use this to sell something. www.jonlajoie.com', - 'thumbnail': 're:^http:.*\.jpg$', + 'thumbnail': r're:^http:.*\.jpg$', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index d545e01bb..a218a6944 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -20,7 +20,7 @@ class GamersydeIE(InfoExtractor): 'ext': 'mp4', 'duration': 372, 'title': 'Bloodborne - Birth of a hero', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 4e859e09a..682c49e79 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -63,7 +63,7 @@ class GameSpotIE(OnceIE): streams, ('progressive_hd', 'progressive_high', 'progressive_low')) if progressive_url and manifest_url: qualities_basename = self._search_regex( - '/([^/]+)\.csmil/', + r'/([^/]+)\.csmil/', manifest_url, 'qualities basename', default=None) if qualities_basename: QUALITIES_RE = r'((,\d+)+,?)' diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 55a34604a..e607d6ab8 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -18,7 +18,7 @@ class GameStarIE(InfoExtractor): 'ext': 'mp4', 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1406542020, 'upload_date': '20140728', 'duration': 17 diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py index 18ef5c252..57c67a451 100644 --- a/youtube_dl/extractor/gazeta.py +++ b/youtube_dl/extractor/gazeta.py @@ -16,7 +16,7 @@ class GazetaIE(InfoExtractor): 'ext': 'mp4', 'title': '«70–80 процентов гражданских в Донецке на грани голода»', 'description': 'md5:38617526050bd17b234728e7f9620a71', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', }, 'skip': 'video not found', }, { diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d71d0dc46..86dc79307 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -239,7 +239,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tikibad ontruimd wegens brand', 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 33, }, 'params': { @@ -300,7 +300,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 're:^Chris Ziegler takes a look at the\.*', + 'description': r're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, @@ -539,7 +539,7 @@ class GenericIE(InfoExtractor): 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e', 'ext': 'mp4', 'title': 'Ужастики, русский трейлер (2015)', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 153, } }, @@ -759,7 +759,7 @@ class GenericIE(InfoExtractor): 'duration': 48, 'timestamp': 1401537900, 'upload_date': '20140531', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, # Wistia embed @@ -1031,7 +1031,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Навальный вышел на свободу', 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 87, 'view_count': int, 'age_limit': 0, @@ -1045,7 +1045,7 @@ class GenericIE(InfoExtractor): 'id': '12820', 'ext': 'mp4', 'title': "'O Sole Mio", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 216, 'view_count': int, }, @@ -1058,7 +1058,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 694, 'age_limit': 0, }, @@ -1070,7 +1070,7 @@ class GenericIE(InfoExtractor): 'id': '3519514', 'ext': 'mp4', 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer', - 'thumbnail': 're:^https?://.*\.png$', + 'thumbnail': r're:^https?://.*\.png$', 'duration': 45.115, }, }, @@ -1153,7 +1153,7 @@ class GenericIE(InfoExtractor): 'id': '300346', 'ext': 'mp4', 'title': '中一中男師變性 全校師生力挺', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download @@ -1199,7 +1199,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Sauvons les abeilles ! - Le débat', 'description': 'md5:d9082128b1c5277987825d684939ca26', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'timestamp': 1434970506, 'upload_date': '20150622', 'uploader': 'Public Sénat', @@ -1213,7 +1213,7 @@ class GenericIE(InfoExtractor): 'id': '2855', 'ext': 'mp4', 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'uploader': 'ClickHole', 'uploader_id': 'clickhole', } diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py index 87cd19147..29b684d35 100644 --- a/youtube_dl/extractor/giantbomb.py +++ b/youtube_dl/extractor/giantbomb.py @@ -23,7 +23,7 @@ class GiantBombIE(InfoExtractor): 'title': 'Quick Look: Destiny: The Dark Below', 'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24', 'duration': 2399, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py index 28eb733e2..5a9992a27 100644 --- a/youtube_dl/extractor/giga.py +++ b/youtube_dl/extractor/giga.py @@ -24,7 +24,7 @@ class GigaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss', 'description': 'md5:afdf5862241aded4718a30dff6a57baf', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 578, 'timestamp': 1414749706, 'upload_date': '20141031', diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index f0d951396..d94dfbf09 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -14,7 +14,7 @@ class GlideIE(InfoExtractor): 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', 'ext': 'mp4', 'title': "Damon's Glide message", - 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', + 'thumbnail': r're:^https?://.*?\.cloudfront\.net/.*\.jpg$', } } diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py index 363dc6608..92efd16b3 100644 --- a/youtube_dl/extractor/godtube.py +++ b/youtube_dl/extractor/godtube.py @@ -23,7 +23,7 @@ class GodTubeIE(InfoExtractor): 'timestamp': 1205712000, 'uploader': 'beverlybmusic', 'upload_date': '20080317', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, ] diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 74e1720ee..377981d3e 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -19,7 +19,7 @@ class GoshgayIE(InfoExtractor): 'id': '299069', 'ext': 'flv', 'title': 'DIESEL SFW XXX Video', - 'thumbnail': 're:^http://.*\.jpg$', + 'thumbnail': r're:^http://.*\.jpg$', 'duration': 80, 'age_limit': 18, } diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index cbf774377..8116ad9bd 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -120,7 +120,7 @@ class HBOIE(HBOBaseIE): 'id': '1437839', 'ext': 'mp4', 'title': 'Ep. 64 Clip: Encryption', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'duration': 1072, } } @@ -141,7 +141,7 @@ class HBOEpisodeIE(HBOBaseIE): 'display_id': 'ep-52-inside-the-episode', 'ext': 'mp4', 'title': 'Ep. 52: Inside the Episode', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'duration': 240, }, }, { diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index 256453882..18c252012 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -25,7 +25,7 @@ class HearThisAtIE(InfoExtractor): 'id': '150939', 'ext': 'wav', 'title': 'Moofi - Dr. Kreep', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1421564134, 'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP', 'upload_date': '20150118', @@ -46,7 +46,7 @@ class HearThisAtIE(InfoExtractor): 'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance', 'upload_date': '20160328', 'timestamp': 1459186146, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'comment_count': int, 'view_count': int, 'like_count': int, diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 278d9f527..1629cdb8d 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -29,7 +29,7 @@ class HeiseIE(InfoExtractor): 'timestamp': 1411812600, 'upload_date': '20140927', 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', } } diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py index 10da14067..0ee8ea712 100644 --- a/youtube_dl/extractor/hellporno.py +++ b/youtube_dl/extractor/hellporno.py @@ -20,7 +20,7 @@ class HellPornoIE(InfoExtractor): 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic', 'ext': 'mp4', 'title': 'Dixie is posing with naked ass very erotic', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, } }, { diff --git a/youtube_dl/extractor/historicfilms.py b/youtube_dl/extractor/historicfilms.py index 6a36933ac..56343e98f 100644 --- a/youtube_dl/extractor/historicfilms.py +++ b/youtube_dl/extractor/historicfilms.py @@ -14,7 +14,7 @@ class HistoricFilmsIE(InfoExtractor): 'ext': 'mov', 'title': 'Historic Films: GP-7', 'description': 'md5:1a86a0f3ac54024e419aba97210d959a', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2096, }, } diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index ff797438d..e21ebb8fb 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -25,7 +25,7 @@ class HitboxIE(InfoExtractor): 'alt_title': 'hitboxlive - Aug 9th #6', 'description': '', 'ext': 'mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 215.1666, 'resolution': 'HD 720p', 'uploader': 'hitboxlive', @@ -163,7 +163,7 @@ class HitboxLiveIE(HitboxIE): if cdn.get('rtmpSubscribe') is True: continue base_url = cdn.get('netConnectionUrl') - host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) + host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) if base_url not in servers: servers.append(base_url) for stream in cdn.get('bitrates'): diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py index 0615f06af..c458a959d 100644 --- a/youtube_dl/extractor/hornbunny.py +++ b/youtube_dl/extractor/hornbunny.py @@ -20,7 +20,7 @@ class HornBunnyIE(InfoExtractor): 'duration': 550, 'age_limit': 18, 'view_count': int, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index 65ba2a48b..2be68abad 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -21,7 +21,7 @@ class HowStuffWorksIE(InfoExtractor): 'title': 'Cool Jobs - Iditarod Musher', 'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.', 'display_id': 'cool-jobs-iditarod-musher', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 161, }, 'skip': 'Video broken', @@ -34,7 +34,7 @@ class HowStuffWorksIE(InfoExtractor): 'title': 'Survival Zone: Food and Water In the Savanna', 'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.', 'display_id': 'survival-zone-food-and-water-in-the-savanna', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -45,7 +45,7 @@ class HowStuffWorksIE(InfoExtractor): 'title': 'Sword Swallowing #1 by Dan Meyer', 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International ', 'display_id': 'sword-swallowing-1-by-dan-meyer', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py index cec0df09a..4ca275dda 100644 --- a/youtube_dl/extractor/huajiao.py +++ b/youtube_dl/extractor/huajiao.py @@ -20,7 +20,7 @@ class HuajiaoIE(InfoExtractor): 'title': '#新人求关注#', 'description': 're:.*', 'duration': 2424.0, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1475866459, 'upload_date': '20161007', 'uploader': 'Penny_余姿昀', diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py index 059073749..97e36f056 100644 --- a/youtube_dl/extractor/huffpost.py +++ b/youtube_dl/extractor/huffpost.py @@ -52,7 +52,7 @@ class HuffPostIE(InfoExtractor): thumbnails = [] for url in filter(None, data['images'].values()): - m = re.match('.*-([0-9]+x[0-9]+)\.', url) + m = re.match(r'.*-([0-9]+x[0-9]+)\.', url) if not m: continue thumbnails.append({ diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index c6f080484..11cf3c609 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -19,7 +19,7 @@ class IndavideoEmbedIE(InfoExtractor): 'ext': 'mp4', 'title': 'Cicatánc', 'description': '', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'cukiajanlo', 'uploader_id': '83729', 'timestamp': 1439193826, @@ -102,7 +102,7 @@ class IndavideoIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vicces cica', 'description': 'Játszik a tablettel. :D', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Jet_Pack', 'uploader_id': '491217', 'timestamp': 1390821212, diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 196407b06..98f408c18 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -22,7 +22,7 @@ class InstagramIE(InfoExtractor): 'ext': 'mp4', 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1371748545, 'upload_date': '20130620', 'uploader_id': 'naomipq', @@ -38,7 +38,7 @@ class InstagramIE(InfoExtractor): 'id': 'BA-pQFBG8HZ', 'ext': 'mp4', 'title': 'Video by britneyspears', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1453760977, 'upload_date': '20160125', 'uploader_id': 'britneyspears', @@ -169,7 +169,7 @@ class InstagramUserIE(InfoExtractor): 'id': '614605558512799803_462752227', 'ext': 'mp4', 'title': '#Porsche Intelligent Performance.', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'Porsche', 'uploader_id': 'porsche', 'timestamp': 1387486713, diff --git a/youtube_dl/extractor/ir90tv.py b/youtube_dl/extractor/ir90tv.py index 214bcd5b5..d5a3f6fa5 100644 --- a/youtube_dl/extractor/ir90tv.py +++ b/youtube_dl/extractor/ir90tv.py @@ -14,7 +14,7 @@ class Ir90TvIE(InfoExtractor): 'id': '95719', 'ext': 'mp4', 'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218', diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 7c8cb21c2..3d3c15024 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -28,7 +28,7 @@ class IviIE(InfoExtractor): 'title': 'Иван Васильевич меняет профессию', 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f', 'duration': 5498, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', }, @@ -46,7 +46,7 @@ class IviIE(InfoExtractor): 'episode': 'Дело Гольдберга (1 часть)', 'episode_number': 1, 'duration': 2655, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', }, @@ -60,7 +60,7 @@ class IviIE(InfoExtractor): 'title': 'Кукла', 'description': 'md5:ffca9372399976a2d260a407cc74cce6', 'duration': 5599, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', } diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index aa0728abc..b1d72177d 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -29,7 +29,7 @@ class IzleseneIE(InfoExtractor): 'ext': 'mp4', 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi', 'description': 'md5:253753e2655dde93f59f74b572454f6d', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'uploader_id': 'pelikzzle', 'timestamp': int, 'upload_date': '20140702', @@ -44,7 +44,7 @@ class IzleseneIE(InfoExtractor): 'id': '17997', 'ext': 'mp4', 'title': 'Tarkan Dortmund 2006 Konseri', - 'thumbnail': 're:^https://.*\.jpg', + 'thumbnail': r're:^https://.*\.jpg', 'uploader_id': 'parlayankiz', 'timestamp': int, 'upload_date': '20061112', diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index ee9acac09..51d19e67d 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -17,7 +17,7 @@ class JamendoIE(InfoExtractor): 'display_id': 'stories-from-emona-i', 'ext': 'flac', 'title': 'Stories from Emona I', - 'thumbnail': 're:^https?://.*\.jpg' + 'thumbnail': r're:^https?://.*\.jpg' } } diff --git a/youtube_dl/extractor/jove.py b/youtube_dl/extractor/jove.py index cf73cd753..f9a034b78 100644 --- a/youtube_dl/extractor/jove.py +++ b/youtube_dl/extractor/jove.py @@ -21,7 +21,7 @@ class JoveIE(InfoExtractor): 'ext': 'mp4', 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation', 'description': 'md5:015dd4509649c0908bc27f049e0262c6', - 'thumbnail': 're:^https?://.*\.png$', + 'thumbnail': r're:^https?://.*\.png$', 'upload_date': '20110523', } }, @@ -33,7 +33,7 @@ class JoveIE(InfoExtractor): 'ext': 'mp4', 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment', 'description': 'md5:35ff029261900583970c4023b70f1dc9', - 'thumbnail': 're:^https?://.*\.png$', + 'thumbnail': r're:^https?://.*\.png$', 'upload_date': '20140802', } }, diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index c05263e61..4e9eb67bf 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -20,7 +20,7 @@ class KarriereVideosIE(InfoExtractor): 'ext': 'flv', 'title': 'AltenpflegerIn', 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2', - 'thumbnail': 're:^http://.*\.png', + 'thumbnail': r're:^http://.*\.png', }, 'params': { # rtmp download @@ -34,7 +34,7 @@ class KarriereVideosIE(InfoExtractor): 'ext': 'flv', 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"', 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33', - 'thumbnail': 're:^http://.*\.png', + 'thumbnail': r're:^http://.*\.png', }, 'params': { # rtmp download diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 588a4d0ec..e83115e2a 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -27,7 +27,7 @@ class KeezMoviesIE(InfoExtractor): 'display_id': 'petite-asian-lady-mai-playing-in-bathtub', 'ext': 'mp4', 'title': 'Petite Asian Lady Mai Playing In Bathtub', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, 'age_limit': 18, } diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py index eb0a16008..fb9c2dbd4 100644 --- a/youtube_dl/extractor/ketnet.py +++ b/youtube_dl/extractor/ketnet.py @@ -13,7 +13,7 @@ class KetnetIE(InfoExtractor): 'ext': 'mp4', 'title': 'Gluur mee op de filmset en op Pennenzakkenrock', 'description': 'Gluur mee met Ghost Rockers op de filmset', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index cf8876fa1..d27d052ff 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -23,7 +23,7 @@ class KrasViewIE(InfoExtractor): 'title': 'Снег, лёд, заносы', 'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.', 'duration': 27, - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', }, 'params': { 'skip_download': 'Not accessible from Travis CI server', diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py index 2e66e8cf9..6a7e3baa7 100644 --- a/youtube_dl/extractor/kusi.py +++ b/youtube_dl/extractor/kusi.py @@ -27,7 +27,7 @@ class KUSIIE(InfoExtractor): 'duration': 223.586, 'upload_date': '20160826', 'timestamp': 1472233118, - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' }, }, { 'url': 'http://kusi.com/video?clipId=12203019', diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index c48a5aad1..4321f90c8 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -386,8 +386,8 @@ class LetvCloudIE(InfoExtractor): return formats def _real_extract(self, url): - uu_mobj = re.search('uu=([\w]+)', url) - vu_mobj = re.search('vu=([\w]+)', url) + uu_mobj = re.search(r'uu=([\w]+)', url) + vu_mobj = re.search(r'vu=([\w]+)', url) if not uu_mobj or not vu_mobj: raise ExtractorError('Invalid URL: %s' % url, expected=True) diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py index be66fff03..42568f315 100644 --- a/youtube_dl/extractor/lemonde.py +++ b/youtube_dl/extractor/lemonde.py @@ -12,7 +12,7 @@ class LemondeIE(InfoExtractor): 'id': 'lqm3kl', 'ext': 'mp4', 'title': "Comprendre l'affaire Bygmalion en 5 minutes", - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 320, 'upload_date': '20160119', 'timestamp': 1453194778, diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 0a94366fd..40295a30b 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -25,7 +25,7 @@ class LibraryOfCongressIE(InfoExtractor): 'id': '90716351', 'ext': 'mp4', 'title': "Pa's trip to Mars", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 0, 'view_count': int, }, diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index d375695f5..4750b03a3 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -41,7 +41,7 @@ class LibsynIE(InfoExtractor): formats = [{ 'url': media_url, - } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] + } for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] podcast_title = self._search_regex( r'

([^<]+)

', webpage, 'podcast title', default=None) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index afce2010e..42e263bfa 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -176,7 +176,7 @@ class LifeEmbedIE(InfoExtractor): 'id': 'e50c2dec2867350528e2574c899b8291', 'ext': 'mp4', 'title': 'e50c2dec2867350528e2574c899b8291', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', } }, { # with 1080p diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index b7bfa7a6d..905a0e85f 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -164,7 +164,7 @@ class LimelightMediaIE(LimelightBaseIE): 'ext': 'mp4', 'title': 'HaP and the HB Prince Trailer', 'description': 'md5:8005b944181778e313d95c1237ddb640', - 'thumbnail': 're:^https?://.*\.jpeg$', + 'thumbnail': r're:^https?://.*\.jpeg$', 'duration': 144.23, 'timestamp': 1244136834, 'upload_date': '20090604', @@ -181,7 +181,7 @@ class LimelightMediaIE(LimelightBaseIE): 'id': 'a3e00274d4564ec4a9b29b9466432335', 'ext': 'mp4', 'title': '3Play Media Overview Video', - 'thumbnail': 're:^https?://.*\.jpeg$', + 'thumbnail': r're:^https?://.*\.jpeg$', 'duration': 78.101, 'timestamp': 1338929955, 'upload_date': '20120605', diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index ded717cf2..337b1b15c 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -31,7 +31,7 @@ class LiTVIE(InfoExtractor): 'id': 'VOD00041610', 'ext': 'mp4', 'title': '花千骨第1集', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f', 'episode_number': 1, }, @@ -80,7 +80,7 @@ class LiTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) program_info = self._parse_json(self._search_regex( - 'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), + r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), video_id) season_list = list(program_info.get('seasonList', {}).values()) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index b84e4dd6c..c7de65353 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -18,7 +18,7 @@ class LiveLeakIE(InfoExtractor): 'description': 'extremely bad day for this guy..!', 'uploader': 'ljfriel2', 'title': 'Most unlucky car accident', - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' } }, { 'url': 'http://www.liveleak.com/view?i=f93_1390833151', @@ -29,7 +29,7 @@ class LiveLeakIE(InfoExtractor): 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', 'uploader': 'ARD_Stinkt', 'title': 'German Television does first Edward Snowden Interview (ENGLISH)', - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' } }, { 'url': 'http://www.liveleak.com/view?i=4f7_1392687779', @@ -52,7 +52,7 @@ class LiveLeakIE(InfoExtractor): 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.', 'uploader': 'bony333', 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' } }, { # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521 diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index bc7894bf1..c863413bf 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -37,7 +37,7 @@ class LivestreamIE(InfoExtractor): 'duration': 5968.0, 'like_count': int, 'view_count': int, - 'thumbnail': 're:^http://.*\.jpg$' + 'thumbnail': r're:^http://.*\.jpg$' } }, { 'url': 'http://new.livestream.com/tedx/cityenglish', diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index fd23b0b43..068378c9c 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -22,7 +22,7 @@ class LnkGoIE(InfoExtractor): 'description': 'md5:d82a5e36b775b7048617f263a0e3475e', 'age_limit': 7, 'duration': 3019, - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' }, 'params': { 'skip_download': True, # HLS download @@ -37,7 +37,7 @@ class LnkGoIE(InfoExtractor): 'description': 'md5:7352d113a242a808676ff17e69db6a69', 'age_limit': 18, 'duration': 346, - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' }, 'params': { 'skip_download': True, # HLS download diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index f4dcfd93f..da94eab56 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -73,7 +73,7 @@ class LyndaBaseIE(InfoExtractor): # Already logged in if any(re.search(p, signin_page) for p in ( - 'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): return # Step 2: submit email diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index 33b0b539f..bc9933a81 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -14,7 +14,7 @@ class MatchTVIE(InfoExtractor): 'info_dict': { 'id': 'matchtv-live', 'ext': 'flv', - 'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, }, 'params': { diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 2100583df..6e4290aad 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -72,7 +72,7 @@ class MDRIE(InfoExtractor): data_url = self._search_regex( r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', - webpage, 'data url', group='url').replace('\/', '/') + webpage, 'data url', group='url').replace(r'\/', '/') doc = self._download_xml( compat_urlparse.urljoin(url, data_url), video_id) diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index 35914fd4b..c8eacb4f4 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -21,7 +21,7 @@ class MeipaiIE(InfoExtractor): 'ext': 'mp4', 'title': '#葉子##阿桑##余姿昀##超級女聲#', 'description': '#葉子##阿桑##余姿昀##超級女聲#', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 152, 'timestamp': 1465492420, 'upload_date': '20160609', @@ -38,7 +38,7 @@ class MeipaiIE(InfoExtractor): 'ext': 'mp4', 'title': '姿昀和善願 練歌練琴啦😁😁😁', 'description': '姿昀和善願 練歌練琴啦😁😁😁', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 5975, 'timestamp': 1474311799, 'upload_date': '20160919', diff --git a/youtube_dl/extractor/melonvod.py b/youtube_dl/extractor/melonvod.py index 2c80b3ba8..bd8cf13ab 100644 --- a/youtube_dl/extractor/melonvod.py +++ b/youtube_dl/extractor/melonvod.py @@ -16,7 +16,7 @@ class MelonVODIE(InfoExtractor): 'id': '50158734', 'ext': 'mp4', 'title': "Jessica 'Wonderland' MV Making Film", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'artist': 'Jessica (제시카)', 'upload_date': '20161212', 'duration': 203, diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index e6e7659a1..9880924e6 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -133,7 +133,7 @@ class MetacafeIE(InfoExtractor): video_id, display_id = re.match(self._VALID_URL, url).groups() # the video may come from an external site - m_external = re.match('^(\w{2})-(.*)$', video_id) + m_external = re.match(r'^(\w{2})-(.*)$', video_id) if m_external is not None: prefix, ext_id = m_external.groups() # Check if video comes from YouTube diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py index 94bc87b00..7bb473900 100644 --- a/youtube_dl/extractor/mgoon.py +++ b/youtube_dl/extractor/mgoon.py @@ -27,7 +27,7 @@ class MgoonIE(InfoExtractor): 'upload_date': '20131220', 'ext': 'mp4', 'title': 'md5:543aa4c27a4931d371c3f433e8cebebc', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index e0bb5d208..659ede8c2 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -18,7 +18,7 @@ class MGTVIE(InfoExtractor): 'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗', 'description': '我是歌手第四季双年巅峰会', 'duration': 7461, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { # no tbr extracted from stream_url diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py index e6730b75a..dccc54249 100644 --- a/youtube_dl/extractor/minhateca.py +++ b/youtube_dl/extractor/minhateca.py @@ -19,7 +19,7 @@ class MinhatecaIE(InfoExtractor): 'id': '125848331', 'ext': 'mp4', 'title': 'youtube-dl test video', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'filesize_approx': 1530000, 'duration': 9, 'view_count': int, diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index 10190d5f6..8ad9239c5 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -17,7 +17,7 @@ class MinistryGridIE(InfoExtractor): 'id': '3453494717001', 'ext': 'mp4', 'title': 'The Gospel by Numbers', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'upload_date': '20140410', 'description': 'Coming soon from T4G 2014!', 'uploader_id': '2034960640001', diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index f577836be..8984d3b8d 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -90,7 +90,7 @@ class MiTeleIE(InfoExtractor): 'season_id': 'diario_de_t14_11981', 'episode': 'Programa 144', 'episode_number': 3, - 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, 'add_ie': ['Ooyala'], @@ -108,7 +108,7 @@ class MiTeleIE(InfoExtractor): 'season_id': 'cuarto_milenio_t06_12715', 'episode': 'Programa 226', 'episode_number': 24, - 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'duration': 7313, }, 'params': { diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 202c05dcb..4ba2310fd 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -34,7 +34,7 @@ class MixcloudIE(InfoExtractor): 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, 'like_count': int, }, diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index e242b897f..59cd4b838 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -37,7 +37,7 @@ class MLBIE(InfoExtractor): 'duration': 66, 'timestamp': 1405980600, 'upload_date': '20140721', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -51,7 +51,7 @@ class MLBIE(InfoExtractor): 'duration': 46, 'timestamp': 1405105800, 'upload_date': '20140711', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -65,7 +65,7 @@ class MLBIE(InfoExtractor): 'duration': 488, 'timestamp': 1405399936, 'upload_date': '20140715', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -79,7 +79,7 @@ class MLBIE(InfoExtractor): 'duration': 52, 'timestamp': 1405390722, 'upload_date': '20140715', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py index e3f42e7bd..6a85dcbd5 100644 --- a/youtube_dl/extractor/mnet.py +++ b/youtube_dl/extractor/mnet.py @@ -22,7 +22,7 @@ class MnetIE(InfoExtractor): 'timestamp': 1451564040, 'age_limit': 0, 'thumbnails': 'mincount:5', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'ext': 'flv', }, 'params': { diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 91ee9c4e9..44bcc4982 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -30,7 +30,7 @@ class MoeVideoIE(InfoExtractor): 'ext': 'flv', 'title': 'Sink cut out machine', 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'width': 540, 'height': 360, 'duration': 179, @@ -46,7 +46,7 @@ class MoeVideoIE(InfoExtractor): 'ext': 'flv', 'title': 'Operacion Condor.', 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'width': 480, 'height': 296, 'duration': 6027, diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index e3bbe5aa8..54716f5c7 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -18,7 +18,7 @@ class MofosexIE(KeezMoviesIE): 'display_id': 'amateur-teen-playing-and-masturbating-318131', 'ext': 'mp4', 'title': 'amateur teen playing and masturbating', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20121114', 'view_count': int, 'like_count': int, diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dl/extractor/mojvideo.py index 0ba435dc5..165e658c9 100644 --- a/youtube_dl/extractor/mojvideo.py +++ b/youtube_dl/extractor/mojvideo.py @@ -20,7 +20,7 @@ class MojvideoIE(InfoExtractor): 'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic', 'ext': 'mp4', 'title': 'V avtu pred mano rdečelaska - Alfi Nipič', - 'thumbnail': 're:^http://.*\.jpg$', + 'thumbnail': r're:^http://.*\.jpg$', 'duration': 242, } } diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 5e1a8a71a..6fe3b6049 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -23,7 +23,7 @@ class MotherlessIE(InfoExtractor): 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], 'upload_date': '20100913', 'uploader_id': 'famouslyfuckedup', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'age_limit': 18, } }, { @@ -37,7 +37,7 @@ class MotherlessIE(InfoExtractor): 'game', 'hairy'], 'upload_date': '20140622', 'uploader_id': 'Sulivana7x', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'age_limit': 18, }, 'skip': '404', @@ -51,7 +51,7 @@ class MotherlessIE(InfoExtractor): 'categories': ['superheroine heroine superher'], 'upload_date': '20140827', 'uploader_id': 'shade0230', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'age_limit': 18, } }, { diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py index 30c206f9b..5453da1ac 100644 --- a/youtube_dl/extractor/movieclips.py +++ b/youtube_dl/extractor/movieclips.py @@ -20,7 +20,7 @@ class MovieClipsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Warcraft Trailer 1', 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1446843055, 'upload_date': '20151106', 'uploader': 'Movieclips', diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index 478e39967..85cc6e22f 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -16,7 +16,7 @@ class MoviezineIE(InfoExtractor): 'ext': 'mp4', 'title': 'Oculus - Trailer 1', 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, } diff --git a/youtube_dl/extractor/movingimage.py b/youtube_dl/extractor/movingimage.py index bb789c32e..4f62d628a 100644 --- a/youtube_dl/extractor/movingimage.py +++ b/youtube_dl/extractor/movingimage.py @@ -18,7 +18,7 @@ class MovingImageIE(InfoExtractor): 'title': 'SHETLAND WOOL', 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', 'duration': 900, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, } diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py index d9f176136..2cc2bf229 100644 --- a/youtube_dl/extractor/muenchentv.py +++ b/youtube_dl/extractor/muenchentv.py @@ -22,7 +22,7 @@ class MuenchenTVIE(InfoExtractor): 'ext': 'mp4', 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$' }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index fea1caf47..a67276596 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -18,7 +18,7 @@ class MwaveIE(InfoExtractor): 'id': '168859', 'ext': 'flv', 'title': '[M COUNTDOWN] SISTAR - SHAKE IT', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'M COUNTDOWN', 'duration': 206, 'view_count': int, @@ -70,7 +70,7 @@ class MwaveMeetGreetIE(InfoExtractor): 'id': '173294', 'ext': 'flv', 'title': '[MEET&GREET] Park BoRam', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Mwave', 'duration': 3634, 'view_count': int, diff --git a/youtube_dl/extractor/myvi.py b/youtube_dl/extractor/myvi.py index 4c65be122..621ae74a7 100644 --- a/youtube_dl/extractor/myvi.py +++ b/youtube_dl/extractor/myvi.py @@ -27,7 +27,7 @@ class MyviIE(SprutoBaseIE): 'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43', 'ext': 'mp4', 'title': 'хозяин жизни', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 25, }, }, { diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 6d447a493..6bb64eb63 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor): else: video_playpath = '' - video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') + video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') video_swfobj = compat_urllib_parse_unquote(video_swfobj) video_title = self._html_search_regex("(.*?)", diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 4e96e78c3..434a94de4 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -276,7 +276,7 @@ class NBCNewsIE(ThePlatformIE): 'ext': 'mp4', 'title': 'The chaotic GOP immigration vote', 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1406937606, 'upload_date': '20140802', 'uploader': 'NBCU-NEWS', diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index e3b0da2e9..07528d140 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -302,7 +302,7 @@ class NDREmbedIE(NDREmbedBaseIE): 'info_dict': { 'id': 'livestream217', 'ext': 'flv', - 'title': 're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, 'upload_date': '20150910', }, @@ -367,7 +367,7 @@ class NJoyEmbedIE(NDREmbedBaseIE): 'info_dict': { 'id': 'webradioweltweit100', 'ext': 'mp3', - 'title': 're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, 'uploader': 'njoy', 'upload_date': '20150810', diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py index 96528f649..255f60878 100644 --- a/youtube_dl/extractor/ndtv.py +++ b/youtube_dl/extractor/ndtv.py @@ -21,7 +21,7 @@ class NDTVIE(InfoExtractor): 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', 'upload_date': '20131208', 'duration': 1327, - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', }, } diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py index 0d165a82a..aec3026b1 100644 --- a/youtube_dl/extractor/netzkino.py +++ b/youtube_dl/extractor/netzkino.py @@ -25,7 +25,7 @@ class NetzkinoIE(InfoExtractor): 'comments': 'mincount:3', 'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28', 'upload_date': '20120813', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'timestamp': 1344858571, 'age_limit': 12, }, diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index dee9056d3..c900f232a 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -15,7 +15,7 @@ class NextMediaIE(InfoExtractor): 'id': '53109199', 'ext': 'mp4', 'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:28222b9912b6665a21011b034c70fcc7', 'timestamp': 1415456273, 'upload_date': '20141108', @@ -76,7 +76,7 @@ class NextMediaActionNewsIE(NextMediaIE): 'id': '19009428', 'ext': 'mp4', 'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659', 'timestamp': 1421791200, 'upload_date': '20150120', @@ -101,7 +101,7 @@ class AppleDailyIE(NextMediaIE): 'id': '36354694', 'ext': 'mp4', 'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4', 'upload_date': '20150128', } @@ -112,7 +112,7 @@ class AppleDailyIE(NextMediaIE): 'id': '550549', 'ext': 'mp4', 'title': '不滿被踩腳 山東兩大媽一路打下車', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:175b4260c1d7c085993474217e4ab1b4', 'upload_date': '20150128', } @@ -123,7 +123,7 @@ class AppleDailyIE(NextMediaIE): 'id': '5003671', 'ext': 'mp4', 'title': '20正妹熱舞 《刀龍傳說Online》火辣上市', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd', 'upload_date': '20150128', }, @@ -150,7 +150,7 @@ class AppleDailyIE(NextMediaIE): 'id': '35770334', 'ext': 'mp4', 'title': '咖啡占卜測 XU裝熟指數', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', 'upload_date': '20140417', }, diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 3930d16f1..460deb162 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -72,7 +72,7 @@ class NFLIE(InfoExtractor): 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', 'upload_date': '20140921', 'timestamp': 1411337580, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', @@ -84,7 +84,7 @@ class NFLIE(InfoExtractor): 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', 'upload_date': '20131229', 'timestamp': 1388354455, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py index eab816e49..53c500c35 100644 --- a/youtube_dl/extractor/nosvideo.py +++ b/youtube_dl/extractor/nosvideo.py @@ -17,7 +17,7 @@ _x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'}) class NosVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \ - '(?:embed/|\?v=)(?P[A-Za-z0-9]{12})/?' + r'(?:embed/|\?v=)(?P[A-Za-z0-9]{12})/?' _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml' _FILE_DELETED_REGEX = r'File Not Found' _TEST = { @@ -27,7 +27,7 @@ class NosVideoIE(InfoExtractor): 'id': 'mu8fle7g7rpq', 'ext': 'mp4', 'title': 'big_buck_bunny_480p_surround-fix.avi.mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 103952345..06cb8cb3f 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -21,7 +21,7 @@ class NovaIE(InfoExtractor): 'ext': 'flv', 'title': 'Duel: Michal Hrdlička a Petr Suchoň', 'description': 'md5:d0cc509858eee1b1374111c588c6f5d5', - 'thumbnail': 're:^https?://.*\.(?:jpg)', + 'thumbnail': r're:^https?://.*\.(?:jpg)', }, 'params': { # rtmp download @@ -36,7 +36,7 @@ class NovaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Podzemní nemocnice v pražské Krči', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', - 'thumbnail': 're:^https?://.*\.(?:jpg)', + 'thumbnail': r're:^https?://.*\.(?:jpg)', } }, { 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove', @@ -46,7 +46,7 @@ class NovaIE(InfoExtractor): 'ext': 'flv', 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', 'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags - 'thumbnail': 're:^https?://.*\.(?:jpg)', + 'thumbnail': r're:^https?://.*\.(?:jpg)', }, 'params': { # rtmp download @@ -58,7 +58,7 @@ class NovaIE(InfoExtractor): 'id': '1756858', 'ext': 'flv', 'title': 'Televizní noviny - 30. 5. 2015', - 'thumbnail': 're:^https?://.*\.(?:jpg)', + 'thumbnail': r're:^https?://.*\.(?:jpg)', 'upload_date': '20150530', }, 'params': { @@ -72,7 +72,7 @@ class NovaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zaklínač 3: Divoký hon', 'description': 're:.*Pokud se stejně jako my nemůžete.*', - 'thumbnail': 're:https?://.*\.jpg(\?.*)?', + 'thumbnail': r're:https?://.*\.jpg(\?.*)?', 'upload_date': '20150521', }, 'params': { diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 3bbd47355..829c71960 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -24,7 +24,7 @@ class NovaMovIE(InfoExtractor): ) (?P[a-z\d]{13}) ''' - _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'} + _VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'} _HOST = 'www.novamov.com' @@ -104,7 +104,7 @@ class WholeCloudIE(NovaMovIE): IE_NAME = 'wholecloud' IE_DESC = 'WholeCloud' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'} _HOST = 'www.wholecloud.net' @@ -128,7 +128,7 @@ class NowVideoIE(NovaMovIE): IE_NAME = 'nowvideo' IE_DESC = 'NowVideo' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'} _HOST = 'www.nowvideo.to' @@ -152,7 +152,7 @@ class VideoWeedIE(NovaMovIE): IE_NAME = 'videoweed' IE_DESC = 'VideoWeed' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'} _HOST = 'www.videoweed.es' @@ -176,7 +176,7 @@ class CloudTimeIE(NovaMovIE): IE_NAME = 'cloudtime' IE_DESC = 'CloudTime' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'} _HOST = 'www.cloudtime.to' @@ -190,7 +190,7 @@ class AuroraVidIE(NovaMovIE): IE_NAME = 'auroravid' IE_DESC = 'AuroraVid' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'auroravid\.to'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'} _HOST = 'www.auroravid.to' diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index 7e5346316..b6c5ee6e4 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -62,7 +62,7 @@ class NownessIE(NownessBaseIE): 'ext': 'mp4', 'title': 'Candor: The Art of Gesticulation', 'description': 'Candor: The Art of Gesticulation', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1446745676, 'upload_date': '20151105', 'uploader_id': '2385340575001', @@ -76,7 +76,7 @@ class NownessIE(NownessBaseIE): 'ext': 'mp4', 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1407315371, 'upload_date': '20140806', 'uploader_id': '2385340575001', @@ -91,7 +91,7 @@ class NownessIE(NownessBaseIE): 'ext': 'mp4', 'title': 'Bleu, Blanc, Rouge - A Godard Supercut', 'description': 'md5:f0ea5f1857dffca02dbd37875d742cec', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'upload_date': '20150607', 'uploader': 'Cinema Sem Lei', 'uploader_id': 'cinemasemlei', diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 916a102bf..e43b37136 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -83,7 +83,7 @@ class NowTVIE(NowTVBaseIE): 'ext': 'flv', 'title': 'Inka Bause stellt die neuen Bauern vor', 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1432580700, 'upload_date': '20150525', 'duration': 2786, @@ -101,7 +101,7 @@ class NowTVIE(NowTVBaseIE): 'ext': 'flv', 'title': 'Berlin - Tag & Nacht (Folge 934)', 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1432666800, 'upload_date': '20150526', 'duration': 2641, @@ -119,7 +119,7 @@ class NowTVIE(NowTVBaseIE): 'ext': 'flv', 'title': 'Hals- und Beinbruch', 'description': 'md5:b50d248efffe244e6f56737f0911ca57', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1432415400, 'upload_date': '20150523', 'duration': 2742, @@ -137,7 +137,7 @@ class NowTVIE(NowTVBaseIE): 'ext': 'flv', 'title': 'Angst!', 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1222632900, 'upload_date': '20080928', 'duration': 3025, @@ -155,7 +155,7 @@ class NowTVIE(NowTVBaseIE): 'ext': 'flv', 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch', 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1432751700, 'upload_date': '20150527', 'duration': 1083, @@ -173,7 +173,7 @@ class NowTVIE(NowTVBaseIE): 'ext': 'flv', 'title': "Büro-Fall / Chihuahua 'Joel'", 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1432408200, 'upload_date': '20150523', 'duration': 3092, diff --git a/youtube_dl/extractor/noz.py b/youtube_dl/extractor/noz.py index c47a33d15..ccafd7723 100644 --- a/youtube_dl/extractor/noz.py +++ b/youtube_dl/extractor/noz.py @@ -24,7 +24,7 @@ class NozIE(InfoExtractor): 'duration': 215, 'title': '3:2 - Deutschland gewinnt Badminton-Länderspiel in Melle', 'description': 'Vor rund 370 Zuschauern gewinnt die deutsche Badminton-Nationalmannschaft am Donnerstag ein EM-Vorbereitungsspiel gegen Frankreich in Melle. Video Moritz Frankenberg.', - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', }, }] diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index d28a81542..101a5374c 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -22,7 +22,7 @@ class NTVDeIE(InfoExtractor): 'info_dict': { 'id': '14438086', 'ext': 'mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus', 'alt_title': 'Winterchaos auf deutschen Straßen', 'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.', diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index 7d7a785ab..4f9cedb84 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -21,7 +21,7 @@ class NTVRuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', 'duration': 136, }, }, { @@ -32,7 +32,7 @@ class NTVRuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', 'duration': 172, }, }, { @@ -43,7 +43,7 @@ class NTVRuIE(InfoExtractor): 'ext': 'mp4', 'title': '«Сегодня». 21 марта 2014 года. 16:00', 'description': '«Сегодня». 21 марта 2014 года. 16:00', - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', 'duration': 1496, }, }, { @@ -54,7 +54,7 @@ class NTVRuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Остросюжетный фильм «Кома»', 'description': 'Остросюжетный фильм «Кома»', - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', 'duration': 5592, }, }, { @@ -65,7 +65,7 @@ class NTVRuIE(InfoExtractor): 'ext': 'mp4', 'title': '«Дело врачей»: «Деревце жизни»', 'description': '«Дело врачей»: «Деревце жизни»', - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', 'duration': 2590, }, }] diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index 50fbbc79c..a914068f9 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -13,7 +13,7 @@ class OktoberfestTVIE(InfoExtractor): 'id': 'hb-zelt', 'ext': 'mp4', 'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'is_live': True, }, 'params': { diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index c3e830c23..de1d6b08a 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -16,7 +16,7 @@ class OnDemandKoreaIE(JWPlatformBaseIE): 'id': 'ask-us-anything-e43', 'ext': 'mp4', 'title': 'Ask Us Anything : E43', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'skip_download': 'm3u8 download' diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py index 6fb1a3fcc..1d336cf30 100644 --- a/youtube_dl/extractor/onionstudios.py +++ b/youtube_dl/extractor/onionstudios.py @@ -22,7 +22,7 @@ class OnionStudiosIE(InfoExtractor): 'id': '2937', 'ext': 'mp4', 'title': 'Hannibal charges forward, stops for a cocktail', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'The A.V. Club', 'uploader_id': 'the-av-club', }, diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 8c5ec72d9..2ce9f3826 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -19,7 +19,7 @@ class OpenloadIE(InfoExtractor): 'id': 'kUEfGclsU9o', 'ext': 'mp4', 'title': 'skyrim_no-audio_1080.mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'https://openload.co/embed/rjC09fkPLYs', @@ -27,7 +27,7 @@ class OpenloadIE(InfoExtractor): 'id': 'rjC09fkPLYs', 'ext': 'mp4', 'title': 'movie.mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'subtitles': { 'en': [{ 'ext': 'vtt', diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index b4cce7ea9..1e2c54e68 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -247,7 +247,7 @@ class ORFIPTVIE(InfoExtractor): 'title': 'Weitere Evakuierungen um Vulkan Calbuco', 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', 'duration': 68.197, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20150425', }, } diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index cbb1968d3..89c95fffb 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -26,7 +26,7 @@ class PandoraTVIE(InfoExtractor): 'ext': 'flv', 'title': '頭を撫でてくれる?', 'description': '頭を撫でてくれる?', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 39, 'upload_date': '20151218', 'uploader': 'カワイイ動物まとめ', diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index f1c0cd068..6baed773f 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -236,7 +236,7 @@ class PBSIE(InfoExtractor): 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b', 'duration': 6559, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -249,7 +249,7 @@ class PBSIE(InfoExtractor): 'description': 'md5:c741d14e979fc53228c575894094f157', 'title': 'NOVA - Killer Typhoon', 'duration': 3172, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140122', 'age_limit': 10, }, @@ -270,7 +270,7 @@ class PBSIE(InfoExtractor): 'title': 'American Experience - Death and the Civil War, Chapter 1', 'description': 'md5:67fa89a9402e2ee7d08f53b920674c18', 'duration': 682, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -286,7 +286,7 @@ class PBSIE(InfoExtractor): 'title': 'FRONTLINE - United States of Secrets (Part One)', 'description': 'md5:55756bd5c551519cc4b7703e373e217e', 'duration': 6851, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -302,7 +302,7 @@ class PBSIE(InfoExtractor): 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", 'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5', 'duration': 1480, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -315,7 +315,7 @@ class PBSIE(InfoExtractor): 'title': 'FRONTLINE - The Atomic Artists', 'description': 'md5:f677e4520cfacb4a5ce1471e31b57800', 'duration': 723, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -330,7 +330,7 @@ class PBSIE(InfoExtractor): 'ext': 'mp4', 'title': 'FRONTLINE - Netanyahu at War', 'duration': 6852, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'formats': 'mincount:8', }, }, @@ -568,7 +568,7 @@ class PBSIE(InfoExtractor): # Try turning it to 'program - title' naming scheme if possible alt_title = info.get('program', {}).get('title') if alt_title: - info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) + info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title']) description = info.get('description') or info.get( 'program', {}).get('description') or description diff --git a/youtube_dl/extractor/people.py b/youtube_dl/extractor/people.py index 9ecdbc13b..6ca95715e 100644 --- a/youtube_dl/extractor/people.py +++ b/youtube_dl/extractor/people.py @@ -14,7 +14,7 @@ class PeopleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 246.318, 'timestamp': 1458720585, 'upload_date': '20160323', diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py index a52210fab..6a4580d54 100644 --- a/youtube_dl/extractor/pinkbike.py +++ b/youtube_dl/extractor/pinkbike.py @@ -23,7 +23,7 @@ class PinkbikeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Brandon Semenuk - RAW 100', 'description': 'Official release: www.redbull.ca/rupertwalker', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 100, 'upload_date': '20150406', 'uploader': 'revelco', diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index 77e1211d6..e38c7618e 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -34,7 +34,7 @@ class PladformIE(InfoExtractor): 'ext': 'mp4', 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 694, 'age_limit': 0, }, diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index 1e8096a25..391e1bd09 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -25,7 +25,7 @@ class PlaytvakIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vyžeňte vosy a sršně ze zahrady', 'description': 'md5:f93d398691044d303bc4a3de62f3e976', - 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'duration': 279, 'timestamp': 1438732860, 'upload_date': '20150805', @@ -38,7 +38,7 @@ class PlaytvakIE(InfoExtractor): 'ext': 'flv', 'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', - 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'is_live': True, }, 'params': { @@ -52,7 +52,7 @@ class PlaytvakIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', - 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'duration': 39, 'timestamp': 1438969140, 'upload_date': '20150807', @@ -66,7 +66,7 @@ class PlaytvakIE(InfoExtractor): 'ext': 'mp4', 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', - 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'timestamp': 1439052180, 'upload_date': '20150808', 'is_live': False, @@ -79,7 +79,7 @@ class PlaytvakIE(InfoExtractor): 'ext': 'mp4', 'title': 'Recesisté udělali z billboardu kolotoč', 'description': 'md5:7369926049588c3989a66c9c1a043c4c', - 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'timestamp': 1415725500, 'upload_date': '20141111', 'is_live': False, diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index 79c2db085..4aef186ea 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -34,7 +34,7 @@ class PlayvidIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', 'age_limit': 18, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }] diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 0bc743118..4d96a10a7 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -18,7 +18,7 @@ class PlaywireIE(InfoExtractor): 'id': '3353705', 'ext': 'mp4', 'title': 'S04_RM_UCL_Rus', - 'thumbnail': 're:^https?://.*\.png$', + 'thumbnail': r're:^https?://.*\.png$', 'duration': 145.94, }, }, { diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index 5ff173774..2ac1fcb0b 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -36,7 +36,7 @@ class PolskieRadioIE(InfoExtractor): 'timestamp': 1456594200, 'upload_date': '20160227', 'duration': 2364, - 'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$' + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' }, }], }, { diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py index d85e0294d..8218c7d3b 100644 --- a/youtube_dl/extractor/porncom.py +++ b/youtube_dl/extractor/porncom.py @@ -22,7 +22,7 @@ class PornComIE(InfoExtractor): 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', 'ext': 'mp4', 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 551, 'view_count': int, 'age_limit': 18, diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 8df12eec0..842317e6c 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -21,7 +21,7 @@ class PornHdIE(InfoExtractor): 'ext': 'mp4', 'title': 'Restroom selfie masturbation', 'description': 'md5:3748420395e03e31ac96857a8f125b2b', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, 'age_limit': 18, } @@ -35,7 +35,7 @@ class PornHdIE(InfoExtractor): 'ext': 'mp4', 'title': 'Sierra loves doing laundry', 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, 'age_limit': 18, }, diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 63816c358..1b5b9a320 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -19,7 +19,7 @@ class PornotubeIE(InfoExtractor): 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', 'categories': ['Adult Humor', 'Blondes'], 'uploader': 'Alpha Blue Archives', - 'thumbnail': 're:^https?://.*\\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1417582800, 'age_limit': 18, } diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 58f557e39..b6b71069d 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -23,7 +23,7 @@ class PornoVoisinesIE(InfoExtractor): 'ext': 'mp4', 'title': 'Recherche appartement', 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140925', 'duration': 120, 'view_count': int, diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 3c9087f2d..1a0cce7e0 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -20,7 +20,7 @@ class PornoXOIE(JWPlatformBaseIE): 'display_id': 'striptease-from-sexy-secretary', 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 'categories': list, # NSFW - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, } } diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py index 2da93ed34..b5c279203 100644 --- a/youtube_dl/extractor/presstv.py +++ b/youtube_dl/extractor/presstv.py @@ -19,7 +19,7 @@ class PressTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Organic mattresses used to clean waste water', 'upload_date': '20160409', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' } } diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index d40cca06f..23ac93d7e 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -20,7 +20,7 @@ class PromptFileIE(InfoExtractor): 'id': '86D1CE8462-576CAAE416', 'ext': 'mp4', 'title': 'oceans.mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 30478f979..03e1b1f7f 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -394,7 +394,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') playlist = self._parse_json( self._search_regex( - 'var\s+contentResources\s*=\s*(\[.+?\]);\s*]+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', webpage) if mobj: return mobj.group('url') diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index ce631b46c..2b830cf47 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -14,7 +14,7 @@ class RUHDIE(InfoExtractor): 'ext': 'divx', 'title': 'КОТ бааааам', 'description': 'классный кот)', - 'thumbnail': 're:^http://.*\.jpg$', + 'thumbnail': r're:^http://.*\.jpg$', } } diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 6db3e3e93..f12bc5614 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -23,7 +23,7 @@ class RuutuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114, 'age_limit': 0, }, @@ -36,7 +36,7 @@ class RuutuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Superpesis: katso koko kausi Ruudussa', 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 40, 'age_limit': 0, }, @@ -49,7 +49,7 @@ class RuutuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Osa 1: Mikael Jungner', 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, }, }, diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py index 5b7367b94..30f9cf824 100644 --- a/youtube_dl/extractor/savefrom.py +++ b/youtube_dl/extractor/savefrom.py @@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor): 'upload_date': '20120816', 'uploader': 'Howcast', 'uploader_id': 'Howcast', - 'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', + 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', }, 'params': { 'skip_download': True diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 43131fb7e..845712a76 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -22,7 +22,7 @@ class SBSIE(InfoExtractor): 'ext': 'mp4', 'title': 'Dingo Conservation (The Feed)', 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'duration': 308, 'timestamp': 1408613220, 'upload_date': '20140821', diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index ed9de9648..62a6a8337 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -21,7 +21,7 @@ class ScreencastIE(InfoExtractor): 'ext': 'm4v', 'title': 'Color Measurement with Ocean Optics Spectrometers', 'description': 'md5:240369cde69d8bed61349a199c5fb153', - 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', @@ -31,7 +31,7 @@ class ScreencastIE(InfoExtractor): 'ext': 'mov', 'title': 'The Amadeus Spectrometer', 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', - 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://www.screencast.com/t/aAB3iowa', @@ -41,7 +41,7 @@ class ScreencastIE(InfoExtractor): 'ext': 'mp4', 'title': 'Google Earth Export', 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', - 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://www.screencast.com/t/X3ddTrYh', @@ -51,7 +51,7 @@ class ScreencastIE(InfoExtractor): 'ext': 'wmv', 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', - 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', } }, { 'url': 'http://screencast.com/t/aAB3iowa', diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py index 7a88a42cd..94a2a37d2 100644 --- a/youtube_dl/extractor/screencastomatic.py +++ b/youtube_dl/extractor/screencastomatic.py @@ -14,7 +14,7 @@ class ScreencastOMaticIE(JWPlatformBaseIE): 'id': 'c2lD3BeOPl', 'ext': 'mp4', 'title': 'Welcome to 3-4 Philosophy @ DECV!', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', 'duration': 369.163, } diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index 02e574cd8..50aea65cb 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -20,7 +20,7 @@ class ScreenJunkiesIE(InfoExtractor): 'display_id': 'best-quentin-tarantino-movie', 'ext': 'mp4', 'title': 'Best Quentin Tarantino Movie', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3671, 'age_limit': 13, 'tags': list, @@ -32,7 +32,7 @@ class ScreenJunkiesIE(InfoExtractor): 'display_id': 'honest-trailers-the-dark-knight', 'ext': 'mp4', 'title': "Honest Trailers: 'The Dark Knight'", - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'age_limit': 10, 'tags': list, }, @@ -44,7 +44,7 @@ class ScreenJunkiesIE(InfoExtractor): 'display_id': 'knocking-dead-ep-1-the-show-so-far', 'ext': 'mp4', 'title': 'Knocking Dead Ep 1: State of The Dead Recap', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3307, 'age_limit': 13, 'tags': list, diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 35540c082..387a4f7f6 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -55,7 +55,7 @@ class SenateISVPIE(InfoExtractor): 'id': 'judiciary031715', 'ext': 'mp4', 'title': 'Integrated Senate Video Player', - 'thumbnail': 're:^https?://.*\.(?:jpg|png)$', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', }, 'params': { # m3u8 download diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py index 2dbe490bb..941a82e1a 100644 --- a/youtube_dl/extractor/sendtonews.py +++ b/youtube_dl/extractor/sendtonews.py @@ -29,7 +29,7 @@ class SendtoNewsIE(JWPlatformBaseIE): 'title': 'Recap: CLE 5, LAA 4', 'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win', 'duration': 57.343, - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'upload_date': '20160815', 'timestamp': 1471221961, }, diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py index a99b2a8e7..5e22ea730 100644 --- a/youtube_dl/extractor/sexu.py +++ b/youtube_dl/extractor/sexu.py @@ -14,7 +14,7 @@ class SexuIE(InfoExtractor): 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b', 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403', 'categories': list, # NSFW - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, } } diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 18cc7721e..7145d285a 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -13,7 +13,7 @@ class SlutloadIE(InfoExtractor): 'ext': 'mp4', 'title': 'virginie baisee en cam', 'age_limit': 18, - 'thumbnail': 're:https?://.*?\.jpg' + 'thumbnail': r're:https?://.*?\.jpg' } } diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index def46abda..370fa8879 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -81,7 +81,7 @@ class SmotriIE(InfoExtractor): 'uploader': 'psavari1', 'uploader_id': 'psavari1', 'upload_date': '20081103', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'videopassword': '223322', @@ -117,7 +117,7 @@ class SmotriIE(InfoExtractor): 'uploader': 'вАся', 'uploader_id': 'asya_prosto', 'upload_date': '20081218', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 18, }, 'params': { diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index 4819fe5b4..f77354748 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -22,7 +22,7 @@ class SnotrIE(InfoExtractor): 'duration': 248, 'filesize_approx': 40700000, 'description': 'A drone flying through Fourth of July Fireworks', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'expected_warnings': ['description'], }, { @@ -34,7 +34,7 @@ class SnotrIE(InfoExtractor): 'duration': 126, 'filesize_approx': 8500000, 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }] diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index 3a4ddf57e..e004e2c5a 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -27,7 +27,7 @@ class SoundgasmIE(InfoExtractor): webpage = self._download_webpage(url, display_id) audio_url = self._html_search_regex( r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') - audio_id = re.split('\/|\.', audio_url)[-2] + audio_id = re.split(r'\/|\.', audio_url)[-2] description = self._html_search_regex( r'(?s)
  • Description:\s(.*?)<\/li>', webpage, 'description', fatal=False) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 186d22b7d..123c33ac3 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor): 'ext': 'mp4', 'title': 'fantasy solo', 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, } diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 92a7120a3..44d8fa52f 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -85,7 +85,7 @@ class SpankwireIE(InfoExtractor): r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage) heights = [int(video[0]) for video in videos] video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos])) - if webpage.find('flashvars\.encrypted = "true"') != -1: + if webpage.find(r'flashvars\.encrypted = "true"') != -1: password = self._search_regex( r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ') diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index 034bd47ff..e1cfb8698 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -18,7 +18,7 @@ class SpiegeltvIE(InfoExtractor): 'ext': 'm4v', 'title': 'Flug MH370', 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', - 'thumbnail': 're:http://.*\.jpg$', + 'thumbnail': r're:http://.*\.jpg$', }, 'params': { # m3u8 download diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py index 7e6783306..a417b5a4e 100644 --- a/youtube_dl/extractor/sport5.py +++ b/youtube_dl/extractor/sport5.py @@ -41,7 +41,7 @@ class Sport5IE(InfoExtractor): webpage = self._download_webpage(url, media_id) - video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id') + video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id') metadata = self._download_xml( 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index e5c28ae89..b512cd20f 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -21,7 +21,7 @@ class SportBoxIE(InfoExtractor): 'ext': 'mp4', 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140928', }, 'params': { @@ -73,7 +73,7 @@ class SportBoxEmbedIE(InfoExtractor): 'id': '211355', 'ext': 'mp4', 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index a9927f6e2..a3c35a899 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -20,8 +20,8 @@ class SportDeutschlandIE(InfoExtractor): 'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen', 'categories': ['Badminton'], 'view_count': int, - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV', 'timestamp': int, 'upload_date': 're:^201408[23][0-9]$', }, @@ -38,7 +38,7 @@ class SportDeutschlandIE(InfoExtractor): 'timestamp': 1408976060, 'duration': 2732, 'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, 'categories': ['Li-Ning Badminton WM 2014'], diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 847d3c08f..47aa887cc 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -153,7 +153,7 @@ class SRGSSRPlayIE(InfoExtractor): 'uploader': '19h30', 'upload_date': '20141201', 'timestamp': 1417458600, - 'thumbnail': 're:^https?://.*\.image', + 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'params': { diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index b03272f7a..28baf901c 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -20,7 +20,7 @@ class SRMediathekIE(ARDMediathekIE): 'ext': 'mp4', 'title': 'sportarena (26.10.2014)', 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'no longer available', }, { diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index 4a3d8bb8f..cce65fb10 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): r'(?s)([^<]+)', coursepage, 'description', fatal=False) - links = orderedSet(re.findall('', coursepage)) + links = orderedSet(re.findall(r'', coursepage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] @@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor): rootpage = self._download_webpage(rootURL, info['id'], errnote='Unable to download course info page') - links = orderedSet(re.findall('', rootpage)) + links = orderedSet(re.findall(r'', rootpage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] diff --git a/youtube_dl/extractor/stitcher.py b/youtube_dl/extractor/stitcher.py index 0f8782d03..97d1ff681 100644 --- a/youtube_dl/extractor/stitcher.py +++ b/youtube_dl/extractor/stitcher.py @@ -22,7 +22,7 @@ class StitcherIE(InfoExtractor): 'title': 'Machine Learning Mastery and Cancer Clusters', 'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3', 'duration': 1604, - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', }, }, { 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', @@ -33,7 +33,7 @@ class StitcherIE(InfoExtractor): 'title': "The CW's 'Crazy Ex-Girlfriend'", 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', 'duration': 2235, - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 2c26fa689..e973c867c 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -21,7 +21,7 @@ class StreamableIE(InfoExtractor): 'id': 'dnd1', 'ext': 'mp4', 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'uploader': 'teabaker', 'timestamp': 1454964157.35115, 'upload_date': '20160208', @@ -37,7 +37,7 @@ class StreamableIE(InfoExtractor): 'id': 'moo', 'ext': 'mp4', 'title': '"Please don\'t eat me!"', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'timestamp': 1426115495, 'upload_date': '20150311', 'duration': 12, diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dl/extractor/streetvoice.py index e529051d1..91612c7f2 100644 --- a/youtube_dl/extractor/streetvoice.py +++ b/youtube_dl/extractor/streetvoice.py @@ -16,7 +16,7 @@ class StreetVoiceIE(InfoExtractor): 'ext': 'mp3', 'title': '輸', 'description': 'Crispy脆樂團 - 輸', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 260, 'upload_date': '20091018', 'uploader': 'Crispy脆樂團', diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index ef9be7926..68051169b 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -21,7 +21,7 @@ class SunPornoIE(InfoExtractor): 'ext': 'mp4', 'title': 'md5:0a400058e8105d39e35c35e7c5184164', 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 302, 'age_limit': 18, } diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index fb0a4b24e..10cf80885 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -129,7 +129,7 @@ class SVTPlayIE(SVTBaseIE): 'ext': 'mp4', 'title': 'Flygplan till Haile Selassie', 'duration': 3527, - 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'thumbnail': r're:^https?://.*[\.-]jpg$', 'age_limit': 0, 'subtitles': { 'sv': [{ diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index 6d69f7686..c9902ed8a 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -18,7 +18,7 @@ class SWRMediathekIE(InfoExtractor): 'ext': 'mp4', 'title': 'SWR odysso', 'description': 'md5:2012e31baad36162e97ce9eb3f157b8a', - 'thumbnail': 're:^http:.*\.jpg$', + 'thumbnail': r're:^http:.*\.jpg$', 'duration': 2602, 'upload_date': '20140515', 'uploader': 'SWR Fernsehen', @@ -32,7 +32,7 @@ class SWRMediathekIE(InfoExtractor): 'ext': 'mp4', 'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen', 'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'duration': 5305, 'upload_date': '20140516', 'uploader': 'SWR Fernsehen', @@ -46,7 +46,7 @@ class SWRMediathekIE(InfoExtractor): 'ext': 'mp3', 'title': 'Saša Stanišic: Vor dem Fest', 'description': 'md5:5b792387dc3fbb171eb709060654e8c9', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'duration': 3366, 'upload_date': '20140520', 'uploader': 'SWR 2', diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 8670cee28..c351b7545 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -23,7 +23,7 @@ class TagesschauPlayerIE(InfoExtractor): 'id': '179517', 'ext': 'mp4', 'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', 'formats': 'mincount:6', }, }, { @@ -33,7 +33,7 @@ class TagesschauPlayerIE(InfoExtractor): 'id': '29417', 'ext': 'mp3', 'title': 'Trabi - Bye, bye Rennpappe', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', 'formats': 'mincount:2', }, }, { @@ -135,7 +135,7 @@ class TagesschauIE(InfoExtractor): 'ext': 'mp4', 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', 'description': '18.07.2015 20:10 Uhr', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', @@ -145,7 +145,7 @@ class TagesschauIE(InfoExtractor): 'ext': 'mp4', 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', 'description': 'md5:695c01bfd98b7e313c501386327aea59', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { # exclusive audio @@ -156,7 +156,7 @@ class TagesschauIE(InfoExtractor): 'ext': 'mp3', 'title': 'Trabi - Bye, bye Rennpappe', 'description': 'md5:8687dda862cbbe2cfb2df09b56341317', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { # audio in article @@ -167,7 +167,7 @@ class TagesschauIE(InfoExtractor): 'ext': 'mp3', 'title': 'Viele Baustellen für neuen BND-Chef', 'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html', diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py index 5293393ef..6d336da78 100644 --- a/youtube_dl/extractor/tass.py +++ b/youtube_dl/extractor/tass.py @@ -21,7 +21,7 @@ class TassIE(InfoExtractor): 'ext': 'mp4', 'title': 'Посетителям московского зоопарка показали красную панду', 'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dl/extractor/tdslifeway.py index 4d1f5c801..101c6ee31 100644 --- a/youtube_dl/extractor/tdslifeway.py +++ b/youtube_dl/extractor/tdslifeway.py @@ -13,7 +13,7 @@ class TDSLifewayIE(InfoExtractor): 'id': '3453494717001', 'ext': 'mp4', 'title': 'The Gospel by Numbers', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'upload_date': '20140410', 'description': 'Coming soon from T4G 2014!', 'uploader_id': '2034960640001', diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index df5d5556f..f14713a78 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -24,7 +24,7 @@ class TeacherTubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Measures of dispersion from a frequency table', 'description': 'Measures of dispersion from a frequency table', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }, { 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064', @@ -34,7 +34,7 @@ class TeacherTubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'How to Make Paper Dolls _ Paper Art Projects', 'description': 'Learn how to make paper dolls in this simple', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }, { 'url': 'http://www.teachertube.com/music.php?music_id=8805', diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 451cde76d..1b1afab32 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -47,7 +47,7 @@ class TEDIE(InfoExtractor): 'id': 'tSVI8ta_P4w', 'ext': 'mp4', 'title': 'Vishal Sikka: The beauty and power of algorithms', - 'thumbnail': 're:^https?://.+\.jpg', + 'thumbnail': r're:^https?://.+\.jpg', 'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4', 'upload_date': '20140122', 'uploader_id': 'TEDInstitute', @@ -189,7 +189,7 @@ class TEDIE(InfoExtractor): 'format_id': '%s-%sk' % (format_id, bitrate), 'tbr': bitrate, }) - if re.search('\d+k', h264_url): + if re.search(r'\d+k', h264_url): http_url = h264_url elif format_id == 'rtmp': streamer = talk_info.get('streamer') diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py index 58078c531..0f576c1ab 100644 --- a/youtube_dl/extractor/telegraaf.py +++ b/youtube_dl/extractor/telegraaf.py @@ -17,7 +17,7 @@ class TelegraafIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tikibad ontruimd wegens brand', 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 33, }, 'params': { diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py index 1bbd0e7bd..9bcac4ec0 100644 --- a/youtube_dl/extractor/telemb.py +++ b/youtube_dl/extractor/telemb.py @@ -19,7 +19,7 @@ class TeleMBIE(InfoExtractor): 'ext': 'mp4', 'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages', 'description': 'md5:bc5225f47b17c309761c856ad4776265', - 'thumbnail': 're:^http://.*\.(?:jpg|png)$', + 'thumbnail': r're:^http://.*\.(?:jpg|png)$', } }, { @@ -32,7 +32,7 @@ class TeleMBIE(InfoExtractor): 'ext': 'mp4', 'title': 'Havré - Incendie mortel - Les reportages', 'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a', - 'thumbnail': 're:^http://.*\.(?:jpg|png)$', + 'thumbnail': r're:^http://.*\.(?:jpg|png)$', } }, ] diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py index 7786b2813..1207b1a1b 100644 --- a/youtube_dl/extractor/telewebion.py +++ b/youtube_dl/extractor/telewebion.py @@ -13,7 +13,7 @@ class TelewebionIE(InfoExtractor): 'id': '1263668', 'ext': 'mp4', 'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, }, 'params': { diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0405bd6b0..192d8fa29 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -156,7 +156,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): 'title': 'iPhone Siri’s sassy response to a math question has people talking', 'description': 'md5:a565d1deadd5086f3331d57298ec6333', 'duration': 83.0, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1435752600, 'upload_date': '20150701', 'uploader': 'NBCU-NEWS', @@ -297,7 +297,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): 'ext': 'mp4', 'title': 'The Biden factor: will Joe run in 2016?', 'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140208', 'timestamp': 1391824260, 'duration': 467.0, diff --git a/youtube_dl/extractor/thisamericanlife.py b/youtube_dl/extractor/thisamericanlife.py index 36493a5de..91e45f2c3 100644 --- a/youtube_dl/extractor/thisamericanlife.py +++ b/youtube_dl/extractor/thisamericanlife.py @@ -13,7 +13,7 @@ class ThisAmericanLifeIE(InfoExtractor): 'ext': 'm4a', 'title': '487: Harper High School, Part One', 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'http://www.thisamericanlife.org/play_full.php?play=487', diff --git a/youtube_dl/extractor/tinypic.py b/youtube_dl/extractor/tinypic.py index c43cace24..bc2def508 100644 --- a/youtube_dl/extractor/tinypic.py +++ b/youtube_dl/extractor/tinypic.py @@ -34,7 +34,7 @@ class TinyPicIE(InfoExtractor): webpage = self._download_webpage(url, video_id, 'Downloading page') mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P[\da-z]+)"\);\n' - '\s+fo\.addVariable\("s",\s"(?P\d+)"\);', webpage) + r'\s+fo\.addVariable\("s",\s"(?P\d+)"\);', webpage) if mobj is None: raise ExtractorError('Video %s does not exist' % video_id, expected=True) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 77d56b8ca..7e6ec3430 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -91,7 +91,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): formats = [] def extract_video_url(vl): - return re.sub('speed=\d+', 'speed=', unescapeHTML(vl.text)) + return re.sub(r'speed=\d+', 'speed=', unescapeHTML(vl.text)) video_link = cfg_xml.find('./videoLink') if video_link is not None: @@ -174,7 +174,7 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE): 'display_id': '6538', 'ext': 'mp4', 'title': 'Educational xxx video', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, }, 'params': { @@ -209,7 +209,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): 'display_id': 'Carmella-Decesare-striptease', 'ext': 'mp4', 'title': 'Carmella Decesare - striptease', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'duration': 91, 'age_limit': 18, 'categories': ['Porn Stars'], @@ -224,7 +224,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): 'ext': 'mp4', 'title': 'Educational xxx video', 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'duration': 164, 'age_limit': 18, 'uploader': 'bobwhite39', @@ -250,7 +250,7 @@ class EMPFlixIE(TNAFlixNetworkBaseIE): 'ext': 'mp4', 'title': 'Amateur Finger Fuck', 'description': 'Amateur solo finger fucking.', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'duration': 83, 'age_limit': 18, 'uploader': 'cwbike', @@ -280,7 +280,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE): 'ext': 'mp4', 'title': 'Experienced MILF Amazing Handjob', 'description': 'Experienced MILF giving an Amazing Handjob', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, 'uploader': 'darvinfred06', 'view_count': int, @@ -298,7 +298,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE): 'ext': 'flv', 'title': 'Jeune Couple Russe', 'description': 'Amateur', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, 'uploader': 'whiskeyjar', 'view_count': int, diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index bb8b8e234..2aae55e7e 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -23,7 +23,7 @@ class TudouIE(InfoExtractor): 'id': '159448201', 'ext': 'f4v', 'title': '卡马乔国足开大脚长传冲吊集锦', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1372113489000, 'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', 'duration': 289.04, @@ -36,7 +36,7 @@ class TudouIE(InfoExtractor): 'id': '117049447', 'ext': 'f4v', 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1349207518000, 'description': 'md5:294612423894260f2dcd5c6c04fe248b', 'duration': 5478.33, diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index ebe411e12..786143525 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -17,7 +17,7 @@ class TumblrIE(InfoExtractor): 'ext': 'mp4', 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', 'description': 'md5:37db8211e40b50c7c44e95da14f630b7', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', } }, { 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all', @@ -27,7 +27,7 @@ class TumblrIE(InfoExtractor): 'ext': 'mp4', 'title': '5SOS STRUM ;]', 'description': 'md5:dba62ac8639482759c8eb10ce474586a', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', } }, { 'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video', @@ -37,7 +37,7 @@ class TumblrIE(InfoExtractor): 'ext': 'mp4', 'title': 'HD Video Testing \u2014 Test description for my HD video', 'description': 'md5:97cc3ab5fcd27ee4af6356701541319c', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, 'params': { 'format': 'hd', @@ -92,7 +92,7 @@ class TumblrIE(InfoExtractor): 'title': 'Video by victoriassecret', 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat', 'uploader_id': 'victoriassecret', - 'thumbnail': 're:^https?://.*\.jpg' + 'thumbnail': r're:^https?://.*\.jpg' }, 'add_ie': ['Instagram'], }] diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py index 7ae63a499..25aa9c58e 100644 --- a/youtube_dl/extractor/turbo.py +++ b/youtube_dl/extractor/turbo.py @@ -24,7 +24,7 @@ class TurboIE(InfoExtractor): 'duration': 3715, 'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', 'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index bd28267b0..d5071e8a5 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -126,7 +126,7 @@ class TV2ArticleIE(InfoExtractor): if not assets: # New embed pattern - for v in re.findall('TV2ContentboxVideo\(({.+?})\)', webpage): + for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage): video = self._parse_json( v, playlist_id, transform_source=js_to_json, fatal=False) if not video: diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 5d2d8f132..29f62b970 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -33,7 +33,7 @@ class TV4IE(InfoExtractor): 'id': '2491650', 'ext': 'mp4', 'title': 'Kalla Fakta 5 (english subtitles)', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': int, 'upload_date': '20131125', }, @@ -45,7 +45,7 @@ class TV4IE(InfoExtractor): 'id': '3054113', 'ext': 'mp4', 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.', 'timestamp': int, 'upload_date': '20150130', diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 4065354dd..008f64cc2 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -19,7 +19,7 @@ class TVCIE(InfoExtractor): 'id': '74622', 'ext': 'mp4', 'title': 'События. "События". Эфир от 22.05.2015 14:30', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1122, }, } @@ -72,7 +72,7 @@ class TVCArticleIE(InfoExtractor): 'ext': 'mp4', 'title': 'События. "События". Эфир от 22.05.2015 14:30', 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1122, }, }, { @@ -82,7 +82,7 @@ class TVCArticleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках', 'description': 'md5:f2098f71e21f309e89f69b525fd9846e', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 278, }, }, { @@ -92,7 +92,7 @@ class TVCArticleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ещё не поздно. Эфир от 03.08.2013', 'description': 'md5:51fae9f3f8cfe67abce014e428e5b027', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 3316, }, }] diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index 7a9386cde..2b10d9bca 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -18,7 +18,7 @@ class TweakersIE(InfoExtractor): 'ext': 'mp4', 'title': 'New Nintendo 3DS XL - Op alle fronten beter', 'description': 'md5:3789b21fed9c0219e9bcaacd43fab280', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'duration': 386, 'uploader_id': 's7JeEm', } diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index af92b713b..1093a3829 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -22,7 +22,7 @@ class TwentyFourVideoIE(InfoExtractor): 'ext': 'mp4', 'title': 'Эротика каменного века', 'description': 'Как смотрели порно в каменном веке.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'SUPERTELO', 'duration': 31, 'timestamp': 1275937857, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 681f1b676..6d67bda86 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -223,7 +223,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'id': 'v6528877', 'ext': 'mp4', 'title': 'LCK Summer Split - Week 6 Day 1', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 17208, 'timestamp': 1435131709, 'upload_date': '20150624', @@ -243,7 +243,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'id': 'v11230755', 'ext': 'mp4', 'title': 'Untitled Broadcast', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1638, 'timestamp': 1439746708, 'upload_date': '20150816', @@ -550,7 +550,7 @@ class TwitchClipsIE(InfoExtractor): 'id': 'AggressiveCobraPoooound', 'ext': 'mp4', 'title': 'EA Play 2016 Live from the Novo Theatre', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'creator': 'EA', 'uploader': 'stereotype_', 'uploader_id': 'stereotype_', diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index ac0b221b4..37e3bc412 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -34,7 +34,7 @@ class TwitterCardIE(TwitterBaseIE): 'id': '560070183650213889', 'ext': 'mp4', 'title': 'Twitter Card', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 30.033, } }, @@ -45,7 +45,7 @@ class TwitterCardIE(TwitterBaseIE): 'id': '623160978427936768', 'ext': 'mp4', 'title': 'Twitter Card', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 80.155, }, }, @@ -82,7 +82,7 @@ class TwitterCardIE(TwitterBaseIE): 'id': '705235433198714880', 'ext': 'mp4', 'title': 'Twitter web player', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', }, }, { 'url': 'https://twitter.com/i/videos/752274308186120192', @@ -201,7 +201,7 @@ class TwitterIE(InfoExtractor): 'id': '643211948184596480', 'ext': 'mp4', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', @@ -217,7 +217,7 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai', 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"', - 'thumbnail': 're:^https?://.*\.png', + 'thumbnail': r're:^https?://.*\.png', 'uploader': 'Gifs', 'uploader_id': 'giphz', }, @@ -257,7 +257,7 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'JG', 'uploader_id': 'jaydingeer', }, diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 57dd73aef..daf45d0b4 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -23,7 +23,7 @@ class UDNEmbedIE(InfoExtractor): 'id': '300040', 'ext': 'mp4', 'title': '生物老師男變女 全校挺"做自己"', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download diff --git a/youtube_dl/extractor/urort.py b/youtube_dl/extractor/urort.py index 8872cfcb2..8f6edab4b 100644 --- a/youtube_dl/extractor/urort.py +++ b/youtube_dl/extractor/urort.py @@ -21,7 +21,7 @@ class UrortIE(InfoExtractor): 'id': '33124-24', 'ext': 'mp3', 'title': 'The Bomb', - 'thumbnail': 're:^https?://.+\.jpg', + 'thumbnail': r're:^https?://.+\.jpg', 'uploader': 'Gerilja', 'uploader_id': 'Gerilja', 'upload_date': '20100323', diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dl/extractor/ustudio.py index 3484a2046..56509beed 100644 --- a/youtube_dl/extractor/ustudio.py +++ b/youtube_dl/extractor/ustudio.py @@ -22,7 +22,7 @@ class UstudioIE(InfoExtractor): 'ext': 'mp4', 'title': 'San Francisco: Golden Gate Bridge', 'description': 'md5:23925500697f2c6d4830e387ba51a9be', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20111107', 'uploader': 'Tony Farley', } diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py index 84698371a..f474ed73f 100644 --- a/youtube_dl/extractor/varzesh3.py +++ b/youtube_dl/extractor/varzesh3.py @@ -22,7 +22,7 @@ class Varzesh3IE(InfoExtractor): 'ext': 'mp4', 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', 'description': 'فصل ۲۰۱۵-۲۰۱۴', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'HTTP 404 Error', }, { @@ -67,7 +67,7 @@ class Varzesh3IE(InfoExtractor): webpage, display_id, default=None) if video_id is None: video_id = self._search_regex( - 'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id', + r'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id', default=display_id) return { diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 429893e38..bef639462 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -28,7 +28,7 @@ class Vbox7IE(InfoExtractor): 'ext': 'mp4', 'title': 'Борисов: Притеснен съм за бъдещето на България', 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1470982814, 'upload_date': '20160812', 'uploader': 'zdraveibulgaria', @@ -56,7 +56,7 @@ class Vbox7IE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - ']+src=(?P["\'])(?P(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)', + r']+src=(?P["\'])(?P(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)', webpage) if mobj: return mobj.group('url') diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 6b9c227db..80a643dfe 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -24,7 +24,7 @@ class VesselIE(InfoExtractor): 'id': 'HDN7G5UMs', 'ext': 'mp4', 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20150317', 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', 'timestamp': int, diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 3b38ac700..8a574bc26 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -61,7 +61,7 @@ class VGTVIE(XstreamIE): 'ext': 'mp4', 'title': 'Hevnen er søt: Episode 10 - Abu', 'description': 'md5:e25e4badb5f544b04341e14abdc72234', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 648.000, 'timestamp': 1404626400, 'upload_date': '20140706', @@ -76,7 +76,7 @@ class VGTVIE(XstreamIE): 'ext': 'flv', 'title': 'OPPTAK: VGTV følger EM-kvalifiseringen', 'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 9103.0, 'timestamp': 1410113864, 'upload_date': '20140907', @@ -96,7 +96,7 @@ class VGTVIE(XstreamIE): 'ext': 'mp4', 'title': 'V75 fra Solvalla 30.05.15', 'description': 'md5:b3743425765355855f88e096acc93231', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 25966, 'timestamp': 1432975582, 'upload_date': '20150530', @@ -200,7 +200,7 @@ class VGTVIE(XstreamIE): format_info = { 'url': mp4_url, } - mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url) + mobj = re.search(r'(\d+)_(\d+)_(\d+)', mp4_url) if mobj: tbr = int(mobj.group(3)) format_info.update({ @@ -246,7 +246,7 @@ class BTArticleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Alrekstad internat', 'description': 'md5:dc81a9056c874fedb62fc48a300dac58', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 191, 'timestamp': 1289991323, 'upload_date': '20101117', diff --git a/youtube_dl/extractor/vidbit.py b/youtube_dl/extractor/vidbit.py index e7ac5a842..91f45b7cc 100644 --- a/youtube_dl/extractor/vidbit.py +++ b/youtube_dl/extractor/vidbit.py @@ -20,7 +20,7 @@ class VidbitIE(InfoExtractor): 'ext': 'mp4', 'title': 'Intro to VidBit', 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7', - 'thumbnail': 're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'upload_date': '20160618', 'view_count': int, 'comment_count': int, diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 8d92aee87..67808e7e6 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -26,7 +26,7 @@ class ViddlerIE(InfoExtractor): 'timestamp': 1335371429, 'upload_date': '20120425', 'duration': 100.89, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, 'comment_count': int, 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'], diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 4f0dcd18c..c02830ddd 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -19,7 +19,7 @@ class VideoMegaIE(InfoExtractor): 'id': 'AOSQBJYKIDDIKYJBQSOA', 'ext': 'mp4', 'title': '1254207', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600', diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 7f2566586..9b56630de 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -23,7 +23,7 @@ class VideomoreIE(InfoExtractor): 'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук', 'series': 'Кино в деталях', 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2910, 'view_count': int, 'comment_count': int, @@ -37,7 +37,7 @@ class VideomoreIE(InfoExtractor): 'title': 'Молодежка 2 сезон 40 серия', 'series': 'Молодежка', 'episode': '40 серия', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2809, 'view_count': int, 'comment_count': int, @@ -53,7 +53,7 @@ class VideomoreIE(InfoExtractor): 'ext': 'flv', 'title': 'Промо Команда проиграла из-за Бакина?', 'episode': 'Команда проиграла из-за Бакина?', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 29, 'age_limit': 16, 'view_count': int, @@ -145,7 +145,7 @@ class VideomoreVideoIE(InfoExtractor): 'ext': 'flv', 'title': 'Ёлки 3', 'description': '', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 5579, 'age_limit': 6, 'view_count': int, @@ -168,7 +168,7 @@ class VideomoreVideoIE(InfoExtractor): 'ext': 'flv', 'title': '1 серия. Здравствуй, Аквавилль!', 'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 754, 'age_limit': 6, 'view_count': int, diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index 6898042de..4e4b4e38c 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -18,7 +18,7 @@ class VidioIE(InfoExtractor): 'ext': 'mp4', 'title': 'DJ_AMBRED - Booyah (Live 2015)', 'description': 'md5:27dc15f819b6a78a626490881adbadf8', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 149, 'like_count': int, }, diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index b1156d531..e9ff336c4 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -23,7 +23,7 @@ class VidmeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fishing for piranha - the easy way', 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1406313244, 'upload_date': '20140725', 'age_limit': 0, @@ -39,7 +39,7 @@ class VidmeIE(InfoExtractor): 'id': 'Gc6M', 'ext': 'mp4', 'title': 'O Mere Dil ke chain - Arnav and Khushi VM', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1441211642, 'upload_date': '20150902', 'uploader': 'SunshineM', @@ -61,7 +61,7 @@ class VidmeIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Carver', 'description': 'md5:e9c24870018ae8113be936645b93ba3c', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1433203629, 'upload_date': '20150602', 'uploader': 'Thomas', @@ -82,7 +82,7 @@ class VidmeIE(InfoExtractor): 'id': 'Wmur', 'ext': 'mp4', 'title': 'naked smoking & stretching', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1430931613, 'upload_date': '20150506', 'uploader': 'naked-yogi', @@ -115,7 +115,7 @@ class VidmeIE(InfoExtractor): 'id': 'e5g', 'ext': 'mp4', 'title': 'Video upload (e5g)', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1401480195, 'upload_date': '20140530', 'uploader': None, diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py index 19500eba8..18735cfb2 100644 --- a/youtube_dl/extractor/viewlift.py +++ b/youtube_dl/extractor/viewlift.py @@ -14,7 +14,7 @@ from ..utils import ( class ViewLiftBaseIE(InfoExtractor): - _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv' + _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv' class ViewLiftEmbedIE(ViewLiftBaseIE): @@ -110,7 +110,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'ext': 'mp4', 'title': 'Lost for Life', 'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 4489, 'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals'] } @@ -123,7 +123,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'ext': 'mp4', 'title': 'India', 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 979, 'categories': ['Documentary', 'Sports', 'Politics'] } @@ -160,7 +160,7 @@ class ViewLiftIE(ViewLiftBaseIE): snag = self._parse_json( self._search_regex( - 'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'), + r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'), display_id) for item in snag: diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index a93196a07..52dd95e2f 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -157,7 +157,7 @@ class ViewsterIE(InfoExtractor): formats.extend(m3u8_formats) else: qualities_basename = self._search_regex( - '/([^/]+)\.csmil/', + r'/([^/]+)\.csmil/', manifest_url, 'qualities basename', default=None) if not qualities_basename: continue diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index a4f914d14..4adcd1830 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -40,7 +40,7 @@ class ViideaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Automatics, robotics and biocybernetics', 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'timestamp': 1372349289, 'upload_date': '20130627', 'duration': 565, @@ -58,7 +58,7 @@ class ViideaIE(InfoExtractor): 'ext': 'flv', 'title': 'NLP at Google', 'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'timestamp': 1284375600, 'upload_date': '20100913', 'duration': 5352, @@ -74,7 +74,7 @@ class ViideaIE(InfoExtractor): 'id': '23181', 'title': 'Deep Learning Summer School, Montreal 2015', 'description': 'md5:0533a85e4bd918df52a01f0e1ebe87b7', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'timestamp': 1438560000, }, 'playlist_count': 30, @@ -85,7 +85,7 @@ class ViideaIE(InfoExtractor): 'id': '9737', 'display_id': 'mlss09uk_bishop_ibi', 'title': 'Introduction To Bayesian Inference', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'timestamp': 1251622800, }, 'playlist': [{ @@ -94,7 +94,7 @@ class ViideaIE(InfoExtractor): 'display_id': 'mlss09uk_bishop_ibi_part1', 'ext': 'wmv', 'title': 'Introduction To Bayesian Inference (Part 1)', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'duration': 4622, 'timestamp': 1251622800, 'upload_date': '20090830', @@ -105,7 +105,7 @@ class ViideaIE(InfoExtractor): 'display_id': 'mlss09uk_bishop_ibi_part2', 'ext': 'wmv', 'title': 'Introduction To Bayesian Inference (Part 2)', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', 'duration': 5641, 'timestamp': 1251622800, 'upload_date': '20090830', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c35cafcc6..37e1da70d 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -205,7 +205,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 'description': 'md5:2d3305bad981a06ff79f027f19865021', 'upload_date': '20121220', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user7108434', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', 'uploader_id': 'user7108434', 'uploader': 'Filippo Valsorda', 'duration': 10, @@ -218,7 +218,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'info_dict': { 'id': '68093876', 'ext': 'mp4', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/openstreetmapus', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus', 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', @@ -235,7 +235,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 'uploader': 'The BLN & Business of Software', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware', 'uploader_id': 'theblnbusinessofsoftware', 'duration': 3610, 'description': None, @@ -250,7 +250,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl password protected test video', 'upload_date': '20130614', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user18948128', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, @@ -268,7 +268,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'Key & Peele: Terrorist Interrogation', 'description': 'md5:8678b246399b070816b12313e8b4eb5c', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/atencio', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', 'upload_date': '20130927', @@ -284,7 +284,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'title': 'The New Vimeo Player (You Know, For Videos)', 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 'upload_date': '20131015', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/staff', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff', 'uploader_id': 'staff', 'uploader': 'Vimeo Staff', 'duration': 62, @@ -299,7 +299,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'Pier Solar OUYA Official Trailer', 'uploader': 'Tulio Gonçalves', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user28849593', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593', 'uploader_id': 'user28849593', }, }, @@ -312,7 +312,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute', 'uploader': 'The DMCI', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/dmci', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci', 'uploader_id': 'dmci', 'upload_date': '20111220', 'description': 'md5:ae23671e82d05415868f7ad1aec21147', @@ -327,7 +327,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'Vimeo Tribute: The Shining', 'uploader': 'Casey Donahue', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', @@ -346,7 +346,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'The Reluctant Revolutionary', 'uploader': '10Ft Films', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms', 'uploader_id': 'tenfootfilms', }, 'params': { @@ -497,7 +497,7 @@ class VimeoIE(VimeoBaseInfoExtractor): except RegexNotFoundError: # For pro videos or player.vimeo.com urls # We try to find out to which variable is assigned the config dic - m_variable_name = re.search('(\w)\.video\.id', webpage) + m_variable_name = re.search(r'(\w)\.video\.id', webpage) if m_variable_name is not None: config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1)) else: @@ -626,7 +626,7 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'המעבדה - במאי יותם פלדמן', 'uploader': 'גם סרטים', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, }, { @@ -637,7 +637,7 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'Rävlock, rätt läte på rätt plats', 'uploader': 'Lindroth & Norin', - 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847', 'uploader_id': 'user14430847', }, 'params': { @@ -857,7 +857,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn', 'uploader': 'DevWeek Events', 'duration': 2773, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader_id': 'user22258446', } }, { diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 7fd9b777b..c74b43766 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -37,7 +37,7 @@ class VimpleIE(SprutoBaseIE): 'ext': 'mp4', 'title': 'Sunset', 'duration': 20, - 'thumbnail': 're:https?://.*?\.jpg', + 'thumbnail': r're:https?://.*?\.jpg', }, }, { 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index bbfa6e5f2..02c9617d2 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -20,7 +20,7 @@ class VodlockerIE(InfoExtractor): 'id': 'e8wvyzz4sl42', 'ext': 'mp4', 'title': 'Germany vs Brazil', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }] diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 4f1a99a89..59e1359c4 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -26,7 +26,7 @@ class VoiceRepublicIE(InfoExtractor): 'ext': 'm4a', 'title': 'Watching the Watchers: Building a Sousveillance State', 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', - 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', + 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', 'duration': 1800, 'view_count': int, } diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index e22900f8d..858ac9e71 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -23,7 +23,7 @@ class VpornIE(InfoExtractor): 'ext': 'mp4', 'title': 'Violet on her 19th birthday', 'description': 'Violet dances in front of the camera which is sure to get you horny.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'kileyGrope', 'categories': ['Masturbation', 'Teen'], 'duration': 393, @@ -41,7 +41,7 @@ class VpornIE(InfoExtractor): 'ext': 'mp4', 'title': 'Hana Shower', 'description': 'Hana showers at the bathroom.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Hmmmmm', 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], 'duration': 588, diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 10ca6acb1..8ce3a6b81 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -26,7 +26,7 @@ class VubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Best Drummer Ever [HD]', 'description': 'md5:2d63c4b277b85c2277761c2cf7337d71', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'William', 'timestamp': 1406876915, 'upload_date': '20140801', @@ -45,7 +45,7 @@ class VubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Chiara Grispo - Price Tag by Jessie J', 'description': 'md5:8ea652a1f36818352428cb5134933313', - 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$', + 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$', 'uploader': 'Chiara.Grispo', 'timestamp': 1388743358, 'upload_date': '20140103', @@ -65,7 +65,7 @@ class VubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'My 7 year old Sister and I singing "Alive" by Krewella', 'description': 'md5:40bcacb97796339f1690642c21d56f4a', - 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$', + 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$', 'uploader': 'Seraina', 'timestamp': 1396492438, 'upload_date': '20140403', @@ -84,7 +84,7 @@ class VubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Frozen - Let It Go Cover by Siren Gene', 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.', - 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', + 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', 'uploader': 'Siren', 'timestamp': 1395448018, 'upload_date': '20140322', diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py index 8b9488340..cbb548672 100644 --- a/youtube_dl/extractor/walla.py +++ b/youtube_dl/extractor/walla.py @@ -20,7 +20,7 @@ class WallaIE(InfoExtractor): 'ext': 'flv', 'title': 'וואן דיירקשן: ההיסטריה', 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3600, }, 'params': { diff --git a/youtube_dl/extractor/watchindianporn.py b/youtube_dl/extractor/watchindianporn.py index 5d3b5bdb4..ed099beea 100644 --- a/youtube_dl/extractor/watchindianporn.py +++ b/youtube_dl/extractor/watchindianporn.py @@ -22,7 +22,7 @@ class WatchIndianPornIE(InfoExtractor): 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera', 'ext': 'mp4', 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'LoveJay', 'upload_date': '20160428', 'duration': 226, diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py index 7486cb347..e4b65f54f 100644 --- a/youtube_dl/extractor/webcaster.py +++ b/youtube_dl/extractor/webcaster.py @@ -20,7 +20,7 @@ class WebcasterIE(InfoExtractor): 'id': 'c8cefd240aa593681c8d068cff59f407_hd', 'ext': 'mp4', 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py index 7aea47ed5..1eb1f6702 100644 --- a/youtube_dl/extractor/webofstories.py +++ b/youtube_dl/extractor/webofstories.py @@ -19,7 +19,7 @@ class WebOfStoriesIE(InfoExtractor): 'id': '4536', 'ext': 'mp4', 'title': 'The temperature of the sun', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Hans Bethe talks about calculating the temperature of the sun', 'duration': 238, } @@ -30,7 +30,7 @@ class WebOfStoriesIE(InfoExtractor): 'id': '55908', 'ext': 'mp4', 'title': 'The story of Gemmata obscuriglobus', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', 'duration': 169, }, @@ -42,7 +42,7 @@ class WebOfStoriesIE(InfoExtractor): 'id': '54215', 'ext': 'mp4', 'title': '"A Leg to Stand On"', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Oliver Sacks talks about the death and resurrection of a limb', 'duration': 97, }, @@ -134,7 +134,7 @@ class WebOfStoriesPlaylistIE(InfoExtractor): entries = [ self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories') - for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage)) + for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage)) ] title = self._search_regex( diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index 8e09156c2..7e0befd39 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -37,11 +37,11 @@ class WeiqiTVIE(InfoExtractor): page = self._download_webpage(url, media_id) info_json_str = self._search_regex( - 'var\s+video\s*=\s*(.+});', page, 'info json str') + r'var\s+video\s*=\s*(.+});', page, 'info json str') info_json = self._parse_json(info_json_str, media_id) letvcloud_url = self._search_regex( - 'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url') + r'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url') return { '_type': 'url_transparent', diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index e4a2baad2..4c41e98b2 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -14,7 +14,7 @@ class XBefIE(InfoExtractor): 'ext': 'mp4', 'title': 'md5:7358a9faef8b7b57acda7c04816f170e', 'age_limit': 18, - 'thumbnail': 're:^http://.*\.jpg', + 'thumbnail': r're:^http://.*\.jpg', } } diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index de344bad2..e616adce3 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -44,7 +44,7 @@ class XFileShareIE(InfoExtractor): 'id': '06y9juieqpmi', 'ext': 'mp4', 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, }, { 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', @@ -56,7 +56,7 @@ class XFileShareIE(InfoExtractor): 'id': '3rso4kdn6f9m', 'ext': 'mp4', 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', } }, { 'url': 'http://movpod.in/0wguyyxi1yca', @@ -67,7 +67,7 @@ class XFileShareIE(InfoExtractor): 'id': '3ivfabn7573c', 'ext': 'mp4', 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': r're:http://.*\.jpg', }, 'skip': 'Video removed', }, { diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 4b9c1ee9c..e0818201a 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -24,7 +24,7 @@ class XuiteIE(InfoExtractor): 'id': '3860914', 'ext': 'mp3', 'title': '孤單南半球-歐德陽', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 247.246, 'timestamp': 1314932940, 'upload_date': '20110902', @@ -40,7 +40,7 @@ class XuiteIE(InfoExtractor): 'id': '25925099', 'ext': 'mp4', 'title': 'BigBuckBunny_320x180', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 596.458, 'timestamp': 1454242500, 'upload_date': '20160131', @@ -58,7 +58,7 @@ class XuiteIE(InfoExtractor): 'ext': 'mp4', 'title': '暗殺教室 02', 'description': '字幕:【極影字幕社】', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1384.907, 'timestamp': 1421481240, 'upload_date': '20150117', diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dl/extractor/yesjapan.py index 112a6c030..681338c96 100644 --- a/youtube_dl/extractor/yesjapan.py +++ b/youtube_dl/extractor/yesjapan.py @@ -21,7 +21,7 @@ class YesJapanIE(InfoExtractor): 'ext': 'mp4', 'timestamp': 1416391590, 'upload_date': '20141119', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', } } diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py index 834d860af..1fd8d35c6 100644 --- a/youtube_dl/extractor/yinyuetai.py +++ b/youtube_dl/extractor/yinyuetai.py @@ -18,7 +18,7 @@ class YinYueTaiIE(InfoExtractor): 'title': '少女时代_PARTY_Music Video Teaser', 'creator': '少女时代', 'duration': 25, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'http://v.yinyuetai.com/video/h5/2322376', diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 0d943c343..c4ae4d88e 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -17,7 +17,7 @@ class YnetIE(InfoExtractor): 'id': 'L-11659-99244', 'ext': 'flv', 'title': 'איש לא יודע מאיפה באנו', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', } }, { 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', @@ -25,7 +25,7 @@ class YnetIE(InfoExtractor): 'id': 'L-8859-84418', 'ext': 'flv', 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין", - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', } } ] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 0265a64a7..34ab878a4 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -24,7 +24,7 @@ class YouPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'Sex Ed: Is It Safe To Masturbate Daily?', 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ask Dan And Jennifer', 'upload_date': '20101221', 'average_rating': int, @@ -43,7 +43,7 @@ class YouPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Tits Awesome Brunette On amazing webcam show', 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Unknown', 'upload_date': '20111125', 'average_rating': int, diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 4e25d6f22..4ce327845 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -19,7 +19,7 @@ class YourUploadIE(InfoExtractor): 'id': '14i14h', 'ext': 'mp4', 'title': 'BigBuckBunny_320x180.mp4', - 'thumbnail': 're:^https?://.*\.jpe?g', + 'thumbnail': r're:^https?://.*\.jpe?g', } }, { diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bd24a2838..335568a10 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -376,7 +376,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -403,7 +403,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'iconic ep', 'iconic', 'love', 'it'], 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', 'license': 'Standard YouTube License', 'creator': 'Icona Pop', } @@ -420,7 +420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:64249768eec3bc4276236606ea996373', 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', 'license': 'Standard YouTube License', 'creator': 'Justin Timberlake', 'age_limit': 18, @@ -437,7 +437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 'uploader': 'SET India', 'uploader_id': 'setindia', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia', 'license': 'Standard YouTube License', 'age_limit': 18, } @@ -451,7 +451,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -472,7 +472,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'upload_date': '20121002', 'uploader_id': '8KVIDEO', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 'description': '', 'uploader': '8KVIDEO', 'license': 'Standard YouTube License', @@ -531,7 +531,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20100909', 'uploader': 'The Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', 'license': 'Standard YouTube License', 'title': 'Burning Everyone\'s Koran', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', @@ -544,10 +544,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'HtVdAasjOgU', 'ext': 'mp4', 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', - 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', + 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'upload_date': '20140605', 'license': 'Standard YouTube License', 'age_limit': 18, @@ -563,7 +563,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', 'upload_date': '20110629', 'license': 'Standard YouTube License', 'age_limit': 18, @@ -577,7 +577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20100430', 'uploader_id': 'deadmau5', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', 'creator': 'deadmau5', 'description': 'md5:12c56784b8032162bb936a5f76d55360', 'uploader': 'deadmau5', @@ -597,7 +597,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20150827', 'uploader_id': 'olympic', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympic', @@ -616,7 +616,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'stretched_ratio': 16 / 9., 'upload_date': '20110310', 'uploader_id': 'AllenMeow', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 'uploader': '孫艾倫', 'license': 'Standard YouTube License', @@ -650,7 +650,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:116377fd2963b81ec4ce64b542173306', 'upload_date': '20150625', 'uploader_id': 'dorappi2000', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', 'license': 'Standard YouTube License', 'formats': 'mincount:32', @@ -693,7 +693,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -705,7 +705,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -717,7 +717,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -729,7 +729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }], @@ -769,7 +769,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'upload_date': '20151119', 'uploader_id': 'IronSoulElf', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 'uploader': 'IronSoulElf', 'license': 'Standard YouTube License', 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan', @@ -810,7 +810,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 'upload_date': '20150127', 'uploader_id': 'BerkmanCenter', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 'uploader': 'BerkmanCenter', 'license': 'Creative Commons Attribution license (reuse allowed)', }, @@ -829,7 +829,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20151119', 'uploader': 'Bernie 2016', 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', 'license': 'Creative Commons Attribution license (reuse allowed)', }, 'params': { @@ -856,7 +856,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150811', 'uploader': 'FlixMatrix', 'uploader_id': 'FlixMatrixKaravan', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', 'license': 'Standard YouTube License', }, 'params': { @@ -1877,7 +1877,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': "Smiley's People 01 detective, Adventure Series, Action", 'uploader': 'STREEM', 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', 'upload_date': '20150526', 'license': 'Standard YouTube License', 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', @@ -1898,7 +1898,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'Small Scale Baler and Braiding Rugs', 'uploader': 'Backus-Page House Museum', 'uploader_id': 'backuspagemuseum', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 'upload_date': '20161008', 'license': 'Standard YouTube License', 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', @@ -2186,7 +2186,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): 'title': 'The Young Turks - Live Main Show', 'uploader': 'The Young Turks', 'uploader_id': 'TheYoungTurks', - 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', 'upload_date': '20150715', 'license': 'Standard YouTube License', 'description': 'md5:438179573adcdff3c97ebb1ee632b891', diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py index 22a9a57e8..bacb82eee 100644 --- a/youtube_dl/extractor/zapiks.py +++ b/youtube_dl/extractor/zapiks.py @@ -24,7 +24,7 @@ class ZapiksIE(InfoExtractor): 'ext': 'mp4', 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 528, 'timestamp': 1359044972, 'upload_date': '20130124', diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 12537737c..a365923fb 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -266,7 +266,7 @@ class ZDFChannelIE(ZDFBaseIE): return self.playlist_result( entries, channel_id, self._og_search_title(webpage, fatal=False)) - """ + r""" player = self._extract_player(webpage, channel_id) channel_id = self._search_regex( diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index 0f0e9d0eb..adfdcaabf 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -95,7 +95,7 @@ class ZingMp3IE(ZingMp3BaseInfoExtractor): 'id': 'ZWZB9WAB', 'title': 'Xa Mãi Xa', 'ext': 'mp3', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { 'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html', diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index 920573da9..164edd3a8 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -12,7 +12,7 @@ class MetadataFromTitlePP(PostProcessor): self._titleregex = self.format_to_regex(titleformat) def format_to_regex(self, fmt): - """ + r""" Converts a string like '%(title)s - %(artist)s' to a regex like diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 528d87bb9..39dd6c49f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -501,7 +501,7 @@ def sanitize_path(s): if drive_or_unc: norm_path.pop(0) sanitized_path = [ - path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part) + path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part) for path_part in norm_path] if drive_or_unc: sanitized_path.insert(0, drive_or_unc + os.path.sep) @@ -1183,7 +1183,7 @@ def date_from_str(date_str): return today if date_str == 'yesterday': return today - datetime.timedelta(days=1) - match = re.match('(now|today)(?P[+-])(?P

    ', - webpage, 'title') + title = self._html_search_regex(r'

    ([^<]+)

    ', webpage, 'title') - video_id = self._search_regex( - r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) + player_params = extract_attributes(self._search_regex( + r'(]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters')) - m3u8_url = self._download_webpage( - 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' - % video_id, video_id, headers={'Referer': url}) - formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native') + page_id = self._html_search_regex( + ']+data-pageid="([^"]+)"', webpage, 'page ID') + video_data = self._download_json( + 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id, + data=urlencode_postdata({ + 'xEvent': 'UIVideoPlayer.PingOutcome', + 'xJson': json.dumps({ + 'EJOutcomes': player_params['data-ejpingables'], + 'NativeHLS': False + }), + 'arcVersion': 3, + 'appVersion': 59, + 'gorilla.csrf.Token': page_id, + }))['Data'] - description = self._html_search_meta('description', webpage) + if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'): + raise ExtractorError( + 'Download rate reached. Please try again later.', expected=True) + + ej_links = self._decrypt(video_data['EJLinks'], video_id) + + formats = [] + + m3u8_url = ej_links.get('HLSLink') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + + mp4_url = ej_links.get('MP4Link') + if mp4_url: + formats.append({ + 'url': mp4_url, + }) + + self._sort_formats(formats) + + description = get_elements_by_class('synopsis', webpage)[0] thumbnail = self._html_search_regex( - r'''''', - webpage, "thumbnail url", fatal=False) + r''']+src=(["'])(?P(?!\1).+?/moviecovers/(?!\1).+?)\1''', + webpage, 'thumbnail url', fatal=False, group='url') if thumbnail is not None: - thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..')) + thumbnail = compat_urlparse.urljoin(url, thumbnail) return { 'id': video_id, From 0dac7cbb092c804f1548c4a60f15ac29a7db06b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 12 Feb 2017 17:24:45 +0100 Subject: [PATCH 356/586] [hotstar] improve extraction(closes #12096) - extract all qualities - detect drm protected videos - extract more metadata --- youtube_dl/extractor/hotstar.py | 46 +++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index f05d765d6..3a7a66a34 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor): 'only_matching': True, }] - _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' - _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' - - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): - json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None): + json_data = super(HotStarIE, self)._download_json( + url_or_request, video_id, note, fatal=fatal, query=query) if json_data['resultCode'] != 'OK': if fatal: raise ExtractorError(json_data['errorDescription']) @@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - self._GET_CONTENT_TEMPLATE % video_id, - video_id)['contentInfo'][0] + 'http://account.hotstar.com/AVS/besc', video_id, query={ + 'action': 'GetAggregatedContentDetails', + 'channel': 'PCTV', + 'contentId': video_id, + })['contentInfo'][0] + title = video_data['episodeTitle'] + + if video_data.get('encrypted') == 'Y': + raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - # PCTV for extracting f4m manifest - for f in ('TABLET',): + for f in ('JIO',): format_data = self._download_json( - self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'), - video_id, 'Downloading %s JSON metadata' % f, fatal=False) + 'http://getcdn.hotstar.com/AVS/besc', + video_id, 'Downloading %s JSON metadata' % f, + fatal=False, query={ + 'action': 'GetCDN', + 'asJson': 'Y', + 'channel': f, + 'id': video_id, + 'type': 'VOD', + }) if format_data: - format_url = format_data['src'] + format_url = format_data.get('src') + if not format_url: + continue ext = determine_ext(format_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + m3u8_id='hls', fatal=False)) elif ext == 'f4m': # produce broken files continue @@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor): return { 'id': video_id, - 'title': video_data['episodeTitle'], + 'title': title, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate')), 'formats': formats, + 'episode': title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('contentTitle'), } From 1e2c3f61fc952620a52a8a3a79bcd1a6f7d8ecae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Feb 2017 03:33:23 +0700 Subject: [PATCH 357/586] [travis] Separate builds for core and download --- .travis.yml | 7 ++++++- devscripts/run_tests.sh | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 4833c76e9..8ba93ec02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: - "3.5" - "3.6" sudo: false -script: nosetests test --verbose +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +before_script: + - chmod +x ./devscripts/run_tests.sh +script: ./devscripts/run_tests.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100644 index 000000000..7f4c1e083 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" + +test_set="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set From 9dad94185367cdfde0de21cd8e595094cbe31acc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 11:43:20 +0100 Subject: [PATCH 358/586] [disney] improve extraction - add support for more urls - detect expired videos - skip Adobe Flash Access protected videos closes #4975 closes #11000 closes #11882 closes #11936 --- youtube_dl/extractor/disney.py | 60 ++++++++++++++++++++++++++++----- youtube_dl/extractor/generic.py | 13 ------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py index 396873c6d..939d1338c 100644 --- a/youtube_dl/extractor/disney.py +++ b/youtube_dl/extractor/disney.py @@ -9,13 +9,15 @@ from ..utils import ( unified_strdate, compat_str, determine_ext, + ExtractorError, ) class DisneyIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})''' + https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})|(?:[^/]+/)?(?P[^/?#]+))''' _TESTS = [{ + # Disney.EmbedVideo 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', 'info_dict': { 'id': '545ed1857afee5a0ec239977', @@ -28,6 +30,20 @@ class DisneyIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + # Grill.burger + 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', + 'info_dict': { + 'id': '5454e9f4e9804a552e3524c8', + 'ext': 'mp4', + 'title': '"Intro" Featurette: Rogue One: A Star Wars Story', + 'upload_date': '20170104', + 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', 'only_matching': True, @@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor): }, { 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', 'only_matching': True, + }, { + 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677', + 'only_matching': True, + }, { + 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', + 'only_matching': True, }] def _real_extract(self, url): - domain, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'http://%s/embed/%s' % (domain, video_id), video_id) - video_data = self._parse_json(self._search_regex( - r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] + domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( + r'Grill\.burger\s*=\s*({.+})\s*:', + webpage, 'grill data')) + page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video') + video_data = page_data['data'][0] + else: + webpage = self._download_webpage( + 'http://%s/embed/%s' % (domain, video_id), video_id) + page_data = self._parse_json(self._search_regex( + r'Disney\.EmbedVideo\s*=\s*({.+});', + webpage, 'embed data'), video_id) + video_data = page_data['video'] for external in video_data.get('externals', []): if external.get('source') == 'vevo': return self.url_result('vevo:' + external['data_id'], 'Vevo') + video_id = video_data['id'] title = video_data['title'] formats = [] for flavor in video_data.get('flavors', []): flavor_format = flavor.get('format') flavor_url = flavor.get('url') - if not flavor_url or not re.match(r'https?://', flavor_url): + if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access': continue tbr = int_or_none(flavor.get('bitrate')) if tbr == 99999: formats.extend(self._extract_m3u8_formats( - flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) + flavor_url, video_id, 'mp4', + m3u8_id=flavor_format, fatal=False)) continue format_id = [] if flavor_format: @@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor): 'ext': ext, 'vcodec': 'none' if (width == 0 and height == 0) else None, }) + if not formats and video_data.get('expired'): + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), + expected=True) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c233f038..494cc3c84 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -991,19 +991,6 @@ class GenericIE(InfoExtractor): 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', }, }, - # Kaltura embed protected with referrer - { - 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', - 'info_dict': { - 'id': '1_g4fbemnq', - 'ext': 'mp4', - 'title': 'Violetta - Achter De Schermen - Ruggero', - 'description': 'Achter de schermen met Ruggero', - 'timestamp': 1435133761, - 'upload_date': '20150624', - 'uploader_id': 'echojecka', - }, - }, # Kaltura embed with single quotes { 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', From 1de9f78e71214e130b5882662cdcd716b737e6ca Mon Sep 17 00:00:00 2001 From: Sergey M Date: Mon, 13 Feb 2017 18:56:05 +0800 Subject: [PATCH 359/586] [travis] Separate builds for core and download --- .travis.yml | 7 ++++++- devscripts/run_tests.sh | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 4833c76e9..8ba93ec02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: - "3.5" - "3.6" sudo: false -script: nosetests test --verbose +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +before_script: + - chmod +x ./devscripts/run_tests.sh +script: ./devscripts/run_tests.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100644 index 000000000..7f4c1e083 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" + +test_set="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set From 454e5cdb17dd4e77f3d387045b083f3d3ed61ae0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 14:28:30 +0100 Subject: [PATCH 360/586] [limelight] add support referer protected videos --- youtube_dl/extractor/generic.py | 9 ++++++--- youtube_dl/extractor/limelight.py | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 494cc3c84..a2b0298ec 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2337,8 +2337,9 @@ class GenericIE(InfoExtractor): 'Channel': 'channel', 'ChannelList': 'channel_list', } - return self.url_result('limelight:%s:%s' % ( - lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + return self.url_result(smuggle_url('limelight:%s:%s' % ( + lm[mobj.group(1)], mobj.group(2)), {'source_url': url}), + 'Limelight%s' % mobj.group(1), mobj.group(2)) mobj = re.search( r'''(?sx) @@ -2348,7 +2349,9 @@ class GenericIE(InfoExtractor): value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) ''', webpage) if mobj: - return self.url_result('limelight:media:%s' % mobj.group('id')) + return self.url_result(smuggle_url( + 'limelight:media:%s' % mobj.group('id'), + {'source_url': url}), 'LimelightMedia', mobj.group('id')) # Look for AdobeTVVideo embeds mobj = re.search( diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index e635f3c4d..a3712665b 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + unsmuggle_url, ) @@ -15,20 +16,23 @@ class LimelightBaseIE(InfoExtractor): _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' - def _call_playlist_service(self, item_id, method, fatal=True): + def _call_playlist_service(self, item_id, method, fatal=True, referer=None): + headers = {} + if referer: + headers['Referer'] = referer return self._download_json( self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal) + item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) def _call_api(self, organization_id, item_id, method): return self._download_json( self._API_URL % (organization_id, self._API_PATH, item_id, method), item_id, 'Downloading API %s JSON' % method) - def _extract(self, item_id, pc_method, mobile_method, meta_method): - pc = self._call_playlist_service(item_id, pc_method) + def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None): + pc = self._call_playlist_service(item_id, pc_method, referer=referer) metadata = self._call_api(pc['orgId'], item_id, meta_method) - mobile = self._call_playlist_service(item_id, mobile_method, fatal=False) + mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer) return pc, mobile, metadata def _extract_info(self, streams, mobile_urls, properties): @@ -207,10 +211,13 @@ class LimelightMediaIE(LimelightBaseIE): _API_PATH = 'media' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) pc, mobile, metadata = self._extract( - video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties') + video_id, 'getPlaylistByMediaId', + 'getMobilePlaylistByMediaId', 'properties', + smuggled_data.get('source_url')) return self._extract_info( pc['playlistItems'][0].get('streams', []), @@ -247,11 +254,13 @@ class LimelightChannelIE(LimelightBaseIE): _API_PATH = 'channels' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) channel_id = self._match_id(url) pc, mobile, medias = self._extract( channel_id, 'getPlaylistByChannelId', - 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media') + 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', + 'media', smuggled_data.get('source_url')) entries = [ self._extract_info( From 89c6691f9d130ec63552a6ece4743caa572fc962 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 15:08:48 +0100 Subject: [PATCH 361/586] [bellmedia] accept longer video id(closes #12114) --- youtube_dl/extractor/bellmedia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index 32326ed9e..1f5b6ed92 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor): space )\.ca| much\.com - )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6})''' + )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor): }, { 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', + 'only_matching': True, }] _DOMAINS = { 'thecomedynetwork': 'comedy', From 6e5956e6ba32c5e4d186e79fbaff0842818ae56b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Feb 2017 23:17:48 +0700 Subject: [PATCH 362/586] [lemonde] Fallback delegate extraction to generic extractor (closes #12115, closes #12116) --- youtube_dl/extractor/lemonde.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py index 42568f315..3306892e8 100644 --- a/youtube_dl/extractor/lemonde.py +++ b/youtube_dl/extractor/lemonde.py @@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P[^/]+)\.html' _TESTS = [{ 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html', - 'md5': '01fb3c92de4c12c573343d63e163d302', + 'md5': 'da120c8722d8632eec6ced937536cc98', 'info_dict': { 'id': 'lqm3kl', 'ext': 'mp4', 'title': "Comprendre l'affaire Bygmalion en 5 minutes", 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 320, + 'duration': 309, 'upload_date': '20160119', 'timestamp': 1453194778, 'uploader_id': '3pmkp', }, + }, { + # standard iframe embed + 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html', + 'info_dict': { + 'id': 'uzsxms', + 'ext': 'mp4', + 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?", + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 325, + 'upload_date': '20161021', + 'timestamp': 1477044540, + 'uploader_id': '3pmkp', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html', 'only_matching': True, + }, { + # YouTube embeds + 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor): digiteka_url = self._proto_relative_url(self._search_regex( r'url\s*:\s*(["\'])(?P(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1', - webpage, 'digiteka url', group='url')) - return self.url_result(digiteka_url, 'Digiteka') + webpage, 'digiteka url', group='url', default=None)) + + if digiteka_url: + return self.url_result(digiteka_url, 'Digiteka') + + return self.url_result(url, 'Generic') From f6d6ca1db3020e7c7771880d0c4b58fdf732a8d5 Mon Sep 17 00:00:00 2001 From: Vobe Date: Sat, 11 Feb 2017 21:11:55 +0100 Subject: [PATCH 363/586] [xtube] Improve title extraction --- youtube_dl/extractor/xtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 11717fe98..ed3a37649 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -53,7 +53,7 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/video-watch/-%s' % video_id + url = 'http://www.xtube.com/watch.php?v=%s' % video_id req = sanitized_Request(url) req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') @@ -73,7 +73,7 @@ class XTubeIE(InfoExtractor): self._sort_formats(formats) title = self._search_regex( - (r'

    (?P[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), + (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), webpage, 'title', group='title') description = self._search_regex( r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) From 085f169ffebc17ec8b2bfc63aec8f5df57c7bdcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Feb 2017 23:44:43 +0700 Subject: [PATCH 364/586] [xtube] Fix extraction for both kinds of video id (closes #12088) --- youtube_dl/extractor/xtube.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index ed3a37649..5584674a0 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -44,6 +44,9 @@ class XTubeIE(InfoExtractor): }, { 'url': 'xtube:625837', 'only_matching': True, + }, { + 'url': 'xtube:kVTUy_G222_', + 'only_matching': True, }] def _real_extract(self, url): @@ -53,11 +56,16 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/watch.php?v=%s' % video_id - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') - webpage = self._download_webpage(req, display_id) + if video_id.isdigit() and len(video_id) < 11: + url_pattern = 'http://www.xtube.com/video-watch/-%s' + else: + url_pattern = 'http://www.xtube.com/watch.php?v=%s' + + webpage = self._download_webpage( + url_pattern % video_id, display_id, headers={ + 'Cookie': 'age_verified=1; cookiesAccepted=1', + }) sources = self._parse_json(self._search_regex( r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),', From 50de3dbad39d0b8cc1529113894f146f6f3f24b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:00:06 +0700 Subject: [PATCH 365/586] [zdf] Fix extraction (closes #12117) --- youtube_dl/extractor/zdf.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index a365923fb..523bb5c95 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -20,9 +20,9 @@ from ..utils import ( class ZDFBaseIE(InfoExtractor): - def _call_api(self, url, player, referrer, video_id): + def _call_api(self, url, player, referrer, video_id, item): return self._download_json( - url, video_id, 'Downloading JSON content', + url, video_id, 'Downloading JSON %s' % item, headers={ 'Referer': referrer, 'Api-Auth': 'Bearer %s' % player['apiToken'], @@ -104,7 +104,7 @@ class ZDFIE(ZDFBaseIE): }) formats.append(f) - def _extract_entry(self, url, content, video_id): + def _extract_entry(self, url, player, content, video_id): title = content.get('title') or content['teaserHeadline'] t = content['mainVideoContent']['http://zdf.de/rels/target'] @@ -116,7 +116,8 @@ class ZDFIE(ZDFBaseIE): 'http://zdf.de/rels/streams/ptmd-template'].replace( '{playerId}', 'portal') - ptmd = self._download_json(urljoin(url, ptmd_path), video_id) + ptmd = self._call_api( + urljoin(url, ptmd_path), player, url, video_id, 'metadata') formats = [] track_uris = set() @@ -174,8 +175,9 @@ class ZDFIE(ZDFBaseIE): } def _extract_regular(self, url, player, video_id): - content = self._call_api(player['content'], player, url, video_id) - return self._extract_entry(player['content'], content, video_id) + content = self._call_api( + player['content'], player, url, video_id, 'content') + return self._extract_entry(player['content'], player, content, video_id) def _extract_mobile(self, video_id): document = self._download_json( From cedf08ff54d192a0e32ecb3b943f50299cda7ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:07:35 +0700 Subject: [PATCH 366/586] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 089449dfb..d651f8880 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,17 @@ Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) Extractor +* [zdf] Fix extraction (#12117) +* [xtube] Fix extraction for both kinds of video id (#12088) +* [xtube] Improve title extraction (#12088) ++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116) +* [bellmedia] Allow video id longer than 6 characters (#12114) ++ [limelight] Add support for referer protected videos +* [disney] Improve extraction (#4975, #11000, #11882, #11936) +* [hotstar] Improve extraction (#12096) * [einthusan] Fix extraction (#11416) ++ [aenetworks] Add support for lifetimemovieclub.com (#12097) +* [youtube] Fix parsing codecs (#12091) version 2017.02.11 From 58a65ba852443075fe38a3ef74798de05dd57bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:09:18 +0700 Subject: [PATCH 367/586] release 2017.02.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7bd301cc8..32aa55d83 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.11 +[debug] youtube-dl version 2017.02.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d651f8880..9242b3eee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.14 Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1f84acfea..3e7e7c0bf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.11' +__version__ = '2017.02.14' From fcca0d53a8fa47614a39a433a3da7d1ab1d88ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Caletka?= <ondrej@caletka.cz> Date: Tue, 14 Feb 2017 15:57:17 +0100 Subject: [PATCH 368/586] [ceskatelevize] Quick fix to revert to using old HLS-based playlist This fixes recent changes in iVysilani. Proper patch should migrate to MPEG-DASH version, which is now the default. --- youtube_dl/extractor/ceskatelevize.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 4f88c31ad..0f1453b99 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -21,10 +21,10 @@ class CeskaTelevizeIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { - 'id': '61924494876951776', + 'id': '61924494877246241', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace', - 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', + 'title': 'Hyde Park Civilizace: Život v Grónsku', + 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3350, }, @@ -121,6 +121,7 @@ class CeskaTelevizeIE(InfoExtractor): req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') req.add_header('X-Requested-With', 'XMLHttpRequest') + req.add_header('User-agent', 'Mozilla/5.0') req.add_header('Referer', url) playlistpage = self._download_json(req, playlist_id) From 5cb2d36c82abf3b753910afe3013b274e31a247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 22:56:39 +0700 Subject: [PATCH 369/586] [ceskatelevize] Extract DASH formats (closes #12119, closes #12133) --- youtube_dl/extractor/ceskatelevize.py | 142 +++++++++++++++----------- 1 file changed, 83 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 0f1453b99..e08bf264c 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -13,6 +13,7 @@ from ..utils import ( float_or_none, sanitized_Request, urlencode_postdata, + USER_AGENTS, ) @@ -114,71 +115,94 @@ class CeskaTelevizeIE(InfoExtractor): 'requestSource': 'iVysilani', } - req = sanitized_Request( - 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=urlencode_postdata(data)) - - req.add_header('Content-type', 'application/x-www-form-urlencoded') - req.add_header('x-addr', '127.0.0.1') - req.add_header('X-Requested-With', 'XMLHttpRequest') - req.add_header('User-agent', 'Mozilla/5.0') - req.add_header('Referer', url) - - playlistpage = self._download_json(req, playlist_id) - - playlist_url = playlistpage['url'] - if playlist_url == 'error_region': - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - - req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) - req.add_header('Referer', url) - - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - - playlist = self._download_json(req, playlist_id)['playlist'] - playlist_len = len(playlist) - entries = [] - for item in playlist: - is_live = item.get('type') == 'LIVE' - formats = [] - for format_id, stream_url in item['streamUrls'].items(): - formats.extend(self._extract_m3u8_formats( - stream_url, playlist_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', - fatal=False)) - self._sort_formats(formats) - item_id = item.get('id') or item['assetId'] - title = item['title'] + for user_agent in (None, USER_AGENTS['Safari']): + req = sanitized_Request( + 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + data=urlencode_postdata(data)) - duration = float_or_none(item.get('duration')) - thumbnail = item.get('previewImageUrl') + req.add_header('Content-type', 'application/x-www-form-urlencoded') + req.add_header('x-addr', '127.0.0.1') + req.add_header('X-Requested-With', 'XMLHttpRequest') + if user_agent: + req.add_header('User-Agent', user_agent) + req.add_header('Referer', url) - subtitles = {} - if item.get('type') == 'VOD': - subs = item.get('subtitles') - if subs: - subtitles = self.extract_subtitles(episode_id, subs) + playlistpage = self._download_json(req, playlist_id, fatal=False) - if playlist_len == 1: - final_title = playlist_title or title - if is_live: - final_title = self._live_title(final_title) - else: - final_title = '%s (%s)' % (playlist_title, title) + if not playlistpage: + continue - entries.append({ - 'id': item_id, - 'title': final_title, - 'description': playlist_description if playlist_len == 1 else None, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - }) + playlist_url = playlistpage['url'] + if playlist_url == 'error_region': + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + + req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) + req.add_header('Referer', url) + + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) + + playlist = self._download_json(req, playlist_id, fatal=False) + if not playlist: + continue + + playlist = playlist.get('playlist') + if not isinstance(playlist, list): + continue + + playlist_len = len(playlist) + + for num, item in enumerate(playlist): + is_live = item.get('type') == 'LIVE' + formats = [] + for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'playerType=flash' in stream_url: + formats.extend(self._extract_m3u8_formats( + stream_url, playlist_id, 'mp4', + entry_protocol='m3u8' if is_live else 'm3u8_native', + fatal=False)) + else: + formats.extend(self._extract_mpd_formats( + stream_url, playlist_id, fatal=False)) + + if user_agent and len(entries) == playlist_len: + entries[num]['formats'].extend(formats) + continue + + item_id = item.get('id') or item['assetId'] + title = item['title'] + + duration = float_or_none(item.get('duration')) + thumbnail = item.get('previewImageUrl') + + subtitles = {} + if item.get('type') == 'VOD': + subs = item.get('subtitles') + if subs: + subtitles = self.extract_subtitles(episode_id, subs) + + if playlist_len == 1: + final_title = playlist_title or title + if is_live: + final_title = self._live_title(final_title) + else: + final_title = '%s (%s)' % (playlist_title, title) + + entries.append({ + 'id': item_id, + 'title': final_title, + 'description': playlist_description if playlist_len == 1 else None, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + }) + + for e in entries: + self._sort_formats(e['formats']) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From 9a372f14b422de15acf91e25a90375688b2ba3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 23:52:41 +0700 Subject: [PATCH 370/586] [pornhub] Extract video URL from tv platform site (#12007, #12129) --- youtube_dl/extractor/pornhub.py | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 818d99c1f..7a2737032 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -2,27 +2,27 @@ from __future__ import unicode_literals import itertools -import os +# import os import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, - compat_urllib_parse_urlparse, + # compat_urllib_parse_unquote, + # compat_urllib_parse_unquote_plus, + # compat_urllib_parse_urlparse, ) from ..utils import ( ExtractorError, int_or_none, js_to_json, orderedSet, - sanitized_Request, + # sanitized_Request, str_to_int, ) -from ..aes import ( - aes_decrypt_text -) +# from ..aes import ( +# aes_decrypt_text +# ) class PornHubIE(InfoExtractor): @@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - req = sanitized_Request( - 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + def dl_webpage(platform): + return self._download_webpage( + 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id, + video_id, headers={ + 'Cookie': 'age_verified=1; platform=%s' % platform, + }) + + webpage = dl_webpage('pc') error_msg = self._html_search_regex( r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>', @@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + tv_webpage = dl_webpage('tv') + + video_url = self._search_regex( + r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, + 'video url', group='url') + + title = self._search_regex( + r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) + # video_title from flashvars contains whitespace instead of non-ASCII (see # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. - title = self._html_search_meta( + title = title or self._html_search_meta( 'twitter:title', webpage, default=None) or self._search_regex( (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', @@ -156,6 +169,7 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') + """ video_variables = {} for video_variablename, quote, video_variable in re.findall( r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): @@ -197,6 +211,7 @@ class PornHubIE(InfoExtractor): 'height': height, }) self._sort_formats(formats) + """ page_params = self._parse_json(self._search_regex( r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', @@ -209,6 +224,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, + 'url': video_url, 'uploader': video_uploader, 'title': title, 'thumbnail': thumbnail, @@ -217,7 +233,7 @@ class PornHubIE(InfoExtractor): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - 'formats': formats, + # 'formats': formats, 'age_limit': 18, 'tags': tags, 'categories': categories, From 22ce9ad2bdad2bf79b22f82cfff7f58156c9d349 Mon Sep 17 00:00:00 2001 From: Marek Rusinowski <marekrusinowski@gmail.com> Date: Mon, 13 Feb 2017 21:42:26 +0100 Subject: [PATCH 371/586] [vod.pl] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vodpl.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/vodpl.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 76ad7c40b..657e45e6f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1147,6 +1147,7 @@ from .vlive import ( VLiveChannelIE ) from .vodlocker import VodlockerIE +from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py new file mode 100644 index 000000000..f612347ce --- /dev/null +++ b/youtube_dl/extractor/vodpl.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .onet import OnetBaseIE +from ..utils import clean_html + + +class VODPlIE(OnetBaseIE): + _VALID_URL = r'https?://vod\.pl/(?:.*/)?(?P<id>[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', + 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', + 'info_dict': { + 'id': '3ep3jns', + 'ext': 'mp4', + 'title': 'Chłopaki nie płaczą', + 'description': 'Kuba Brenner aby pomóc swojemu nieśmiałemu przyjacielowi Oskarowi wynajmuje w agencji towarzyskiej dwie panie. Po upojnej nocy okazuje się, że chłopcy nie byli przygotowani finansowo. "Opiekun artystyczny" dziewczyn zabiera w ramach rekompensaty drogocenną rzeźbę należącą do wujka Oskara. Kłopoty chłopców zaczynają się, gdy Kuba udaje się do agencji aby wykupić figurkę i trafia w sam środek mafijnej transakcji... Idiotyczny przypadek sprawia, że w klubie dochodzi do strzelaniny podczas której Grucha i Bolec zostają ranni, ginie również walizka z pieniędzmi... Podejrzenie pada na Kubę.', + 'timestamp': 1463415154, + 'duration': 5765, + 'upload_date': '20160516', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mvp_id = self._search_mvp_id(webpage) + + info_dict = self._extract_from_id(mvp_id, webpage) + info_dict.update({ + 'id': video_id, + 'description': clean_html(info_dict['description']).strip().replace('\r', '\n') + }) + + return info_dict From 6092ccd05844976ea946ba5277f2b00ccb5c7920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 00:52:31 +0700 Subject: [PATCH 372/586] [vodpl] Make more robust and add another test (closes #12122) --- youtube_dl/extractor/vodpl.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py index f612347ce..9e919708e 100644 --- a/youtube_dl/extractor/vodpl.py +++ b/youtube_dl/extractor/vodpl.py @@ -2,35 +2,31 @@ from __future__ import unicode_literals from .onet import OnetBaseIE -from ..utils import clean_html class VODPlIE(OnetBaseIE): - _VALID_URL = r'https?://vod\.pl/(?:.*/)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', 'info_dict': { 'id': '3ep3jns', 'ext': 'mp4', 'title': 'Chłopaki nie płaczą', - 'description': 'Kuba Brenner aby pomóc swojemu nieśmiałemu przyjacielowi Oskarowi wynajmuje w agencji towarzyskiej dwie panie. Po upojnej nocy okazuje się, że chłopcy nie byli przygotowani finansowo. "Opiekun artystyczny" dziewczyn zabiera w ramach rekompensaty drogocenną rzeźbę należącą do wujka Oskara. Kłopoty chłopców zaczynają się, gdy Kuba udaje się do agencji aby wykupić figurkę i trafia w sam środek mafijnej transakcji... Idiotyczny przypadek sprawia, że w klubie dochodzi do strzelaniny podczas której Grucha i Bolec zostają ranni, ginie również walizka z pieniędzmi... Podejrzenie pada na Kubę.', + 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224', 'timestamp': 1463415154, 'duration': 5765, 'upload_date': '20160516', }, - } + }, { + 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_mvp_id(webpage) - - info_dict = self._extract_from_id(mvp_id, webpage) - info_dict.update({ - 'id': video_id, - 'description': clean_html(info_dict['description']).strip().replace('\r', '\n') - }) - + info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage) + info_dict['id'] = video_id return info_dict From d31aa74fdb3f69071ba869feba03525f67e974f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 00:58:18 +0700 Subject: [PATCH 373/586] [onetmvp] Add shortcut extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/onet.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 657e45e6f..b2ee0c1b0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -694,6 +694,7 @@ from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, OnetChannelIE, + OnetMVPIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 0a501b3e5..46bad492a 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor): return self._search_regex( r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') - def _extract_from_id(self, video_id, webpage): + def _extract_from_id(self, video_id, webpage=None): response = self._download_json( 'http://qi.ckm.onetapi.pl/', video_id, query={ @@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor): meta = video.get('meta', {}) - title = self._og_search_title(webpage, default=None) or meta['title'] - description = self._og_search_description(webpage, default=None) or meta.get('description') + title = (self._og_search_title( + webpage, default=None) if webpage else None) or meta['title'] + description = (self._og_search_description( + webpage, default=None) if webpage else None) or meta.get('description') duration = meta.get('length') or meta.get('lenght') timestamp = parse_iso8601(meta.get('addDate'), ' ') @@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor): } +class OnetMVPIE(OnetBaseIE): + _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' + + _TEST = { + 'url': 'onetmvp:381027.1509591944', + 'only_matching': True, + } + + def _real_extract(self, url): + return self._extract_from_id(self._match_id(url)) + + class OnetIE(OnetBaseIE): _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' IE_NAME = 'onet.tv' From 43a3d9edfcdad8eb33758c4a7f4f912322001b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 01:14:06 +0700 Subject: [PATCH 374/586] [onetpl] Add support for onet.pl (closes #10507) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/onet.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b2ee0c1b0..be3688d5a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -695,6 +695,7 @@ from .onet import ( OnetIE, OnetChannelIE, OnetMVPIE, + OnetPlIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 46bad492a..801aadbff 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -181,3 +181,35 @@ class OnetChannelIE(OnetBaseIE): channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) return self.playlist_result(entries, channel_id, channel_title, channel_description) + + +class OnetPlIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' + IE_NAME = 'onet.pl' + + _TESTS = [{ + 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', + 'md5': 'b94021eb56214c3969380388b6e73cb0', + 'info_dict': { + 'id': '1561707.1685479', + 'ext': 'mp4', + 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', + 'description': 'md5:61fb0740084d2d702ea96512a03585b4', + 'upload_date': '20170214', + 'timestamp': 1487078046, + }, + }, { + 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mvp_id = self._search_regex( + r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + + return self.url_result( + 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) From 04a741232f8e03cc91a3539066c66aed802076b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 01:23:55 +0700 Subject: [PATCH 375/586] [onetpl] Add support for businessinsider.com.pl and plejada.pl --- youtube_dl/extractor/onet.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 801aadbff..94f57990b 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -184,7 +184,7 @@ class OnetChannelIE(OnetBaseIE): class OnetPlIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' IE_NAME = 'onet.pl' _TESTS = [{ @@ -201,6 +201,15 @@ class OnetPlIE(InfoExtractor): }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, + }, { + 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', + 'only_matching': True, + }, { + 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', + 'only_matching': True, + }, { + 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', + 'only_matching': True, }] def _real_extract(self, url): From 3021cf83b7cd45283fd1a72859e46f44e67ce7bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 02:08:32 +0700 Subject: [PATCH 376/586] [pinkbike] Fix uploader extraction (closes #12054) --- youtube_dl/extractor/pinkbike.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py index 6a4580d54..9f3501f77 100644 --- a/youtube_dl/extractor/pinkbike.py +++ b/youtube_dl/extractor/pinkbike.py @@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor): 'video:duration', webpage, 'duration')) uploader = self._search_regex( - r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False) + r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, + 'uploader', fatal=False) upload_date = unified_strdate(self._search_regex( r'class="fullTime"[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)) From 1bd05345ea4b91598ec04b8e0d33fd14f9e2eddc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 15 Feb 2017 14:18:50 +0100 Subject: [PATCH 377/586] [amcnetworks] fix extraction(closes #12127) --- youtube_dl/extractor/amcnetworks.py | 30 ++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 87c803e94..b71d1a093 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE): 'mbr': 'true', 'manifest': 'm3u', } - media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url') + media_url = self._search_regex( + r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', + webpage, 'media url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id) + r'link\.theplatform\.com/s/([^?]+)', + media_url, 'theplatform_path'), display_id) info = self._parse_theplatform_metadata(theplatform_metadata) video_id = theplatform_metadata['pid'] title = theplatform_metadata['title'] rating = theplatform_metadata['ratings'][0]['rating'] - auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') + auth_required = self._search_regex( + r'window\.authRequired\s*=\s*(true|false);', + webpage, 'auth required') if auth_required == 'true': - requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id') - resource = self._get_mvpd_resource(requestor_id, title, video_id, rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource) + requestor_id = self._search_regex( + r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', + webpage, 'requestor id') + resource = self._get_mvpd_resource( + requestor_id, title, video_id, rating) + query['auth'] = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) media_url = update_url_query(media_url, query) - formats, subtitles = self._extract_theplatform_smil(media_url, video_id) + formats, subtitles = self._extract_theplatform_smil( + media_url, video_id) self._sort_formats(formats) info.update({ 'id': video_id, @@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE): if ns_keys: ns = list(ns_keys)[0] series = theplatform_metadata.get(ns + '$show') - season_number = int_or_none(theplatform_metadata.get(ns + '$season')) + season_number = int_or_none( + theplatform_metadata.get(ns + '$season')) episode = theplatform_metadata.get(ns + '$episodeTitle') - episode_number = int_or_none(theplatform_metadata.get(ns + '$episode')) + episode_number = int_or_none( + theplatform_metadata.get(ns + '$episode')) if season_number: title = 'Season %d - %s' % (season_number, title) if series: From db13c16ef8968613680e2bbc85f373c3e74faf98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:12:10 +0700 Subject: [PATCH 378/586] [utils] Add support for quoted string literals in --match-filter (closes #8050, closes #12142, closes #12144) --- test/test_YoutubeDL.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 9 +++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8bf00bea9..d07c35be8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals @@ -606,6 +607,8 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', + 'uploader': "變態妍字幕版 太妍 тест", + 'creator': "тест ' 123 ' тест--", } second = { 'id': '2', @@ -616,6 +619,7 @@ class TestYoutubeDL(unittest.TestCase): 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', + 'uploader': "тест 123", } videos = [first, second] @@ -656,6 +660,26 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos(f) self.assertEqual(res, ['1']) + f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('creator = "тест \' 123 \' тест--"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") + res = get_videos(f) + self.assertEqual(res, []) + def test_playlist_items_selection(self): entries = [{ 'id': compat_str(i), diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1279a9042..07c07be6f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2383,6 +2383,7 @@ def _match_one(filter_part, dct): \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* (?: (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| + (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| (?P<strval>(?![0-9.])[a-z0-9A-Z]*) ) \s*$ @@ -2391,7 +2392,8 @@ def _match_one(filter_part, dct): if m: op = COMPARISON_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) - if (m.group('strval') is not None or + if (m.group('quotedstrval') is not None or + m.group('strval') is not None or # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see @@ -2401,7 +2403,10 @@ def _match_one(filter_part, dct): if m.group('op') not in ('=', '!='): raise ValueError( 'Operator %s does not support string values!' % m.group('op')) - comparison_value = m.group('strval') or m.group('intval') + comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') + quote = m.group('quote') + if quote is not None: + comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: try: comparison_value = int(m.group('intval')) From 398dea321001b99ac4ad28d3d60a5317c4a439d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:20:46 +0700 Subject: [PATCH 379/586] [test_YoutubeDL] Fix invalid escape sequences --- test/test_YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index d07c35be8..2cfcf743a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -541,10 +541,10 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(ydl._format_note({}), '') assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, - }), '^\s*10k$') + }), r'^\s*10k$') assertRegexpMatches(self, ydl._format_note({ 'fps': 30, - }), '^30fps$') + }), r'^30fps$') def test_postprocessors(self): filename = 'post-processor-testfile.mp4' From 099cfdb770f458de7cfdf3e814fbb9f43db217ea Mon Sep 17 00:00:00 2001 From: Anisse Astier <anisse@astier.eu> Date: Wed, 15 Feb 2017 17:28:31 +0100 Subject: [PATCH 380/586] [devscripts/run_tests.sh] Change permission for script to 755 --- .travis.yml | 2 -- devscripts/run_tests.sh | 0 2 files changed, 2 deletions(-) mode change 100644 => 100755 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 8ba93ec02..f41e11137 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,8 +11,6 @@ sudo: false env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download -before_script: - - chmod +x ./devscripts/run_tests.sh script: ./devscripts/run_tests.sh notifications: email: diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh old mode 100644 new mode 100755 From de4d378c0cd9035d4ab93dc6826a17c76f388641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:38:00 +0700 Subject: [PATCH 381/586] [ceskatelevize] Prefix format ids --- youtube_dl/extractor/ceskatelevize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e08bf264c..1b16e5aaa 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -162,10 +162,10 @@ class CeskaTelevizeIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', entry_protocol='m3u8' if is_live else 'm3u8_native', - fatal=False)) + m3u8_id='hls', fatal=False)) else: formats.extend(self._extract_mpd_formats( - stream_url, playlist_id, fatal=False)) + stream_url, playlist_id, mpd_id='dash', fatal=False)) if user_agent and len(entries) == playlist_len: entries[num]['formats'].extend(formats) From eafaeb226a277008fb8df72bf0326f2b369ff6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:04:15 +0700 Subject: [PATCH 382/586] [ceskatelevize] Lower priority for audio description sources (#12119) --- youtube_dl/extractor/ceskatelevize.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 1b16e5aaa..b1dfacf80 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -159,13 +159,19 @@ class CeskaTelevizeIE(InfoExtractor): formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): if 'playerType=flash' in stream_url: - formats.extend(self._extract_m3u8_formats( + stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', entry_protocol='m3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls-%s' % format_id, fatal=False) else: - formats.extend(self._extract_mpd_formats( - stream_url, playlist_id, mpd_id='dash', fatal=False)) + stream_formats = self._extract_mpd_formats( + stream_url, playlist_id, + mpd_id='dash-%s' % format_id, fatal=False) + # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031 + if format_id == 'audioDescription': + for f in stream_formats: + f['source_preference'] = -10 + formats.extend(stream_formats) if user_agent and len(entries) == playlist_len: entries[num]['formats'].extend(formats) From 3aa25395aa02b7a33e0fbf6d38e39fffee268255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:08:56 +0700 Subject: [PATCH 383/586] [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9242b3eee..912e1bbdc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core ++ [utils] Add support for quoted string literals in --match-filter (#8050, + #12142, #12144) + +Extractors +* [ceskatelevize] Lower priority for audio description sources (#12119) +* [amcnetworks] Fix extraction (#12127) +* [pinkbike] Fix uploader extraction (#12054) ++ [onetpl] Add support for businessinsider.com.pl and plejada.pl ++ [onetpl] Add support for onet.pl (#10507) ++ [onetmvp] Add shortcut extractor ++ [vodpl] Add support for vod.pl (#12122) ++ [pornhub] Extract video URL from tv platform site (#12007, #12129) ++ [ceskatelevize] Extract DASH formats (#12119, #12133) + + version 2017.02.14 Core From 2480b056c137e514662b70053ec2df1391b6c2ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:10:04 +0700 Subject: [PATCH 384/586] release 2017.02.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 32aa55d83..06711f73b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.14 +[debug] youtube-dl version 2017.02.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 912e1bbdc..8ef8a8307 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.16 Core + [utils] Add support for quoted string literals in --match-filter (#8050, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3e84f1237..5a436e8f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -546,8 +546,10 @@ - **OktoberfestTV** - **on.aol.com** - **OnDemandKorea** + - **onet.pl** - **onet.tv** - **onet.tv:channel** + - **OnetMVP** - **OnionStudios** - **Ooyala** - **OoyalaExternal** @@ -900,6 +902,7 @@ - **vlive** - **vlive:channel** - **Vodlocker** + - **VODPl** - **VODPlatform** - **VoiceRepublic** - **VoxMedia** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3e7e7c0bf..323e80954 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.14' +__version__ = '2017.02.16' From b898f0a173fa040ddf95dbd97650cec07a8f19f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 04:57:42 +0700 Subject: [PATCH 385/586] [elpais] Fix typo and improve extraction (closes #12139) --- youtube_dl/extractor/elpais.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py index 99e00cf3c..b89f6db62 100644 --- a/youtube_dl/extractor/elpais.py +++ b/youtube_dl/extractor/elpais.py @@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor): 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas', 'upload_date': '20170127', }, + }, { + 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html', + 'info_dict': { + 'id': '1487062137_075943', + 'ext': 'mp4', + 'title': 'Disyuntivas', + 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218', + 'upload_date': '20170214', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor): video_url = prefix + video_suffix thumbnail_suffix = self._search_regex( r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", - webpage, 'thumbnail URL', fatal=False) + webpage, 'thumbnail URL', default=None) thumbnail = ( None if thumbnail_suffix is None - else prefix + thumbnail_suffix) + else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage) title = self._html_search_regex( - (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title', - r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'), - webpage, 'title') + (r"tituloVideo\s*=\s*'([^']+)'", + r'<h2 class="entry-header entry-title.*?>(.*?)</h2>', + r'<h1[^>]+class="titulo"[^>]*>([^<]+)'), + webpage, 'title', default=None) or self._og_search_title(webpage) upload_date = unified_strdate(self._search_regex( r'<p class="date-header date-int updated"\s+title="([^"]+)">', webpage, 'upload date', default=None) or self._html_search_meta( From a4a554a79354981fcab55de8eaab7b95a40bbb48 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Feb 2017 23:42:36 +0800 Subject: [PATCH 386/586] [generic] Try parsing JWPlayer embedded videos (closes #12030) --- ChangeLog | 6 ++ youtube_dl/extractor/archiveorg.py | 4 +- youtube_dl/extractor/common.py | 118 ++++++++++++++++++++ youtube_dl/extractor/generic.py | 20 ++++ youtube_dl/extractor/jwplatform.py | 132 +---------------------- youtube_dl/extractor/ondemandkorea.py | 4 +- youtube_dl/extractor/pornhub.py | 44 -------- youtube_dl/extractor/pornoxo.py | 4 +- youtube_dl/extractor/rentv.py | 3 +- youtube_dl/extractor/rudo.py | 4 +- youtube_dl/extractor/screencastomatic.py | 4 +- youtube_dl/extractor/sendtonews.py | 4 +- youtube_dl/extractor/thisav.py | 4 +- youtube_dl/extractor/tvnoe.py | 4 +- youtube_dl/extractor/vidzi.py | 4 +- youtube_dl/extractor/wimp.py | 4 +- 16 files changed, 166 insertions(+), 197 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ef8a8307..4e69b03d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [generic] Support complex JWPlayer embedded videos (#12030) + + version 2017.02.16 Core diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 486dff82d..e21045bed 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( unified_strdate, clean_html, ) -class ArchiveOrgIE(JWPlatformBaseIE): +class ArchiveOrgIE(InfoExtractor): IE_NAME = 'archive.org' IE_DESC = 'archive.org videos' _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9681453ca..f6ff56eda 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -40,6 +40,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, int_or_none, + js_to_json, parse_iso8601, RegexNotFoundError, sanitize_filename, @@ -2073,6 +2074,123 @@ class InfoExtractor(object): }) return formats + @staticmethod + def _find_jwplayer_data(webpage): + mobj = re.search( + r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', + webpage) + if mobj: + return mobj.group('options') + + def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + jwplayer_data = self._parse_json( + self._find_jwplayer_data(webpage), video_id, + transform_source=js_to_json) + return self._parse_jwplayer_data( + jwplayer_data, video_id, *args, **kwargs) + + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + # JWPlayer backward compatibility: flattened playlists + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if 'playlist' not in jwplayer_data: + jwplayer_data = {'playlist': [jwplayer_data]} + + entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + + for video_data in jwplayer_data['playlist']: + # JWPlayer backward compatibility: flattened sources + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 + if 'sources' not in video_data: + video_data['sources'] = [video_data] + + this_video_id = video_id or video_data['mediaid'] + + formats = [] + for source in video_data['sources']: + source_url = self._proto_relative_url(source['file']) + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + source_type = source.get('type') or '' + ext = mimetype2ext(source_type) or determine_ext(source_url) + if source_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) + # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 + elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + formats.append({ + 'url': source_url, + 'vcodec': 'none', + 'ext': ext, + }) + else: + height = int_or_none(source.get('height')) + if height is None: + # Often no height is provided but there is a label in + # format like 1080p. + height = int_or_none(self._search_regex( + r'^(\d{3,})[pP]$', source.get('label') or '', + 'height', default=None)) + a_format = { + 'url': source_url, + 'width': int_or_none(source.get('width')), + 'height': height, + 'ext': ext, + } + if source_url.startswith('rtmp'): + a_format['ext'] = 'flv' + + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url_parts = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) + self._sort_formats(formats) + + subtitles = {} + tracks = video_data.get('tracks') + if tracks and isinstance(tracks, list): + for track in tracks: + if track.get('kind') != 'captions': + continue + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) + + entries.append({ + 'id': this_video_id, + 'title': video_data['title'] if require_title else video_data.get('title'), + 'description': video_data.get('description'), + 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'timestamp': int_or_none(video_data.get('pubdate')), + 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), + 'subtitles': subtitles, + 'formats': formats, + }) + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries) + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a2b0298ec..3db31debe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -20,6 +20,7 @@ from ..utils import ( float_or_none, HEADRequest, is_html, + js_to_json, orderedSet, sanitized_Request, smuggle_url, @@ -961,6 +962,16 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # Complex jwplayer + { + 'url': 'http://www.indiedb.com/games/king-machine/videos', + 'info_dict': { + 'id': 'videos', + 'ext': 'mp4', + 'title': 'king machine trailer 1', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2488,6 +2499,15 @@ class GenericIE(InfoExtractor): self._sort_formats(entry['formats']) return self.playlist_result(entries) + jwplayer_data_str = self._find_jwplayer_data(webpage) + if jwplayer_data_str: + try: + jwplayer_data = self._parse_json( + jwplayer_data_str, video_id, transform_source=js_to_json) + return self._parse_jwplayer_data(jwplayer_data, video_id) + except ExtractorError: + pass + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index aff7ab49a..33d55f770 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,139 +4,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - js_to_json, - mimetype2ext, - urljoin, -) -class JWPlatformBaseIE(InfoExtractor): - @staticmethod - def _find_jwplayer_data(webpage): - # TODO: Merge this with JWPlayer-related codes in generic.py - - mobj = re.search( - r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', - webpage) - if mobj: - return mobj.group('options') - - def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): - jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id, - transform_source=js_to_json) - return self._parse_jwplayer_data( - jwplayer_data, video_id, *args, **kwargs) - - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, - m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - - entries = [] - - # JWPlayer backward compatibility: single playlist item - # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] - - for video_data in jwplayer_data['playlist']: - # JWPlayer backward compatibility: flattened sources - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 - if 'sources' not in video_data: - video_data['sources'] = [video_data] - - this_video_id = video_id or video_data['mediaid'] - - formats = [] - for source in video_data['sources']: - source_url = self._proto_relative_url(source['file']) - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - source_url, this_video_id, mpd_id=mpd_id, fatal=False)) - # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): - formats.append({ - 'url': source_url, - 'vcodec': 'none', - 'ext': ext, - }) - else: - height = int_or_none(source.get('height')) - if height is None: - # Often no height is provided but there is a label in - # format like 1080p. - height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', - 'height', default=None)) - a_format = { - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': height, - 'ext': ext, - } - if source_url.startswith('rtmp'): - a_format['ext'] = 'flv' - - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as - # of jwplayer.flash.swf - rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) - if len(rtmp_url_parts) == 3: - rtmp_url, prefix, play_path = rtmp_url_parts - a_format.update({ - 'url': rtmp_url, - 'play_path': prefix + play_path, - }) - if rtmp_params: - a_format.update(rtmp_params) - formats.append(a_format) - self._sort_formats(formats) - - subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if track.get('kind') != 'captions': - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) - - entries.append({ - 'id': this_video_id, - 'title': video_data['title'] if require_title else video_data.get('title'), - 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), - 'timestamp': int_or_none(video_data.get('pubdate')), - 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), - 'subtitles': subtitles, - 'formats': formats, - }) - if len(entries) == 1: - return entries[0] - else: - return self.playlist_result(entries) - - -class JWPlatformIE(JWPlatformBaseIE): +class JWPlatformIE(InfoExtractor): _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _TEST = { 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index de1d6b08a..dcd157777 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, js_to_json, ) -class OnDemandKoreaIE(JWPlatformBaseIE): +class OnDemandKoreaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7a2737032..9b413590a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -169,50 +169,6 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - """ - video_variables = {} - for video_variablename, quote, video_variable in re.findall( - r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): - video_variables[video_variablename] = video_variable - - video_urls = [] - for encoded_video_url in re.findall( - r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage): - for varname, varval in video_variables.items(): - encoded_video_url = encoded_video_url.replace(varname, varval) - video_urls.append(re.sub(r'[\s+]', '', encoded_video_url)) - - if webpage.find('"encrypted":true') != -1: - password = compat_urllib_parse_unquote_plus( - self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) - video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) - - formats = [] - for video_url in video_urls: - path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] - format = path.split('/')[5].split('_')[:2] - format = '-'.join(format) - - m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format) - if m is None: - height = None - tbr = None - else: - height = int(m.group('height')) - tbr = int(m.group('tbr')) - - formats.append({ - 'url': video_url, - 'ext': extension, - 'format': format, - 'format_id': format, - 'tbr': tbr, - 'height': height, - }) - self._sort_formats(formats) - """ - page_params = self._parse_json(self._search_regex( r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', webpage, 'page parameters', group='data', default='{}'), diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 1a0cce7e0..2831368b6 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( str_to_int, ) -class PornoXOIE(JWPlatformBaseIE): +class PornoXOIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index 422c02cff..d338b3a93 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -2,11 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .jwplatform import JWPlatformBaseIE from ..compat import compat_str -class RENTVIE(JWPlatformBaseIE): +class RENTVIE(InfoExtractor): _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://ren.tv/video/epizod/118577', diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 3bfe934d8..51644011e 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( js_to_json, get_element_by_class, @@ -11,7 +11,7 @@ from ..utils import ( ) -class RudoIE(JWPlatformBaseIE): +class RudoIE(InfoExtractor): _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' _TEST = { diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py index 94a2a37d2..b5e76c9af 100644 --- a/youtube_dl/extractor/screencastomatic.py +++ b/youtube_dl/extractor/screencastomatic.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import js_to_json -class ScreencastOMaticIE(JWPlatformBaseIE): +class ScreencastOMaticIE(InfoExtractor): _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)' _TEST = { 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py index 9880a5a78..9d9652949 100644 --- a/youtube_dl/extractor/sendtonews.py +++ b/youtube_dl/extractor/sendtonews.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( float_or_none, parse_iso8601, @@ -14,7 +14,7 @@ from ..utils import ( ) -class SendtoNewsIE(JWPlatformBaseIE): +class SendtoNewsIE(InfoExtractor): _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' _TEST = { diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 4473a3c77..b7b3568cb 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import remove_end -class ThisAVIE(JWPlatformBaseIE): +class ThisAVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _TESTS = [{ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index 6d5c74826..1a5b76bf2 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( clean_html, get_element_by_class, @@ -9,7 +9,7 @@ from ..utils import ( ) -class TVNoeIE(JWPlatformBaseIE): +class TVNoeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.tvnoe.cz/video/10362', diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 9950c62ad..1f1828fce 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( decode_packed_codes, js_to_json, @@ -12,7 +12,7 @@ from ..utils import ( ) -class VidziIE(JWPlatformBaseIE): +class VidziIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://vidzi.tv/cghql9yq6emu.html', diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 54eb51427..c022fb33e 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals +from .common import InfoExtractor from .youtube import YoutubeIE -from .jwplatform import JWPlatformBaseIE -class WimpIE(JWPlatformBaseIE): +class WimpIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.wimp.com/maru-is-exhausted/', From 4cead6a614b5a293e78dce5cd5eda7476f83985d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 22:02:01 +0700 Subject: [PATCH 387/586] [einthusan] Relax _VALID_URL (closes #12141, closes #12159) --- youtube_dl/extractor/einthusan.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 8a2a17b63..3f6268637 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -18,8 +18,8 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', 'info_dict': { @@ -29,7 +29,10 @@ class EinthusanIE(InfoExtractor): 'description': 'md5:33ef934c82a671a94652a9b4e54d931b', 'thumbnail': r're:^https?://.*\.jpg$', } - } + }, { + 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', + 'only_matching': True, + }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js def _decrypt(self, encrypted_data, video_id): From fef51645d6c224f898ff6f44d041a458d21e8547 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 17 Feb 2017 23:13:51 +0800 Subject: [PATCH 388/586] [theplatform] Recognize URLs with whitespaces (closes #12044) --- ChangeLog | 1 + youtube_dl/extractor/generic.py | 7 ++++++- youtube_dl/extractor/theplatform.py | 6 ++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4e69b03d0..d5fe3dd5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [theplatform] Recognize URLs with whitespaces (#12044) + [generic] Support complex JWPlayer embedded videos (#12030) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3db31debe..9868ca6d0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1501,7 +1501,12 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [VideoPressIE.ie_key()], - } + }, + { + # ThePlatform embedded with whitespaces in URLs + 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', + 'only_matching': True, + }, # { # # TODO: find another test # # http://schema.org/VideoObject diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 5c5987c6a..9a424b1c6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): if m: return [m.group('url')] + # Are whitesapces ignored in URLs? + # https://github.com/rg3/youtube-dl/issues/12044 matches = re.findall( - r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) if matches: - return list(zip(*matches))[1] + return [re.sub(r'\s', '', list(zip(*matches))[1][0])] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): From d94badc755228ee3159b9b499aa718d27fa472ed Mon Sep 17 00:00:00 2001 From: Vijay Singh <sudovijay@users.noreply.github.com> Date: Tue, 7 Feb 2017 10:32:45 +0530 Subject: [PATCH 389/586] [openload] Semifix extraction (closes #10408) just updated the code. i don't do much python still i tried to convert my code. lemme know if there is any prob with it --- youtube_dl/extractor/openload.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32289d897..bd1120fd8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,17 +75,20 @@ class OpenloadIE(InfoExtractor): '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>', webpage, 'openload ID') - first_three_chars = int(float(ol_id[0:][:3])) - fifth_char = int(float(ol_id[3:5])) - urlcode = '' - num = 5 + first_two_chars = int(float(ol_id[0:][:2])) + urlcode = {} + num = 2 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) + - first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) + key = int(float(ol_id[num + 3:][:2])) + urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) num += 5 + + sorted(urlcode, key=lambda key: urlcode[key]) - video_url = 'https://openload.co/stream/' + urlcode + urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) + + video_url = 'https://openload.co/stream/' + urllink title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From 90fad0e74cd8079246c5f3d8150650b5f65f998b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 22:31:16 +0700 Subject: [PATCH 390/586] [openload] Fix extraction (closes #12002) --- youtube_dl/extractor/openload.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index bd1120fd8..10896c442 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -76,19 +76,16 @@ class OpenloadIE(InfoExtractor): webpage, 'openload ID') first_two_chars = int(float(ol_id[0:][:2])) - urlcode = {} + urlcode = [] num = 2 while num < len(ol_id): key = int(float(ol_id[num + 3:][:2])) - urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) + urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) num += 5 - - sorted(urlcode, key=lambda key: urlcode[key]) - urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) - - video_url = 'https://openload.co/stream/' + urllink + video_url = 'https://openload.co/stream/' + ''.join( + [value for _, value in sorted(urlcode, key=lambda x: x[0])]) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From c2bde5d08163ce46548ea60333750a0a74a8fe44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <TRox1972@noreply.github.com> Date: Mon, 9 Jan 2017 18:22:53 +0100 Subject: [PATCH 391/586] [ellentv] Improve --- youtube_dl/extractor/ellentv.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 74bbc5c51..e0a13dd76 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -1,13 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import json - from .common import InfoExtractor -from ..utils import ( - ExtractorError, - NO_DEFAULT, -) +from .kaltura import KalturaIE +from ..utils import NO_DEFAULT class EllenTVIE(InfoExtractor): @@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor): if partner_id and kaltura_id: break - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) class EllenTVClipsIE(InfoExtractor): @@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor): 'id': 'meryl-streep-vanessa-hudgens', 'title': 'Meryl Streep, Vanessa Hudgens', }, - 'playlist_mincount': 7, + 'playlist_mincount': 5, } def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - playlist = self._extract_playlist(webpage) + playlist = self._extract_playlist(webpage, playlist_id) return { '_type': 'playlist', @@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor): 'entries': self._extract_entries(playlist) } - def _extract_playlist(self, webpage): + def _extract_playlist(self, webpage, playlist_id): json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json') - try: - return json.loads('[{' + json_string + '}]') - except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + return self._parse_json('[{' + json_string + '}]', playlist_id) def _extract_entries(self, playlist): return [ self.url_result( 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']), - 'Kaltura') + KalturaIE.ie_key(), video_id=item['kaltura_entry_id']) for item in playlist] From db76c30c6ecb5d198a72f1807163c9b69771bba1 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher <tobias-git@23.gs> Date: Tue, 7 Jun 2016 23:42:56 +0200 Subject: [PATCH 392/586] [heise] Support videos embedded in any article. --- youtube_dl/extractor/heise.py | 119 +++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 1629cdb8d..a5ec0fae9 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -11,54 +11,115 @@ from ..utils import ( class HeiseIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?:www\.)?heise\.de/video/artikel/ - .+?(?P<id>[0-9]+)\.html(?:$|[?#]) + https?://(?:www\.)?heise\.de/.+?(?P<id>[0-9]+)\.html(?:$|[?#]) ''' - _TEST = { - 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404147', - 'ext': 'mp4', - 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" + _TESTS = [ + { + 'url': ( + 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' ), - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': r're:^https?://.*\.jpe?g$', - } - } + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': ( + "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411812600, + 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2403911', + 'ext': 'mp4', + 'title': ( + "c't uplink 3.3: Owncloud, Tastaturen, Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411803000, + 'upload_date': '20140927', + 'description': "In c't uplink erklären wir in dieser Woche, wie man mit Owncloud die Kontrolle über die eigenen Daten behält. Darüber hinaus erklären wir, dass zur Wahl der richtigen Tastatur mehr gehört, als man denkt und wie Smartphones uns weiter verraten.", + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404251', + 'ext': 'mp4', + 'title': ( + "c't uplink: Owncloud, Tastaturen, Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411811400, + 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 sprechen wir über Owncloud und wie man sich damit von Cloudanbietern emanzipieren kann. Außerdem erklären wir, woran man alles beim Kauf einer Tastatur denken sollte und was Smartphones nun über uns verraten.', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html' + ), + 'md5': '0616c9297d9c989f9b2a23b483b408c3', + 'info_dict': { + 'id': '3214137', + 'ext': 'mp4', + 'title': ( + "c\u2019t zockt \u201eGlitchspace\u201c, \u201eThe Mind's Eclipse\u201c und \u201eWindowframe\u201c." + ), + 'format_id': 'mp4_720p', + 'timestamp': 1464011220, + 'upload_date': '20160523', + 'description': "Unsere Spiele-Tipps der Woche: Das Puzzle-Adventure Glitchspace, das Jump&Run-Spiel Windowframe und The Mind's Eclipse", + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + + ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) container_id = self._search_regex( - r'<div class="videoplayerjw".*?data-container="([0-9]+)"', + r'<div class="videoplayerjw"[^>]*data-container="([0-9]+)"', webpage, 'container ID') sequenz_id = self._search_regex( - r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"', + r'<div class="videoplayerjw"[^>]*data-sequenz="([0-9]+)"', webpage, 'sequenz ID') data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) doc = self._download_xml(data_url, video_id) info = { 'id': video_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': doc.find('.//{http://rss.jwpcdn.com/}image').text, 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)), - 'description': self._og_search_description(webpage), + self._html_search_meta('date', webpage)) } - title = self._html_search_meta('fulltitle', webpage) - if title: - info['title'] = title - else: - info['title'] = self._og_search_title(webpage) + title = self._html_search_meta('fulltitle', webpage, default=None) + if not title or title == "c't": + title = self._search_regex( + r'<div class="videoplayerjw"[^>]*data-title="([^"]+)"', + webpage, 'video title') + info['title'] = title + + desc = self._og_search_description(webpage, default=None) + if not desc: + desc = self._html_search_meta('description', webpage) + info['description'] = desc formats = [] for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): From bad4ccdb5db7c00865d433558ddfcdfdbd499343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:09:40 +0700 Subject: [PATCH 393/586] [heise] Improve (closes #9725) --- youtube_dl/extractor/heise.py | 146 +++++++++++----------------------- 1 file changed, 48 insertions(+), 98 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index a5ec0fae9..382f32771 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -6,120 +6,58 @@ from ..utils import ( determine_ext, int_or_none, parse_iso8601, + xpath_text, ) class HeiseIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?heise\.de/.+?(?P<id>[0-9]+)\.html(?:$|[?#]) - ''' - _TESTS = [ - { - 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404147', - 'ext': 'mp4', - 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2403911', - 'ext': 'mp4', - 'title': ( - "c't uplink 3.3: Owncloud, Tastaturen, Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411803000, - 'upload_date': '20140927', - 'description': "In c't uplink erklären wir in dieser Woche, wie man mit Owncloud die Kontrolle über die eigenen Daten behält. Darüber hinaus erklären wir, dass zur Wahl der richtigen Tastatur mehr gehört, als man denkt und wie Smartphones uns weiter verraten.", - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404251', - 'ext': 'mp4', - 'title': ( - "c't uplink: Owncloud, Tastaturen, Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411811400, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 sprechen wir über Owncloud und wie man sich damit von Cloudanbietern emanzipieren kann. Außerdem erklären wir, woran man alles beim Kauf einer Tastatur denken sollte und was Smartphones nun über uns verraten.', - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html' - ), - 'md5': '0616c9297d9c989f9b2a23b483b408c3', - 'info_dict': { - 'id': '3214137', - 'ext': 'mp4', - 'title': ( - "c\u2019t zockt \u201eGlitchspace\u201c, \u201eThe Mind's Eclipse\u201c und \u201eWindowframe\u201c." - ), - 'format_id': 'mp4_720p', - 'timestamp': 1464011220, - 'upload_date': '20160523', - 'description': "Unsere Spiele-Tipps der Woche: Das Puzzle-Adventure Glitchspace, das Jump&Run-Spiel Windowframe und The Mind's Eclipse", - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - - ] + _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html' + _TESTS = [{ + 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone", + 'format_id': 'mp4_720p', + 'timestamp': 1411812600, + 'upload_date': '20140927', + 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, { + 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', + 'only_matching': True, + }, { + 'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom', + 'only_matching': True, + }, { + 'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) container_id = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-container="([0-9]+)"', + r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"', webpage, 'container ID') sequenz_id = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-sequenz="([0-9]+)"', + r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) - doc = self._download_xml(data_url, video_id) - - info = { - 'id': video_id, - 'thumbnail': doc.find('.//{http://rss.jwpcdn.com/}image').text, - 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)) - } title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": title = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-title="([^"]+)"', - webpage, 'video title') - info['title'] = title + r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') - desc = self._og_search_description(webpage, default=None) - if not desc: - desc = self._html_search_meta('description', webpage) - info['description'] = desc + doc = self._download_xml( + 'http://www.heise.de/videout/feed', video_id, query={ + 'container': container_id, + 'sequenz': sequenz_id, + }) formats = [] for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): @@ -135,6 +73,18 @@ class HeiseIE(InfoExtractor): 'height': height, }) self._sort_formats(formats) - info['formats'] = formats - return info + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or + self._og_search_thumbnail(webpage)), + 'timestamp': parse_iso8601( + self._html_search_meta('date', webpage)), + 'formats': formats, + } From 2c1f442c2bb4de65479f2e6c2f81c5741445184e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:18:26 +0700 Subject: [PATCH 394/586] [options] Add missing spaces --- youtube_dl/options.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 349f44778..2fea99ff2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -298,14 +298,14 @@ def parseOpts(overrideArguments=None): metavar='FILTER', dest='match_filter', default=None, help=( 'Generic video filter. ' - 'Specify any key (see help for -o for a list of available keys) to' - ' match if the key is present, ' - '!key to check if the key is not present,' + 'Specify any key (see help for -o for a list of available keys) to ' + 'match if the key is present, ' + '!key to check if the key is not present, ' 'key > NUMBER (like "comment_count > 12", also works with ' '>=, <, <=, !=, =) to compare against a number, and ' '& to require multiple matches. ' - 'Values which are not known are excluded unless you' - ' put a question mark (?) after the operator.' + 'Values which are not known are excluded unless you ' + 'put a question mark (?) after the operator. ' 'For example, to only match videos that have been liked more than ' '100 times and disliked less than 50 times (or the dislike ' 'functionality is not available at the given service), but who ' From cf3704c132800809caacc6ce89afa87f0dfae487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:47:54 +0700 Subject: [PATCH 395/586] [ChangeLog] Actualize --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index d5fe3dd5b..00ee0a5a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,13 @@ version <unreleased> Extractors +* [heise] Improve extraction (#9725) +* [ellentv] Improve (#11653) +* [openload] Fix extraction (#10408, #12002) + [theplatform] Recognize URLs with whitespaces (#12044) +* [einthusan] Relax URL regular expression (#12141, #12159) + [generic] Support complex JWPlayer embedded videos (#12030) +* [elpais] Improve extraction (#12139) version 2017.02.16 From 28e35f50702a8841b4caf072a546ff06ca63db96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:59:56 +0700 Subject: [PATCH 396/586] release 2017.02.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 06711f73b..6f1361b32 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.16 +[debug] youtube-dl version 2017.02.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 00ee0a5a9..2c90f791d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.17 Extractors * [heise] Improve extraction (#9725) diff --git a/README.md b/README.md index 89876bd7a..c2a1a6b02 100644 --- a/README.md +++ b/README.md @@ -137,13 +137,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --match-filter FILTER Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to - check if the key is not present,key > + check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) - after the operator.For example, to only + after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 323e80954..530e1856b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.16' +__version__ = '2017.02.17' From 70bcc444a990ee9ca3daab6f3dc2d5d58a948ba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 18 Feb 2017 09:52:43 +0100 Subject: [PATCH 397/586] [viceland] improve info extraction and update test --- youtube_dl/extractor/vice.py | 6 +++--- youtube_dl/extractor/viceland.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 8a00c8fee..f0a7fd739 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -70,10 +70,10 @@ class ViceBaseIE(AdobePassIE): 'url': uplynk_preplay_url, 'id': video_id, 'title': title, - 'description': base.get('body'), + 'description': base.get('body') or base.get('display_body'), 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), + 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating')), 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 0eff055a6..87f9216b5 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -7,16 +7,16 @@ from .vice import ViceBaseIE class VicelandIE(ViceBaseIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)' _TEST = { - 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', + 'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316', 'info_dict': { - 'id': '57608447973ee7705f6fbd4e', + 'id': '588a70d0dba8a16007de7316', 'ext': 'mp4', - 'title': 'CYBERWAR (Trailer)', - 'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.', + 'title': 'TRAPPED (Series Trailer)', + 'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf', 'age_limit': 14, - 'timestamp': 1466008539, - 'upload_date': '20160615', - 'uploader_id': '11', + 'timestamp': 1485474122, + 'upload_date': '20170126', + 'uploader_id': '57a204098cb727dec794c6a3', 'uploader': 'Viceland', }, 'params': { From bdabbc220c60ea6be50c9b1058405b636f70fb71 Mon Sep 17 00:00:00 2001 From: Alex Monk <krenair@gmail.com> Date: Wed, 17 Aug 2016 21:13:28 +0100 Subject: [PATCH 398/586] [metacafe] Bypass family filter If you don't send this user=ffilter: false cookie, it will 301 redirect you to a page asking about it, and then the title check will fail. --- youtube_dl/extractor/metacafe.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 9880924e6..adbd44fd1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -50,6 +50,18 @@ class MetacafeIE(InfoExtractor): }, 'skip': 'Page is temporarily unavailable.', }, + # metacafe video with family filter + { + 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/', + 'md5': 'b06082c5079bbdcde677a6291fbdf376', + 'info_dict': { + 'id': '2155630', + 'ext': 'mp4', + 'title': 'Adult Art By David Hart #156', + 'uploader': 'hartistry', + 'description': 'Adult Art By David Hart. All the Art Works presented here are not in the possession of the American Artist, David John Hart. The paintings are in collections worldwide of individuals, countries, art museums, foundations and charities.', + } + }, # AnyClip video { 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/', @@ -148,8 +160,9 @@ class MetacafeIE(InfoExtractor): # AnyClip videos require the flashversion cookie so that we get the link # to the mp4 file headers = {} + headers['Cookie'] = 'user=%7B%22ffilter%22%3Afalse%7D;'; if video_id.startswith('an-'): - headers['Cookie'] = 'flashVersion=0;' + headers['Cookie'] += ' flashVersion=0;' # Retrieve video webpage to extract further information webpage = self._download_webpage(url, video_id, headers=headers) From f75caf059eb7a1a156921124cbf4b720fea526e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 19:58:25 +0700 Subject: [PATCH 399/586] [metacafe] Improve (closes #10371) --- youtube_dl/extractor/metacafe.py | 38 +++++++++++--------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index adbd44fd1..28f59f63c 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -6,12 +6,12 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, ExtractorError, int_or_none, - urlencode_postdata, get_element_by_attribute, mimetype2ext, ) @@ -57,10 +57,13 @@ class MetacafeIE(InfoExtractor): 'info_dict': { 'id': '2155630', 'ext': 'mp4', - 'title': 'Adult Art By David Hart #156', - 'uploader': 'hartistry', - 'description': 'Adult Art By David Hart. All the Art Works presented here are not in the possession of the American Artist, David John Hart. The paintings are in collections worldwide of individuals, countries, art museums, foundations and charities.', - } + 'title': 'Adult Art By David Hart 156', + 'uploader': '63346', + 'description': 'md5:9afac8fc885252201ad14563694040fc', + }, + 'params': { + 'skip_download': True, + }, }, # AnyClip video { @@ -124,22 +127,6 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): self.to_screen('Retrieving disclaimer') - def _confirm_age(self): - # Retrieve disclaimer - self.report_disclaimer() - self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer') - - # Confirm age - self.report_age_confirmation() - self._download_webpage( - self._FILTER_POST, None, False, 'Unable to confirm age', - data=urlencode_postdata({ - 'filters': '0', - 'submit': "Continue - I'm over 18", - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - def _real_extract(self, url): # Extract id and simplified title from URL video_id, display_id = re.match(self._VALID_URL, url).groups() @@ -155,14 +142,15 @@ class MetacafeIE(InfoExtractor): if prefix == 'cb': return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') - # self._confirm_age() + headers = { + # Disable family filter + 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False}) + } # AnyClip videos require the flashversion cookie so that we get the link # to the mp4 file - headers = {} - headers['Cookie'] = 'user=%7B%22ffilter%22%3Afalse%7D;'; if video_id.startswith('an-'): - headers['Cookie'] += ' flashVersion=0;' + headers['Cookie'] += 'flashVersion=0; ' # Retrieve video webpage to extract further information webpage = self._download_webpage(url, video_id, headers=headers) From a2e3286676606103601f9499154ad465037314d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 20:21:37 +0700 Subject: [PATCH 400/586] [thisav] Add support for html5 media (closes #11771) --- youtube_dl/extractor/thisav.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index b7b3568cb..33683b139 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -10,6 +10,7 @@ from ..utils import remove_end class ThisAVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _TESTS = [{ + # jwplayer 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', 'md5': '0480f1ef3932d901f0e0e719f188f19b', 'info_dict': { @@ -20,6 +21,7 @@ class ThisAVIE(InfoExtractor): 'uploader_id': 'dj7970' } }, { + # html5 media 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', 'md5': 'ba90c076bd0f80203679e5b60bf523ee', 'info_dict': { @@ -48,8 +50,12 @@ class ThisAVIE(InfoExtractor): }], } else: - info_dict = self._extract_jwplayer_data( - webpage, video_id, require_title=False) + entries = self._parse_html5_media_entries(url, webpage, video_id) + if entries: + info_dict = entries[0] + else: + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) uploader = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', webpage, 'uploader name', fatal=False) From 02d9b82a233abcb778f3f8601b229f996fd7df94 Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Wed, 11 Jan 2017 18:49:40 +0100 Subject: [PATCH 401/586] [tvn24] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvn24.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/tvn24.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index be3688d5a..55b4782d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1009,6 +1009,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py new file mode 100644 index 000000000..225ee4a6a --- /dev/null +++ b/youtube_dl/extractor/tvn24.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TVN24IE(InfoExtractor): + _VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', + 'md5': 'fbdec753d7bc29d96036808275f2130c', + 'info_dict': { + 'id': '1584444', + 'ext': 'mp4', + 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'thumbnail': 're:http://.*[.]jpeg', + } + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster') + share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params') + share_params = self._parse_json(share_params, page_id) + video_id = share_params['id'] + quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality') + quality_data = self._parse_json(quality_data, page_id) + formats = [] + for format_id, url in quality_data.items(): + formats.append({ + 'format_id': format_id, + 'height': int(format_id.rstrip('p')), + 'url': url, + 'ext': 'mp4', + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } From e84888b4322abd2e2a74e8a89b7942a68dd0b6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 23:34:09 +0700 Subject: [PATCH 402/586] [tvn24] Improve extraction (closes #11679) --- youtube_dl/extractor/tvn24.py | 59 ++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 225ee4a6a..12ed6039c 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + unescapeHTML, +) class TVN24IE(InfoExtractor): - _VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html' - _TEST = { + _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html' + _TESTS = [{ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'md5': 'fbdec753d7bc29d96036808275f2130c', 'info_dict': { @@ -16,28 +20,53 @@ class TVN24IE(InfoExtractor): 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', 'thumbnail': 're:http://.*[.]jpeg', } - } + }, { + 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', + 'only_matching': True, + }, { + 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', + 'only_matching': True, + }, { + 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', + 'only_matching': True, + }] def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster') - share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params') - share_params = self._parse_json(share_params, page_id) - video_id = share_params['id'] - quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality') - quality_data = self._parse_json(quality_data, page_id) + + def extract_json(attr, name, fatal=True): + return self._parse_json( + self._search_regex( + r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, + name, group='json', fatal=fatal) or '{}', + video_id, transform_source=unescapeHTML, fatal=fatal) + + quality_data = extract_json('data-quality', 'formats') + formats = [] for format_id, url in quality_data.items(): formats.append({ - 'format_id': format_id, - 'height': int(format_id.rstrip('p')), 'url': url, - 'ext': 'mp4', + 'format_id': format_id, + 'height': int_or_none(format_id.rstrip('p')), }) self._sort_formats(formats) + + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_regex( + r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, + 'thumbnail', group='url') + + share_params = extract_json( + 'data-share-params', 'share params', fatal=False) + if isinstance(share_params, dict): + video_id = share_params.get('id') or video_id + return { 'id': video_id, 'title': title, From ac33accd96279ee541952aaa4f0bb72b4f76b9ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 23:59:26 +0700 Subject: [PATCH 403/586] [options] Mention quoted string literals for --match-filter --- youtube_dl/options.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2fea99ff2..deff54324 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -302,8 +302,10 @@ def parseOpts(overrideArguments=None): 'match if the key is present, ' '!key to check if the key is not present, ' 'key > NUMBER (like "comment_count > 12", also works with ' - '>=, <, <=, !=, =) to compare against a number, and ' - '& to require multiple matches. ' + '>=, <, <=, !=, =) to compare against a number, ' + 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) ' + 'to match against a string literal ' + 'and & to require multiple matches. ' 'Values which are not known are excluded unless you ' 'put a question mark (?) after the operator. ' 'For example, to only match videos that have been liked more than ' From 049a0f4d6da55f4062658da7593363147c92f4a8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 18 Feb 2017 21:07:09 +0100 Subject: [PATCH 404/586] [brightcove:legacy] restrict videoPlayer value(closes #12040) --- youtube_dl/extractor/brightcove.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5c6e99da1..27685eed0 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -191,6 +191,10 @@ class BrightcoveLegacyIE(InfoExtractor): # These fields hold the id of the video videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') if videoPlayer is not None: + if isinstance(videoPlayer, list): + videoPlayer = videoPlayer[0] + if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')): + return None params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') if linkBase is not None: From bf5b9d859a1f2a68fda0dc57eb839448c7571dfa Mon Sep 17 00:00:00 2001 From: Pierre Mdawar <p.mdawar@gmail.com> Date: Mon, 17 Oct 2016 14:38:37 +0300 Subject: [PATCH 405/586] [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions --- youtube_dl/utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 07c07be6f..3f9e592e3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -701,7 +701,12 @@ def bug_reports_message(): return msg -class ExtractorError(Exception): +class YoutubeDLError(Exception): + """Base exception for YoutubeDL errors.""" + pass + + +class ExtractorError(YoutubeDLError): """Error during info extraction.""" def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): @@ -742,7 +747,7 @@ class RegexNotFoundError(ExtractorError): pass -class DownloadError(Exception): +class DownloadError(YoutubeDLError): """Download Error exception. This exception may be thrown by FileDownloader objects if they are not @@ -756,7 +761,7 @@ class DownloadError(Exception): self.exc_info = exc_info -class SameFileError(Exception): +class SameFileError(YoutubeDLError): """Same File exception. This exception will be thrown by FileDownloader objects if they detect @@ -765,7 +770,7 @@ class SameFileError(Exception): pass -class PostProcessingError(Exception): +class PostProcessingError(YoutubeDLError): """Post Processing exception. This exception may be raised by PostProcessor's .run() method to @@ -773,15 +778,16 @@ class PostProcessingError(Exception): """ def __init__(self, msg): + super(PostProcessingError, self).__init__(msg) self.msg = msg -class MaxDownloadsReached(Exception): +class MaxDownloadsReached(YoutubeDLError): """ --max-downloads limit has been reached. """ pass -class UnavailableVideoError(Exception): +class UnavailableVideoError(YoutubeDLError): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -790,7 +796,7 @@ class UnavailableVideoError(Exception): pass -class ContentTooShortError(Exception): +class ContentTooShortError(YoutubeDLError): """Content Too Short exception. This exception may be raised by FileDownloader objects when a file they @@ -799,12 +805,15 @@ class ContentTooShortError(Exception): """ def __init__(self, downloaded, expected): + super(ContentTooShortError, self).__init__( + 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) + ) # Both in bytes self.downloaded = downloaded self.expected = expected -class XAttrMetadataError(Exception): +class XAttrMetadataError(YoutubeDLError): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code @@ -820,7 +829,7 @@ class XAttrMetadataError(Exception): self.reason = 'NOT_SUPPORTED' -class XAttrUnavailableError(Exception): +class XAttrUnavailableError(YoutubeDLError): pass From 773f291dcbce486fefe24e1abd29735d374d0a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:49:58 +0700 Subject: [PATCH 406/586] Add experimental geo restriction bypass mechanism Based on faking X-Forwarded-For HTTP header --- youtube_dl/YoutubeDL.py | 17 +++ youtube_dl/__init__.py | 2 + youtube_dl/extractor/common.py | 48 +++++- youtube_dl/options.py | 12 ++ youtube_dl/utils.py | 267 +++++++++++++++++++++++++++++++++ 5 files changed, 340 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a7bf5a1b0..ebace6b57 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -56,6 +56,8 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, + GeoRestrictedError, + ISO3166Utils, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -272,6 +274,13 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. + bypass_geo_restriction: + Bypass geographic restriction via faking X-Forwarded-For + HTTP header (experimental) + bypass_geo_restriction_as_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header (experimental) The following options determine which downloader is picked: external_downloader: Executable of the external downloader to call. @@ -707,6 +716,14 @@ class YoutubeDL(object): return self.process_ie_result(ie_result, download, extra_info) else: return ie_result + except GeoRestrictedError as e: + msg = e.msg + if e.countries: + msg += '\nThis video is available in %s.' % ', '.join( + map(ISO3166Utils.short2full, e.countries)) + msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' + self.report_error(msg) + break except ExtractorError as e: # An error we somewhat expected self.report_error(compat_str(e), e.format_traceback()) break diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5c5b8094b..94f461a78 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -414,6 +414,8 @@ def _real_main(argv=None): 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'config_location': opts.config_location, + 'bypass_geo_restriction': opts.bypass_geo_restriction, + 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f6ff56eda..96815099d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -6,6 +6,7 @@ import hashlib import json import netrc import os +import random import re import socket import sys @@ -39,6 +40,8 @@ from ..utils import ( ExtractorError, fix_xml_ampersands, float_or_none, + GeoRestrictedError, + GeoUtils, int_or_none, js_to_json, parse_iso8601, @@ -320,17 +323,25 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + _BYPASS_GEO attribute may be set to False in order to disable + geo restriction bypass mechanisms for a particular extractor. + Though it won't disable explicit geo restriction bypass based on + country code provided with bypass_geo_restriction_as_country. + Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ _ready = False _downloader = None + _x_forwarded_for_ip = None + _BYPASS_GEO = True _WORKING = True def __init__(self, downloader=None): """Constructor. Receives an optional downloader.""" self._ready = False + self._x_forwarded_for_ip = None self.set_downloader(downloader) @classmethod @@ -359,6 +370,10 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" + if not self._x_forwarded_for_ip: + country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) + if country_code: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if not self._ready: self._real_initialize() self._ready = True @@ -366,8 +381,22 @@ class InfoExtractor(object): def extract(self, url): """Extracts URL information and returns it in list of dicts.""" try: - self.initialize() - return self._real_extract(url) + for _ in range(2): + try: + self.initialize() + return self._real_extract(url) + except GeoRestrictedError as e: + if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and + self._BYPASS_GEO and + self._downloader.params.get('bypass_geo_restriction', True) and + not self._x_forwarded_for_ip and + e.countries): + self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + continue + raise except ExtractorError: raise except compat_http_client.IncompleteRead as e: @@ -434,6 +463,15 @@ class InfoExtractor(object): if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] + # Some sites check X-Forwarded-For HTTP header in order to figure out + # the origin of the client behind proxy. This allows bypassing geo + # restriction by faking this header's value to IP that belongs to some + # geo unrestricted country. We will do so once we encounter any + # geo restriction error. + if self._x_forwarded_for_ip: + if 'X-Forwarded-For' not in headers: + headers['X-Forwarded-For'] = self._x_forwarded_for_ip + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) if urlh is False: assert not fatal @@ -609,10 +647,8 @@ class InfoExtractor(object): expected=True) @staticmethod - def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): - raise ExtractorError( - '%s. You might want to use --proxy to workaround.' % msg, - expected=True) + def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None): + raise GeoRestrictedError(msg, countries=countries) # Methods for following #608 @staticmethod diff --git a/youtube_dl/options.py b/youtube_dl/options.py index deff54324..2e194f6dc 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -549,6 +549,18 @@ def parseOpts(overrideArguments=None): 'Upper bound of a range for randomized sleep before each download ' '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) + workarounds.add_option( + '--bypass-geo', + action='store_true', dest='bypass_geo_restriction', default=True, + help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + workarounds.add_option( + '--no-bypass-geo', + action='store_false', dest='bypass_geo_restriction', default=True, + help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + workarounds.add_option( + '--bypass-geo-as-country', metavar='CODE', + dest='bypass_geo_restriction_as_country', default=None, + help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3f9e592e3..4e76b6b7b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -23,6 +23,7 @@ import operator import os import pipes import platform +import random import re import socket import ssl @@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError): pass +class GeoRestrictedError(ExtractorError): + """Geographic restriction Error exception. + + This exception may be thrown when a video is not available from your + geographic location due to geographic restrictions imposed by a website. + """ + def __init__(self, msg, countries=None): + super(GeoRestrictedError, self).__init__(msg, expected=True) + self.msg = msg + self.countries = countries + + class DownloadError(YoutubeDLError): """Download Error exception. @@ -3027,6 +3040,260 @@ class ISO3166Utils(object): return cls._country_map.get(code.upper()) +class GeoUtils(object): + # Major IPv4 address blocks per country + _country_ip_map = { + 'AD': '85.94.160.0/19', + 'AE': '94.200.0.0/13', + 'AF': '149.54.0.0/17', + 'AG': '209.59.64.0/18', + 'AI': '204.14.248.0/21', + 'AL': '46.99.0.0/16', + 'AM': '46.70.0.0/15', + 'AO': '105.168.0.0/13', + 'AP': '159.117.192.0/21', + 'AR': '181.0.0.0/12', + 'AS': '202.70.112.0/20', + 'AT': '84.112.0.0/13', + 'AU': '1.128.0.0/11', + 'AW': '181.41.0.0/18', + 'AZ': '5.191.0.0/16', + 'BA': '31.176.128.0/17', + 'BB': '65.48.128.0/17', + 'BD': '114.130.0.0/16', + 'BE': '57.0.0.0/8', + 'BF': '129.45.128.0/17', + 'BG': '95.42.0.0/15', + 'BH': '37.131.0.0/17', + 'BI': '154.117.192.0/18', + 'BJ': '137.255.0.0/16', + 'BL': '192.131.134.0/24', + 'BM': '196.12.64.0/18', + 'BN': '156.31.0.0/16', + 'BO': '161.56.0.0/16', + 'BQ': '161.0.80.0/20', + 'BR': '152.240.0.0/12', + 'BS': '24.51.64.0/18', + 'BT': '119.2.96.0/19', + 'BW': '168.167.0.0/16', + 'BY': '178.120.0.0/13', + 'BZ': '179.42.192.0/18', + 'CA': '99.224.0.0/11', + 'CD': '41.243.0.0/16', + 'CF': '196.32.200.0/21', + 'CG': '197.214.128.0/17', + 'CH': '85.0.0.0/13', + 'CI': '154.232.0.0/14', + 'CK': '202.65.32.0/19', + 'CL': '152.172.0.0/14', + 'CM': '165.210.0.0/15', + 'CN': '36.128.0.0/10', + 'CO': '181.240.0.0/12', + 'CR': '201.192.0.0/12', + 'CU': '152.206.0.0/15', + 'CV': '165.90.96.0/19', + 'CW': '190.88.128.0/17', + 'CY': '46.198.0.0/15', + 'CZ': '88.100.0.0/14', + 'DE': '53.0.0.0/8', + 'DJ': '197.241.0.0/17', + 'DK': '87.48.0.0/12', + 'DM': '192.243.48.0/20', + 'DO': '152.166.0.0/15', + 'DZ': '41.96.0.0/12', + 'EC': '186.68.0.0/15', + 'EE': '90.190.0.0/15', + 'EG': '156.160.0.0/11', + 'ER': '196.200.96.0/20', + 'ES': '88.0.0.0/11', + 'ET': '196.188.0.0/14', + 'EU': '2.16.0.0/13', + 'FI': '91.152.0.0/13', + 'FJ': '144.120.0.0/16', + 'FM': '119.252.112.0/20', + 'FO': '88.85.32.0/19', + 'FR': '90.0.0.0/9', + 'GA': '41.158.0.0/15', + 'GB': '25.0.0.0/8', + 'GD': '74.122.88.0/21', + 'GE': '31.146.0.0/16', + 'GF': '161.22.64.0/18', + 'GG': '62.68.160.0/19', + 'GH': '45.208.0.0/14', + 'GI': '85.115.128.0/19', + 'GL': '88.83.0.0/19', + 'GM': '160.182.0.0/15', + 'GN': '197.149.192.0/18', + 'GP': '104.250.0.0/19', + 'GQ': '105.235.224.0/20', + 'GR': '94.64.0.0/13', + 'GT': '168.234.0.0/16', + 'GU': '168.123.0.0/16', + 'GW': '197.214.80.0/20', + 'GY': '181.41.64.0/18', + 'HK': '113.252.0.0/14', + 'HN': '181.210.0.0/16', + 'HR': '93.136.0.0/13', + 'HT': '148.102.128.0/17', + 'HU': '84.0.0.0/14', + 'ID': '39.192.0.0/10', + 'IE': '87.32.0.0/12', + 'IL': '79.176.0.0/13', + 'IM': '5.62.80.0/20', + 'IN': '117.192.0.0/10', + 'IO': '203.83.48.0/21', + 'IQ': '37.236.0.0/14', + 'IR': '2.176.0.0/12', + 'IS': '82.221.0.0/16', + 'IT': '79.0.0.0/10', + 'JE': '87.244.64.0/18', + 'JM': '72.27.0.0/17', + 'JO': '176.29.0.0/16', + 'JP': '126.0.0.0/8', + 'KE': '105.48.0.0/12', + 'KG': '158.181.128.0/17', + 'KH': '36.37.128.0/17', + 'KI': '103.25.140.0/22', + 'KM': '197.255.224.0/20', + 'KN': '198.32.32.0/19', + 'KP': '175.45.176.0/22', + 'KR': '175.192.0.0/10', + 'KW': '37.36.0.0/14', + 'KY': '64.96.0.0/15', + 'KZ': '2.72.0.0/13', + 'LA': '115.84.64.0/18', + 'LB': '178.135.0.0/16', + 'LC': '192.147.231.0/24', + 'LI': '82.117.0.0/19', + 'LK': '112.134.0.0/15', + 'LR': '41.86.0.0/19', + 'LS': '129.232.0.0/17', + 'LT': '78.56.0.0/13', + 'LU': '188.42.0.0/16', + 'LV': '46.109.0.0/16', + 'LY': '41.252.0.0/14', + 'MA': '105.128.0.0/11', + 'MC': '88.209.64.0/18', + 'MD': '37.246.0.0/16', + 'ME': '178.175.0.0/17', + 'MF': '74.112.232.0/21', + 'MG': '154.126.0.0/17', + 'MH': '117.103.88.0/21', + 'MK': '77.28.0.0/15', + 'ML': '154.118.128.0/18', + 'MM': '37.111.0.0/17', + 'MN': '49.0.128.0/17', + 'MO': '60.246.0.0/16', + 'MP': '202.88.64.0/20', + 'MQ': '109.203.224.0/19', + 'MR': '41.188.64.0/18', + 'MS': '208.90.112.0/22', + 'MT': '46.11.0.0/16', + 'MU': '105.16.0.0/12', + 'MV': '27.114.128.0/18', + 'MW': '105.234.0.0/16', + 'MX': '187.192.0.0/11', + 'MY': '175.136.0.0/13', + 'MZ': '197.218.0.0/15', + 'NA': '41.182.0.0/16', + 'NC': '101.101.0.0/18', + 'NE': '197.214.0.0/18', + 'NF': '203.17.240.0/22', + 'NG': '105.112.0.0/12', + 'NI': '186.76.0.0/15', + 'NL': '145.96.0.0/11', + 'NO': '84.208.0.0/13', + 'NP': '36.252.0.0/15', + 'NR': '203.98.224.0/19', + 'NU': '49.156.48.0/22', + 'NZ': '49.224.0.0/14', + 'OM': '5.36.0.0/15', + 'PA': '186.72.0.0/15', + 'PE': '186.160.0.0/14', + 'PF': '123.50.64.0/18', + 'PG': '124.240.192.0/19', + 'PH': '49.144.0.0/13', + 'PK': '39.32.0.0/11', + 'PL': '83.0.0.0/11', + 'PM': '70.36.0.0/20', + 'PR': '66.50.0.0/16', + 'PS': '188.161.0.0/16', + 'PT': '85.240.0.0/13', + 'PW': '202.124.224.0/20', + 'PY': '181.120.0.0/14', + 'QA': '37.210.0.0/15', + 'RE': '139.26.0.0/16', + 'RO': '79.112.0.0/13', + 'RS': '178.220.0.0/14', + 'RU': '5.136.0.0/13', + 'RW': '105.178.0.0/15', + 'SA': '188.48.0.0/13', + 'SB': '202.1.160.0/19', + 'SC': '154.192.0.0/11', + 'SD': '154.96.0.0/13', + 'SE': '78.64.0.0/12', + 'SG': '152.56.0.0/14', + 'SI': '188.196.0.0/14', + 'SK': '78.98.0.0/15', + 'SL': '197.215.0.0/17', + 'SM': '89.186.32.0/19', + 'SN': '41.82.0.0/15', + 'SO': '197.220.64.0/19', + 'SR': '186.179.128.0/17', + 'SS': '105.235.208.0/21', + 'ST': '197.159.160.0/19', + 'SV': '168.243.0.0/16', + 'SX': '190.102.0.0/20', + 'SY': '5.0.0.0/16', + 'SZ': '41.84.224.0/19', + 'TC': '65.255.48.0/20', + 'TD': '154.68.128.0/19', + 'TG': '196.168.0.0/14', + 'TH': '171.96.0.0/13', + 'TJ': '85.9.128.0/18', + 'TK': '27.96.24.0/21', + 'TL': '180.189.160.0/20', + 'TM': '95.85.96.0/19', + 'TN': '197.0.0.0/11', + 'TO': '175.176.144.0/21', + 'TR': '78.160.0.0/11', + 'TT': '186.44.0.0/15', + 'TV': '202.2.96.0/19', + 'TW': '120.96.0.0/11', + 'TZ': '156.156.0.0/14', + 'UA': '93.72.0.0/13', + 'UG': '154.224.0.0/13', + 'US': '3.0.0.0/8', + 'UY': '167.56.0.0/13', + 'UZ': '82.215.64.0/18', + 'VA': '212.77.0.0/19', + 'VC': '24.92.144.0/20', + 'VE': '186.88.0.0/13', + 'VG': '172.103.64.0/18', + 'VI': '146.226.0.0/16', + 'VN': '14.160.0.0/11', + 'VU': '202.80.32.0/20', + 'WF': '117.20.32.0/21', + 'WS': '202.4.32.0/19', + 'YE': '134.35.0.0/16', + 'YT': '41.242.116.0/22', + 'ZA': '41.0.0.0/11', + 'ZM': '165.56.0.0/13', + 'ZW': '41.85.192.0/19', + } + + @classmethod + def random_ipv4(cls, code): + block = cls._country_ip_map.get(code.upper()) + if not block: + return None + addr, preflen = block.split('/') + addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] + addr_max = addr_min | (0xffffffff >> int(preflen)) + return socket.inet_ntoa( + compat_struct_pack('!I', random.randint(addr_min, addr_max))) + + class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): def __init__(self, proxies=None): # Set default handlers From d392005a795a6cf85fda3c0f982254f8a2731e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:16 +0700 Subject: [PATCH 407/586] [dramafever] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/dramafever.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index bcd9fe2a0..755db806a 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -116,8 +116,9 @@ class DramaFeverIE(DramaFeverBaseIE): 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): - raise ExtractorError( - 'Currently unavailable in your country.', expected=True) + self.raise_geo_restricted( + msg='Currently unavailable in your country', + countries=['US', 'CA']) raise series_id, episode_number = video_id.split('.') From e633f21a96f37a96e8ef0fd4d6c1e4d3c0b41fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:33 +0700 Subject: [PATCH 408/586] [go] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/go.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index f28e6fbf5..ec902c670 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -101,6 +101,10 @@ class GoIE(AdobePassIE): video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) errors = entitlement.get('errors', {}).get('errors', []) if errors: + for error in errors: + if error.get('code') == 1002: + self.raise_geo_restricted( + error['message'], countries=['US']) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] From 28200e654b8051cadca12e51bd57f77e1ff0a4ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:51 +0700 Subject: [PATCH 409/586] [itv] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/itv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index b0d860452..aabde15f3 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -98,7 +98,10 @@ class ITVIE(InfoExtractor): headers=headers, data=etree.tostring(req_env)) playlist = xpath_element(resp_env, './/Playlist') if playlist is None: + fault_code = xpath_text(resp_env, './/faultcode') fault_string = xpath_text(resp_env, './/faultstring') + if fault_code == 'InvalidGeoRegion': + self.raise_geo_restricted(msg=fault_string, countries=['GB']) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) From ff4007891fde74212eb0898bb04c14b2de92ed03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:59 +0700 Subject: [PATCH 410/586] [nrk] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/nrk.py | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index fc3c0cd3c..78ece33e1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import random import re from .common import InfoExtractor @@ -15,25 +14,6 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): - _faked_ip = None - - def _download_webpage_handle(self, *args, **kwargs): - # NRK checks X-Forwarded-For HTTP header in order to figure out the - # origin of the client behind proxy. This allows to bypass geo - # restriction by faking this header's value to some Norway IP. - # We will do so once we encounter any geo restriction error. - if self._faked_ip: - # NB: str is intentional - kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip - return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs) - - def _fake_ip(self): - # Use fake IP from 37.191.128.0/17 in order to workaround geo - # restriction - def octet(lb=0, ub=255): - return random.randint(lb, ub) - self._faked_ip = '37.191.%d.%d' % (octet(128), octet()) - def _real_extract(self, url): video_id = self._match_id(url) @@ -44,8 +24,6 @@ class NRKBaseIE(InfoExtractor): title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id - http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {} - entries = [] conviva = data.get('convivaStatistics') or {} @@ -90,7 +68,6 @@ class NRKBaseIE(InfoExtractor): 'duration': duration, 'subtitles': subtitles, 'formats': formats, - 'http_headers': http_headers, }) if not entries: @@ -107,19 +84,16 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - message_type = data.get('messageType', '') - # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* - if 'IsGeoBlocked' in message_type and not self._faked_ip: - self.report_warning( - 'Video is geo restricted, trying to fake IP') - self._fake_ip() - return self._real_extract(url) - MESSAGES = { 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', 'ProgramRightsHasExpired': 'Programmet har gått ut', 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', } + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type: + self.raise_geo_restricted( + msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) raise ExtractorError( '%s said: %s' % (self.IE_NAME, MESSAGES.get( message_type, message_type)), From 01b1aa9ff408ce15b8bbea08dbc190f3282141a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:11 +0700 Subject: [PATCH 411/586] [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/ondemandkorea.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index dcd157777..0c85d549e 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -35,7 +35,8 @@ class OnDemandKoreaIE(InfoExtractor): if 'msg_block_01.png' in webpage: self.raise_geo_restricted( - 'This content is not available in your region') + msg='This content is not available in your region', + countries=['US', 'CA']) if 'This video is only available to ODK PLUS members.' in webpage: raise ExtractorError( From 8ab8066cf08352ad336c3ff594d0ac27f6c809c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:20 +0700 Subject: [PATCH 412/586] [pbs] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/pbs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 6baed773f..64f47bae3 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -489,11 +489,12 @@ class PBSIE(InfoExtractor): headers=self.geo_verification_headers()) if redirect_info['status'] == 'error': + message = self._ERRORS.get( + redirect_info['http_code'], redirect_info['message']) + if redirect_info['http_code'] == 403: + self.raise_geo_restricted(msg=message, countries=['US']) raise ExtractorError( - '%s said: %s' % ( - self.IE_NAME, - self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])), - expected=True) + '%s said: %s' % (self.IE_NAME, message), expected=True) format_url = redirect_info.get('url') if not format_url: From 04d906eae3071e37049cfcd2a02e9079b72a265c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:33 +0700 Subject: [PATCH 413/586] [svt] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/svt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 10cf80885..f2a2200bf 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -38,7 +38,8 @@ class SVTBaseIE(InfoExtractor): 'url': vurl, }) if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): - self.raise_geo_restricted('This video is only available in Sweden') + self.raise_geo_restricted( + 'This video is only available in Sweden', countries=['SE']) self._sort_formats(formats) subtitles = {} From 89cc7fe7705b6534f434b514265a0507b70ef40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:42 +0700 Subject: [PATCH 414/586] [vbox7] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/vbox7.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index bef639462..f86d804c1 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -78,7 +78,7 @@ class Vbox7IE(InfoExtractor): video_url = video['src'] if '/na.mp4' in video_url: - self.raise_geo_restricted() + self.raise_geo_restricted(countries=['BG']) uploader = video.get('uploader') From 71631862f4de5a10223642ebdbd5e10db374d270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:23 +0700 Subject: [PATCH 415/586] [srgssr] Improve geo restriction detection --- youtube_dl/extractor/srgssr.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 319a48a7a..a35a0a538 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -14,6 +14,7 @@ from ..utils import ( class SRGSSRIE(InfoExtractor): _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' + _BYPASS_GEO = False _ERRORS = { 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', @@ -40,8 +41,11 @@ class SRGSSRIE(InfoExtractor): media_id)[media_type.capitalize()] if media_data.get('block') and media_data['block'] in self._ERRORS: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, self._ERRORS[media_data['block']]), expected=True) + message = self._ERRORS[media_data['block']] + if media_data['block'] == 'GEOBLOCK': + self.raise_geo_restricted(msg=message, countries=['CH']) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) return media_data From 80b59020e02e9c61f74f8f8f8891f9745667edb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:31 +0700 Subject: [PATCH 416/586] [vgtv] Improve geo restriction detection --- youtube_dl/extractor/vgtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 8a574bc26..1709fd6bb 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -14,6 +14,7 @@ from ..utils import ( class VGTVIE(XstreamIE): IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' + _BYPASS_GEO = False _HOST_TO_APPNAME = { 'vgtv.no': 'vgtv', @@ -217,7 +218,7 @@ class VGTVIE(XstreamIE): properties = try_get( data, lambda x: x['streamConfiguration']['properties'], list) if properties and 'geoblocked' in properties: - raise self.raise_geo_restricted() + raise self.raise_geo_restricted(countries=['NO']) self._sort_formats(info['formats']) From 5d3fbf77d96ade64c645b6942979c0b99aa4d775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:39 +0700 Subject: [PATCH 417/586] [viki] Improve geo restriction detection --- youtube_dl/extractor/viki.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 9c48701c1..68a74e246 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -27,6 +27,7 @@ class VikiBaseIE(InfoExtractor): _APP_VERSION = '2.2.5.1428709186' _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' + _BYPASS_GEO = False _NETRC_MACHINE = 'viki' _token = None @@ -77,8 +78,11 @@ class VikiBaseIE(InfoExtractor): def _check_errors(self, data): for reason, status in data.get('blocking', {}).items(): if status and reason in self._ERRORS: + message = self._ERRORS[reason] + if reason == 'geo': + self.raise_geo_restricted(msg=message) raise ExtractorError('%s said: %s' % ( - self.IE_NAME, self._ERRORS[reason]), expected=True) + self.IE_NAME, message), expected=True) def _real_initialize(self): self._login() From 18a0defab063523cd76a30be2dd5a80e9f9172d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 20:26:43 +0700 Subject: [PATCH 418/586] [utils] Make random_ipv4 return unicode string --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4e76b6b7b..cbf7639c5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3290,8 +3290,8 @@ class GeoUtils(object): addr, preflen = block.split('/') addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] addr_max = addr_min | (0xffffffff >> int(preflen)) - return socket.inet_ntoa( - compat_struct_pack('!I', random.randint(addr_min, addr_max))) + return compat_str(socket.inet_ntoa( + compat_struct_pack('!I', random.randint(addr_min, addr_max)))) class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): From 0016b84e16965a07c52946c4672363153e8b18a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 21:06:07 +0700 Subject: [PATCH 419/586] Add faked X-Forwarded-For to formats' HTTP headers --- youtube_dl/YoutubeDL.py | 14 ++++++++++++++ youtube_dl/extractor/common.py | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ebace6b57..1c04e46c1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -864,8 +864,14 @@ class YoutubeDL(object): if self.params.get('playlistrandom', False): random.shuffle(entries) + x_forwarded_for = ie_result.get('__x_forwarded_for_ip') + for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) + # This __x_forwarded_for_ip thing is a bit ugly but requires + # minimal changes + if x_forwarded_for: + entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, 'playlist': playlist, @@ -1250,6 +1256,11 @@ class YoutubeDL(object): if cookies: res['Cookie'] = cookies + if 'X-Forwarded-For' not in res: + x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') + if x_forwarded_for_ip: + res['X-Forwarded-For'] = x_forwarded_for_ip + return res def _calc_cookies(self, info_dict): @@ -1392,6 +1403,9 @@ class YoutubeDL(object): full_format_info = info_dict.copy() full_format_info.update(format) format['http_headers'] = self._calc_headers(full_format_info) + # Remove private housekeeping stuff + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] # TODO Central sorting goes here diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 96815099d..c1f7f28a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -384,7 +384,10 @@ class InfoExtractor(object): for _ in range(2): try: self.initialize() - return self._real_extract(url) + ie_result = self._real_extract(url) + if self._x_forwarded_for_ip: + ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip + return ie_result except GeoRestrictedError as e: if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and self._BYPASS_GEO and From 0a840f584c3f1fedb6957c05587dec697143f2d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 01:53:41 +0700 Subject: [PATCH 420/586] Rename bypass geo restriction options --- youtube_dl/YoutubeDL.py | 5 ++--- youtube_dl/__init__.py | 4 ++-- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/options.py | 12 ++++++------ 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1c04e46c1..68000dea2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -274,10 +274,9 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. - bypass_geo_restriction: - Bypass geographic restriction via faking X-Forwarded-For + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental) - bypass_geo_restriction_as_country: + geo_bypass_country: Two-letter ISO 3166-2 country code that will be used for explicit geographic restriction bypassing via faking X-Forwarded-For HTTP header (experimental) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 94f461a78..f91d29a7b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -414,8 +414,8 @@ def _real_main(argv=None): 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'config_location': opts.config_location, - 'bypass_geo_restriction': opts.bypass_geo_restriction, - 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, + 'geo_bypass': opts.geo_bypass, + 'geo_bypass_country': opts.geo_bypass_country, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c1f7f28a0..6eb6a25b8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -326,7 +326,7 @@ class InfoExtractor(object): _BYPASS_GEO attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on - country code provided with bypass_geo_restriction_as_country. + country code provided with geo_bypass_country. Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. @@ -371,7 +371,7 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" if not self._x_forwarded_for_ip: - country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) + country_code = self._downloader.params.get('geo_bypass_country', None) if country_code: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if not self._ready: @@ -389,9 +389,9 @@ class InfoExtractor(object): ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip return ie_result except GeoRestrictedError as e: - if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and + if (not self._downloader.params.get('geo_bypass_country', None) and self._BYPASS_GEO and - self._downloader.params.get('bypass_geo_restriction', True) and + self._downloader.params.get('geo_bypass', True) and not self._x_forwarded_for_ip and e.countries): self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2e194f6dc..ae3f50754 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -550,16 +550,16 @@ def parseOpts(overrideArguments=None): '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) workarounds.add_option( - '--bypass-geo', - action='store_true', dest='bypass_geo_restriction', default=True, + '--geo-bypass', + action='store_true', dest='geo_bypass', default=True, help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') workarounds.add_option( - '--no-bypass-geo', - action='store_false', dest='bypass_geo_restriction', default=True, + '--no-geo-bypass', + action='store_false', dest='geo_bypass', default=True, help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') workarounds.add_option( - '--bypass-geo-as-country', metavar='CODE', - dest='bypass_geo_restriction_as_country', default=None, + '--geo-bypass-country', metavar='CODE', + dest='geo_bypass_country', default=None, help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From 4248dad92bd87650c791194276296b148f668e68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 03:53:23 +0700 Subject: [PATCH 421/586] Improve geo bypass mechanism * Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction --- youtube_dl/extractor/common.py | 59 +++++++++++++++++++-------- youtube_dl/extractor/dramafever.py | 3 +- youtube_dl/extractor/go.py | 3 +- youtube_dl/extractor/itv.py | 4 +- youtube_dl/extractor/nrk.py | 4 +- youtube_dl/extractor/ondemandkorea.py | 3 +- youtube_dl/extractor/pbs.py | 5 ++- youtube_dl/extractor/srgssr.py | 6 ++- youtube_dl/extractor/svt.py | 4 +- youtube_dl/extractor/vbox7.py | 3 +- youtube_dl/extractor/vgtv.py | 5 ++- youtube_dl/extractor/viki.py | 2 +- youtube_dl/utils.py | 2 +- 13 files changed, 72 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6eb6a25b8..272da74b6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -323,10 +323,15 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. - _BYPASS_GEO attribute may be set to False in order to disable + _GEO_BYPASS attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on - country code provided with geo_bypass_country. + country code provided with geo_bypass_country. (experimental) + + _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted + countries for this extractor. One of these countries will be used by + geo restriction bypass mechanism right away in order to bypass + geo restriction, of course, if the mechanism is not disabled. (experimental) Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. @@ -335,7 +340,8 @@ class InfoExtractor(object): _ready = False _downloader = None _x_forwarded_for_ip = None - _BYPASS_GEO = True + _GEO_BYPASS = True + _GEO_COUNTRIES = None _WORKING = True def __init__(self, downloader=None): @@ -370,14 +376,28 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" - if not self._x_forwarded_for_ip: - country_code = self._downloader.params.get('geo_bypass_country', None) - if country_code: - self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) + self.__initialize_geo_bypass() if not self._ready: self._real_initialize() self._ready = True + def __initialize_geo_bypass(self): + if not self._x_forwarded_for_ip: + country_code = self._downloader.params.get('geo_bypass_country', None) + # If there is no explicit country for geo bypass specified and + # the extractor is known to be geo restricted let's fake IP + # as X-Forwarded-For right away. + if (not country_code and + self._GEO_BYPASS and + self._downloader.params.get('geo_bypass', True) and + self._GEO_COUNTRIES): + country_code = random.choice(self._GEO_COUNTRIES) + if country_code: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) + if self._downloader.params.get('verbose', False): + self._downloader.to_stdout( + '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + def extract(self, url): """Extracts URL information and returns it in list of dicts.""" try: @@ -389,16 +409,8 @@ class InfoExtractor(object): ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip return ie_result except GeoRestrictedError as e: - if (not self._downloader.params.get('geo_bypass_country', None) and - self._BYPASS_GEO and - self._downloader.params.get('geo_bypass', True) and - not self._x_forwarded_for_ip and - e.countries): - self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) - if self._x_forwarded_for_ip: - self.report_warning( - 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) - continue + if self.__maybe_fake_ip_and_retry(e.countries): + continue raise except ExtractorError: raise @@ -407,6 +419,19 @@ class InfoExtractor(object): except (KeyError, StopIteration) as e: raise ExtractorError('An extractor error has occurred.', cause=e) + def __maybe_fake_ip_and_retry(self, countries): + if (not self._downloader.params.get('geo_bypass_country', None) and + self._GEO_BYPASS and + self._downloader.params.get('geo_bypass', True) and + not self._x_forwarded_for_ip and + countries): + self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries)) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + return True + return False + def set_downloader(self, downloader): """Sets the downloader for this IE.""" self._downloader = downloader diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 755db806a..e7abc8889 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -20,6 +20,7 @@ from ..utils import ( class DramaFeverBaseIE(AMPIE): _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' + _GEO_COUNTRIES = ['US', 'CA'] _CONSUMER_SECRET = 'DA59dtVXYLxajktV' @@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE): if isinstance(e.cause, compat_HTTPError): self.raise_geo_restricted( msg='Currently unavailable in your country', - countries=['US', 'CA']) + countries=self._GEO_COUNTRIES) raise series_id, episode_number = video_id.split('.') diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index ec902c670..b205bfc7c 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -37,6 +37,7 @@ class GoIE(AdobePassIE): } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) + _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -104,7 +105,7 @@ class GoIE(AdobePassIE): for error in errors: if error.get('code') == 1002: self.raise_geo_restricted( - error['message'], countries=['US']) + error['message'], countries=self._GEO_COUNTRIES) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index aabde15f3..021c6b278 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -24,6 +24,7 @@ from ..utils import ( class ITVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' + _GEO_COUNTRIES = ['GB'] _TEST = { 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { @@ -101,7 +102,8 @@ class ITVIE(InfoExtractor): fault_code = xpath_text(resp_env, './/faultcode') fault_string = xpath_text(resp_env, './/faultstring') if fault_code == 'InvalidGeoRegion': - self.raise_geo_restricted(msg=fault_string, countries=['GB']) + self.raise_geo_restricted( + msg=fault_string, countries=self._GEO_COUNTRIES) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 78ece33e1..13af9ed1f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -14,6 +14,7 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['NO'] def _real_extract(self, url): video_id = self._match_id(url) @@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor): # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* if 'IsGeoBlocked' in message_type: self.raise_geo_restricted( - msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) + msg=MESSAGES.get('ProgramIsGeoBlocked'), + countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, MESSAGES.get( message_type, message_type)), diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index 0c85d549e..df1ce3c1d 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -10,6 +10,7 @@ from ..utils import ( class OnDemandKoreaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' + _GEO_COUNTRIES = ['US', 'CA'] _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', 'info_dict': { @@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor): if 'msg_block_01.png' in webpage: self.raise_geo_restricted( msg='This content is not available in your region', - countries=['US', 'CA']) + countries=self._GEO_COUNTRIES) if 'This video is only available to ODK PLUS members.' in webpage: raise ExtractorError( diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 64f47bae3..3e51b4dd7 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -193,6 +193,8 @@ class PBSIE(InfoExtractor): ) ''' % '|'.join(list(zip(*_STATIONS))[0]) + _GEO_COUNTRIES = ['US'] + _TESTS = [ { 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', @@ -492,7 +494,8 @@ class PBSIE(InfoExtractor): message = self._ERRORS.get( redirect_info['http_code'], redirect_info['message']) if redirect_info['http_code'] == 403: - self.raise_geo_restricted(msg=message, countries=['US']) + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, message), expected=True) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index a35a0a538..bb73eb1d5 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -14,7 +14,8 @@ from ..utils import ( class SRGSSRIE(InfoExtractor): _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' - _BYPASS_GEO = False + _GEO_BYPASS = False + _GEO_COUNTRIES = ['CH'] _ERRORS = { 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', @@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor): if media_data.get('block') and media_data['block'] in self._ERRORS: message = self._ERRORS[media_data['block']] if media_data['block'] == 'GEOBLOCK': - self.raise_geo_restricted(msg=message, countries=['CH']) + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, message), expected=True) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index f2a2200bf..9e2c9fcc6 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -13,6 +13,7 @@ from ..utils import ( class SVTBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['SE'] def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: @@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor): }) if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): self.raise_geo_restricted( - 'This video is only available in Sweden', countries=['SE']) + 'This video is only available in Sweden', + countries=self._GEO_COUNTRIES) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index f86d804c1..8152acefd 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor): ) (?P<id>[\da-fA-F]+) ''' + _GEO_COUNTRIES = ['BG'] _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor): video_url = video['src'] if '/na.mp4' in video_url: - self.raise_geo_restricted(countries=['BG']) + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) uploader = video.get('uploader') diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 1709fd6bb..0f8c156a7 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -14,7 +14,7 @@ from ..utils import ( class VGTVIE(XstreamIE): IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' - _BYPASS_GEO = False + _GEO_BYPASS = False _HOST_TO_APPNAME = { 'vgtv.no': 'vgtv', @@ -218,7 +218,8 @@ class VGTVIE(XstreamIE): properties = try_get( data, lambda x: x['streamConfiguration']['properties'], list) if properties and 'geoblocked' in properties: - raise self.raise_geo_restricted(countries=['NO']) + raise self.raise_geo_restricted( + countries=[host.rpartition('.')[-1].partition('/')[0].upper()]) self._sort_formats(info['formats']) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 68a74e246..e9c8bf824 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor): _APP_VERSION = '2.2.5.1428709186' _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' - _BYPASS_GEO = False + _GEO_BYPASS = False _NETRC_MACHINE = 'viki' _token = None diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cbf7639c5..17b83794a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3291,7 +3291,7 @@ class GeoUtils(object): addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] addr_max = addr_min | (0xffffffff >> int(preflen)) return compat_str(socket.inet_ntoa( - compat_struct_pack('!I', random.randint(addr_min, addr_max)))) + compat_struct_pack('!L', random.randint(addr_min, addr_max)))) class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): From 0aa10994f452b4ca978baf124df0cb2239d49305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 03:58:17 +0700 Subject: [PATCH 422/586] [options] Move geo restriction related options to separate section --- youtube_dl/options.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ae3f50754..2c880d06a 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -228,17 +228,29 @@ def parseOpts(overrideArguments=None): action='store_const', const='::', dest='source_address', help='Make all connections via IPv6', ) - network.add_option( + + geo = optparse.OptionGroup(parser, 'Geo Restriction') + geo.add_option( '--geo-verification-proxy', dest='geo_verification_proxy', default=None, metavar='URL', help='Use this proxy to verify the IP address for some geo-restricted sites. ' - 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.' - ) - network.add_option( + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.') + geo.add_option( '--cn-verification-proxy', dest='cn_verification_proxy', default=None, metavar='URL', - help=optparse.SUPPRESS_HELP, - ) + help=optparse.SUPPRESS_HELP) + geo.add_option( + '--geo-bypass', + action='store_true', dest='geo_bypass', default=True, + help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + geo.add_option( + '--no-geo-bypass', + action='store_false', dest='geo_bypass', default=True, + help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + geo.add_option( + '--geo-bypass-country', metavar='CODE', + dest='geo_bypass_country', default=None, + help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') selection = optparse.OptionGroup(parser, 'Video Selection') selection.add_option( @@ -549,18 +561,6 @@ def parseOpts(overrideArguments=None): 'Upper bound of a range for randomized sleep before each download ' '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) - workarounds.add_option( - '--geo-bypass', - action='store_true', dest='geo_bypass', default=True, - help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') - workarounds.add_option( - '--no-geo-bypass', - action='store_false', dest='geo_bypass', default=True, - help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') - workarounds.add_option( - '--geo-bypass-country', metavar='CODE', - dest='geo_bypass_country', default=None, - help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( @@ -848,6 +848,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(general) parser.add_option_group(network) + parser.add_option_group(geo) parser.add_option_group(selection) parser.add_option_group(downloader) parser.add_option_group(filesystem) From 553f6dbac7afac84994eae18f551799f807d1503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 04:18:22 +0700 Subject: [PATCH 423/586] [downloader/dash] Honor HTTP headers when downloading fragments For example, https://www.oppetarkiv.se/video/1196142/natten-ar-dagens-mor --- youtube_dl/downloader/dash.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8437dde30..e2ddc369e 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -43,7 +43,10 @@ class DashSegmentsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': segment_url}) + success = ctx['dl'].download(target_filename, { + 'url': segment_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') From de64e23c5663ceb4f62264077a7993d13ace0d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 04:18:36 +0700 Subject: [PATCH 424/586] [downloader/ism] Honor HTTP headers when downloading fragments --- youtube_dl/downloader/ism.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 93cac5e98..63a636cb7 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -238,7 +238,10 @@ class IsmFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': segment_url}) + success = ctx['dl'].download(target_filename, { + 'url': segment_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') From f1a78ee4ef3bfd8e7ff06a3014d96c3cf11b4d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 06:16:00 +0700 Subject: [PATCH 425/586] [tv4] Switch to hls3 protocol (closes #12177) --- youtube_dl/extractor/tv4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index ad79db92b..7918e3d86 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -80,7 +80,7 @@ class TV4IE(InfoExtractor): subtitles = {} formats = [] # http formats are linked with unresolvable host - for kind in ('hls', ''): + for kind in ('hls3', ''): data = self._download_json( 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, video_id, 'Downloading sources JSON', query={ From c58b7ffef43f60fa6a183c849cfdca42e36eae0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 06:24:38 +0700 Subject: [PATCH 426/586] [tv4] Bypass geo restriction and improve detection --- youtube_dl/extractor/tv4.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 7918e3d86..7aeb2c620 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -24,6 +24,7 @@ class TV4IE(InfoExtractor): sport/| ) )(?P<id>[0-9]+)''' + _GEO_COUNTRIES = ['SE'] _TESTS = [ { 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', @@ -71,10 +72,6 @@ class TV4IE(InfoExtractor): 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') - # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info.get('is_geo_restricted'): - self.report_warning('This content might not be available in your country due to licensing restrictions.') - title = info['title'] subtitles = {} @@ -113,6 +110,10 @@ class TV4IE(InfoExtractor): 'url': manifest_url, 'ext': 'vtt', }]}) + + if not formats and info.get('is_geo_restricted'): + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + self._sort_formats(formats) return { From 8936f68a0ba3284c88ec619fb4cc22eb0499e7f3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 21 Oct 2015 00:37:28 +0800 Subject: [PATCH 427/586] [travis] Run tests in parallel [test_download] Print test names in case of network errors [test_download] Add comments for nose parameters [test_download] Modify outtmpl to prevent info JSON filename conflicts Thanks @jaimeMF for the idea. [travis] Only download tests should be run in parallel --- devscripts/run_tests.sh | 4 +++- test/test_download.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index 7f4c1e083..c60807215 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -3,6 +3,7 @@ DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" test_set="" +multiprocess_args="" case "$YTDL_TEST_SET" in core) @@ -10,10 +11,11 @@ case "$YTDL_TEST_SET" in ;; download) test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + multiprocess_args="--processes=4 --process-timeout=540" ;; *) break ;; esac -nosetests test --verbose $test_set +nosetests test --verbose $test_set $multiprocess_args diff --git a/test/test_download.py b/test/test_download.py index 463952989..30034f978 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -65,6 +65,10 @@ defs = gettestcases() class TestDownload(unittest.TestCase): + # Parallel testing in nosetests. See + # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html + _multiprocess_shared_ = True + maxDiff = None def setUp(self): @@ -73,7 +77,7 @@ class TestDownload(unittest.TestCase): # Dynamically generate tests -def generator(test_case): +def generator(test_case, tname): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) @@ -102,6 +106,7 @@ def generator(test_case): return params = get_params(test_case.get('params', {})) + params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: params.setdefault('extract_flat', 'in_playlist') params.setdefault('skip_download', True) @@ -146,7 +151,7 @@ def generator(test_case): raise if try_num == RETRIES: - report_warning('Failed due to network errors, skipping...') + report_warning('%s failed due to network errors, skipping...' % tname) return print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) @@ -221,12 +226,12 @@ def generator(test_case): # And add them to TestDownload for n, test_case in enumerate(defs): - test_method = generator(test_case) tname = 'test_' + str(test_case['name']) i = 1 while hasattr(TestDownload, tname): tname = 'test_%s_%d' % (test_case['name'], i) i += 1 + test_method = generator(test_case, tname) test_method.__name__ = str(tname) setattr(TestDownload, test_method.__name__, test_method) del test_method From 983e9b774643fc588fbfb51d314381025ffac248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 00:59:31 +0700 Subject: [PATCH 428/586] [nrk] Update _API_HOST and relax _VALID_URL --- youtube_dl/extractor/nrk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 13af9ed1f..7b98626f2 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -164,12 +164,12 @@ class NRKIE(NRKBaseIE): https?:// (?: (?:www\.)?nrk\.no/video/PS\*| - v8-psapi\.nrk\.no/mediaelement/ + v8[-.]psapi\.nrk\.no/mediaelement/ ) ) - (?P<id>[^/?#&]+) + (?P<id>[^?#&]+) ''' - _API_HOST = 'v8.psapi.nrk.no' + _API_HOST = 'v8-psapi.nrk.no' _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', @@ -195,6 +195,9 @@ class NRKIE(NRKBaseIE): }, { 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, + }, { + 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70', + 'only_matching': True, }, { 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, From 8ffb8e63fe2853f9e51420ba224db428f1241c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 01:00:53 +0700 Subject: [PATCH 429/586] [prosiebensat1] Throw ExtractionError on unsupported page type (closes #12180) --- youtube_dl/extractor/prosiebensat1.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 5091d8456..1245309a7 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -424,3 +424,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): return self._extract_clip(url, webpage) elif page_type == 'playlist': return self._extract_playlist(url, webpage) + else: + raise ExtractorError( + 'Unsupported page type %s' % page_type, expected=True) From c78dd3549155d4cb8f70707c1b4085f9f974db2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 02:25:39 +0700 Subject: [PATCH 430/586] [nrk] PEP 8 --- youtube_dl/extractor/nrk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7b98626f2..7fe79cb53 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -15,6 +15,7 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): _GEO_COUNTRIES = ['NO'] + def _real_extract(self, url): video_id = self._match_id(url) From 6d4c259765de86bdb8a10e71bfbc7b6e196f6967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 02:25:55 +0700 Subject: [PATCH 431/586] [svt] PEP 8 --- youtube_dl/extractor/svt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 9e2c9fcc6..1b5afb73e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -14,6 +14,7 @@ from ..utils import ( class SVTBaseIE(InfoExtractor): _GEO_COUNTRIES = ['SE'] + def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: From 2cc7fcd338e8690a5c211b95fb9e0dcdc5d98ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 03:06:52 +0700 Subject: [PATCH 432/586] [commonmistakes] Disable UnicodeBOM extractor test for python 3.2 --- youtube_dl/extractor/commonmistakes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index d3ed4a9a4..79f7a9cd1 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import sys + from .common import InfoExtractor from ..utils import ExtractorError @@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor): IE_DESC = False _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' - _TESTS = [{ + # Disable test for python 3.2 since BOM is broken in re in this version + # (see https://github.com/rg3/youtube-dl/issues/9751) + _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'only_matching': True, }] From 82f662182b9ade630b37af81ebf8ae7ae6468898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 23:16:14 +0700 Subject: [PATCH 433/586] [iprima] Modernize --- youtube_dl/extractor/iprima.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 0fe576883..7afa2def0 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -8,7 +8,6 @@ from .common import InfoExtractor from ..utils import ( determine_ext, js_to_json, - sanitized_Request, ) @@ -38,11 +37,13 @@ class IPrimaIE(InfoExtractor): video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') - req = sanitized_Request( - 'http://play.iprima.cz/prehravac/init?_infuse=1' - '&_ts=%s&productId=%s' % (round(time.time()), video_id)) - req.add_header('Referer', url) - playerpage = self._download_webpage(req, video_id, note='Downloading player') + playerpage = self._download_webpage( + 'http://play.iprima.cz/prehravac/init', + video_id, note='Downloading player', query={ + '_infuse': 1, + '_ts': round(time.time()), + 'productId': video_id, + }, headers={'Referer': url}) formats = [] From da42ff066811490064e0c3039b9db5c0e9a69f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 23:17:19 +0700 Subject: [PATCH 434/586] [iprima] Improve geo restriction detection and disable geo bypass --- youtube_dl/extractor/iprima.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 7afa2def0..a29e6a5ba 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -13,6 +13,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)' + _GEO_BYPASS = False _TESTS = [{ 'url': 'http://play.iprima.cz/gondici-s-r-o-33', @@ -28,6 +29,10 @@ class IPrimaIE(InfoExtractor): }, { 'url': 'http://play.iprima.cz/particka/particka-92', 'only_matching': True, + }, { + # geo restricted + 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', + 'only_matching': True, }] def _real_extract(self, url): @@ -83,7 +88,7 @@ class IPrimaIE(InfoExtractor): extract_formats(src) if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: - self.raise_geo_restricted() + self.raise_geo_restricted(countries=['CZ']) self._sort_formats(formats) From 3ccdde8cb76cacb7b2b64469ca51d3b1877da1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 23:21:15 +0700 Subject: [PATCH 435/586] [extractor/common] Emphasize geo bypass APIs are experimental --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 272da74b6..1ae264722 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -333,6 +333,9 @@ class InfoExtractor(object): geo restriction bypass mechanism right away in order to bypass geo restriction, of course, if the mechanism is not disabled. (experimental) + NB: both these geo attributes are experimental and may change in future + or be completely removed. + Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ From 6926304472d4598f095abc7115ca0f36068271d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 00:54:43 +0700 Subject: [PATCH 436/586] [spankbang] Make uploader optional (closes #12193) --- youtube_dl/extractor/spankbang.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 123c33ac3..3394c7e6b 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -23,6 +23,10 @@ class SpankBangIE(InfoExtractor): # 480p only 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', 'only_matching': True, + }, { + # no uploader + 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', + 'only_matching': True, }] def _real_extract(self, url): @@ -48,7 +52,7 @@ class SpankBangIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._search_regex( r'class="user"[^>]*><img[^>]+>([^<]+)', - webpage, 'uploader', fatal=False) + webpage, 'uploader', default=None) age_limit = self._rta_search(webpage) From 890d44b005c3073442064a847f2e0204619a8b47 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 20 Feb 2017 19:00:06 +0100 Subject: [PATCH 437/586] [adobepass] add support for Time Warner Cable(closes #12191) --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 12eeab271..4d655bd5e 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -31,6 +31,11 @@ MSO_INFO = { 'username_field': 'user', 'password_field': 'passwd', }, + 'TWC': { + 'name': 'Time Warner Cable | Spectrum', + 'username_field': 'Ecom_User_ID', + 'password_field': 'Ecom_Password', + }, 'thr030': { 'name': '3 Rivers Communications' }, From e469ab25280433781881d0c3ea6fd423ac5fea71 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 14:38:00 +0100 Subject: [PATCH 438/586] [ninecninemedia] use geo bypass mechanism --- youtube_dl/extractor/ninecninemedia.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py index ec4d675e2..d9943fc2c 100644 --- a/youtube_dl/extractor/ninecninemedia.py +++ b/youtube_dl/extractor/ninecninemedia.py @@ -19,6 +19,7 @@ class NineCNineMediaBaseIE(InfoExtractor): class NineCNineMediaStackIE(NineCNineMediaBaseIE): IE_NAME = '9c9media:stack' + _GEO_COUNTRIES = ['CA'] _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)' def _real_extract(self, url): From e39b5d4ab83de7a466c6d4c9528d385758566b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:00:43 +0700 Subject: [PATCH 439/586] [extractor/common] Allow calling _initialize_geo_bypass from extractors (#11970) --- youtube_dl/extractor/common.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1ae264722..86aff3312 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -379,12 +379,31 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" - self.__initialize_geo_bypass() + self._initialize_geo_bypass(self._GEO_COUNTRIES) if not self._ready: self._real_initialize() self._ready = True - def __initialize_geo_bypass(self): + def _initialize_geo_bypass(self, countries): + """ + Initialize geo restriction bypass mechanism. + + This method is used to initialize geo bypass mechanism based on faking + X-Forwarded-For HTTP header. A random country from provided country list + is selected and a random IP brlonging to this country is generated. This + IP will be passed as X-Forwarded-For HTTP header in all subsequent + HTTP requests. + Method does nothing if no countries are specified. + + This method will be used for initial geo bypass mechanism initialization + during the instance initialization with _GEO_COUNTRIES. + + You may also manually call it from extractor's code if geo countries + information is not available beforehand (e.g. obtained during + extraction) or due to some another reason. + """ + if not countries: + return if not self._x_forwarded_for_ip: country_code = self._downloader.params.get('geo_bypass_country', None) # If there is no explicit country for geo bypass specified and @@ -393,8 +412,8 @@ class InfoExtractor(object): if (not country_code and self._GEO_BYPASS and self._downloader.params.get('geo_bypass', True) and - self._GEO_COUNTRIES): - country_code = random.choice(self._GEO_COUNTRIES) + countries): + country_code = random.choice(countries) if country_code: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._downloader.params.get('verbose', False): From dc0a869e5ee7a75218a759706bb11f17c4de6b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:05:31 +0700 Subject: [PATCH 440/586] [extractor/common] Fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 86aff3312..6d4789d96 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -390,7 +390,7 @@ class InfoExtractor(object): This method is used to initialize geo bypass mechanism based on faking X-Forwarded-For HTTP header. A random country from provided country list - is selected and a random IP brlonging to this country is generated. This + is selected and a random IP belonging to this country is generated. This IP will be passed as X-Forwarded-For HTTP header in all subsequent HTTP requests. Method does nothing if no countries are specified. From 336a76551b92db1c040cbf3c4a9b1857e125ad45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:09:41 +0700 Subject: [PATCH 441/586] [extractor/common] Do not quit _initialize_geo_bypass on empty countries --- youtube_dl/extractor/common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6d4789d96..a34fbbc9b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -393,7 +393,6 @@ class InfoExtractor(object): is selected and a random IP belonging to this country is generated. This IP will be passed as X-Forwarded-For HTTP header in all subsequent HTTP requests. - Method does nothing if no countries are specified. This method will be used for initial geo bypass mechanism initialization during the instance initialization with _GEO_COUNTRIES. @@ -402,8 +401,6 @@ class InfoExtractor(object): information is not available beforehand (e.g. obtained during extraction) or due to some another reason. """ - if not countries: - return if not self._x_forwarded_for_ip: country_code = self._downloader.params.get('geo_bypass_country', None) # If there is no explicit country for geo bypass specified and From eea0716cae1290fe08faea89e24a58ec91098638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:14:33 +0700 Subject: [PATCH 442/586] [extractor/common] Print origin country for fake IP --- youtube_dl/extractor/common.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a34fbbc9b..4252d6825 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -415,7 +415,8 @@ class InfoExtractor(object): self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._downloader.params.get('verbose', False): self._downloader.to_stdout( - '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + '[debug] Using fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country_code.upper())) def extract(self, url): """Extracts URL information and returns it in list of dicts.""" @@ -444,10 +445,12 @@ class InfoExtractor(object): self._downloader.params.get('geo_bypass', True) and not self._x_forwarded_for_ip and countries): - self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries)) + country_code = random.choice(countries) + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: self.report_warning( - 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country_code.upper())) return True return False From 159aaaa9d09ce5843ec843d6e10030e229709e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:46:58 +0700 Subject: [PATCH 443/586] [ChangeLog] Actualize --- ChangeLog | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2c90f791d..2b02994e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +version <unreleased> + +Core +* [extractor/common] Allow calling _initialize_geo_bypass from extractors + (#11970) ++ [adobepass] Add support for Time Warner Cable (#12191) ++ [travis] Run tests in parallel ++ [downloader/ism] Honor HTTP headers when downloading fragments ++ [downloader/dash] Honor HTTP headers when downloading fragments ++ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4 ++ Add option --geo-bypass-country for explicit geo bypass on behalf of + specified country ++ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass ++ Add experimental geo restriction bypass mechanism based on faking + X-Forwarded-For HTTP header ++ [utils] Introduce GeoRestrictedError for geo restricted videos ++ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions + +Extractors ++ [ninecninemedia] Use geo bypass mechanism +* [spankbang] Make uploader optional (#12193) ++ [iprima] Improve geo restriction detection and disable geo bypass +* [iprima] Modernize +* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2 ++ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180) +* [nrk] Update _API_HOST and relax _VALID_URL ++ [tv4] Bypass geo restriction and improve detection +* [tv4] Switch to hls3 protocol (#12177) ++ [viki] Improve geo restriction detection ++ [vgtv] Improve geo restriction detection ++ [srgssr] Improve geo restriction detection ++ [vbox7] Improve geo restriction detection and use geo bypass mechanism ++ [svt] Improve geo restriction detection and use geo bypass mechanism ++ [pbs] Improve geo restriction detection and use geo bypass mechanism ++ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism ++ [nrk] Improve geo restriction detection and use geo bypass mechanism ++ [itv] Improve geo restriction detection and use geo bypass mechanism ++ [go] Improve geo restriction detection and use geo bypass mechanism ++ [dramafever] Improve geo restriction detection and use geo bypass mechanism +* [brightcove:legacy] Restrict videoPlayer value (#12040) ++ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679) ++ [thisav] Add support for HTML5 media (#11771) +* [metacafe] Bypass family filter (#10371) +* [viceland] Improve info extraction + + version 2017.02.17 Extractors From 8c6c88c7dae595d5cb7d5926eb00fbaf40103f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:48:24 +0700 Subject: [PATCH 444/586] release 2017.02.21 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 34 +++++++++++++++++++++++----------- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6f1361b32..8b6d14fa2 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.17 +[debug] youtube-dl version 2017.02.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 2b02994e0..a479d274f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.21 Core * [extractor/common] Allow calling _initialize_geo_bypass from extractors diff --git a/README.md b/README.md index c2a1a6b02..1eccfd287 100644 --- a/README.md +++ b/README.md @@ -99,11 +99,21 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --source-address IP Client-side IP address to bind to -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 + +## Geo Restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. + --geo-bypass Bypass geographic restriction via faking + X-Forwarded-For HTTP header (experimental) + --no-geo-bypass Do not bypass geographic restriction via + faking X-Forwarded-For HTTP header + (experimental) + --geo-bypass-country CODE Force bypass geographic restriction with + explicitly provided two-letter ISO 3166-2 + country code (experimental) ## Video Selection: --playlist-start NUMBER Playlist video to start at (default is 1) @@ -140,17 +150,19 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare - against a number, and & to require multiple - matches. Values which are not known are - excluded unless you put a question mark (?) - after the operator. For example, to only - match videos that have been liked more than - 100 times and disliked less than 50 times - (or the dislike functionality is not - available at the given service), but who - also have a description, use --match-filter - "like_count > 100 & dislike_count <? 50 & - description" . + against a number, key = 'LITERAL' (like + "uploader = 'Mike Smith'", also works with + !=) to match against a string literal and & + to require multiple matches. Values which + are not known are excluded unless you put a + question mark (?) after the operator. For + example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & + dislike_count <? 50 & description" . --no-playlist Download only the video, if the URL refers to a video and a playlist. --yes-playlist Download the playlist, if the URL refers to diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5a436e8f7..1eb9c2cdd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -804,6 +804,7 @@ - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** + - **TVN24** - **TVNoe** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 530e1856b..a85aebaa3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.17' +__version__ = '2017.02.21' From 3444844b04ae482edc5a353d9125b45ba47cd8d8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:47:14 +0100 Subject: [PATCH 445/586] [limelight] extract PlaylistService errors --- youtube_dl/extractor/limelight.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a3712665b..422be2528 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( determine_ext, float_or_none, int_or_none, unsmuggle_url, + ExtractorError, ) @@ -20,9 +22,17 @@ class LimelightBaseIE(InfoExtractor): headers = {} if referer: headers['Referer'] = referer - return self._download_json( - self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) + try: + return self._download_json( + self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), + item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission'] + if error == 'CountryDisabled': + self.raise_geo_restricted() + raise ExtractorError(error, expected=True) + raise def _call_api(self, organization_id, item_id, method): return self._download_json( @@ -213,6 +223,7 @@ class LimelightMediaIE(LimelightBaseIE): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) pc, mobile, metadata = self._extract( video_id, 'getPlaylistByMediaId', From 33dc173cdc84efbc1f794033480af0e4af459891 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:50:36 +0100 Subject: [PATCH 446/586] [telequebec] use geo bypass mechanism --- youtube_dl/extractor/telequebec.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 4043fcb92..82d73c31d 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + smuggle_url, +) class TeleQuebecIE(InfoExtractor): @@ -28,7 +31,7 @@ class TeleQuebecIE(InfoExtractor): return { '_type': 'url_transparent', 'id': media_id, - 'url': 'limelight:media:' + media_data['streamInfo']['sourceId'], + 'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}), 'title': media_data['title'], 'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), 'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), From 86466a8b6f313d0d8c80823e8e61215ac16046a5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:50:53 +0100 Subject: [PATCH 447/586] [cbc] use geo bypass mechanism --- youtube_dl/extractor/cbc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index cf678e7f8..4d93c7744 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -195,6 +195,7 @@ class CBCPlayerIE(InfoExtractor): class CBCWatchBaseIE(InfoExtractor): _device_id = None _device_token = None + _GEO_COUNTRIES = ['CA'] _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' _NS_MAP = { 'media': 'http://search.yahoo.com/mrss/', From 7345d6d465c4889ae06672a11f8b6e491b0b7fe4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:51:40 +0100 Subject: [PATCH 448/586] [tfo] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/tfo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py index 6f1eeac57..0e2370cd8 100644 --- a/youtube_dl/extractor/tfo.py +++ b/youtube_dl/extractor/tfo.py @@ -8,10 +8,12 @@ from ..utils import ( HEADRequest, ExtractorError, int_or_none, + clean_html, ) class TFOIE(InfoExtractor): + _GEO_COUNTRIES = ['CA'] _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)' _TEST = { 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', @@ -36,7 +38,9 @@ class TFOIE(InfoExtractor): 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value, }) if infos.get('success') == 0: - raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True) + if infos.get('code') == 'ErrGeoBlocked': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True) video_data = infos['data'] return { From fc320a40d97ed0c439a8d2ace9cfad94a74ff635 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 18:14:55 +0100 Subject: [PATCH 449/586] Revert "[cbc] use geo bypass mechanism" This reverts commit 86466a8b6f313d0d8c80823e8e61215ac16046a5. --- youtube_dl/extractor/cbc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 4d93c7744..cf678e7f8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -195,7 +195,6 @@ class CBCPlayerIE(InfoExtractor): class CBCWatchBaseIE(InfoExtractor): _device_id = None _device_token = None - _GEO_COUNTRIES = ['CA'] _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' _NS_MAP = { 'media': 'http://search.yahoo.com/mrss/', From 31615ac279ac60fbd3925995de2eed69a4b3976a Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 19:36:39 +0100 Subject: [PATCH 450/586] [viewster] use geo verifcation headers --- youtube_dl/extractor/viewster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 52dd95e2f..fcf0cb100 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -86,7 +86,9 @@ class ViewsterIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) # Get 'api_token' cookie - self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id) + self._request_webpage( + HEADRequest('http://www.viewster.com/'), + video_id, headers=self.geo_verification_headers()) cookies = self._get_cookies('http://www.viewster.com/') self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value) From abd29a2cedaab096e920ed5be9c480921cfacf0a Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 19:37:26 +0100 Subject: [PATCH 451/586] [crackle] use geo bypass mechanism --- youtube_dl/extractor/crackle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 377fb45e9..f919ed208 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -6,6 +6,7 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): + _GEO_COUNTRIES = ['US'] _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', From 139d8ac106dc173d27cf20361b649c0dbc5f9b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 01:50:34 +0700 Subject: [PATCH 452/586] [setup] Add python 3.6 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index ce6dd1870..b8c3e0925 100644 --- a/setup.py +++ b/setup.py @@ -130,6 +130,7 @@ setup( 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From 0d427c83047778d2984df5594b96f119ec7f8771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 01:51:27 +0700 Subject: [PATCH 453/586] [setup] Actualize maintainer info --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b8c3e0925..67d6633ed 100644 --- a/setup.py +++ b/setup.py @@ -107,8 +107,8 @@ setup( url='https://github.com/rg3/youtube-dl', author='Ricardo Garcia', author_email='ytdl@yt-dl.org', - maintainer='Philipp Hagemeister', - maintainer_email='phihag@phihag.de', + maintainer='Sergey M.', + maintainer_email='dstftw@gmail.com', packages=[ 'youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader', From 71e9577b94a4792a330e9bdab4674c6893ea5bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 21:19:52 +0700 Subject: [PATCH 454/586] [24video] Add support for 24video.tube (closes #12217) --- youtube_dl/extractor/twentyfourvideo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index a983ebf05..f3541b654 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,7 +12,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -37,6 +37,9 @@ class TwentyFourVideoIE(InfoExtractor): }, { 'url': 'http://www.24video.me/video/view/1044982', 'only_matching': True, + }, { + 'url': 'http://www.24video.tube/video/view/2363750', + 'only_matching': True, }] def _real_extract(self, url): From a86e4160880e0747c5a0b774fe8d3b0cbb2990ae Mon Sep 17 00:00:00 2001 From: Tobias Florek <me@ibotty.net> Date: Wed, 22 Feb 2017 15:28:09 +0100 Subject: [PATCH 455/586] [vidzi] Add support for vidzi.cc --- youtube_dl/extractor/vidzi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 1f1828fce..b642caf22 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -13,7 +13,7 @@ from ..utils import ( class VidziIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://vidzi.tv/cghql9yq6emu.html', 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', From 58ad6995cd3974eb9cff64f3fa8d34ec68cb6a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 21:29:53 +0700 Subject: [PATCH 456/586] [vidzi] Add test for #12213 --- youtube_dl/extractor/vidzi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index b642caf22..d0556297e 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -29,6 +29,9 @@ class VidziIE(InfoExtractor): }, { 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', 'skip_download': True, + }, { + 'url': 'http://vidzi.cc/cghql9yq6emu.html', + 'skip_download': True, }] def _real_extract(self, url): From 527ef85fe9bc65ed676ab855ee386c7cce8716ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 21:49:30 +0700 Subject: [PATCH 457/586] [dailymotion] Make comment count optional (closes #12209) Not served anymore --- youtube_dl/extractor/dailymotion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 31bf5faf6..b312401dc 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -66,7 +66,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader_id': 'xijv66', 'age_limit': 0, 'view_count': int, - 'comment_count': int, } }, # Vevo video @@ -140,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): view_count = str_to_int(view_count_str) comment_count = int_or_none(self._search_regex( r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', - webpage, 'comment count', fatal=False)) + webpage, 'comment count', default=None)) player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 From b5869560a44caaa06b317302425fb472169c2d28 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Feb 2017 00:08:45 +0800 Subject: [PATCH 458/586] [crunchyroll] Fix descriptions with double quotes (closes #12124) --- ChangeLog | 6 ++++++ youtube_dl/extractor/crunchyroll.py | 23 +++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index a479d274f..e57e7fece 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [crunchyroll] Fix descriptions with double quotes (#12124) + + version 2017.02.21 Core diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 109d1c5a8..d2b87442d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -123,7 +123,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { 'id': '645513', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', 'description': 'md5:2d17137920c64f2f49981a7797d275ef', 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', @@ -192,6 +192,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): # geo-restricted (US), 18+ maturity wall, non-premium available 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617', 'only_matching': True, + }, { + # A description with double quotes + 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080', + 'info_dict': { + 'id': '535080', + 'ext': 'mp4', + 'title': '11eyes Episode 1 – Piros éjszaka - Red Night', + 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".', + 'uploader': 'Marvelous AQL Inc.', + 'upload_date': '20091021', + }, + 'params': { + # Just test metadata extraction + 'skip_download': True, + }, }] _FORMAT_IDS = { @@ -362,9 +377,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>', webpage, 'video_title') video_title = re.sub(r' {2,}', ' ', video_title) - video_description = self._html_search_regex( - r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id, - webpage, 'description', default=None) + video_description = self._parse_json(self._html_search_regex( + r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, + webpage, 'description', default='{}'), video_id).get('description') if video_description: video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) video_upload_date = self._html_search_regex( From 63a29b6118d147404b5fff63d82f098c4bfa3ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 23:45:01 +0700 Subject: [PATCH 459/586] [ChangeLog] Actualize --- ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index e57e7fece..14db5ac13 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,15 @@ version <unreleased> Extractors * [crunchyroll] Fix descriptions with double quotes (#12124) +* [dailymotion] Make comment count optional (#12209) ++ [vidzi] Add support for vidzi.cc (#12213) ++ [24video] Add support for 24video.tube (#12217) ++ [crackle] Use geo bypass mechanism ++ [viewster] Use geo verification headers ++ [tfo] Improve geo restriction detection and use geo bypass mechanism ++ [telequebec] Use geo bypass mechanism ++ [limelight] Extract PlaylistService errors and improve geo restriction + detection version 2017.02.21 From 345b24538b24772c6c5917439e62c510437fce04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 23:50:42 +0700 Subject: [PATCH 460/586] release 2017.02.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8b6d14fa2..923f28276 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.21 +[debug] youtube-dl version 2017.02.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 14db5ac13..cff065171 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.22 Extractors * [crunchyroll] Fix descriptions with double quotes (#12124) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a85aebaa3..fce1b8558 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.21' +__version__ = '2017.02.22' From bc61c80c143d8faed3e264c2487204924acd1eb6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:47:06 +0100 Subject: [PATCH 461/586] [leeco] raise GeoRestrictedError and use geo bypass mechanism --- youtube_dl/extractor/leeco.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 4321f90c8..9eda956d2 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -30,7 +30,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html' - + _GEO_COUNTRIES = ['CN'] _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' _TESTS = [{ @@ -126,10 +126,9 @@ class LeIE(InfoExtractor): if playstatus['status'] == 0: flag = playstatus['flag'] if flag == 1: - msg = 'Country %s auth error' % playstatus['country'] + self.raise_geo_restricted() else: - msg = 'Generic error. flag = %d' % flag - raise ExtractorError(msg, expected=True) + raise ExtractorError('Generic error. flag = %d' % flag, expected=True) def _real_extract(self, url): media_id = self._match_id(url) From c59f7036101b5349b3b02a8bd700eff507012e3f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:49:35 +0100 Subject: [PATCH 462/586] [sohu] raise GeoRestrictedError --- youtube_dl/extractor/sohu.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 30760ca06..7da12cef8 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -108,12 +108,11 @@ class SohuIE(InfoExtractor): if vid_data['play'] != 1: if vid_data.get('status') == 12: raise ExtractorError( - 'Sohu said: There\'s something wrong in the video.', + '%s said: There\'s something wrong in the video.' % self.IE_NAME, expected=True) else: - raise ExtractorError( - 'Sohu said: The video is only licensed to users in Mainland China.', - expected=True) + self.raise_geo_restricted( + '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME) formats_json = {} for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): From 30eaa3a7023a3c03c62eb481f6415cb0599e0da5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:50:04 +0100 Subject: [PATCH 463/586] [mgtv] fix extraction --- youtube_dl/extractor/mgtv.py | 50 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 659ede8c2..d53d96aae 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -2,16 +2,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import int_or_none class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' _TESTS = [{ 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', - 'md5': '1bdadcf760a0b90946ca68ee9a2db41a', + 'md5': 'b1ffc0fc163152acf6beaa81832c9ee7', 'info_dict': { 'id': '3116640', 'ext': 'mp4', @@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, { - # no tbr extracted from stream_url - 'url': 'http://www.mgtv.com/v/1/1/f/3324755.html', + 'url': 'http://www.mgtv.com/b/301817/3826653.html', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) api_data = self._download_json( - 'http://v.api.mgtv.com/player/video', video_id, + 'http://pcweb.api.mgtv.com/player/video', video_id, query={'video_id': video_id}, headers=self.geo_verification_headers())['data'] info = api_data['info'] + title = info['title'].strip() + stream_domain = api_data['stream_domain'][0] formats = [] for idx, stream in enumerate(api_data['stream']): - stream_url = stream.get('url') - if not stream_url: + stream_path = stream.get('url') + if not stream_path: + continue + format_data = self._download_json( + stream_domain + stream_path, video_id, + note='Download video info for format #%d' % idx) + format_url = format_data.get('info') + if not format_url: continue tbr = int_or_none(self._search_regex( - r'(\d+)\.mp4', stream_url, 'tbr', default=None)) - - def extract_format(stream_url, format_id, idx, query={}): - format_info = self._download_json( - stream_url, video_id, - note='Download video info for format %s' % (format_id or '#%d' % idx), - query=query) - return { - 'format_id': format_id, - 'url': format_info['info'], - 'ext': 'mp4', - 'tbr': tbr, - } - - formats.append(extract_format( - stream_url, 'hls-%d' % tbr if tbr else None, idx * 2)) - formats.append(extract_format(stream_url.replace( - '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031})) + r'_(\d+)_mp4/', format_url, 'tbr', default=None)) + formats.append({ + 'format_id': compat_str(tbr or idx), + 'url': format_url, + 'ext': 'mp4', + 'tbr': tbr, + 'protocol': 'm3u8_native', + }) self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'].strip(), + 'title': title, 'formats': formats, 'description': info.get('desc'), 'duration': int_or_none(info.get('duration')), From 9e03aa75c779b79da79353ef1ecc4520ad06d6d0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:54:43 +0100 Subject: [PATCH 464/586] [crunchyroll] extract playlist entries ids --- youtube_dl/extractor/crunchyroll.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d2b87442d..a1fc6a756 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -534,11 +534,11 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>', webpage, 'title') episode_paths = re.findall( - r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"', + r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"', webpage) entries = [ - self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll') - for ep in episode_paths + self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id) + for ep_id, ep in episode_paths ] entries.reverse() From ada77fa544e185a8cd7c3e5d6374e0b6995557a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 18:02:04 +0700 Subject: [PATCH 465/586] [instagram] Add support for multi video posts (closes #12226) --- youtube_dl/extractor/instagram.py | 54 +++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 98f408c18..c1921cbcf 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( get_element_by_attribute, int_or_none, @@ -50,6 +51,33 @@ class InstagramIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # multi video post + 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/', + 'playlist': [{ + 'info_dict': { + 'id': 'BQ0dSaohpPW', + 'ext': 'mp4', + 'title': 'Video 1', + }, + }, { + 'info_dict': { + 'id': 'BQ0dTpOhuHT', + 'ext': 'mp4', + 'title': 'Video 2', + }, + }, { + 'info_dict': { + 'id': 'BQ0dT7RBFeF', + 'ext': 'mp4', + 'title': 'Video 3', + }, + }], + 'info_dict': { + 'id': 'BQ0eAlwhDrw', + 'title': 'Post by instagram', + 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957', + }, }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, @@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor): 'timestamp': int_or_none(comment.get('created_at')), } for comment in media.get( 'comments', {}).get('nodes', []) if comment.get('text')] + if not video_url: + edges = try_get( + media, lambda x: x['edge_sidecar_to_children']['edges'], + list) or [] + if edges: + entries = [] + for edge_num, edge in enumerate(edges, start=1): + node = try_get(edge, lambda x: x['node'], dict) + if not node: + continue + node_video_url = try_get(node, lambda x: x['video_url'], compat_str) + if not node_video_url: + continue + entries.append({ + 'id': node.get('shortcode') or node['id'], + 'title': 'Video %d' % edge_num, + 'url': node_video_url, + 'thumbnail': node.get('display_url'), + 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), + 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), + 'view_count': int_or_none(node.get('video_view_count')), + }) + return self.playlist_result( + entries, video_id, + 'Post by %s' % uploader_id if uploader_id else None, + description) if not video_url: video_url = self._og_search_video_url(webpage, secure=False) From d5fd9a3be305aa8fead8fb70aae64703afe49e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 18:44:04 +0700 Subject: [PATCH 466/586] [skylinewebcams] Add extractor (closes #12221) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/skylinewebcams.py | 42 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/skylinewebcams.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 55b4782d3..83a170fa7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -852,6 +852,7 @@ from .shared import ( from .showroomlive import ShowRoomLiveIE from .sina import SinaIE from .sixplay import SixPlayIE +from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py new file mode 100644 index 000000000..5b4aaac6f --- /dev/null +++ b/youtube_dl/extractor/skylinewebcams.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SkylineWebcamsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html' + _TEST = { + 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html', + 'info_dict': { + 'id': 'scalinata-piazza-di-spagna-barcaccia', + 'ext': 'mp4', + 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + stream_url = self._search_regex( + r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, + 'stream url', group='url') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + 'id': video_id, + 'url': stream_url, + 'ext': 'mp4', + 'title': self._live_title(title), + 'description': description, + 'is_live': True, + } From 0f3d41b44d84869e7f4e809692ce71567b3f7130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 19:48:54 +0700 Subject: [PATCH 467/586] [devscripts/run_tests] Exclude youtube lists tests from core build --- devscripts/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index c60807215..6ba26720d 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -1,6 +1,6 @@ #!/bin/bash -DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists" test_set="" multiprocess_args="" From 28572a1a0b27ba3ccedac5d8d093f925dfb7485f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Mar 2016 23:07:25 +0600 Subject: [PATCH 468/586] [compat] Add compat_numeric_types --- youtube_dl/compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 718902019..b257e2e81 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2760,6 +2760,10 @@ else: compat_kwargs = lambda kwargs: kwargs +compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3 + else (int, float, complex)) + + if sys.version_info < (2, 7): def compat_socket_create_connection(address, timeout, source_address=None): host, port = address @@ -2895,6 +2899,7 @@ __all__ = [ 'compat_input', 'compat_itertools_count', 'compat_kwargs', + 'compat_numeric_types', 'compat_ord', 'compat_os_name', 'compat_parse_qs', From d0d9ade4860fd44a07f5513d13b66233fdca0e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Mar 2016 03:52:42 +0600 Subject: [PATCH 469/586] [YoutubeDL] Add support for string formatting operations in output template --- test/test_YoutubeDL.py | 14 ++++++++++++++ youtube_dl/YoutubeDL.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2cfcf743a..8491a88bd 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -526,6 +526,7 @@ class TestYoutubeDL(unittest.TestCase): 'id': '1234', 'ext': 'mp4', 'width': None, + 'height': 1080, } def fname(templ): @@ -535,6 +536,19 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') # Replace missing fields with 'NA' self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') + self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') + self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') + self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4') + self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4') + self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') + self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') + self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') + self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') def test_format_note(self): ydl = YoutubeDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 68000dea2..bdaf06e62 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -33,6 +33,7 @@ from .compat import ( compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_numeric_types, compat_os_name, compat_str, compat_tokenize_tokenize, @@ -609,12 +610,45 @@ class YoutubeDL(object): compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) - template_dict = dict((k, sanitize(k, v)) + template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) for k, v in template_dict.items() if v is not None and not isinstance(v, (list, tuple, dict))) template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + + NUMERIC_FIELDS = set(( + 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', + 'upload_year', 'upload_month', 'upload_day', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + )) + + # Missing numeric fields used together with integer presentation types + # in format specification will break the argument substitution since + # string 'NA' is returned for missing fields. We will patch output + # template for missing fields to meet string presentation type. + for numeric_field in NUMERIC_FIELDS: + if numeric_field not in template_dict: + # As of [1] format syntax is: + # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type + # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting + FORMAT_RE = r'''(?x) + (?<!%) + % + \({0}\) # mapping key + (?:[#0\-+ ]+)? # conversion flags (optional) + (?:\d+)? # minimum field width (optional) + (?:\.\d+)? # precision (optional) + [hlL]? # length modifier (optional) + [diouxXeEfFgGcrs%] # conversion type + ''' + outtmpl = re.sub( + FORMAT_RE.format(numeric_field), + r'%({0})s'.format(numeric_field), outtmpl) + tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 From 89db639dfe02b291bbf901973ca00d6e60fc1dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 22:01:09 +0700 Subject: [PATCH 470/586] [YoutubeDL] Rewrite outtmpl for playlist_index and autonumber for backward compatibility --- youtube_dl/YoutubeDL.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bdaf06e62..fc5e67828 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -594,10 +594,7 @@ class YoutubeDL(object): autonumber_size = self.params.get('autonumber_size') if autonumber_size is None: autonumber_size = 5 - autonumber_templ = '%0' + str(autonumber_size) + 'd' - template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads) - if template_dict.get('playlist_index') is not None: - template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) + template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads if template_dict.get('resolution') is None: if template_dict.get('width') and template_dict.get('height'): template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) @@ -617,6 +614,20 @@ class YoutubeDL(object): outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + # For fields playlist_index and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': len(str(template_dict['n_entries'])), + 'autonumber': autonumber_size, + } + FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s' + mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) + if mobj: + outtmpl = re.sub( + FIELD_SIZE_COMPAT_RE, + r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], + outtmpl) + NUMERIC_FIELDS = set(( 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 'upload_year', 'upload_month', 'upload_day', @@ -624,6 +635,7 @@ class YoutubeDL(object): 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', + 'playlist_index', )) # Missing numeric fields used together with integer presentation types From b3175982c31a61ff4184d666b0bdb6dd34213365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 22:01:57 +0700 Subject: [PATCH 471/586] [YoutubeDL] Add more numeric fields for NA substitution in outtmpl --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fc5e67828..56a8691eb 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -635,6 +635,7 @@ class YoutubeDL(object): 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', 'playlist_index', )) From fafc2bf5a92b8397148e47e0c9b46fb4d9212075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 22:11:16 +0700 Subject: [PATCH 472/586] [options] Deprecate --autonumber-size --- youtube_dl/options.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2c880d06a..5a11dddf9 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -680,7 +680,9 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', default=5, type=int, - help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)') + help='[deprecated; use output template with %(autonumber)0Nd, where N in the number of digits] ' + 'Specify the number of digits in %(autonumber)s when it is present ' + 'in output filename template or --auto-number option is given (default is %default)') filesystem.add_option( '--autonumber-start', dest='autonumber_start', metavar='NUMBER', default=1, type=int, From 050f143c1286ddafcb6966a0f679c5bbaceecca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 23:00:13 +0700 Subject: [PATCH 473/586] [README.md] Clarify sequence types in output template and document numeric string formatting operations --- README.md | 138 +++++++++++++++++++++++++++--------------------------- 1 file changed, 70 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index 1eccfd287..b4364ef61 100644 --- a/README.md +++ b/README.md @@ -486,87 +486,89 @@ The `-o` option allows users to indicate a template for the output file names. **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are: +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: - - `id`: Video identifier - - `title`: Video title - - `url`: Video URL - - `ext`: Video filename extension - - `alt_title`: A secondary title of the video - - `display_id`: An alternative identifier for the video - - `uploader`: Full name of the video uploader - - `license`: License name the video is licensed under - - `creator`: The creator of the video - - `release_date`: The date (YYYYMMDD) when the video was released - - `timestamp`: UNIX timestamp of the moment the video became available - - `upload_date`: Video upload date (YYYYMMDD) - - `uploader_id`: Nickname or id of the video uploader - - `location`: Physical location where the video was filmed - - `duration`: Length of the video in seconds - - `view_count`: How many users have watched the video on the platform - - `like_count`: Number of positive ratings of the video - - `dislike_count`: Number of negative ratings of the video - - `repost_count`: Number of reposts of the video - - `average_rating`: Average rating give by users, the scale used depends on the webpage - - `comment_count`: Number of comments on the video - - `age_limit`: Age restriction for the video (years) - - `format`: A human-readable description of the format - - `format_id`: Format code specified by `--format` - - `format_note`: Additional info about the format - - `width`: Width of the video - - `height`: Height of the video - - `resolution`: Textual description of width and height - - `tbr`: Average bitrate of audio and video in KBit/s - - `abr`: Average audio bitrate in KBit/s - - `acodec`: Name of the audio codec in use - - `asr`: Audio sampling rate in Hertz - - `vbr`: Average video bitrate in KBit/s - - `fps`: Frame rate - - `vcodec`: Name of the video codec in use - - `container`: Name of the container format - - `filesize`: The number of bytes, if known in advance - - `filesize_approx`: An estimate for the number of bytes - - `protocol`: The protocol that will be used for the actual download - - `extractor`: Name of the extractor - - `extractor_key`: Key name of the extractor - - `epoch`: Unix epoch when creating the file - - `autonumber`: Five-digit number that will be increased with each download, starting at zero - - `playlist`: Name or id of the playlist that contains the video - - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist - - `playlist_id`: Playlist identifier - - `playlist_title`: Playlist title + - `id` (string): Video identifier + - `title` (string): Video title + - `url` (string): Video URL + - `ext` (string): Video filename extension + - `alt_title` (string): A secondary title of the video + - `display_id` (string): An alternative identifier for the video + - `uploader` (string): Full name of the video uploader + - `license` (string): License name the video is licensed under + - `creator` (string): The creator of the video + - `release_date` (string): The date (YYYYMMDD) when the video was released + - `timestamp` (numeric): UNIX timestamp of the moment the video became available + - `upload_date` (string): Video upload date (YYYYMMDD) + - `uploader_id` (string): Nickname or id of the video uploader + - `location` (string): Physical location where the video was filmed + - `duration` (numeric): Length of the video in seconds + - `view_count` (numeric): How many users have watched the video on the platform + - `like_count` (numeric): Number of positive ratings of the video + - `dislike_count` (numeric): Number of negative ratings of the video + - `repost_count` (numeric): Number of reposts of the video + - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `comment_count` (numeric): Number of comments on the video + - `age_limit` (numeric): Age restriction for the video (years) + - `format` (string): A human-readable description of the format + - `format_id` (string): Format code specified by `--format` + - `format_note` (string): Additional info about the format + - `width` (numeric): Width of the video + - `height` (numeric): Height of the video + - `resolution` (string): Textual description of width and height + - `tbr` (numeric): Average bitrate of audio and video in KBit/s + - `abr` (numeric): Average audio bitrate in KBit/s + - `acodec` (string): Name of the audio codec in use + - `asr` (numeric): Audio sampling rate in Hertz + - `vbr` (numeric): Average video bitrate in KBit/s + - `fps` (numeric): Frame rate + - `vcodec` (string): Name of the video codec in use + - `container` (string): Name of the container format + - `filesize` (numeric): The number of bytes, if known in advance + - `filesize_approx` (numeric): An estimate for the number of bytes + - `protocol` (string): The protocol that will be used for the actual download + - `extractor` (string): Name of the extractor + - `extractor_key` (string): Key name of the extractor + - `epoch` (numeric): Unix epoch when creating the file + - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero + - `playlist` (string): Name or id of the playlist that contains the video + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_id` (string): Playlist identifier + - `playlist_title` (string): Playlist title Available for the video that belongs to some logical chapter or section: - - `chapter`: Name or title of the chapter the video belongs to - - `chapter_number`: Number of the chapter the video belongs to - - `chapter_id`: Id of the chapter the video belongs to + - `chapter` (string): Name or title of the chapter the video belongs to + - `chapter_number` (numeric): Number of the chapter the video belongs to + - `chapter_id` (string): Id of the chapter the video belongs to Available for the video that is an episode of some series or programme: - - `series`: Title of the series or programme the video episode belongs to - - `season`: Title of the season the video episode belongs to - - `season_number`: Number of the season the video episode belongs to - - `season_id`: Id of the season the video episode belongs to - - `episode`: Title of the video episode - - `episode_number`: Number of the video episode within a season - - `episode_id`: Id of the video episode + - `series` (string): Title of the series or programme the video episode belongs to + - `season` (string): Title of the season the video episode belongs to + - `season_number` (numeric): Number of the season the video episode belongs to + - `season_id` (string): Id of the season the video episode belongs to + - `episode` (string): Title of the video episode + - `episode_number` (numeric): Number of the video episode within a season + - `episode_id` (string): Id of the video episode Available for the media that is a track or a part of a music album: - - `track`: Title of the track - - `track_number`: Number of the track within an album or a disc - - `track_id`: Id of the track - - `artist`: Artist(s) of the track - - `genre`: Genre(s) of the track - - `album`: Title of the album the track belongs to - - `album_type`: Type of the album - - `album_artist`: List of all artists appeared on the album - - `disc_number`: Number of the disc or other physical medium the track belongs to - - `release_year`: Year (YYYY) when the album was released + - `track` (string): Title of the track + - `track_number` (numeric): Number of the track within an album or a disc + - `track_id` (string): Id of the track + - `artist` (string): Artist(s) of the track + - `genre` (string): Genre(s) of the track + - `album` (string): Title of the album the track belongs to + - `album_type` (string): Type of the album + - `album_artist` (string): List of all artists appeared on the album + - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to + - `release_year` (numeric): Year (YYYY) when the album was released Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. +For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. + Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. To use percent literals in an output template use `%%`. To output to stdout use `-o -`. From 8e1409fd805b3b5e3731da66a2101494643a06ea Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 18:42:06 +0100 Subject: [PATCH 474/586] [go] sign all uplynk urls and use geo bypass only for free videos(closes #12087)(closes #12210) --- youtube_dl/extractor/go.py | 81 ++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index b205bfc7c..21ed846b2 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -37,7 +37,6 @@ class GoIE(AdobePassIE): } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) - _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -79,44 +78,60 @@ class GoIE(AdobePassIE): ext = determine_ext(asset_url) if ext == 'm3u8': video_type = video_data.get('type') - if video_type == 'lf': - data = { - 'video_id': video_data['id'], - 'video_type': video_type, - 'brand': brand, - 'device': '001', - } - if video_data.get('accesslevel') == '1': - requestor_id = site_info['requestor_id'] - resource = self._get_mvpd_resource( - requestor_id, title, video_id, None) - auth = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) - data.update({ - 'token': auth, - 'token_type': 'ap', - 'adobe_requestor_id': requestor_id, - }) - entitlement = self._download_json( - 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', - video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) - errors = entitlement.get('errors', {}).get('errors', []) - if errors: - for error in errors: - if error.get('code') == 1002: - self.raise_geo_restricted( - error['message'], countries=self._GEO_COUNTRIES) - error_message = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) - asset_url += '?' + entitlement['uplynkData']['sessionKey'] + data = { + 'video_id': video_data['id'], + 'video_type': video_type, + 'brand': brand, + 'device': '001', + } + if video_data.get('accesslevel') == '1': + requestor_id = site_info['requestor_id'] + resource = self._get_mvpd_resource( + requestor_id, title, video_id, None) + auth = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) + data.update({ + 'token': auth, + 'token_type': 'ap', + 'adobe_requestor_id': requestor_id, + }) + else: + self._initialize_geo_bypass(['US']) + entitlement = self._download_json( + 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) + errors = entitlement.get('errors', {}).get('errors', []) + if errors: + for error in errors: + if error.get('code') == 1002: + self.raise_geo_restricted( + error['message'], countries=['US']) + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) else: - formats.append({ + f = { 'format_id': format_id, 'url': asset_url, 'ext': ext, - }) + } + if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url): + f.update({ + 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE', + 'preference': 1, + }) + else: + mobj = re.search(r'/(\d+)x(\d+)/', asset_url) + if mobj: + height = int(mobj.group(2)) + f.update({ + 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height, + 'width': int(mobj.group(1)), + 'height': height, + }) + formats.append(f) self._sort_formats(formats) subtitles = {} From 19f3821821ada01fbf9b466402bc1d0366b3edb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 02:09:13 +0700 Subject: [PATCH 475/586] [devscripts/make_lazy_extractors] Fix making lazy extractors on python 3 under Windows --- devscripts/make_lazy_extractors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 19114d30d..0a1762dbc 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals, print_function from inspect import getsource +import io import os from os.path import dirname as dirn import sys @@ -95,5 +96,5 @@ module_contents.append( module_src = '\n'.join(module_contents) + '\n' -with open(lazy_extractors_filename, 'wt') as f: +with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: f.write(module_src) From 0f57447de790fb2434cdd80d819876859ac4fcc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 04:56:58 +0700 Subject: [PATCH 476/586] [postprocessor/ffmpeg] Add mising space (closes #12232) --- youtube_dl/postprocessor/ffmpeg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 1881f4849..96ddb3b36 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -536,8 +536,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): ext = sub['ext'] if ext == new_ext: self._downloader.to_screen( - '[ffmpeg] Subtitle file for %s is already in the requested' - 'format' % new_ext) + '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) continue old_file = subtitles_filename(filename, lang, ext) sub_filenames.append(old_file) From f2980fddeb587f113afe15cc3ecf4bfc3911ca67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 05:01:31 +0700 Subject: [PATCH 477/586] [lynda:course] Add webpage extraction fallback (closes #12238) --- youtube_dl/extractor/lynda.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index da94eab56..d2f75296a 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE): course_path = mobj.group('coursepath') course_id = mobj.group('courseid') + item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path + course = self._download_json( 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, - course_id, 'Downloading course JSON') + course_id, 'Downloading course JSON', fatal=False) + + if not course: + webpage = self._download_webpage(url, course_id) + entries = [ + self.url_result( + item_template % video_id, ie=LyndaIE.ie_key(), + video_id=video_id) + for video_id in re.findall( + r'data-video-id=["\'](\d+)', webpage)] + return self.playlist_result( + entries, course_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) if course.get('Status') == 'NotFound': raise ExtractorError( @@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE): if video_id: entries.append({ '_type': 'url_transparent', - 'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'url': item_template % video_id, 'ie_key': LyndaIE.ie_key(), 'chapter': chapter.get('Title'), 'chapter_number': int_or_none(chapter.get('ChapterIndex')), From be5df5ee311b3ad027f5d59fe077969babd0aa53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 06:04:27 +0700 Subject: [PATCH 478/586] Suppress help for all deprecated options and print warning when used --- youtube_dl/YoutubeDL.py | 14 ++++++++++++-- youtube_dl/__init__.py | 3 +++ youtube_dl/options.py | 12 +++++------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 56a8691eb..f7254560c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -328,11 +328,21 @@ class YoutubeDL(object): self.params.update(params) self.cache = Cache(self) - if self.params.get('cn_verification_proxy') is not None: - self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.') + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning( + '%s is deprecated. Use %s instead.' % (option, suggestion)) + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): if self.params.get('geo_verification_proxy') is None: self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + if params.get('bidi_workaround', False): try: import pty diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f91d29a7b..0c401baa6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -416,6 +416,9 @@ def _real_main(argv=None): 'config_location': opts.config_location, 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, + # just for deprecation check + 'autonumber': opts.autonumber if opts.autonumber is True else None, + 'usetitle': opts.usetitle if opts.usetitle is True else None, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5a11dddf9..8b51d3c6f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -679,10 +679,8 @@ def parseOpts(overrideArguments=None): help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info')) filesystem.add_option( '--autonumber-size', - dest='autonumber_size', metavar='NUMBER', default=5, type=int, - help='[deprecated; use output template with %(autonumber)0Nd, where N in the number of digits] ' - 'Specify the number of digits in %(autonumber)s when it is present ' - 'in output filename template or --auto-number option is given (default is %default)') + dest='autonumber_size', metavar='NUMBER', type=int, + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--autonumber-start', dest='autonumber_start', metavar='NUMBER', default=1, type=int, @@ -694,15 +692,15 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, - help='[deprecated] Use title in file name (default)') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '-l', '--literal', default=False, action='store_true', dest='usetitle', - help='[deprecated] Alias of --title') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '-w', '--no-overwrites', action='store_true', dest='nooverwrites', default=False, From f2f7961820155b6db4a70f083f011cd014c9b51c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 06:07:41 +0700 Subject: [PATCH 479/586] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index cff065171..6f63723fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version <unreleased> + +Core +* [options] Hide deprecated options from --help +* [options] Deprecate --autonumber-size ++ [YoutubeDL] Add support for string formatting operations in output template + (#5185, #5748, #6841, #9929, #9966 #9978, #12189) + +Extractors ++ [lynda:course] Add webpage extraction fallback (#12238) +* [go] Sign all uplynk URLs and use geo bypass only for free videos + (#12087, #12210) ++ [skylinewebcams] Add support for skylinewebcams.com (#12221) ++ [instagram] Add support for multi video posts (#12226) ++ [crunchyroll] Extract playlist entries ids +* [mgtv] Fix extraction ++ [sohu] Raise GeoRestrictedError ++ [leeco] Raise GeoRestrictedError and use geo bypass mechanism + + version 2017.02.22 Extractors From 6b097cff278c93de0665bf681729d75121a98eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 06:09:15 +0700 Subject: [PATCH 480/586] release 2017.02.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 10 ---------- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 6 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 923f28276..743b796ac 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.22 +[debug] youtube-dl version 2017.02.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6f63723fc..6034c9ea4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.24 Core * [options] Hide deprecated options from --help diff --git a/README.md b/README.md index b4364ef61..0fc5984dc 100644 --- a/README.md +++ b/README.md @@ -217,21 +217,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --id Use only video ID in file name -o, --output TEMPLATE Output filename template, see the "OUTPUT TEMPLATE" for all the info - --autonumber-size NUMBER Specify the number of digits in - %(autonumber)s when it is present in output - filename template or --auto-number option - is given (default is 5) --autonumber-start NUMBER Specify the start value for %(autonumber)s (default is 1) --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames - -A, --auto-number [deprecated; use -o - "%(autonumber)s-%(title)s.%(ext)s" ] Number - downloaded files starting from 00000 - -t, --title [deprecated] Use title in file name - (default) - -l, --literal [deprecated] Alias of --title -w, --no-overwrites Do not overwrite files -c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1eb9c2cdd..f97397331 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -680,6 +680,7 @@ - **Shared**: shared.sx - **ShowRoomLive** - **Sina** + - **SkylineWebcams** - **skynewsarabia:article** - **skynewsarabia:video** - **SkySports** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fce1b8558..129447e10 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.22' +__version__ = '2017.02.24' From 42dcdbe11cd738e6b196bc9c14b746a71d61de5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 10:52:41 +0700 Subject: [PATCH 481/586] [ivi] Raise GeoRestrictedError --- youtube_dl/extractor/ivi.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 3d3c15024..cb51cef2d 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -16,6 +16,8 @@ class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['RU'] _TESTS = [ # Single movie @@ -91,7 +93,11 @@ class IviIE(InfoExtractor): if 'error' in video_json: error = video_json['error'] - if error['origin'] == 'NoRedisValidData': + origin = error['origin'] + if origin == 'NotAllowedForLocation': + self.raise_geo_restricted( + msg=error['message'], countries=self._GEO_COUNTRIES) + elif origin == 'NoRedisValidData': raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError( 'Unable to download video %s: %s' % (video_id, error['message']), From 39e7277ed16c1647d636c766d57870121f5f2d68 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 24 Feb 2017 11:21:13 +0100 Subject: [PATCH 482/586] [openload] fix extraction(closes #10408) --- youtube_dl/extractor/openload.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 10896c442..fc7ff43a6 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -72,16 +72,21 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>', + '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_char = int(ol_id[0]) urlcode = [] - num = 2 + num = 1 while num < len(ol_id): - key = int(float(ol_id[num + 3:][:2])) - urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) + i = ord(ol_id[num]) + key = 0 + if i <= 90: + key = i - 65 + elif i >= 97: + key = 25 + i - 97 + urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char))) num += 5 video_url = 'https://openload.co/stream/' + ''.join( From 68f17a9c2df07828d364421e59ede26981aa7756 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 24 Feb 2017 12:27:56 +0100 Subject: [PATCH 483/586] [tubitv] use geo bypass mechanism --- youtube_dl/extractor/tubitv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 3a37df2e8..c44018aec 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -16,6 +16,7 @@ class TubiTvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' + _GEO_COUNTRIES = ['US'] _TEST = { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', From 51ed496307a8a98134f25e8128c317663ccd4355 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb <thomaschristlieb@hotmail.com> Date: Fri, 24 Feb 2017 15:08:45 +0100 Subject: [PATCH 484/586] [thescene] Fix extraction (closes #12235) --- youtube_dl/extractor/thescene.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index ce1326c03..6c16d66ed 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -32,7 +32,7 @@ class TheSceneIE(InfoExtractor): player = self._download_webpage(player_url, display_id) info = self._parse_json( self._search_regex( - r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), + r'(?m)video\s*:\s*({.+?}),$', player, 'info json'), display_id) qualities_order = qualities(('low', 'high')) @@ -40,7 +40,7 @@ class TheSceneIE(InfoExtractor): 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), 'url': f['src'], 'quality': qualities_order(f['quality']), - } for f in info['sources'][0]] + } for f in info['sources']] self._sort_formats(formats) return { From 441d7a32e563b3985c58af5ab80d76ad943f0c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:22:29 +0700 Subject: [PATCH 485/586] [thescene] Extract more metadata --- youtube_dl/extractor/thescene.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 6c16d66ed..b8504f0eb 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urlparse -from ..utils import qualities +from ..utils import ( + int_or_none, + qualities, +) class TheSceneIE(InfoExtractor): @@ -16,6 +19,11 @@ class TheSceneIE(InfoExtractor): 'ext': 'mp4', 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', + 'duration': 127, + 'series': 'Style.com Fashion Shows', + 'season': 'Ready To Wear Spring 2013', + 'tags': list, + 'categories': list, }, } @@ -35,6 +43,9 @@ class TheSceneIE(InfoExtractor): r'(?m)video\s*:\s*({.+?}),$', player, 'info json'), display_id) + video_id = info['id'] + title = info['title'] + qualities_order = qualities(('low', 'high')) formats = [{ 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), @@ -44,9 +55,14 @@ class TheSceneIE(InfoExtractor): self._sort_formats(formats) return { - 'id': info['id'], + 'id': video_id, 'display_id': display_id, - 'title': info['title'], + 'title': title, 'formats': formats, 'thumbnail': info.get('poster_frame'), + 'duration': int_or_none(info.get('duration')), + 'series': info.get('series_title'), + 'season': info.get('season_title'), + 'tags': info.get('tags'), + 'categories': info.get('categories'), } From f3bc281239bafa971195eefd30773d152bfdb10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:48:34 +0700 Subject: [PATCH 486/586] [noco] Swtich login URL to https (closes #12246) --- youtube_dl/extractor/noco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 70ff2ab36..fc0624dd0 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -23,7 +23,7 @@ from ..utils import ( class NocoIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' - _LOGIN_URL = 'http://noco.tv/do.php' + _LOGIN_URL = 'https://noco.tv/do.php' _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' _SUB_LANG_TEMPLATE = '&sub_lang=%s' _NETRC_MACHINE = 'noco' From ad3033037ce6b6809c3d06f2074d36691ba10cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:50:13 +0700 Subject: [PATCH 487/586] [noco] Modernize --- youtube_dl/extractor/noco.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index fc0624dd0..8b83e1f76 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -69,16 +69,17 @@ class NocoIE(InfoExtractor): if username is None: return - login_form = { - 'a': 'login', - 'cookie': '1', - 'username': username, - 'password': password, - } - request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') - - login = self._download_json(request, None, 'Logging in as %s' % username) + login = self._download_json( + self._LOGIN_URL, None, 'Logging in as %s' % username, + data=urlencode_postdata({ + 'a': 'login', + 'cookie': '1', + 'username': username, + 'password': password, + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + }) if 'erreur' in login: raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) From 5d3a51e1b939ada71b3ec6ae7e004ccb6e0861c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:57:39 +0700 Subject: [PATCH 488/586] [ChangeLog] Actualize --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6034c9ea4..0225af42a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Extractors +* [noco] Modernize +* [noco] Switch login URL to https (#12246) ++ [thescene] Extract more metadata +* [thescene] Fix extraction (#12235) ++ [tubitv] Use geo bypass mechanism +* [openload] Fix extraction (#10408) ++ [ivi] Raise GeoRestrictedError + + version 2017.02.24 Core From 204efc850905a5b78c86f884b94210644784b9c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:59:39 +0700 Subject: [PATCH 489/586] release 2017.02.24.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 743b796ac..564108122 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.24 +[debug] youtube-dl version 2017.02.24.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0225af42a..add8a6758 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.24.1 Extractors * [noco] Modernize diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 129447e10..fe7462eac 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.24' +__version__ = '2017.02.24.1' From 231bcd0b6b7737e6b0484ce8aaa4a14de442a1ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 02:51:53 +0700 Subject: [PATCH 490/586] [amcnetworks] Relax _VALID_URL (#12127) --- youtube_dl/extractor/amcnetworks.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index b71d1a093..3a0ec6776 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -10,7 +10,7 @@ from ..utils import ( class AMCNetworksIE(ThePlatformIE): - _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'md5': '', @@ -44,6 +44,12 @@ class AMCNetworksIE(ThePlatformIE): }, { 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3', + 'only_matching': True, }] def _real_extract(self, url): From eeb0a9568442a4dbbf3478579abe2696fbe890e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Feb 2017 18:40:05 +0800 Subject: [PATCH 491/586] [extractor/common] Add 'preference' to _parse_html5_media_entries Some websites, like NJPWorld, put different qualities on different player pages. --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4252d6825..eb3c091aa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2010,7 +2010,7 @@ class InfoExtractor(object): }) return formats - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -2032,7 +2032,8 @@ class InfoExtractor(object): is_plain_url = False formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', - entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) + entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, + preference=preference) elif ext == 'mpd': is_plain_url = False formats = self._extract_mpd_formats( From db182c63fb4a58974e425a56d235131fd9efc531 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Feb 2017 18:44:39 +0800 Subject: [PATCH 492/586] [njpwworld] Add new extractor (closes #11561) --- ChangeLog | 6 +++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/njpwworld.py | 83 ++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/njpwworld.py diff --git a/ChangeLog b/ChangeLog index add8a6758..e0e1f52d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [njpwworld] Add new extractor (#11561) + + version 2017.02.24.1 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 83a170fa7..703bc5d34 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -637,6 +637,7 @@ from .ninecninemedia import ( from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE +from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py new file mode 100644 index 000000000..f5e3f6815 --- /dev/null +++ b/youtube_dl/extractor/njpwworld.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + get_element_by_class, + urlencode_postdata, +) + + +class NJPWWorldIE(InfoExtractor): + _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)' + IE_DESC = '新日本プロレスワールド' + _NETRC_MACHINE = 'njpwworld' + + _TEST = { + 'url': 'http://njpwworld.com/p/s_series_00155_1_9/', + 'info_dict': { + 'id': 's_series_00155_1_9', + 'ext': 'mp4', + 'title': '第9試合 ランディ・サベージ vs リック・スタイナー', + 'tags': list, + }, + 'params': { + 'skip_download': True, # AES-encrypted m3u8 + }, + 'skip': 'Requires login', + } + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + # No authentication to be performed + if not username: + return True + + webpage, urlh = self._download_webpage_handle( + 'https://njpwworld.com/auth/login', None, + note='Logging in', errnote='Unable to login', + data=urlencode_postdata({'login_id': username, 'pw': password})) + # /auth/login will return 302 for successful logins + if urlh.geturl() == 'https://njpwworld.com/auth/login': + self.report_warning('unable to login') + return False + + return True + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = [] + for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage): + player_url = compat_urlparse.urljoin(url, player_url) + + player_page = self._download_webpage( + player_url, video_id, note='Downloading player page') + + entries = self._parse_html5_media_entries( + player_url, player_page, video_id, m3u8_id='hls-%s' % kind, + m3u8_entry_protocol='m3u8_native', + preference=2 if 'hq' in kind else 1) + formats.extend(entries[0]['formats']) + + self._sort_formats(formats) + + post_content = get_element_by_class('post-content', webpage) + tags = re.findall( + r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content + ) if post_content else None + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'tags': tags, + } From 831217291ac05ad75ef16fd6d9985e255489c1e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 19:44:31 +0700 Subject: [PATCH 493/586] [compat] Use try except for compat_numeric_types --- youtube_dl/compat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b257e2e81..0c119e417 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2760,8 +2760,10 @@ else: compat_kwargs = lambda kwargs: kwargs -compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3 - else (int, float, complex)) +try: + compat_numeric_types = (int, float, long, complex) +except NameError: # Python 3 + compat_numeric_types = (int, float, complex) if sys.version_info < (2, 7): From 922ab7840b01bd108887849063572cffa855cdc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 20:16:40 +0700 Subject: [PATCH 494/586] [etonline] Add extractor (closes #12236) --- youtube_dl/extractor/etonline.py | 39 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/etonline.py diff --git a/youtube_dl/extractor/etonline.py b/youtube_dl/extractor/etonline.py new file mode 100644 index 000000000..17d7cfec6 --- /dev/null +++ b/youtube_dl/extractor/etonline.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ETOnlineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/', + 'info_dict': { + 'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale', + 'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6', + 'description': 'md5:8b94484063f463cca709617c79618ccd', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s' + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id) + for video_id in re.findall( + r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)] + + return self.playlist_result( + entries, playlist_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 703bc5d34..9f5aaf1d8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -288,6 +288,7 @@ from .espn import ( ESPNArticleIE, ) from .esri import EsriVideoIE +from .etonline import ETOnlineIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE From 103f8c8d36170d5cb489420db0e8fe383b1f93dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 20:38:21 +0700 Subject: [PATCH 495/586] [xhamster] Capture and output videoClosed error (#12263) --- youtube_dl/extractor/xhamster.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 36a8c9840..7b6703714 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( dict_get, + ExtractorError, int_or_none, parse_duration, unified_strdate, @@ -57,6 +58,10 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'only_matching': True, + }, { + # This video is visible for marcoalfa123456's friends only + 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -78,6 +83,12 @@ class XHamsterIE(InfoExtractor): mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo) webpage = self._download_webpage(mrss_url, video_id) + error = self._html_search_regex( + r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + title = self._html_search_regex( [r'<h1[^>]*>([^<]+)</h1>', r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"', From d374d943f3d9eca4b2052cfc830905d299a0688a Mon Sep 17 00:00:00 2001 From: Pratyush Singh <singh.pratyush96@gmail.com> Date: Sat, 24 Dec 2016 21:05:41 +0530 Subject: [PATCH 496/586] [downloader/common] Limit displaying 2 digits after decimal point in sleep interval message --- youtube_dl/downloader/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 3dc144b4e..2c4470a95 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -347,7 +347,10 @@ class FileDownloader(object): if min_sleep_interval: max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) time.sleep(sleep_interval) return self.real_download(filename, info_dict) From 5fc8d893612285ddba082f381006337573b36afa Mon Sep 17 00:00:00 2001 From: Ricardo Constantino <wiiaboo@gmail.com> Date: Sat, 18 Feb 2017 17:42:31 +0000 Subject: [PATCH 497/586] [freshlive] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/freshlive.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/freshlive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f5aaf1d8..860b8f422 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -339,6 +339,7 @@ from .francetv import ( ) from .freesound import FreesoundIE from .freespeech import FreespeechIE +from .freshlive import FreshliveIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py new file mode 100644 index 000000000..113f8f4fa --- /dev/null +++ b/youtube_dl/extractor/freshlive.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + parse_iso8601 +) + +class FreshliveIE(InfoExtractor): + _VALID_URL = r'https?://freshlive\.tv/(?P<streamer>[^/]+)/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://freshlive.tv/satotv/74712', + 'md5': '224f50d268b6b9f94e4198deccd55d6d', + 'info_dict': { + 'description': 'テスト', + 'duration': 1511, + 'id': '74712', + 'ext': 'mp4', + 'timestamp': 1483621764, + 'title': 'テスト', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20170105', + 'uploader': 'サトTV', + 'uploader_id': 'satotv', + 'view_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + options = self._parse_json( + self._search_regex( + r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>', + webpage, 'initial context'), + video_id) + + programs = options['context']['dispatcher']['stores']['ProgramStore']['programs'] + info = programs.get(video_id, {}) + + video_url = info.get('liveStreamUrl') or info.get('archiveStreamUrl') + if not video_url: + raise ExtractorError('%s not a valid broadcast ID' % video_id, expected=True) + + formats = self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'formats': formats, + 'title': info.get('title'), + 'description': info.get('description'), + 'duration': int_or_none(info.get('airTime')), + 'is_live': int_or_none(info.get('airTime')) == None, + 'thumbnail': info.get('thumbnailUrl'), + 'uploader': info.get('channel', {}).get('title'), + 'uploader_id': info.get('channel', {}).get('code'), + 'uploader_url': info.get('channel', {}).get('permalink'), + 'timestamp': parse_iso8601(info.get('startAt')), + 'view_count': int_or_none(info.get('viewCount')), + } \ No newline at end of file From e498758b9cfa983d05b10a185ecdef480a93cf30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 22:56:42 +0700 Subject: [PATCH 498/586] [freshlive] Fix issues and improve (closes #12175) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/freshlive.py | 64 ++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 860b8f422..58139ee4e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -339,7 +339,7 @@ from .francetv import ( ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .freshlive import FreshliveIE +from .freshlive import FreshLiveIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py index 113f8f4fa..a90f9156c 100644 --- a/youtube_dl/extractor/freshlive.py +++ b/youtube_dl/extractor/freshlive.py @@ -2,34 +2,40 @@ from __future__ import unicode_literals from .common import InfoExtractor - +from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, - parse_iso8601 + try_get, + unified_timestamp, ) -class FreshliveIE(InfoExtractor): - _VALID_URL = r'https?://freshlive\.tv/(?P<streamer>[^/]+)/(?P<id>[0-9]+)' + +class FreshLiveIE(InfoExtractor): + _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'https://freshlive.tv/satotv/74712', - 'md5': '224f50d268b6b9f94e4198deccd55d6d', + 'md5': '9f0cf5516979c4454ce982df3d97f352', 'info_dict': { - 'description': 'テスト', - 'duration': 1511, 'id': '74712', 'ext': 'mp4', - 'timestamp': 1483621764, 'title': 'テスト', + 'description': 'テスト', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1511, + 'timestamp': 1483619655, 'upload_date': '20170105', 'uploader': 'サトTV', 'uploader_id': 'satotv', 'view_count': int, + 'comment_count': int, + 'is_live': False, } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) options = self._parse_json( @@ -38,27 +44,41 @@ class FreshliveIE(InfoExtractor): webpage, 'initial context'), video_id) - programs = options['context']['dispatcher']['stores']['ProgramStore']['programs'] - info = programs.get(video_id, {}) + info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id] - video_url = info.get('liveStreamUrl') or info.get('archiveStreamUrl') - if not video_url: - raise ExtractorError('%s not a valid broadcast ID' % video_id, expected=True) + title = info['title'] + + if info.get('status') == 'upcoming': + raise ExtractorError('Stream %s is upcoming' % video_id, expected=True) + + stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl'] + + is_live = info.get('liveStreamUrl') is not None formats = self._extract_m3u8_formats( - video_url, video_id, ext='mp4', m3u8_id='hls') + stream_url, video_id, ext='mp4', + entry_protocol='m3u8' if is_live else 'm3u8_native', + m3u8_id='hls') + + if is_live: + title = self._live_title(title) return { 'id': video_id, 'formats': formats, - 'title': info.get('title'), + 'title': title, 'description': info.get('description'), - 'duration': int_or_none(info.get('airTime')), - 'is_live': int_or_none(info.get('airTime')) == None, 'thumbnail': info.get('thumbnailUrl'), - 'uploader': info.get('channel', {}).get('title'), - 'uploader_id': info.get('channel', {}).get('code'), - 'uploader_url': info.get('channel', {}).get('permalink'), - 'timestamp': parse_iso8601(info.get('startAt')), + 'duration': int_or_none(info.get('airTime')), + 'timestamp': unified_timestamp(info.get('createdAt')), + 'uploader': try_get( + info, lambda x: x['channel']['title'], compat_str), + 'uploader_id': try_get( + info, lambda x: x['channel']['code'], compat_str), + 'uploader_url': try_get( + info, lambda x: x['channel']['permalink'], compat_str), 'view_count': int_or_none(info.get('viewCount')), - } \ No newline at end of file + 'comment_count': int_or_none(info.get('commentCount')), + 'tags': info.get('tags', []), + 'is_live': is_live, + } From 9d0c08a02c55136221621e8b8dda5860211af8dd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Feb 2017 01:40:12 +0800 Subject: [PATCH 499/586] [vevo] Fix videos with the new streams/streamsV3 format (closes #11719) --- ChangeLog | 2 ++ youtube_dl/extractor/vevo.py | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index e0e1f52d0..701afd57a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,8 @@ version <unreleased> Extractors +* [vevo] Fix extraction for videos with the new streams/streamsV3 format + (#11719) + [njpwworld] Add new extractor (#11561) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c4e37f694..9aa38bc5a 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -17,12 +17,12 @@ from ..utils import ( class VevoBaseIE(InfoExtractor): - def _extract_json(self, webpage, video_id, item): + def _extract_json(self, webpage, video_id): return self._parse_json( self._search_regex( r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', webpage, 'initial store'), - video_id)['default'][item] + video_id) class VevoIE(VevoBaseIE): @@ -139,6 +139,11 @@ class VevoIE(VevoBaseIE): # no genres available 'url': 'http://www.vevo.com/watch/INS171400764', 'only_matching': True, + }, { + # Another case available only via the webpage; using streams/streamsV3 formats + # Geo-restricted to Netherlands/Germany + 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', + 'only_matching': True, }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions @@ -193,7 +198,14 @@ class VevoIE(VevoBaseIE): # https://github.com/rg3/youtube-dl/issues/9366) if not video_versions: webpage = self._download_webpage(url, video_id) - video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] + json_data = self._extract_json(webpage, video_id) + if 'streams' in json_data.get('default', {}): + video_versions = json_data['default']['streams'][video_id][0] + else: + video_versions = [ + value + for key, value in json_data['apollo']['data'].items() + if key.startswith('%s.streams' % video_id)] uploader = None artist = None @@ -207,7 +219,7 @@ class VevoIE(VevoBaseIE): formats = [] for video_version in video_versions: - version = self._VERSIONS.get(video_version['version']) + version = self._VERSIONS.get(video_version.get('version'), 'generic') version_url = video_version.get('url') if not version_url: continue @@ -339,7 +351,7 @@ class VevoPlaylistIE(VevoBaseIE): if video_id: return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) - playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) + playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind] playlist = (list(playlists.values())[0] if playlist_kind == 'playlist' else playlists[playlist_id]) From b3aec47665104223578181c71cc90112f5b17fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 23:27:14 +0700 Subject: [PATCH 500/586] [tvigle] Raise GeoRestrictedError --- youtube_dl/extractor/tvigle.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index f3817ab28..3475ef4c3 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -17,6 +17,9 @@ class TvigleIE(InfoExtractor): IE_DESC = 'Интернет-телевидение Tvigle.ru' _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['RU'] + _TESTS = [ { 'url': 'http://www.tvigle.ru/video/sokrat/', @@ -72,8 +75,13 @@ class TvigleIE(InfoExtractor): error_message = item.get('errorMessage') if not videos and error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + if item.get('isGeoBlocked') is True: + self.raise_geo_restricted( + msg=error_message, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), + expected=True) title = item['title'] description = item.get('description') From a5cf17989b04e559fda9a2731a3b33e881c5cc3c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Feb 2017 17:24:54 +0800 Subject: [PATCH 501/586] [MDR] Relax _VALID_URL and playerURL matching and update _TESTS Ref: #12169 --- ChangeLog | 1 + youtube_dl/extractor/mdr.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 701afd57a..4009acf37 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [MDR] Recognize more URL patterns (#12169) * [vevo] Fix extraction for videos with the new streams/streamsV3 format (#11719) + [njpwworld] Add new extractor (#11561) diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 6e4290aad..322e5b45a 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -14,7 +14,7 @@ from ..utils import ( class MDRIE(InfoExtractor): IE_DESC = 'MDR.DE and KiKA' - _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html' + _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' _TESTS = [{ # MDR regularly deletes its videos @@ -31,6 +31,7 @@ class MDRIE(InfoExtractor): 'duration': 250, 'uploader': 'MITTELDEUTSCHER RUNDFUNK', }, + 'skip': '404 not found', }, { 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', 'md5': '4930515e36b06c111213e80d1e4aad0e', @@ -41,6 +42,7 @@ class MDRIE(InfoExtractor): 'duration': 134, 'uploader': 'KIKA', }, + 'skip': '404 not found', }, { 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', @@ -49,11 +51,21 @@ class MDRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Beutolomäus und der geheime Weihnachtswunsch', 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', - 'timestamp': 1450950000, - 'upload_date': '20151224', + 'timestamp': 1482541200, + 'upload_date': '20161224', 'duration': 4628, 'uploader': 'KIKA', }, + }, { + # audio with alternative playerURL pattern + 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', + 'info_dict': { + 'id': '100', + 'ext': 'mp4', + 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson', + 'duration': 3239, + 'uploader': 'MITTELDEUTSCHER RUNDFUNK', + }, }, { 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', 'only_matching': True, @@ -71,7 +83,7 @@ class MDRIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_url = self._search_regex( - r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', + r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1', webpage, 'data url', group='url').replace(r'\/', '/') doc = self._download_xml( From 8878789f1117b59186ecc6bf82f462201166a26a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Feb 2017 16:50:57 +0700 Subject: [PATCH 502/586] [dailymotion] Raise GeoRestrictedError --- youtube_dl/extractor/dailymotion.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index b312401dc..246efde43 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -282,9 +282,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): } def _check_error(self, info): + error = info.get('error') if info.get('error') is not None: + title = error['title'] + # See https://developer.dailymotion.com/api#access-error + if error.get('code') == 'DM007': + self.raise_geo_restricted(msg=title) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True) + '%s said: %s' % (self.IE_NAME, title), expected=True) def _get_subtitles(self, video_id, webpage): try: From fd5c4aab5958a2a086072488913cc190ff028bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Feb 2017 16:51:21 +0700 Subject: [PATCH 503/586] [youtube] Raise GeoRestrictedError --- youtube_dl/extractor/youtube.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dec02804b..b3c3cd5b2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -47,7 +47,6 @@ from ..utils import ( unsmuggle_url, uppercase_escape, urlencode_postdata, - ISO3166Utils, ) @@ -371,6 +370,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('ttml', 'vtt') + _GEO_BYPASS = False + IE_NAME = 'youtube' _TESTS = [ { @@ -917,7 +918,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # itag 212 'url': '1t24XAntNCY', 'only_matching': True, - } + }, + { + # geo restricted to JP + 'url': 'sJL6WA-aGkQ', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -1376,11 +1382,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'token' not in video_info: if 'reason' in video_info: if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None) - if regions_allowed: - raise ExtractorError('YouTube said: This video is available in %s only' % ( - ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))), - expected=True) + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) raise ExtractorError( 'YouTube said: %s' % video_info['reason'][0], expected=True, video_id=video_id) @@ -2126,6 +2132,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, + }, { + # geo restricted to JP + 'url': 'https://www.youtube.com/user/kananishinoSMEJ', + 'only_matching': True, }] @classmethod From 7fd465525695bb589fa8932e1e36f38ad511735b Mon Sep 17 00:00:00 2001 From: xbe <xbe@users.noreply.github.com> Date: Sun, 26 Feb 2017 03:08:10 -0800 Subject: [PATCH 504/586] [crunchyroll] Extract uploader name that's not a link Provide the Crunchyroll extractor with the ability to extract uploader names that aren't links. Add a test for this new functionality. This fixes #12267. --- youtube_dl/extractor/crunchyroll.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index a1fc6a756..9c6cf00ca 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -207,6 +207,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): # Just test metadata extraction 'skip_download': True, }, + }, { + # make sure we can extract an uploader name that's not a link + 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899', + 'info_dict': { + 'id': '606899', + 'ext': 'mp4', + 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors', + 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"', + 'uploader': 'Geneon Entertainment', + 'upload_date': '20120717', + }, + 'params': { + # just test metadata extraction + 'skip_download': True, + }, }] _FORMAT_IDS = { @@ -388,8 +403,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if video_upload_date: video_upload_date = unified_strdate(video_upload_date) video_uploader = self._html_search_regex( - r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, - 'video_uploader', fatal=False) + # try looking for both an uploader that's a link and one that's not + [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], + webpage, 'video_uploader', fatal=False) available_fmts = [] for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): From fdeea72611b2c2c29a9a34e91ae4bd9b8dfc1f64 Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Sun, 26 Feb 2017 15:05:52 +0100 Subject: [PATCH 505/586] [cda] Decode URL (fixes #12255) --- youtube_dl/extractor/cda.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index ae7af2f0e..1ee35b501 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import codecs import re from .common import InfoExtractor @@ -96,6 +97,10 @@ class CDAIE(InfoExtractor): if not video or 'file' not in video: self.report_warning('Unable to extract %s version information' % version) return + if video['file'].startswith('uggc'): + video['file'] = codecs.decode(video['file'], 'rot_13') + if video['file'].endswith('adc.mp4'): + video['file'] = video['file'].replace('adc.mp4', '.mp4') f = { 'url': video['file'], } From 892b47ab6c9147e785c562229e6dab305fffba61 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 27 Feb 2017 21:34:33 +0800 Subject: [PATCH 506/586] [scivee] Remove extractor (#9315) The Wikipedia page is changed from active to down: https://en.wikipedia.org/w/index.php?title=SciVee&diff=prev&oldid=723161154 Some other interesting bits: $ nslookup www.scivee.tv Server: 8.8.8.8 Address: 8.8.8.8#53 Non-authoritative answer: www.scivee.tv canonical name = scivee.rcsb.org. Name: scivee.rcsb.org Address: 132.249.231.211 $ nslookup rcsb.org Server: 8.8.8.8 Address: 8.8.8.8#53 Non-authoritative answer: Name: rcsb.org Address: 132.249.231.77 Both IPs are from UCSD. I guess it's maintained by a lab and they don't maintain it anymore. --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/scivee.py | 57 ------------------------------ 3 files changed, 1 insertion(+), 58 deletions(-) delete mode 100644 youtube_dl/extractor/scivee.py diff --git a/ChangeLog b/ChangeLog index 4009acf37..4ed9cb4e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +- [scivee] Remove extractor (#9315) + [MDR] Recognize more URL patterns (#12169) * [vevo] Fix extraction for videos with the new streams/streamsV3 format (#11719) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 58139ee4e..d09104096 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -838,7 +838,6 @@ from .safari import ( from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE -from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE diff --git a/youtube_dl/extractor/scivee.py b/youtube_dl/extractor/scivee.py deleted file mode 100644 index b1ca12fde..000000000 --- a/youtube_dl/extractor/scivee.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class SciVeeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.scivee.tv/node/62352', - 'md5': 'b16699b74c9e6a120f6772a44960304f', - 'info_dict': { - 'id': '62352', - 'ext': 'mp4', - 'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', - 'description': 'md5:81f1710638e11a481358fab1b11059d7', - }, - 'skip': 'Not accessible from Travis CI server', - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # annotations XML is malformed - annotations = self._download_webpage( - 'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations') - - title = self._html_search_regex(r'<title>([^<]+)', annotations, 'title') - description = self._html_search_regex(r'([^<]+)', annotations, 'abstract', fatal=False) - filesize = int_or_none(self._html_search_regex( - r'([^<]+)', annotations, 'filesize', fatal=False)) - - formats = [ - { - 'url': 'http://www.scivee.tv/assets/audio/%s' % video_id, - 'ext': 'mp3', - 'format_id': 'audio', - }, - { - 'url': 'http://www.scivee.tv/assets/video/%s' % video_id, - 'ext': 'mp4', - 'format_id': 'video', - 'filesize': filesize, - }, - ] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id, - 'formats': formats, - } From 0e879f432afe5b9a04a06cbc697cf28d08ac5518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 22:22:43 +0700 Subject: [PATCH 507/586] [youtube:channel] Remove duplicate test --- youtube_dl/extractor/youtube.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b3c3cd5b2..7053e5512 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2132,10 +2132,6 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, - }, { - # geo restricted to JP - 'url': 'https://www.youtube.com/user/kananishinoSMEJ', - 'only_matching': True, }] @classmethod @@ -2236,7 +2232,7 @@ class YoutubeUserIE(YoutubeChannelIE): 'url': 'https://www.youtube.com/gametrailers', 'only_matching': True, }, { - # This channel is not available. + # This channel is not available, geo restricted to JP 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos', 'only_matching': True, }] From 0dc5a86a329314f551f86c2ef3202342b7506667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 22:43:19 +0700 Subject: [PATCH 508/586] [npo] Add support for hetklokhuis.nl (closes #12293) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/npo.py | 44 ++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d09104096..0910b7b05 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -669,6 +669,7 @@ from .npo import ( NPORadioIE, NPORadioFragmentIE, SchoolTVIE, + HetKlokhuisIE, VPROIE, WNLIE, ) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 962437145..09e8d9987 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -416,7 +416,21 @@ class NPORadioFragmentIE(InfoExtractor): } -class SchoolTVIE(InfoExtractor): +class NPODataMidEmbedIE(InfoExtractor): + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video_id', group='id') + return { + '_type': 'url_transparent', + 'ie_key': 'NPO', + 'url': 'npo:%s' % video_id, + 'display_id': display_id + } + + +class SchoolTVIE(NPODataMidEmbedIE): IE_NAME = 'schooltv' _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P[^/?#&]+)' @@ -435,17 +449,25 @@ class SchoolTVIE(InfoExtractor): } } - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video_id', group='id') - return { - '_type': 'url_transparent', - 'ie_key': 'NPO', - 'url': 'npo:%s' % video_id, - 'display_id': display_id + +class HetKlokhuisIE(NPODataMidEmbedIE): + IE_NAME = 'schooltv' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' + + _TEST = { + 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', + 'info_dict': { + 'id': 'VPWON_1260528', + 'display_id': 'Zwaartekrachtsgolven', + 'ext': 'm4v', + 'title': 'Het Klokhuis: Zwaartekrachtsgolven', + 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48', + 'upload_date': '20170223', + }, + 'params': { + 'skip_download': True } + } class NPOPlaylistBaseIE(NPOIE): From f264c62334fdd31a7620b4fdefb822e1bae6bd77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:10:00 +0700 Subject: [PATCH 509/586] [npo] Add support for zapp.nl --- youtube_dl/extractor/npo.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 09e8d9987..7c2c93f27 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -51,7 +51,8 @@ class NPOIE(NPOBaseIE): (?: npo\.nl/(?!live|radio)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| - omroepwnl\.nl/video/fragment/[^/]+__ + omroepwnl\.nl/video/fragment/[^/]+__| + zapp\.nl/[^/]+/(?:gemist|filmpjes)/ ) ) (?P[^/?#]+) @@ -140,6 +141,14 @@ class NPOIE(NPOBaseIE): 'upload_date': '20150508', 'duration': 462, }, + }, + { + 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', + 'only_matching': True, + }, + { + 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', + 'only_matching': True, } ] From dbc01fdb6f4b4c58469ffb75d00a179f5af5cdcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:10:29 +0700 Subject: [PATCH 510/586] [hetklokhuis] Fix IE_NAME --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7c2c93f27..b53c29993 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -460,7 +460,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): - IE_NAME = 'schooltv' + IE_NAME = 'hetklokhuis' _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' _TEST = { From 18abb743762ce5b9b2ffd4d9d5e01b62621cc62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:13:51 +0700 Subject: [PATCH 511/586] [npo] Relax _VALID_URL for zapp.nl --- youtube_dl/extractor/npo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index b53c29993..50473d777 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -52,7 +52,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!live|radio)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/(?:gemist|filmpjes)/ + zapp\.nl/[^/]+/[^/]+/ ) ) (?P[^/?#]+) @@ -149,6 +149,10 @@ class NPOIE(NPOBaseIE): { 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', 'only_matching': True, + }, + { + 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', + 'only_matching': True, } ] From c6184bcf7b58476b659a92290711e4c58faff277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:24:03 +0700 Subject: [PATCH 512/586] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4ed9cb4e0..4cb897f7f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,27 @@ version +Core +* [downloader/common] Limit displaying 2 digits after decimal point in sleep + interval message (#12183) ++ [extractor/common] Add preference to _parse_html5_media_entries + Extractors ++ [npo] Add support for zapp.nl ++ [npo] Add support for hetklokhuis.nl (#12293) - [scivee] Remove extractor (#9315) -+ [MDR] Recognize more URL patterns (#12169) ++ [cda] Decode download URL (#12255) ++ [crunchyroll] Improve uploader extraction (#12267) ++ [youtube] Raise GeoRestrictedError ++ [dailymotion] Raise GeoRestrictedError ++ [mdr] Recognize more URL patterns (#12169) ++ [tvigle] Raise GeoRestrictedError * [vevo] Fix extraction for videos with the new streams/streamsV3 format (#11719) -+ [njpwworld] Add new extractor (#11561) ++ [freshlive] Add support for freshlive.tv (#12175) ++ [xhamster] Capture and output videoClosed error (#12263) ++ [etonline] Add support for etonline.com (#12236) ++ [njpwworld] Add support for njpwworld.com (#11561) +* [amcnetworks] Relax URL regular expression (#12127) version 2017.02.24.1 From ef48a1175dc4e28b07c55ae7277d8196abec7ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:26:07 +0700 Subject: [PATCH 513/586] release 2017.02.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 564108122..6374f7c25 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.24.1 +[debug] youtube-dl version 2017.02.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4cb897f7f..949ea1810 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.02.27 Core * [downloader/common] Limit displaying 2 digits after decimal point in sleep diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f97397331..1b01c6d9d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -239,6 +239,7 @@ - **ESPN** - **ESPNArticle** - **EsriVideo** + - **ETOnline** - **Europa** - **EveryonesMixtape** - **ExpoTV** @@ -274,6 +275,7 @@ - **francetvinfo.fr** - **Freesound** - **freespeech.org** + - **FreshLive** - **Funimation** - **FunnyOrDie** - **Fusion** @@ -310,6 +312,7 @@ - **HellPorno** - **Helsinki**: helsinki.fi - **HentaiStigma** + - **hetklokhuis** - **hgtv.com:show** - **HistoricFilms** - **history:topic**: History.com Topic @@ -511,6 +514,7 @@ - **Nintendo** - **njoy**: N-JOY - **njoy:embed** + - **NJPWWorld**: 新日本プロレスワールド - **NobelPrize** - **Noco** - **Normalboots** @@ -666,7 +670,6 @@ - **savefrom.net** - **SBS**: sbs.com.au - **schooltv** - - **SciVee** - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fe7462eac..261218b80 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.24.1' +__version__ = '2017.02.27' From f4c68ba372655c8ff4d6b1cfecfa129796159afd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 28 Feb 2017 21:40:22 +0800 Subject: [PATCH 514/586] [douyu] Fix extraction and update _TESTS They've switched from flv to hls Closes #12301 --- ChangeLog | 6 ++++ youtube_dl/extractor/douyutv.py | 53 ++++++--------------------------- 2 files changed, 15 insertions(+), 44 deletions(-) diff --git a/ChangeLog b/ChangeLog index 949ea1810..f9986ae61 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [douyu] Fix extraction (#12301) + + version 2017.02.27 Core diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 911594413..9a83fb31a 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,15 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import time -import uuid - from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( ExtractorError, unescapeHTML, @@ -24,8 +16,8 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': 'iseven', - 'ext': 'flv', - 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'ext': 'mp4', + 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': '7师傅', @@ -39,7 +31,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '85982', 'display_id': '85982', - 'ext': 'flv', + 'ext': 'mp4', 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': r're:^https?://.*\.jpg$', @@ -55,8 +47,8 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': '17732', - 'ext': 'flv', - 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'ext': 'mp4', + 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': '7师傅', @@ -96,45 +88,18 @@ class DouyuTVIE(InfoExtractor): if room.get('show_status') == '2': raise ExtractorError('Live stream is offline', expected=True) - tt = compat_str(int(time.time() / 60)) - did = uuid.uuid4().hex.upper() - - sign_content = ''.join((room_id, did, self._API_KEY, tt)) - sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() - - flv_data = compat_urllib_parse_urlencode({ - 'cdn': 'ws', - 'rate': '0', - 'tt': tt, - 'did': did, - 'sign': sign, - }) - - video_info = self._download_json( - 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id, - data=flv_data, note='Downloading video info', - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - error_code = video_info.get('error', 0) - if error_code is not 0: - raise ExtractorError( - '%s reported error %i' % (self.IE_NAME, error_code), - expected=True) - - base_url = video_info['data']['rtmp_url'] - live_path = video_info['data']['rtmp_live'] - - video_url = '%s/%s' % (base_url, live_path) + formats = self._extract_m3u8_formats( + room['hls_url'], video_id, ext='mp4') title = self._live_title(unescapeHTML(room['room_name'])) - description = room.get('notice') + description = room.get('show_details') thumbnail = room.get('room_src') uploader = room.get('nickname') return { 'id': room_id, 'display_id': video_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, From c9619f0a17927086c49e4b443202be296d734a76 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 27 Feb 2017 18:47:47 +0800 Subject: [PATCH 515/586] [aes] Add aes_cbc_encrypt Used in daisuki.net (#4738) --- test/test_aes.py | 9 ++++++++- youtube_dl/aes.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/test/test_aes.py b/test/test_aes.py index 54078a66d..78a28751b 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -8,7 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes import base64 @@ -34,6 +34,13 @@ class TestAES(unittest.TestCase): decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + def test_cbc_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd") + def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode('utf-8') encrypted = base64.b64encode( diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index b8ff45481..c5bb3c4ef 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv): return decrypted_data +def aes_cbc_encrypt(data, key, iv): + """ + Encrypt with aes in CBC mode. Using PKCS#7 padding + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + remaining_length = BLOCK_SIZE_BYTES - len(block) + block += [remaining_length] * remaining_length + mixed_block = xor(block, previous_cipher_block) + + encrypted_block = aes_encrypt(mixed_block, expanded_key) + encrypted_data += encrypted_block + + previous_cipher_block = encrypted_block + + return encrypted_data + + def key_expansion(data): """ Generate key schedule From f48409c7ac186fa38bbeb2df2b210e37a18eb04b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 27 Feb 2017 18:50:19 +0800 Subject: [PATCH 516/586] [utils] Add pkcs1pad Used in daisuki.net (#4738) --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3cdb21d40..aefd94518 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -52,6 +52,7 @@ from youtube_dl.utils import ( parse_filesize, parse_count, parse_iso8601, + pkcs1pad, read_batch_urls, sanitize_filename, sanitize_path, @@ -1104,6 +1105,14 @@ The first line ohdave_rsa_encrypt(b'aa111222', e, N), '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') + def test_pkcs1pad(self): + data = [1, 2, 3] + padded_data = pkcs1pad(data, 32) + self.assertEqual(padded_data[:2], [0, 2]) + self.assertEqual(padded_data[28:], [0, 1, 2, 3]) + + self.assertRaises(ValueError, pkcs1pad, data, 8) + def test_encode_base_n(self): self.assertEqual(encode_base_n(0, 30), '0') self.assertEqual(encode_base_n(80, 30), '2k') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 17b83794a..8bd075eaf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3336,6 +3336,21 @@ def ohdave_rsa_encrypt(data, exponent, modulus): return '%x' % encrypted +def pkcs1pad(data, length): + """ + Padding input data with PKCS#1 scheme + + @param {int[]} data input data + @param {int} length target length + @returns {int[]} padded data + """ + if len(data) > length - 11: + raise ValueError('Input data too long for PKCS#1 padding') + + pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)] + return [0, 2] + pseudo_random + [0] + data + + def encode_base_n(num, n, table=None): FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' if not table: From 0a5445ddbeb8d391bbac92e5fe9074c6aa2e1565 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 28 Feb 2017 19:16:55 +0800 Subject: [PATCH 517/586] [utils] Add bytes_to_long() and long_to_bytes() Used in daisuki.net (#4738) Both are adapted from public domain PyCrypto: https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py --- youtube_dl/utils.py | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8bd075eaf..807183f4a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3319,6 +3319,57 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): self, req, proxy, type) +# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is +# released into Public Domain +# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 + +def long_to_bytes(n, blocksize=0): + """long_to_bytes(n:long, blocksize:int) : string + Convert a long integer to a byte string. + + If optional blocksize is given and greater than zero, pad the front of the + byte string with binary zeros so that the length is a multiple of + blocksize. + """ + # after much testing, this algorithm was deemed to be the fastest + s = b'' + n = int(n) + while n > 0: + s = compat_struct_pack('>I', n & 0xffffffff) + s + n = n >> 32 + # strip off leading zeros + for i in range(len(s)): + if s[i] != b'\000'[0]: + break + else: + # only happens when n == 0 + s = b'\000' + i = 0 + s = s[i:] + # add back some pad bytes. this could be done more efficiently w.r.t. the + # de-padding being done above, but sigh... + if blocksize > 0 and len(s) % blocksize: + s = (blocksize - len(s) % blocksize) * b'\000' + s + return s + + +def bytes_to_long(s): + """bytes_to_long(string) : long + Convert a byte string to a long integer. + + This is (essentially) the inverse of long_to_bytes(). + """ + acc = 0 + length = len(s) + if length % 4: + extra = (4 - length % 4) + s = b'\000' * extra + s + length = length + extra + for i in range(0, length, 4): + acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] + return acc + + def ohdave_rsa_encrypt(data, exponent, modulus): ''' Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ From 9bd05b5a18c535f5517158d505af5dced498d23e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 27 Feb 2017 18:56:45 +0800 Subject: [PATCH 518/586] [daisuki] Add new extractor (closes #4738) --- ChangeLog | 1 + youtube_dl/extractor/daisuki.py | 144 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 + 3 files changed, 149 insertions(+) create mode 100644 youtube_dl/extractor/daisuki.py diff --git a/ChangeLog b/ChangeLog index f9986ae61..401c5885e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [daisuki] Add new extractor (#2486, #3186, #4738, #6175, #7776, #10060) * [douyu] Fix extraction (#12301) diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py new file mode 100644 index 000000000..6fd09faf1 --- /dev/null +++ b/youtube_dl/extractor/daisuki.py @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import base64 +import json +import random +import re + +from .common import InfoExtractor +from ..aes import ( + aes_cbc_decrypt, + aes_cbc_encrypt, +) +from ..utils import ( + bytes_to_intlist, + bytes_to_long, + clean_html, + ExtractorError, + intlist_to_bytes, + get_element_by_id, + js_to_json, + int_or_none, + long_to_bytes, + pkcs1pad, + remove_end, +) + + +class DaisukiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P\d+)\.html' + + _TEST = { + 'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html', + 'info_dict': { + 'id': '11213', + 'ext': 'mp4', + 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', + 'creator': 'BANDAI NAMCO Entertainment', + }, + 'params': { + 'skip_download': True, # AES-encrypted HLS stream + }, + } + + # The public key in PEM format can be found in clientlibs_anime_watch.min.js + _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + flashvars = self._parse_json(self._search_regex( + r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), + video_id, transform_source=js_to_json) + + iv = [0] * 16 + + data = {} + for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'): + data[key] = flashvars.get(key, '') + + encrypted_rtn = None + + # Some AES keys are rejected. Try it with different AES keys + for idx in range(5): + aes_key = [random.randint(0, 254) for _ in range(32)] + padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128)) + + n, e = self._RSA_KEY + encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) + init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={ + 's': flashvars.get('s', ''), + 'c': flashvars.get('ss3_prm', ''), + 'e': url, + 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( + bytes_to_intlist(json.dumps(data)), + aes_key, iv))).decode('ascii'), + 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), + }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) + + if 'rtn' in init_data: + encrypted_rtn = init_data['rtn'] + break + + self._sleep(5, video_id) + + if encrypted_rtn is None: + raise ExtractorError('Failed to fetch init data') + + rtn = self._parse_json( + intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( + base64.b64decode(encrypted_rtn)), + aes_key, iv)).decode('utf-8').rstrip('\0'), + video_id) + + formats = self._extract_m3u8_formats( + rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') + + title = remove_end(self._og_search_title(webpage), ' - DAISUKI') + + creator = self._html_search_regex( + r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'creator': creator, + } + + +class DaisukiPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P[a-zA-Z0-9]+)\.html' + + _TEST = { + 'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html', + 'info_dict': { + 'id': 'TheIdolMasterCG', + 'title': 'THE IDOLM@STER CINDERELLA GIRLS', + 'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8', + }, + 'playlist_count': 26, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + episode_pattern = r'''(?sx) + ]+delay="[^"]+/(\d+)/movie\.jpg".+? + ]+class=".*?\bepisodeNumber\b.*?">(?:]+>)?([^<]+)''' + entries = [{ + '_type': 'url_transparent', + 'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'), + 'episode_id': episode_id, + 'episode_number': int_or_none(episode_id), + } for movie_id, episode_id in re.findall(episode_pattern, webpage)] + + playlist_title = remove_end( + self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI') + playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage)) + + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0910b7b05..e251d8478 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -227,6 +227,10 @@ from .dailymotion import ( DailymotionUserIE, DailymotionCloudIE, ) +from .daisuki import ( + DaisukiIE, + DaisukiPlaylistIE, +) from .daum import ( DaumIE, DaumClipIE, From 7c4aa6fd6fd6fadf1cf1942c279cd5c0ff5ae498 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 28 Feb 2017 22:29:01 +0800 Subject: [PATCH 519/586] [daisuki] Add subtitles (#4738) --- youtube_dl/extractor/daisuki.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py index 6fd09faf1..58cc98666 100644 --- a/youtube_dl/extractor/daisuki.py +++ b/youtube_dl/extractor/daisuki.py @@ -34,6 +34,11 @@ class DaisukiIE(InfoExtractor): 'id': '11213', 'ext': 'mp4', 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', + 'subtitles': { + 'mul': [{ + 'ext': 'ttml', + }], + }, 'creator': 'BANDAI NAMCO Entertainment', }, 'params': { @@ -101,10 +106,20 @@ class DaisukiIE(InfoExtractor): creator = self._html_search_regex( r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) + subtitles = {} + caption_url = rtn.get('caption_url') + if caption_url: + # mul: multiple languages + subtitles['mul'] = [{ + 'url': caption_url, + 'ext': 'ttml', + }] + return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'creator': creator, } From 87dadd456a138c3107ff6254bd03ed832cf2e6d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:06:47 +0700 Subject: [PATCH 520/586] [youtube:playlist] Recognize another playlist pattern (closes #11928, closes #12286) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7053e5512..81c793921 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1851,7 +1851,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (?: youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) \? (?:.*?[&;])*? (?:p|a|list)= | p/ )| @@ -1924,6 +1924,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'JODA15', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', } + }, { + 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + 'playlist_mincount': 485, + 'info_dict': { + 'title': '2017 華語最新單曲 (2/24更新)', + 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + } }, { 'note': 'Embedded SWF player', 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', @@ -2072,7 +2079,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Check if it's a video-specific URL query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) video_id = query_dict.get('v', [None])[0] or self._search_regex( - r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url, + r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url, 'video id', default=None) if video_id: if self._downloader.params.get('noplaylist'): From 948519b35dec420c3b3ca5369bcba1dead31fcc6 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Fri, 17 Feb 2017 04:49:25 +0100 Subject: [PATCH 521/586] [azmedien:showplaylist] Add support for all episodes playlists --- youtube_dl/extractor/azmedien.py | 49 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 50 insertions(+) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index cbc3ed564..817fc6d57 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE from ..utils import ( + get_element_by_class, get_element_by_id, strip_or_none, urljoin, @@ -170,3 +171,51 @@ class AZMedienPlaylistIE(AZMedienBaseIE): 'video-title', webpage)), group='title') return self.playlist_result(entries, show_id, title) + + +class AZMedienShowPlaylistIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien Show playlists' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + (?: + all-episodes| + alle-episoden + ) + /[^/]+ + ) + ''' + + _TEST = { + 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', + 'info_dict': { + 'id': 'telezueri.ch/all-episodes/astrotalk', + 'title': 'TeleZüri: AstroTalk - alle episoden', + 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', + }, + 'playlist_mincount': 13, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + episodes = get_element_by_class('search-mobile-box', webpage) + entries = [self.url_result( + urljoin(url, m.group('url'))) for m in re.finditer( + r']+href=(["\'])(?P.+?)\1', episodes)] + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + return self.playlist_result( + entries, + playlist_id=playlist_id, + playlist_title=title, + playlist_description=description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e251d8478..b1613a9d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -83,6 +83,7 @@ from .awaan import ( from .azmedien import ( AZMedienIE, AZMedienPlaylistIE, + AZMedienShowPlaylistIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From 43b38424a9ca2ce962036b17462d59b8acbf6dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:37:33 +0700 Subject: [PATCH 522/586] [azmedien:showplaylist] Improve (closes #12160) --- youtube_dl/extractor/azmedien.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 817fc6d57..f4e07d901 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -174,11 +175,10 @@ class AZMedienPlaylistIE(AZMedienBaseIE): class AZMedienShowPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien Show playlists' + IE_DESC = 'AZ Medien show playlists' _VALID_URL = r'''(?x) https?:// (?:www\.)? - (?P (?: telezueri\.ch| telebaern\.tv| @@ -187,22 +187,18 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE): (?: all-episodes| alle-episoden - ) - /[^/]+ - ) + )/ + (?P[^/?#&]+) ''' _TEST = { 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', 'info_dict': { - 'id': 'telezueri.ch/all-episodes/astrotalk', + 'id': 'astrotalk', 'title': 'TeleZüri: AstroTalk - alle episoden', 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', }, 'playlist_mincount': 13, - 'params': { - 'skip_download': True, - } } def _real_extract(self, url): @@ -211,11 +207,7 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE): episodes = get_element_by_class('search-mobile-box', webpage) entries = [self.url_result( urljoin(url, m.group('url'))) for m in re.finditer( - r']+href=(["\'])(?P.+?)\1', episodes)] - title = self._og_search_title(webpage) + r']+href=(["\'])(?P(?:(?!\1).)+)\1', episodes)] + title = self._og_search_title(webpage, fatal=False) description = self._og_search_description(webpage) - return self.playlist_result( - entries, - playlist_id=playlist_id, - playlist_title=title, - playlist_description=description) + return self.playlist_result(entries, playlist_id, title, description) From 11bae9cdde8ed75b355d5e68ef57dae504ad94ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:49:24 +0700 Subject: [PATCH 523/586] [ChangeLog] Actualize --- ChangeLog | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 401c5885e..8d33c055b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,15 @@ version +Core ++ [utils] Add bytes_to_long and long_to_bytes ++ [utils] Add pkcs1pad ++ [aes] Add aes_cbc_encrypt + Extractors -+ [daisuki] Add new extractor (#2486, #3186, #4738, #6175, #7776, #10060) ++ [azmedien:showplaylist] Add support for show playlists (#12160) ++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286) ++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776, + #10060) * [douyu] Fix extraction (#12301) From 1dc24093f81f349c22e6bda13cb05c26ac3266f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:59:22 +0700 Subject: [PATCH 524/586] release 2017.02.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6374f7c25..c907ef931 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.27 +[debug] youtube-dl version 2017.02.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8d33c055b..2a1fd9018 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.02.28 Core + [utils] Add bytes_to_long and long_to_bytes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1b01c6d9d..a08e00fce 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -78,6 +78,7 @@ - **awaan:video** - **AZMedien**: AZ Medien videos - **AZMedienPlaylist**: AZ Medien playlists + - **AZMedienShowPlaylist**: AZ Medien show playlists - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -191,6 +192,8 @@ - **dailymotion:playlist** - **dailymotion:user** - **DailymotionCloud** + - **Daisuki** + - **DaisukiPlaylist** - **daum.net** - **daum.net:clip** - **daum.net:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 261218b80..340e23bf8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.27' +__version__ = '2017.02.28' From aa9cc2ecbfea6c82944b4e07f3e93c904f1ff421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 05:03:14 +0700 Subject: [PATCH 525/586] [npo] Adapt to app.php API (closes #12311) --- youtube_dl/extractor/npo.py | 79 ++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 50473d777..89082c189 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -17,27 +17,9 @@ from ..utils import ( class NPOBaseIE(InfoExtractor): def _get_token(self, video_id): - token_page = self._download_webpage( - 'http://ida.omroep.nl/npoplayer/i.js', - video_id, note='Downloading token') - token = self._search_regex( - r'npoplayer\.token = "(.+?)"', token_page, 'token') - # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js - token_l = list(token) - first = second = None - for i in range(5, len(token_l) - 4): - if token_l[i].isdigit(): - if first is None: - first = i - elif second is None: - second = i - if first is None or second is None: - first = 12 - second = 13 - - token_l[first], token_l[second] = token_l[second], token_l[first] - - return ''.join(token_l) + return self._download_json( + 'http://ida.omroep.nl/app.php/auth', video_id, + note='Downloading token')['token'] class NPOIE(NPOBaseIE): @@ -187,32 +169,41 @@ class NPOIE(NPOBaseIE): pubopties = metadata.get('pubopties') if pubopties: quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) - for format_id in pubopties: - format_info = self._download_json( - 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' - % (video_id, format_id, token), - video_id, 'Downloading %s JSON' % format_id) - if format_info.get('error_code', 0) or format_info.get('errorcode', 0): + items = self._download_json( + 'http://ida.omroep.nl/app.php/%s' % video_id, + 'Downloading formats JSON', query={ + 'adaptive': 'yes', + 'token': token, + })['items'][0] + for num, item in enumerate(items): + item_url = item.get('url') + if not item_url: continue - streams = format_info.get('streams') - if streams: - try: - video_info = self._download_json( - streams[0] + '&type=json', - video_id, 'Downloading %s stream JSON' % format_id) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') - if error: - raise ExtractorError(error, expected=True) - raise - else: - video_info = format_info - video_url = video_info.get('url') + format_id = self._search_regex( + r'video/ida/([^/]+)', item_url, 'format id', + default=None) + try: + stream_info = self._download_json( + item_url + '&type=json', video_id, + 'Downloading %s stream JSON' % item.get('label') or format_id or num) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json( + ee.cause.read().decode(), video_id, + fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise + if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): + continue + video_url = stream_info.get('url') if not video_url: continue - if format_id == 'adaptive': - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + if stream_info.get('family') == 'adaptive': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) else: formats.append({ 'url': video_url, From 83e8fce628e810e2a5639ef9a21be839526512fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 22:14:46 +0700 Subject: [PATCH 526/586] [npo] Improve extraction and update tests --- youtube_dl/extractor/npo.py | 410 ++++++++++++++++++------------------ 1 file changed, 201 insertions(+), 209 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 89082c189..be10fc486 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -3,15 +3,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( + determine_ext, + ExtractorError, fix_xml_ampersands, orderedSet, parse_duration, qualities, strip_jsonp, unified_strdate, - ExtractorError, ) @@ -40,103 +44,101 @@ class NPOIE(NPOBaseIE): (?P[^/?#]+) ''' - _TESTS = [ - { - 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', - 'md5': '4b3f9c429157ec4775f2c9cb7b911016', - 'info_dict': { - 'id': 'VPWON_1220719', - 'ext': 'm4v', - 'title': 'Nieuwsuur', - 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', - 'upload_date': '20140622', - }, + _TESTS = [{ + 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', + 'md5': '4b3f9c429157ec4775f2c9cb7b911016', + 'info_dict': { + 'id': 'VPWON_1220719', + 'ext': 'm4v', + 'title': 'Nieuwsuur', + 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', + 'upload_date': '20140622', }, - { - 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', - 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', - 'info_dict': { - 'id': 'VARA_101191800', - 'ext': 'm4v', - 'title': 'De Mega Mike & Mega Thomas show: The best of.', - 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', - 'upload_date': '20090227', - 'duration': 2400, - }, + }, { + 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', + 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', + 'info_dict': { + 'id': 'VARA_101191800', + 'ext': 'm4v', + 'title': 'De Mega Mike & Mega Thomas show: The best of.', + 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', + 'upload_date': '20090227', + 'duration': 2400, }, - { - 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', - 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', - 'info_dict': { - 'id': 'VPWON_1169289', - 'ext': 'm4v', - 'title': 'Tegenlicht: De toekomst komt uit Afrika', - 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', - 'upload_date': '20130225', - 'duration': 3000, - }, + }, { + 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', + 'upload_date': '20130225', + 'duration': 3000, }, - { - 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', - 'info_dict': { - 'id': 'WO_VPRO_043706', - 'ext': 'wmv', - 'title': 'De nieuwe mens - Deel 1', - 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', - 'duration': 4680, - }, - 'params': { - # mplayer mms download - 'skip_download': True, - } + }, { + 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', + 'info_dict': { + 'id': 'WO_VPRO_043706', + 'ext': 'm4v', + 'title': 'De nieuwe mens - Deel 1', + 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', + 'duration': 4680, }, - # non asf in streams - { - 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', - 'md5': 'b3da13de374cbe2d5332a7e910bef97f', - 'info_dict': { - 'id': 'WO_NOS_762771', - 'ext': 'mp4', - 'title': 'Hoe gaat Europa verder na Parijs?', - }, - }, - { - 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', - 'md5': '01c6a2841675995da1f0cf776f03a9c3', - 'info_dict': { - 'id': 'VPWON_1233944', - 'ext': 'm4v', - 'title': 'Aap, poot, pies', - 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', - 'upload_date': '20150508', - 'duration': 599, - }, - }, - { - 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', - 'md5': 'd30cd8417b8b9bca1fdff27428860d08', - 'info_dict': { - 'id': 'POW_00996502', - 'ext': 'm4v', - 'title': '''"Dit is wel een 'landslide'..."''', - 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', - 'upload_date': '20150508', - 'duration': 462, - }, - }, - { - 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', - 'only_matching': True, - }, - { - 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', - 'only_matching': True, - }, - { - 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', - 'only_matching': True, + 'params': { + 'skip_download': True, } - ] + }, { + # non asf in streams + 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', + 'info_dict': { + 'id': 'WO_NOS_762771', + 'ext': 'mp4', + 'title': 'Hoe gaat Europa verder na Parijs?', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', + 'info_dict': { + 'id': 'VPWON_1233944', + 'ext': 'm4v', + 'title': 'Aap, poot, pies', + 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', + 'upload_date': '20150508', + 'duration': 599, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', + 'info_dict': { + 'id': 'POW_00996502', + 'ext': 'm4v', + 'title': '''"Dit is wel een 'landslide'..."''', + 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', + 'upload_date': '20150508', + 'duration': 462, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', + 'only_matching': True, + }, { + 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', + 'only_matching': True, + }, { + 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', + 'only_matching': True, + }, { + # live stream + 'url': 'npo:LI_NL1_4188102', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -165,79 +167,115 @@ class NPOIE(NPOBaseIE): token = self._get_token(video_id) formats = [] + urls = set() - pubopties = metadata.get('pubopties') - if pubopties: - quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) - items = self._download_json( - 'http://ida.omroep.nl/app.php/%s' % video_id, - 'Downloading formats JSON', query={ - 'adaptive': 'yes', - 'token': token, - })['items'][0] - for num, item in enumerate(items): - item_url = item.get('url') - if not item_url: - continue - format_id = self._search_regex( - r'video/ida/([^/]+)', item_url, 'format id', - default=None) - try: - stream_info = self._download_json( - item_url + '&type=json', video_id, - 'Downloading %s stream JSON' % item.get('label') or format_id or num) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error = (self._parse_json( - ee.cause.read().decode(), video_id, - fatal=False) or {}).get('errorstring') - if error: - raise ExtractorError(error, expected=True) - raise - if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): + quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) + items = self._download_json( + 'http://ida.omroep.nl/app.php/%s' % video_id, video_id, + 'Downloading formats JSON', query={ + 'adaptive': 'yes', + 'token': token, + })['items'][0] + for num, item in enumerate(items): + item_url = item.get('url') + if not item_url or item_url in urls: + continue + urls.add(item_url) + format_id = self._search_regex( + r'video/ida/([^/]+)', item_url, 'format id', + default=None) + + def add_format_url(format_url): + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'quality': quality(format_id), + }) + + # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 + if item.get('contentType') == 'url': + add_format_url(item_url) + continue + + try: + stream_info = self._download_json( + item_url + '&type=json', video_id, + 'Downloading %s stream JSON' + % item.get('label') or format_id or num) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json( + ee.cause.read().decode(), video_id, + fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise + # Stream URL instead of JSON, example: npo:LI_NL1_4188102 + if isinstance(stream_info, compat_str): + if not stream_info.startswith('http'): continue + video_url = stream_info + # JSON + else: video_url = stream_info.get('url') - if not video_url: + if not video_url or video_url in urls: + continue + urls.add(item_url) + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + else: + add_format_url(video_url) + + is_live = metadata.get('medium') == 'live' + + if not is_live: + for num, stream in enumerate(metadata.get('streams', [])): + stream_url = stream.get('url') + if not stream_url or stream_url in urls: continue - if stream_info.get('family') == 'adaptive': + urls.add(stream_url) + # smooth streaming is not supported + stream_type = stream.get('type', '').lower() + if stream_type in ['ss', 'ms']: + continue + if stream_type == 'hds': + f4m_formats = self._extract_f4m_formats( + stream_url, video_id, fatal=False) + # f4m downloader downloads only piece of live stream + for f4m_format in f4m_formats: + f4m_format['preference'] = -1 + formats.extend(f4m_formats) + elif stream_type == 'hls': formats.extend(self._extract_m3u8_formats( - video_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: + stream_url, video_id, ext='mp4', fatal=False)) + # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 + elif '.asf' in stream_url: + asx = self._download_xml( + stream_url, video_id, + 'Downloading stream %d ASX playlist' % num, + transform_source=fix_xml_ampersands, fatal=False) + if not asx: + continue + ref = asx.find('./ENTRY/Ref') + if ref is None: + continue + video_url = ref.get('href') + if not video_url or video_url in urls: + continue + urls.add(video_url) formats.append({ 'url': video_url, - 'format_id': format_id, - 'quality': quality(format_id), + 'ext': stream.get('formaat', 'asf'), + 'quality': stream.get('kwaliteit'), + 'preference': -10, }) - - streams = metadata.get('streams') - if streams: - for i, stream in enumerate(streams): - stream_url = stream.get('url') - if not stream_url: - continue - if '.asf' not in stream_url: + else: formats.append({ 'url': stream_url, 'quality': stream.get('kwaliteit'), }) - continue - asx = self._download_xml( - stream_url, video_id, - 'Downloading stream %d ASX playlist' % i, - transform_source=fix_xml_ampersands) - ref = asx.find('./ENTRY/Ref') - if ref is None: - continue - video_url = ref.get('href') - if not video_url: - continue - formats.append({ - 'url': video_url, - 'ext': stream.get('formaat', 'asf'), - 'quality': stream.get('kwaliteit'), - }) self._sort_formats(formats) @@ -250,28 +288,28 @@ class NPOIE(NPOBaseIE): return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': metadata.get('info'), 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], 'upload_date': unified_strdate(metadata.get('gidsdatum')), 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, } class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P[^/?#&]+)' _TEST = { 'url': 'http://www.npo.nl/live/npo-1', 'info_dict': { - 'id': 'LI_NEDERLAND1_136692', + 'id': 'LI_NL1_4188102', 'display_id': 'npo-1', 'ext': 'mp4', - 'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'Livestream', + 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { @@ -287,58 +325,12 @@ class NPOLiveIE(NPOBaseIE): live_id = self._search_regex( r'data-prid="([^"]+)"', webpage, 'live id') - metadata = self._download_json( - 'http://e.omroep.nl/metadata/%s' % live_id, - display_id, transform_source=strip_jsonp) - - token = self._get_token(display_id) - - formats = [] - - streams = metadata.get('streams') - if streams: - for stream in streams: - stream_type = stream.get('type').lower() - # smooth streaming is not supported - if stream_type in ['ss', 'ms']: - continue - stream_info = self._download_json( - 'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp' - % (stream.get('url'), token), - display_id, 'Downloading %s JSON' % stream_type) - if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): - continue - stream_url = self._download_json( - stream_info['stream'], display_id, - 'Downloading %s URL' % stream_type, - 'Unable to download %s URL' % stream_type, - transform_source=strip_jsonp, fatal=False) - if not stream_url: - continue - if stream_type == 'hds': - f4m_formats = self._extract_f4m_formats(stream_url, display_id) - # f4m downloader downloads only piece of live stream - for f4m_format in f4m_formats: - f4m_format['preference'] = -1 - formats.extend(f4m_formats) - elif stream_type == 'hls': - formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4')) - else: - formats.append({ - 'url': stream_url, - 'preference': -10, - }) - - self._sort_formats(formats) - return { + '_type': 'url_transparent', + 'url': 'npo:%s' % live_id, + 'ie_key': NPOIE.ie_key(), 'id': live_id, 'display_id': display_id, - 'title': self._live_title(metadata['titel']), - 'description': metadata['info'], - 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], - 'formats': formats, - 'is_live': True, } From 4b8a984c67cdc1b2bfde77398d74096406db9644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 22:21:13 +0700 Subject: [PATCH 527/586] [npo] Add support for audio --- youtube_dl/extractor/npo.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index be10fc486..38fefe492 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -125,6 +125,18 @@ class NPOIE(NPOBaseIE): 'params': { 'skip_download': True, } + }, { + # audio + 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437', + 'info_dict': { + 'id': 'RBX_FUNX_6683215', + 'ext': 'mp3', + 'title': 'Jouw Stad Rotterdam', + 'description': 'md5:db251505244f097717ec59fabc372d9f', + }, + 'params': { + 'skip_download': True, + } }, { 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', 'only_matching': True, @@ -193,7 +205,7 @@ class NPOIE(NPOBaseIE): }) # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 - if item.get('contentType') == 'url': + if item.get('contentType') in ('url', 'audio'): add_format_url(item_url) continue @@ -201,7 +213,7 @@ class NPOIE(NPOBaseIE): stream_info = self._download_json( item_url + '&type=json', video_id, 'Downloading %s stream JSON' - % item.get('label') or format_id or num) + % item.get('label') or item.get('format') or format_id or num) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: error = (self._parse_json( From 40df485f554ec3fff81ca988b5bf961d54d8e41b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 23:03:36 +0700 Subject: [PATCH 528/586] [YoutubeDL] Don't sanitize identifiers (closes #12317) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f7254560c..13a3a909e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -616,7 +616,7 @@ class YoutubeDL(object): sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k == 'id')) + is_id=(k == 'id' or k.endswith('_id'))) template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) for k, v in template_dict.items() if v is not None and not isinstance(v, (list, tuple, dict))) From 158af5242e983312c0c1e7590faa9844136e338f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 23:04:02 +0700 Subject: [PATCH 529/586] [utils] Carry long doc string --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 807183f4a..8738aa249 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -473,7 +473,8 @@ def timeconvert(timestr): def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. - Set is_id if this is not an arbitrary string, but an ID that should be kept if possible + Set is_id if this is not an arbitrary string, but an ID that should be kept + if possible. """ def replace_insane(char): if restricted and char in ACCENT_CHARS: From af5049f128655cfec8978f17b04e5d88bd91d37f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 2 Mar 2017 02:14:42 +0800 Subject: [PATCH 530/586] [adobepass] Add Charter Spectrum (#11465) Thanks @tv21 for the fix! --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 4d655bd5e..d4816abf5 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -36,6 +36,11 @@ MSO_INFO = { 'username_field': 'Ecom_User_ID', 'password_field': 'Ecom_Password', }, + 'Charter_Direct': { + 'name': 'Charter Spectrum', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, 'thr030': { 'name': '3 Rivers Communications' }, From c9612c04872656125108980dc61d71ba6b3a4f89 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 2 Mar 2017 16:59:12 +0800 Subject: [PATCH 531/586] [youtube] Mark errors about rental videos as expected Closes #12324 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 81c793921..caa048249 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1454,7 +1454,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError('"rental" videos not supported') + raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True) # Start extracting information self.report_information_extraction(video_id) From 11bb6ad1a5ae3767535720e7dfebb823e8167088 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 2 Mar 2017 20:49:39 +0800 Subject: [PATCH 532/586] [facebook] Fix extraction (closes #12323) Almost all videos now use the pagelet type 'permalink_video_pagelet' --- ChangeLog | 6 ++++++ youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 2a1fd9018..448d7f521 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [facebook] Fix extraction (#12323) + + version 2017.02.28 Core diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 70b8c95c5..6315d40c5 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -303,7 +303,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) if server_js_data: From 28d15b73f8704c6d00efa14948da29843fdb76e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Mar 2017 22:29:56 +0700 Subject: [PATCH 533/586] [ChangeLog] Actualize --- ChangeLog | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 448d7f521..e3e0f4544 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,14 @@ version +Core ++ [adobepass] Add support for Charter Spectrum (#11465) +* [YoutubeDL] Don't sanitize identifiers in output template (#12317) + Extractors -* [facebook] Fix extraction (#12323) +* [facebook] Fix extraction (#12323, #12330) +* [youtube] Mark errors about rental videos as expected (#12324) ++ [npo] Add support for audio +* [npo] Adapt to app.php API (#12311, #12320) version 2017.02.28 From 250eea6821a5715e2ee7cade8539fcd42177603d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Mar 2017 22:33:22 +0700 Subject: [PATCH 534/586] release 2017.03.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c907ef931..fec4152e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.28 +[debug] youtube-dl version 2017.03.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e3e0f4544..e53fb7767 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.02 Core + [adobepass] Add support for Charter Spectrum (#11465) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 340e23bf8..f4c8d3d5f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.28' +__version__ = '2017.03.02' From 4d345bf17b3040ebfedd079e656e1ca658749187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Mar 2017 23:53:46 +0700 Subject: [PATCH 535/586] [ruutu] Disable DASH formats (closes #12322) Due to causing out of sync issue --- youtube_dl/extractor/ruutu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 20d01754a..6c09df25a 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -82,6 +82,9 @@ class RuutuIE(InfoExtractor): formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) elif ext == 'mpd': + # video-only and audio-only streams are of different + # duration resulting in out of sync issue + continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) else: From 9bae185ba6be0b8ffe1d2168937c9d3274c5b60f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 22:16:00 +0700 Subject: [PATCH 536/586] [24video] Use original host for requests (closes #12339) --- youtube_dl/extractor/twentyfourvideo.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index f3541b654..7af11659f 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, @@ -12,7 +14,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' + _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -43,10 +45,12 @@ class TwentyFourVideoIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') webpage = self._download_webpage( - 'http://www.24video.sex/video/view/%s' % video_id, video_id) + 'http://%s/video/view/%s' % (host, video_id), video_id) title = self._og_search_title(webpage) description = self._html_search_regex( @@ -72,11 +76,11 @@ class TwentyFourVideoIE(InfoExtractor): # Sets some cookies self._download_xml( - r'http://www.24video.sex/video/xml/%s?mode=init' % video_id, + r'http://%s/video/xml/%s?mode=init' % (host, video_id), video_id, 'Downloading init XML') video_xml = self._download_xml( - 'http://www.24video.sex/video/xml/%s?mode=play' % video_id, + 'http://%s/video/xml/%s?mode=play' % (host, video_id), video_id, 'Downloading video XML') video = xpath_element(video_xml, './/video', 'video', fatal=True) From 692fa200cae38a7e37f646118a268ad408c8ab95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 22:28:34 +0700 Subject: [PATCH 537/586] [go] Relax _VALID_URL (closes #12341) --- youtube_dl/extractor/go.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 21ed846b2..4c9be47b4 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,7 @@ class GoIE(AdobePassIE): 'requestor_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P\w+)|season-\d+/\d+-(?P[^/?#]+))' % '|'.join(_SITE_INFO.keys()) + _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P\w+)|(?:[^/]+/)*(?P[^/?#]+))' % '|'.join(_SITE_INFO.keys()) _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -52,6 +52,12 @@ class GoIE(AdobePassIE): }, { 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', 'only_matching': True, + }, { + 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', + 'only_matching': True, + }, { + 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland', + 'only_matching': True, }] def _real_extract(self, url): From d02d4fa0a90f3182d65504508105e8d86886c6ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 22:49:48 +0700 Subject: [PATCH 538/586] [brightcove:new] Raise GeoRestrictedError --- youtube_dl/extractor/brightcove.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 27685eed0..f8605be82 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -544,8 +544,10 @@ class BrightcoveNewIE(InfoExtractor): except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - raise ExtractorError( - json_data.get('message') or json_data['error_code'], expected=True) + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + raise ExtractorError(message, expected=True) raise title = json_data['name'].strip() From cbb127568a6182df2c5a2d65426de523f1f7b43f Mon Sep 17 00:00:00 2001 From: Olivier Bilodeau Date: Thu, 15 Dec 2016 20:14:04 -0500 Subject: [PATCH 539/586] [vrak] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vrak.py | 68 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/vrak.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1613a9d3..0ac42138a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE +from .vrak import VrakIE from .vube import VubeIE from .vuclip import VuClipIE from .vvvvid import VVVVIDIE diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py new file mode 100644 index 000000000..692e2fcfc --- /dev/null +++ b/youtube_dl/extractor/vrak.py @@ -0,0 +1,68 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from .brightcove import BrightcoveNewIE + + +class VrakIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P[0-9\.]+).*' + _TEST = { + 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', + 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', + 'info_dict': { + 'id': '5231040869001', + 'ext': 'mp4', + 'title': 'Référendums américains, animés japonais et hooligans russes', + 'upload_date': '20161201', + 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', + 'timestamp': 1480628425, + 'uploader_id': '2890187628001', + } + } + + def _real_extract(self, url): + url_id = self._match_id(url) + webpage = self._download_webpage(url, url_id) + + result = {} + result['title'] = self._html_search_regex( + r'

    (.+?)

    ', webpage, 'title') + + # Inspired from BrightcoveNewIE._extract_url() + entries = [] + for account_id, player_id, _, video_id in re.findall( + # account_id, player_id and embed from: + #
    ]+ + data-publisher-id=["\'](\d+)["\'] + [^>]* + data-player-id=["\']([^"\']+)["\'] + [^>]* + refId":"([^&]+)" + [^>]* + >.*? +
    .*? + RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' + [^>]* + RW\ java\.lang\.String\ value\ =\ \'(\d+)\' + ''', webpage): + + entries.append( + 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + % (account_id, player_id, 'default', video_id)) + + if entries: + result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) + + return result From d16f27ca272cb10f4de87814665402b9737175ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 23:57:01 +0700 Subject: [PATCH 540/586] [brightcove:new] Add ability to smuggle geo_countries into URL --- youtube_dl/extractor/brightcove.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f8605be82..66c8cb219 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -515,6 +515,9 @@ class BrightcoveNewIE(InfoExtractor): return entries def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( From 4d058c9862ebcb1fb7ebd988cf053fde200913cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 23:58:16 +0700 Subject: [PATCH 541/586] [vrak] Improve and update test (closes #11452) --- youtube_dl/extractor/vrak.py | 104 +++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py index 692e2fcfc..daa247cce 100644 --- a/youtube_dl/extractor/vrak.py +++ b/youtube_dl/extractor/vrak.py @@ -4,65 +4,77 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from .brightcove import BrightcoveNewIE +from ..utils import ( + int_or_none, + parse_age_limit, + smuggle_url, + unescapeHTML, +) class VrakIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P[0-9\.]+).*' + _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P[\d.]+)' _TEST = { - 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', - 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', + 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721', 'info_dict': { - 'id': '5231040869001', + 'id': '5345661243001', 'ext': 'mp4', - 'title': 'Référendums américains, animés japonais et hooligans russes', - 'upload_date': '20161201', - 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', - 'timestamp': 1480628425, + 'title': 'Obésité, film de hockey et Roseline Filion', + 'timestamp': 1488492126, + 'upload_date': '20170302', 'uploader_id': '2890187628001', - } + 'creator': 'VRAK.TV', + 'age_limit': 8, + 'series': 'ALT (Actualité Légèrement Tordue)', + 'episode': 'Obésité, film de hockey et Roseline Filion', + 'tags': list, + }, + 'params': { + 'skip_download': True, + }, } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s' def _real_extract(self, url): - url_id = self._match_id(url) - webpage = self._download_webpage(url, url_id) + video_id = self._match_id(url) - result = {} - result['title'] = self._html_search_regex( - r'

    (.+?)

    ', webpage, 'title') + webpage = self._download_webpage(url, video_id) - # Inspired from BrightcoveNewIE._extract_url() - entries = [] - for account_id, player_id, _, video_id in re.findall( - # account_id, player_id and embed from: - #
    ]+ - data-publisher-id=["\'](\d+)["\'] - [^>]* - data-player-id=["\']([^"\']+)["\'] - [^>]* - refId":"([^&]+)" - [^>]* - >.*? -
    .*? - RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' - [^>]* - RW\ java\.lang\.String\ value\ =\ \'(\d+)\' - ''', webpage): + title = self._html_search_regex( + r']+\bclass=["\']videoTitle["\'][^>]*>([^<]+)', + webpage, 'title', default=None) or self._og_search_title(webpage) - entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, 'default', video_id)) + content = self._parse_json( + self._search_regex( + r'data-player-options-content=(["\'])(?P{.+?})\1', + webpage, 'content', default='{}', group='content'), + video_id, transform_source=unescapeHTML) - if entries: - result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) + ref_id = content.get('refId') or self._search_regex( + r'refId":"([^&]+)"', webpage, 'ref id') - return result + brightcove_id = self._search_regex( + r'''(?x) + java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s + [^>]* + java\.lang\.String\s+value\s*=\s*["'](\d+) + ''' % re.escape(ref_id), webpage, 'brightcove id') + + return { + '_type': 'url_transparent', + 'ie_key': BrightcoveNewIE.ie_key(), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'id': brightcove_id, + 'description': content.get('description'), + 'creator': content.get('brand'), + 'age_limit': parse_age_limit(content.get('rating')), + 'series': content.get('showName') or content.get( + 'episodeName'), # this is intentional + 'season_number': int_or_none(content.get('seasonNumber')), + 'episode': title, + 'episode_number': int_or_none(content.get('episodeNumber')), + 'tags': content.get('tags', []), + } From bc82f22879e222a1ade35fd8ebd7bb535f9166dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 00:42:51 +0700 Subject: [PATCH 542/586] [rutube] Relax _VALID_URL --- youtube_dl/extractor/rutube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index fd1df925b..663b75583 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -17,7 +17,7 @@ from ..utils import ( class RutubeIE(InfoExtractor): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P[\da-z]{32})' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P[\da-z]{32})' _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', @@ -39,6 +39,9 @@ class RutubeIE(InfoExtractor): }, { 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, + }, { + 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, }] def _real_extract(self, url): From eb3079b6ce54b63b4cc609198382b6db2cbb6f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 00:46:33 +0700 Subject: [PATCH 543/586] [generic] Add support for rutube embeds --- youtube_dl/extractor/generic.py | 24 ++++++++++++++++++++++++ youtube_dl/extractor/rutube.py | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9868ca6d0..ebab9509d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -84,6 +84,7 @@ from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE from .videopress import VideoPressIE +from .rutube import RutubeIE class GenericIE(InfoExtractor): @@ -1502,6 +1503,23 @@ class GenericIE(InfoExtractor): }, 'add_ie': [VideoPressIE.ie_key()], }, + { + # Rutube embed + 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2', + 'info_dict': { + 'id': '9b3d5bee0a8740bf70dfd29d3ea43541', + 'ext': 'flv', + 'title': 'Магаззино: Казань 2', + 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a', + 'uploader': 'Магаззино', + 'upload_date': '20170228', + 'uploader_id': '996642', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [RutubeIE.ie_key()], + }, { # ThePlatform embedded with whitespaces in URLs 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', @@ -2480,6 +2498,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( videopress_urls, ie=VideoPressIE.ie_key()) + # Look for Rutube embeds + rutube_urls = RutubeIE._extract_urls(webpage) + if rutube_urls: + return _playlist_from_matches( + rutube_urls, ie=RutubeIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 663b75583..889fa7628 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -44,6 +44,12 @@ class RutubeIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [mobj.group('url') for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( From 6f4e4132d8ef835635059d08206ca9bc6fd5dd98 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 4 Mar 2017 23:23:18 +0800 Subject: [PATCH 544/586] [douyutv] Switch to the PC API to escape the 5-min limitation Thanks @spacemeowx2 for the algo. Ref: https://gist.github.com/spacemeowx2/629b1d131bd7e240a7d28742048e80fc Closes #12316 --- ChangeLog | 6 ++++++ youtube_dl/extractor/douyutv.py | 31 +++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index e53fb7767..13ccb0f8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316) + + version 2017.03.02 Core diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 9a83fb31a..82d8a042f 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import time +import hashlib + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -16,7 +19,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': 'iseven', - 'ext': 'mp4', + 'ext': 'flv', 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', @@ -31,7 +34,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '85982', 'display_id': '85982', - 'ext': 'mp4', + 'ext': 'flv', 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': r're:^https?://.*\.jpg$', @@ -47,7 +50,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': '17732', - 'ext': 'mp4', + 'ext': 'flv', 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', @@ -66,10 +69,6 @@ class DouyuTVIE(InfoExtractor): 'only_matching': True, }] - # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf - # is encrypted originally, but ffdec can dump memory to get the decrypted one. - _API_KEY = 'A12Svb&%1UUmf@hC' - def _real_extract(self, url): video_id = self._match_id(url) @@ -80,6 +79,7 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') + # Grab metadata from mobile API room = self._download_json( 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, note='Downloading room info')['data'] @@ -88,8 +88,19 @@ class DouyuTVIE(InfoExtractor): if room.get('show_status') == '2': raise ExtractorError('Live stream is offline', expected=True) - formats = self._extract_m3u8_formats( - room['hls_url'], video_id, ext='mp4') + # Grab the URL from PC client API + # The m3u8 url from mobile API requires re-authentication every 5 minutes + tt = int(time.time()) + signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt) + sign = hashlib.md5(signContent.encode('ascii')).hexdigest() + video_url = self._download_json( + 'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id, + video_id, note='Downloading video URL info', + query={'rate': 0}, headers={ + 'auth': sign, + 'time': str(tt), + 'aid': 'pcclient' + })['data']['live_url'] title = self._live_title(unescapeHTML(room['room_name'])) description = room.get('show_details') @@ -99,7 +110,7 @@ class DouyuTVIE(InfoExtractor): return { 'id': room_id, 'display_id': video_id, - 'formats': formats, + 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, From 64b7ccef3e3144a50f2cc01772a5ea5e81d4494d Mon Sep 17 00:00:00 2001 From: Juanjo Benages Date: Thu, 2 Feb 2017 17:33:09 +0100 Subject: [PATCH 545/586] [redbulltv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/redbull.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/redbull.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0ac42138a..e42a069b5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -793,6 +793,7 @@ from .rai import ( ) from .rbmaradio import RBMARadioIE from .rds import RDSIE +from .redbull import RedBullIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/youtube_dl/extractor/redbull.py b/youtube_dl/extractor/redbull.py new file mode 100644 index 000000000..e3d978a53 --- /dev/null +++ b/youtube_dl/extractor/redbull.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re + +from .common import InfoExtractor + + +class RedBullIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?PAP-\w+)' + _TEST = { + 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', + 'md5': '78e860f631d7a846e712fab8c5fe2c38', + 'info_dict': { + 'id': 'AP-1Q756YYX51W11', + 'ext': 'mp4', + 'title': 'ABC of...WRC', + 'description': 'Buckle up for a crash course in the terminology, rules, drivers, and courses of the World Rally Championship.' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + access_token = self._download_json( + 'http://api-v2.redbull.tv/start?build=4.0.9&category=smartphone&os_version=23&os_family=android', + video_id, note='Downloading access token', + )['auth']['access_token'] + + info = self._download_json( + 'https://api-v2.redbull.tv/views/%s' % video_id, + video_id, note='Downloading video information', + headers={'Authorization': 'Bearer ' + access_token} + )['blocks'][0]['top'][0] + + m3u8_url = info['video_product']['url'] + title = info['title'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': info.get('short_description'), + 'genre': info.get('genre'), + 'duration': info.get('duration') + } From 054a587de8fe2860bcb93a19f8c628b4ddd9ad56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:25:09 +0700 Subject: [PATCH 546/586] [redbulltv] Improve extraction (closes #11948, closes #3919) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/redbull.py | 50 -------------- youtube_dl/extractor/redbulltv.py | 106 +++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 51 deletions(-) delete mode 100644 youtube_dl/extractor/redbull.py create mode 100644 youtube_dl/extractor/redbulltv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e42a069b5..caf1dc766 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -793,7 +793,7 @@ from .rai import ( ) from .rbmaradio import RBMARadioIE from .rds import RDSIE -from .redbull import RedBullIE +from .redbulltv import RedBullTVIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/youtube_dl/extractor/redbull.py b/youtube_dl/extractor/redbull.py deleted file mode 100644 index e3d978a53..000000000 --- a/youtube_dl/extractor/redbull.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals -import re - -from .common import InfoExtractor - - -class RedBullIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?PAP-\w+)' - _TEST = { - 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', - 'md5': '78e860f631d7a846e712fab8c5fe2c38', - 'info_dict': { - 'id': 'AP-1Q756YYX51W11', - 'ext': 'mp4', - 'title': 'ABC of...WRC', - 'description': 'Buckle up for a crash course in the terminology, rules, drivers, and courses of the World Rally Championship.' - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - access_token = self._download_json( - 'http://api-v2.redbull.tv/start?build=4.0.9&category=smartphone&os_version=23&os_family=android', - video_id, note='Downloading access token', - )['auth']['access_token'] - - info = self._download_json( - 'https://api-v2.redbull.tv/views/%s' % video_id, - video_id, note='Downloading video information', - headers={'Authorization': 'Bearer ' + access_token} - )['blocks'][0]['top'][0] - - m3u8_url = info['video_product']['url'] - title = info['title'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': info.get('short_description'), - 'genre': info.get('genre'), - 'duration': info.get('duration') - } diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py new file mode 100644 index 000000000..5c73d5bca --- /dev/null +++ b/youtube_dl/extractor/redbulltv.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + try_get, + unified_timestamp, +) + + +class RedBullTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?PAP-\w+)' + _TESTS = [{ + # film + 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', + 'md5': '78e860f631d7a846e712fab8c5fe2c38', + 'info_dict': { + 'id': 'AP-1Q756YYX51W11', + 'ext': 'mp4', + 'title': 'ABC of...WRC', + 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', + 'duration': 1582.04, + 'timestamp': 1488405786, + 'upload_date': '20170301', + }, + }, { + # episode + 'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web', + 'info_dict': { + 'id': 'AP-1PMT5JCWH1W11', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2 E4', + 'description': 'md5:334b741c8c1ce65be057eab6773c1cf5', + 'duration': 904.6, + 'timestamp': 1487290093, + 'upload_date': '20170217', + 'series': 'Hashtags', + 'season_number': 2, + 'episode_number': 4, + }, + }, { + 'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + access_token = self._download_json( + 'https://api-v2.redbull.tv/start', video_id, + note='Downloading access token', query={ + 'build': '4.0.9', + 'category': 'smartphone', + 'os_version': 23, + 'os_family': 'android', + })['auth']['access_token'] + + info = self._download_json( + 'https://api-v2.redbull.tv/views/%s' % video_id, + video_id, note='Downloading video information', + headers={'Authorization': 'Bearer ' + access_token} + )['blocks'][0]['top'][0] + + video = info['video_product'] + + title = info['title'].strip() + m3u8_url = video['url'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + subtitles = {} + for _, captions in (try_get( + video, lambda x: x['attachments']['captions'], + dict) or {}).items(): + if not captions or not isinstance(captions, list): + continue + for caption in captions: + caption_url = caption.get('url') + if not caption_url: + continue + subtitles.setdefault(caption.get('lang') or 'en', []).append({ + 'url': caption_url, + 'ext': caption.get('format'), + }) + + subheading = info.get('subheading') + if subheading: + title += ' - %s' % subheading + + return { + 'id': video_id, + 'title': title, + 'description': info.get('long_description') or info.get( + 'short_description'), + 'duration': float_or_none(video.get('duration'), scale=1000), + 'timestamp': unified_timestamp(info.get('published')), + 'series': info.get('show_title'), + 'season_number': int_or_none(info.get('season_number')), + 'episode_number': int_or_none(info.get('episode_number')), + 'formats': formats, + 'subtitles': subtitles, + } From a3ba8a7acfa2db3a8c90000d377c25d14bdad290 Mon Sep 17 00:00:00 2001 From: Lars Vierbergen Date: Sat, 4 Mar 2017 17:47:19 +0100 Subject: [PATCH 547/586] [vier] Add support for vijf.be vier.be and vijf.be run on the same CMS and are property of the same company, so the same extractor can be used for both of them. --- youtube_dl/extractor/vier.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index d26fb49b3..5086f591e 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class VierIE(InfoExtractor): IE_NAME = 'vier' - _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P[^/]+)(?:/(?P\d+))?|video/v3/embed/(?P\d+))' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?:[^/]+/videos/(?P[^/]+)(?:/(?P\d+))?|video/v3/embed/(?P\d+))' _TESTS = [{ 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 'info_dict': { @@ -23,6 +23,19 @@ class VierIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614', + 'info_dict': { + 'id': '2561614', + 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas', + 'ext': 'mp4', + 'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', + 'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 'only_matching': True, @@ -35,6 +48,7 @@ class VierIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) embed_id = mobj.group('embed_id') display_id = mobj.group('display_id') or embed_id + site = mobj.group('site') webpage = self._download_webpage(url, display_id) @@ -43,7 +57,7 @@ class VierIE(InfoExtractor): webpage, 'video id') application = self._search_regex( [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], - webpage, 'application', default='vier_vod') + webpage, 'application', default=site + '_vod') filename = self._search_regex( [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], webpage, 'filename') @@ -68,13 +82,19 @@ class VierIE(InfoExtractor): class VierVideosIE(InfoExtractor): IE_NAME = 'vier:videos' - _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P[^/]+)/videos(?:\?.*\bpage=(?P\d+)|$)' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?P[^/]+)/videos(?:\?.*\bpage=(?P\d+)|$)' _TESTS = [{ 'url': 'http://www.vier.be/demoestuin/videos', 'info_dict': { 'id': 'demoestuin', }, 'playlist_mincount': 153, + }, { + 'url': 'http://www.vijf.be/temptationisland/videos', + 'info_dict': { + 'id': 'temptationisland', + }, + 'playlist_mincount': 159, }, { 'url': 'http://www.vier.be/demoestuin/videos?page=6', 'info_dict': { @@ -92,6 +112,7 @@ class VierVideosIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) program = mobj.group('program') + site = mobj.group('site') page_id = mobj.group('page') if page_id: @@ -105,13 +126,13 @@ class VierVideosIE(InfoExtractor): entries = [] for current_page_id in itertools.count(start_page): current_page = self._download_webpage( - 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), + 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id), program, 'Downloading page %d' % (current_page_id + 1)) page_entries = [ - self.url_result('http://www.vier.be' + video_url, 'Vier') + self.url_result('http://www.' + site + '.be' + video_url, 'Vier') for video_url in re.findall( - r'

    ', current_page)] + r'', current_page)] entries.extend(page_entries) if page_id or '>Meer<' not in current_page: break From 30f8f142d4563df9cf47b0164adbb2c9e0130c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:57:03 +0700 Subject: [PATCH 548/586] Credit @ThomasChr for #12015 and #12245 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 247c0ea13..3d8db1ee2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -202,3 +202,4 @@ Fabian Stahl Bagira Odd Stråbø Philip Herzog +Thomas Christlieb From 466274fe9a8fae0a5f5e0358f48e54f569c10c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:58:12 +0700 Subject: [PATCH 549/586] Credit @p2004a for vodpl (#12122) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3d8db1ee2..2a0938f3b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -203,3 +203,4 @@ Bagira Odd Stråbø Philip Herzog Thomas Christlieb +Marek Rusinowski From f24c1e558456021d9a8704c9964c6a704e8b73ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:59:49 +0700 Subject: [PATCH 550/586] Credit @TobiX for #9725 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 2a0938f3b..0716d2ad6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -204,3 +204,4 @@ Odd Stråbø Philip Herzog Thomas Christlieb Marek Rusinowski +Tobias Gruetzmacher From 6f211dc936dcd1f8ab6f178f8696f5edf2f385a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 00:01:17 +0700 Subject: [PATCH 551/586] Credit @obilodeau for vrak (#11452) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 0716d2ad6..4f383e214 100644 --- a/AUTHORS +++ b/AUTHORS @@ -205,3 +205,4 @@ Philip Herzog Thomas Christlieb Marek Rusinowski Tobias Gruetzmacher +Olivier Bilodeau From bcefc59279d9d4d7034e0f25a3fcf561a456766d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 00:02:30 +0700 Subject: [PATCH 552/586] Credit @vierbergenlars for vijf.be (#12304) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4f383e214..fd1b8c9ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -206,3 +206,4 @@ Thomas Christlieb Marek Rusinowski Tobias Gruetzmacher Olivier Bilodeau +Lars Vierbergen From c64c03be35adae05740058e449c205120f89910d Mon Sep 17 00:00:00 2001 From: Xiao Di Guan Date: Thu, 16 Feb 2017 17:46:54 +1100 Subject: [PATCH 553/586] [twitch] Add basic support for two-factor authentication --- youtube_dl/extractor/twitch.py | 79 ++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bbba394b0..096a2ac9d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -12,7 +12,6 @@ from ..compat import ( compat_str, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urlparse, ) from ..utils import ( clean_html, @@ -24,6 +23,7 @@ from ..utils import ( parse_iso8601, update_url_query, urlencode_postdata, + urljoin, ) @@ -32,7 +32,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' - _LOGIN_URL = 'http://www.twitch.tv/login' + _LOGIN_URL = 'https://www.twitch.tv/login' _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _NETRC_MACHINE = 'twitch' @@ -64,6 +64,36 @@ class TwitchBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login. Twitch said: %s' % message, expected=True) + def post_login_form(page, urlh, note, data): + form = self._hidden_inputs(page) + form.update(data) + + page_url = urlh.geturl() + post_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', page, + 'post url', default=page_url, group='url') + post_url = urljoin(page_url, post_url) + + headers = {'Referer': page_url} + + try: + response = self._download_json( + post_url, None, note, + data=urlencode_postdata(form), + headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + fail(response['message']) + raise + + if response.get('redirect'): + redirect_url = urljoin(post_url, response['redirect']) + return self._download_webpage_handle( + redirect_url, None, 'Downloading login redirect page', + headers=headers) + login_page, handle = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login page') @@ -71,40 +101,25 @@ class TwitchBaseIE(InfoExtractor): if 'blacklist_message' in login_page: fail(clean_html(login_page)) - login_form = self._hidden_inputs(login_page) - - login_form.update({ + login_data = { 'username': username, 'password': password, - }) + } + redirect_res = post_login_form( + login_page, handle, 'Logging in as %s' % username, login_data) - redirect_url = handle.geturl() + if not redirect_res: + return + redirect_page, handle = redirect_res - post_url = self._search_regex( - r']+action=(["\'])(?P.+?)\1', login_page, - 'post url', default=redirect_url, group='url') - - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(redirect_url, post_url) - - headers = {'Referer': redirect_url} - - try: - response = self._download_json( - post_url, None, 'Logging in as %s' % username, - data=urlencode_postdata(login_form), - headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - response = self._parse_json( - e.cause.read().decode('utf-8'), None) - fail(response['message']) - raise - - if response.get('redirect'): - self._download_webpage( - response['redirect'], None, 'Downloading login redirect page', - headers=headers) + if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: + # TODO: Add mechanism to request an SMS or phone call + tfa_token = self._get_tfa_info('two-factor authentication token') + tfa_data = { + 'authy_token': tfa_token, + 'remember_2fa': 'true', + } + post_login_form(redirect_page, handle, 'Submitting TFA token', tfa_data) def _prefer_source(self, formats): try: From 5316566edcbb1a2ac2e0559a1863b2204242b7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 02:06:33 +0700 Subject: [PATCH 554/586] [twitch] Use better naming and simplify (closes #11974) --- youtube_dl/extractor/twitch.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 096a2ac9d..ed36336bd 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -64,7 +64,7 @@ class TwitchBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login. Twitch said: %s' % message, expected=True) - def post_login_form(page, urlh, note, data): + def login_step(page, urlh, note, data): form = self._hidden_inputs(page) form.update(data) @@ -88,11 +88,10 @@ class TwitchBaseIE(InfoExtractor): fail(response['message']) raise - if response.get('redirect'): - redirect_url = urljoin(post_url, response['redirect']) - return self._download_webpage_handle( - redirect_url, None, 'Downloading login redirect page', - headers=headers) + redirect_url = urljoin(post_url, response['redirect']) + return self._download_webpage_handle( + redirect_url, None, 'Downloading login redirect page', + headers=headers) login_page, handle = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login page') @@ -101,25 +100,19 @@ class TwitchBaseIE(InfoExtractor): if 'blacklist_message' in login_page: fail(clean_html(login_page)) - login_data = { - 'username': username, - 'password': password, - } - redirect_res = post_login_form( - login_page, handle, 'Logging in as %s' % username, login_data) - - if not redirect_res: - return - redirect_page, handle = redirect_res + redirect_page, handle = login_step( + login_page, handle, 'Logging in as %s' % username, { + 'username': username, + 'password': password, + }) if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: # TODO: Add mechanism to request an SMS or phone call tfa_token = self._get_tfa_info('two-factor authentication token') - tfa_data = { + login_step(redirect_page, handle, 'Submitting TFA token', { 'authy_token': tfa_token, 'remember_2fa': 'true', - } - post_login_form(redirect_page, handle, 'Submitting TFA token', tfa_data) + }) def _prefer_source(self, formats): try: From 75027364ba35d8852f393f67860be817fff05541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 02:22:02 +0700 Subject: [PATCH 555/586] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 13ccb0f8f..7e22db53e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,18 @@ version Extractors ++ [twitch] Add basic support for two-factor authentication (#11974) ++ [vier] Add support for vijf.be (#12304) ++ [redbulltv] Add support for redbull.tv (#3919, #11948) * [douyutv] Switch to the PC API to escape the 5-min limitation (#12316) ++ [generic] Add support for rutube embeds ++ [rutube] Relax URL regular expression ++ [vrak] Add support for vrak.tv (#11452) ++ [brightcove:new] Add ability to smuggle geo_countries into URL ++ [brightcove:new] Raise GeoRestrictedError +* [go] Relax URL regular expression (#12341) +* [24video] Use original host for requests (#12339) +* [ruutu] Disable DASH formats (#12322) version 2017.03.02 From afa4597618462df01b47febcd64c531f8ffdd63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 02:23:08 +0700 Subject: [PATCH 556/586] release 2017.03.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fec4152e3..988d0d81b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.02 +[debug] youtube-dl version 2017.03.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e22db53e..2f3ec1b47 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.05 Extractors + [twitch] Add basic support for two-factor authentication (#11974) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a08e00fce..f02c4dea0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -626,6 +626,7 @@ - **RaiTV** - **RBMARadio** - **RDS**: RDS.ca + - **RedBullTV** - **RedTube** - **RegioTV** - **RENTV** @@ -916,6 +917,7 @@ - **VoxMedia** - **Vporn** - **vpro**: npo.nl and ntr.nl + - **Vrak** - **VRT** - **vube**: Vube.com - **VuClip** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f4c8d3d5f..215f9a3c1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.02' +__version__ = '2017.03.05' From 6d0fe752bfeaa9a758099315f006bc15acd1ae76 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sat, 4 Mar 2017 22:19:44 -0500 Subject: [PATCH 557/586] [external:ffmpeg] In test harness, limit to 10k download size Otherwise, if you screw up a playlist test by including a playlist dictionary key, you'll be there for eons while it downloads all the files before erroring out. --- youtube_dl/downloader/external.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index bdd3545a2..127a92d20 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -270,6 +270,10 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_live', 'live'] args += ['-i', url, '-c', 'copy'] + + if self.params.get('test', False): + args += ['-fs', compat_str(self._TEST_FILE_SIZE)] # -fs limit_size (output), expressed in bytes + if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] From a50862b7355544d3fd8473bf3ff35e9c6643d789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 10:24:29 +0700 Subject: [PATCH 558/586] [downloader/external] Add missing import and PEP8 --- youtube_dl/downloader/external.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 127a92d20..e13cf547d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,7 +6,10 @@ import sys import re from .common import FileDownloader -from ..compat import compat_setenv +from ..compat import ( + compat_setenv, + compat_str, +) from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, @@ -272,7 +275,7 @@ class FFmpegFD(ExternalFD): args += ['-i', url, '-c', 'copy'] if self.params.get('test', False): - args += ['-fs', compat_str(self._TEST_FILE_SIZE)] # -fs limit_size (output), expressed in bytes + args += ['-fs', compat_str(self._TEST_FILE_SIZE)] if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': From ed0cf9b38394b28bae5f05fd6b00c85a9c0e6755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:22:27 +0700 Subject: [PATCH 559/586] [extractor/common] Move jwplayer formats extraction in separate method --- youtube_dl/extractor/common.py | 106 +++++++++++++++++---------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index eb3c091aa..9b73a948c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2198,56 +2198,7 @@ class InfoExtractor(object): this_video_id = video_id or video_data['mediaid'] - formats = [] - for source in video_data['sources']: - source_url = self._proto_relative_url(source['file']) - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - source_url, this_video_id, mpd_id=mpd_id, fatal=False)) - # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): - formats.append({ - 'url': source_url, - 'vcodec': 'none', - 'ext': ext, - }) - else: - height = int_or_none(source.get('height')) - if height is None: - # Often no height is provided but there is a label in - # format like 1080p. - height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', - 'height', default=None)) - a_format = { - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': height, - 'ext': ext, - } - if source_url.startswith('rtmp'): - a_format['ext'] = 'flv' - - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as - # of jwplayer.flash.swf - rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) - if len(rtmp_url_parts) == 3: - rtmp_url, prefix, play_path = rtmp_url_parts - a_format.update({ - 'url': rtmp_url, - 'play_path': prefix + play_path, - }) - if rtmp_params: - a_format.update(rtmp_params) - formats.append(a_format) + formats = self._parse_jwplayer_formats(video_data['sources'], this_video_id) self._sort_formats(formats) subtitles = {} @@ -2278,6 +2229,61 @@ class InfoExtractor(object): else: return self.playlist_result(entries) + def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + formats = [] + for source in jwplayer_sources_data : + source_url = self._proto_relative_url(source['file']) + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + source_type = source.get('type') or '' + ext = mimetype2ext(source_type) or determine_ext(source_url) + if source_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, video_id, mpd_id=mpd_id, fatal=False)) + # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 + elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + formats.append({ + 'url': source_url, + 'vcodec': 'none', + 'ext': ext, + }) + else: + height = int_or_none(source.get('height')) + if height is None: + # Often no height is provided but there is a label in + # format like 1080p. + height = int_or_none(self._search_regex( + r'^(\d{3,})[pP]$', source.get('label') or '', + 'height', default=None)) + a_format = { + 'url': source_url, + 'width': int_or_none(source.get('width')), + 'height': height, + 'ext': ext, + } + if source_url.startswith('rtmp'): + a_format['ext'] = 'flv' + + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url_parts = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) + return formats + + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From 0236cd0dfde1dda540c1067a9c5982d482005c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:25:03 +0700 Subject: [PATCH 560/586] [extractor/common] Improve height extraction and extract bitrate --- youtube_dl/extractor/common.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b73a948c..2887db0c3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2240,12 +2240,14 @@ class InfoExtractor(object): ext = mimetype2ext(source_type) or determine_ext(source_url) if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + source_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=m3u8_id, fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + elif source_type.startswith('audio') or ext in ( + 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ 'url': source_url, 'vcodec': 'none', @@ -2255,19 +2257,19 @@ class InfoExtractor(object): height = int_or_none(source.get('height')) if height is None: # Often no height is provided but there is a label in - # format like 1080p. + # format like "1080p", "720p SD", or 1080. height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', + r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''), 'height', default=None)) a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), 'height': height, + 'tbr': int_or_none(source.get('bitrate')), 'ext': ext, } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf rtmp_url_parts = re.split( From 1a2192cb904ff42a309ab2c2477fc226f8651f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:28:32 +0700 Subject: [PATCH 561/586] [extractor/common] Pass arguments to _parse_jwplayer_formats and PEP8 --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2887db0c3..78dc5be24 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2198,7 +2198,9 @@ class InfoExtractor(object): this_video_id = video_id or video_data['mediaid'] - formats = self._parse_jwplayer_formats(video_data['sources'], this_video_id) + formats = self._parse_jwplayer_formats( + video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id, + mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) self._sort_formats(formats) subtitles = {} @@ -2232,7 +2234,7 @@ class InfoExtractor(object): def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): formats = [] - for source in jwplayer_sources_data : + for source in jwplayer_sources_data: source_url = self._proto_relative_url(source['file']) if base_url: source_url = compat_urlparse.urljoin(base_url, source_url) @@ -2285,7 +2287,6 @@ class InfoExtractor(object): formats.append(a_format) return formats - def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From 5dd376345b8c87e8c2130b80e73e690c5e721a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:31:38 +0700 Subject: [PATCH 562/586] [tunepk] Add extractor (closes #12197, closes #12243) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tunepk.py | 90 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/tunepk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index caf1dc766..b056dff53 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1000,6 +1000,7 @@ from .tunein import ( TuneInTopicIE, TuneInShortenerIE, ) +from .tunepk import TunePkIE from .turbo import TurboIE from .tutv import TutvIE from .tv2 import ( diff --git a/youtube_dl/extractor/tunepk.py b/youtube_dl/extractor/tunepk.py new file mode 100644 index 000000000..9d42651ce --- /dev/null +++ b/youtube_dl/extractor/tunepk.py @@ -0,0 +1,90 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) + + +class TunePkIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)| + embed\.tune\.pk/play/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins', + 'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55', + 'info_dict': { + 'id': '6919541', + 'ext': 'mp4', + 'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins', + 'description': 'md5:eb5a04114fafef5cec90799a93a2d09c', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1487327564, + 'upload_date': '20170217', + 'uploader': 'Movie Trailers', + 'duration': 107, + 'view_count': int, + } + }, { + 'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no', + 'only_matching': True, + }, { + 'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://tune.pk/video/%s' % video_id, video_id) + + details = self._parse_json( + self._search_regex( + r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'), + video_id)['details'] + + video = details['video'] + title = video.get('title') or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'title', webpage, 'title', fatal=True) + + formats = self._parse_jwplayer_formats( + details['player']['sources'], video_id) + self._sort_formats(formats) + + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description') + + thumbnail = video.get('thumb') or self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'thumbnail', webpage, 'thumbnail') + + timestamp = unified_timestamp(video.get('date_added')) + uploader = try_get( + video, lambda x: x['uploader']['name'], + compat_str) or self._html_search_meta('author', webpage, 'author') + + duration = int_or_none(video.get('duration')) + view_count = int_or_none(video.get('views')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } From d2b64e04b475b6ef7e5fb74d92ae9a35284fc35a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 6 Mar 2017 00:32:53 +0800 Subject: [PATCH 563/586] [addanime] Skip an invalid test --- youtube_dl/extractor/addanime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 55a9322a7..9f8a71262 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor): 'ext': 'mp4', 'description': 'One Piece 606', 'title': 'One Piece 606', - } + }, + 'skip': 'Video is gone', }, { 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', 'only_matching': True, From fc11ad383311d721179483eea2e289e3a236e457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 03:21:03 +0700 Subject: [PATCH 564/586] [drtv:live] Bypass geo restriction --- youtube_dl/extractor/drtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e966d7483..b879f2c2b 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -156,6 +156,7 @@ class DRTVIE(InfoExtractor): class DRTVLiveIE(InfoExtractor): IE_NAME = 'drtv:live' _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P[\da-z-]+)' + _GEO_COUNTRIES = ['DK'] _TEST = { 'url': 'https://www.dr.dk/tv/live/dr1', 'info_dict': { From 96182695e4e37795a30ab143129c91dab18a9865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 03:23:01 +0700 Subject: [PATCH 565/586] [drtv] Add geo countries to GeoRestrictedError --- youtube_dl/extractor/drtv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index b879f2c2b..e4917014a 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -15,6 +15,8 @@ from ..utils import ( class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', @@ -137,7 +139,7 @@ class DRTVIE(InfoExtractor): if not formats and restricted_to_denmark: self.raise_geo_restricted( 'Unfortunately, DR is not allowed to show this program outside Denmark.', - expected=True) + countries=self._GEO_COUNTRIES) self._sort_formats(formats) From 4b5de77bdb7765df4797bf068592926285ba709a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 03:57:46 +0700 Subject: [PATCH 566/586] [utils] Process bytestrings in urljoin (closes #12369) --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index aefd94518..173c49514 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -455,6 +455,9 @@ class TestUtil(unittest.TestCase): def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8738aa249..d293c7498 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1748,11 +1748,16 @@ def base_url(url): def urljoin(base, path): + if isinstance(path, bytes): + path = path.decode('utf-8') if not isinstance(path, compat_str) or not path: return None if re.match(r'^(?:https?:)?//', path): return path - if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base): + if isinstance(base, bytes): + base = base.decode('utf-8') + if not isinstance(base, compat_str) or not re.match( + r'^(?:https?:)?//', base): return None return compat_urlparse.urljoin(base, path) From 3f116b189bb990529a1a18ba7a3829b1592cfecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 04:01:21 +0700 Subject: [PATCH 567/586] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2f3ec1b47..c1f43a625 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version + +Core ++ [utils] Process bytestrings in urljoin (#12369) +* [extractor/common] Improve height extraction and extract bitrate +* [extractor/common] Move jwplayer formats extraction in separate method ++ [external:ffmpeg] Limit test download size to 10KiB (#12362) + +Extractors ++ [drtv] Add geo countries to GeoRestrictedError ++ [drtv:live] Bypass geo restriction ++ [tunepk] Add extractor (#12197, #12243) + + version 2017.03.05 Extractors From 16647026264b35a40ecd56f5d9392d0643a2066c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 04:04:39 +0700 Subject: [PATCH 568/586] release 2017.03.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 988d0d81b..a36d7322d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.05 +[debug] youtube-dl version 2017.03.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c1f43a625..648e04856 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.06 Core + [utils] Process bytestrings in urljoin (#12369) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f02c4dea0..85c59ca81 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -798,6 +798,7 @@ - **tunein:program** - **tunein:station** - **tunein:topic** + - **TunePk** - **Turbo** - **Tutv** - **tv.dfb.de** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 215f9a3c1..71a984913 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.05' +__version__ = '2017.03.06' From da92da4b886a0e44fe28591ddf5b746fba1c9ade Mon Sep 17 00:00:00 2001 From: denneboomyo Date: Mon, 6 Mar 2017 11:00:17 +0100 Subject: [PATCH 569/586] Openload fix extraction (#12357) * Fix extraction --- youtube_dl/extractor/openload.py | 43 +++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index fc7ff43a6..25f6a9aca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,22 +75,37 @@ class OpenloadIE(InfoExtractor): ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', webpage, 'openload ID') - first_char = int(ol_id[0]) - urlcode = [] - num = 1 + video_url_chars = [] - while num < len(ol_id): - i = ord(ol_id[num]) - key = 0 - if i <= 90: - key = i - 65 - elif i >= 97: - key = 25 + i - 97 - urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char))) - num += 5 + first_char = ord(ol_id[0]) + key = first_char - 55 + maxKey = max(2, key) + key = min(maxKey, len(ol_id) - 14) + t = ol_id[key:key + 12] - video_url = 'https://openload.co/stream/' + ''.join( - [value for _, value in sorted(urlcode, key=lambda x: x[0])]) + hashMap = {} + v = ol_id.replace(t, "") + h = 0 + + while h < len(t): + f = t[h:h + 2] + i = int(f, 16) + hashMap[h / 2] = i + h += 2 + + h = 0 + + while h < len(v): + B = v[h:h + 2] + i = int(B, 16) + index = (h / 2) % 6 + A = hashMap[index] + i = i ^ A + video_url_chars.append(compat_chr(i)) + h += 2 + + video_url = 'https://openload.co/stream/%s?mime=true' + video_url = video_url % (''.join(video_url_chars)) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, From 92cb5763f440d0ca1627f8120a7ce29598eb9484 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 6 Mar 2017 18:04:19 +0800 Subject: [PATCH 570/586] [ChangeLog] Update after #12357 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 648e04856..5fb4c20af 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#10408, #12357) + + version 2017.03.06 Core From 54a3a8827baf71c553723b6766e676369e9c743c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 9 Jan 2017 02:36:39 +0800 Subject: [PATCH 571/586] [__init__] Metadata should be added after conversion Fixes #5594 --- ChangeLog | 4 ++++ youtube_dl/__init__.py | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5fb4c20af..ad65505c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core +* [__init__] Metadata are now added after conversion (#5594) + + Extractors * [openload] Fix extraction (#10408, #12357) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0c401baa6..ad5f13d2b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -242,14 +242,11 @@ def _real_main(argv=None): # PostProcessors postprocessors = [] - # Add the metadata pp first, the other pps will copy it if opts.metafromtitle: postprocessors.append({ 'key': 'MetadataFromTitle', 'titleformat': opts.metafromtitle }) - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) if opts.extractaudio: postprocessors.append({ 'key': 'FFmpegExtractAudio', @@ -279,6 +276,11 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + if opts.addmetadata: + postprocessors.append({'key': 'FFmpegMetadata'}) # XAttrMetadataPP should be run after post-processors that may change file # contents if opts.xattrs: From e30ccf7047eb3b8dff8b778790f9b084e6d7f42e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 23:05:38 +0700 Subject: [PATCH 572/586] [soundcloud] Update client id (closes #12376) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index b3aa4ce26..0ee4a8ff8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA' + _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod From 80146dcc6c27b46fb8340d3285d95f2f7674fb0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 03:57:54 +0700 Subject: [PATCH 573/586] [ChangeLog] Actualize --- ChangeLog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index ad65505c5..622086440 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,10 @@ version Core -* [__init__] Metadata are now added after conversion (#5594) - +* Metadata are now added after conversion (#5594) Extractors +* [soundcloud] Update client id (#12376) * [openload] Fix extraction (#10408, #12357) From dccd0ab35d1acc45e36241c505b5325d96ca501d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 03:59:22 +0700 Subject: [PATCH 574/586] release 2017.03.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a36d7322d..eb69696c8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.06 +[debug] youtube-dl version 2017.03.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 622086440..601aad902 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.07 Core * Metadata are now added after conversion (#5594) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 71a984913..bd451bf81 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.06' +__version__ = '2017.03.07' From d7d7f84c9565d682119d081324c26eb361e05aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 04:03:52 +0700 Subject: [PATCH 575/586] Credit @benages for redbull.tv (#11948) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index fd1b8c9ea..74abda016 100644 --- a/AUTHORS +++ b/AUTHORS @@ -207,3 +207,4 @@ Marek Rusinowski Tobias Gruetzmacher Olivier Bilodeau Lars Vierbergen +Juanjo Benages From 9df53ea36ec84c6ab5a4a672d120f7850e2363ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 04:04:49 +0700 Subject: [PATCH 576/586] Credit @puxlit for twitch 2fa (#11974) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 74abda016..273a6a034 100644 --- a/AUTHORS +++ b/AUTHORS @@ -208,3 +208,4 @@ Tobias Gruetzmacher Olivier Bilodeau Lars Vierbergen Juanjo Benages +Xiao Di Guan From fe646a2f106fef0651158c460dd766c236e2f5db Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 7 Mar 2017 15:34:06 +0800 Subject: [PATCH 577/586] [twitch] PEP8 --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index ed36336bd..2daf9dfac 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -104,7 +104,7 @@ class TwitchBaseIE(InfoExtractor): login_page, handle, 'Logging in as %s' % username, { 'username': username, 'password': password, - }) + }) if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: # TODO: Add mechanism to request an SMS or phone call From 2e76bdc850ed5d5ffe95578b576b8fb66dcea8f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 22:59:33 +0700 Subject: [PATCH 578/586] [brightcove:legacy] Relax videoPlayer validation check (closes #12381) --- youtube_dl/extractor/brightcove.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 66c8cb219..46ef8e605 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -193,7 +193,13 @@ class BrightcoveLegacyIE(InfoExtractor): if videoPlayer is not None: if isinstance(videoPlayer, list): videoPlayer = videoPlayer[0] - if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')): + videoPlayer = videoPlayer.strip() + # UUID is also possible for videoPlayer (e.g. + # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd + # or http://www8.hp.com/cn/zh/home.html) + if not (re.match( + r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', + videoPlayer) or videoPlayer.startswith('ref:')): return None params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') From b68a812ea839e44148516a34a15193189e58ba77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 23:00:21 +0700 Subject: [PATCH 579/586] [extractor/generic] Add test for brigthcove UUID-like videoPlayer --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ebab9509d..bc22421ae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -449,6 +449,23 @@ class GenericIE(InfoExtractor): }, }], }, + { + # Brightcove with UUID in videoPlayer + 'url': 'http://www8.hp.com/cn/zh/home.html', + 'info_dict': { + 'id': '5255815316001', + 'ext': 'mp4', + 'title': 'Sprocket Video - China', + 'description': 'Sprocket Video - China', + 'uploader': 'HP-Video Gallery', + 'timestamp': 1482263210, + 'upload_date': '20161220', + 'uploader_id': '1107601872001', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From b08cc749d6e1a3078d807580c424437bca143b73 Mon Sep 17 00:00:00 2001 From: denneboomyo Date: Tue, 7 Mar 2017 23:01:27 +0100 Subject: [PATCH 580/586] [openload] Fix extraction --- youtube_dl/extractor/openload.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 25f6a9aca..5a5607357 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -78,10 +78,10 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] first_char = ord(ol_id[0]) - key = first_char - 55 + key = first_char - 50 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 14) - t = ol_id[key:key + 12] + key = min(maxKey, len(ol_id) - 22) + t = ol_id[key:key + 20] hashMap = {} v = ol_id.replace(t, "") @@ -98,8 +98,9 @@ class OpenloadIE(InfoExtractor): while h < len(v): B = v[h:h + 2] i = int(B, 16) - index = (h / 2) % 6 + index = (h / 2) % 10 A = hashMap[index] + i = i ^ 137 i = i ^ A video_url_chars.append(compat_chr(i)) h += 2 From d7344d33b11cc10d1a668c1ab9a65f2a34b4000f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Mar 2017 18:25:04 +0700 Subject: [PATCH 581/586] [telequebec] Fix description extraction and update test (closes #12399) --- youtube_dl/extractor/telequebec.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 82d73c31d..fafaa826f 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -2,15 +2,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, smuggle_url, + try_get, ) class TeleQuebecIE(InfoExtractor): _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york', 'md5': 'fe95a0957e5707b1b01f5013e725c90f', 'info_dict': { @@ -18,10 +20,14 @@ class TeleQuebecIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le couronnement de New York', 'description': 'md5:f5b3d27a689ec6c1486132b2d687d432', - 'upload_date': '20160220', - 'timestamp': 1455965438, + 'upload_date': '20170201', + 'timestamp': 1485972222, } - } + }, { + # no description + 'url': 'http://zonevideo.telequebec.tv/media/30261', + 'only_matching': True, + }] def _real_extract(self, url): media_id = self._match_id(url) @@ -31,9 +37,13 @@ class TeleQuebecIE(InfoExtractor): return { '_type': 'url_transparent', 'id': media_id, - 'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}), + 'url': smuggle_url( + 'limelight:media:' + media_data['streamInfo']['sourceId'], + {'geo_countries': ['CA']}), 'title': media_data['title'], - 'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), - 'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), + 'description': try_get( + media_data, lambda x: x['descriptions'][0]['text'], compat_str), + 'duration': int_or_none( + media_data.get('durationInMilliseconds'), 1000), 'ie_key': 'LimelightMedia', } From 0f6b87d067f33b5e9b8acf2f8a1f2afa9546439e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Mar 2017 19:46:58 +0800 Subject: [PATCH 582/586] [miomio] Fix extraction Closes #12291 Closes #12388 Closes #12402 --- ChangeLog | 6 ++++++ youtube_dl/extractor/miomio.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 601aad902..b000e2e94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [miomio] Fix extraction (#12291, #12388, #12402) + + version 2017.03.07 Core diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index ec1b4c4fe..40f72d66f 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -51,6 +51,7 @@ class MioMioIE(InfoExtractor): 'ext': 'mp4', 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', }, + 'skip': 'Unable to load videos', }] def _extract_mioplayer(self, webpage, video_id, title, http_headers): @@ -94,9 +95,18 @@ class MioMioIE(InfoExtractor): return entries + def _download_chinese_webpage(self, *args, **kwargs): + # Requests with English locales return garbage + headers = { + 'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3', + } + kwargs.setdefault('headers', {}).update(headers) + return self._download_webpage(*args, **kwargs) + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_chinese_webpage( + url, video_id) title = self._html_search_meta( 'description', webpage, 'title', fatal=True) @@ -106,7 +116,7 @@ class MioMioIE(InfoExtractor): if '_h5' in mioplayer_path: player_url = compat_urlparse.urljoin(url, mioplayer_path) - player_webpage = self._download_webpage( + player_webpage = self._download_chinese_webpage( player_url, video_id, note='Downloading player webpage', headers={'Referer': url}) entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) From 0cf2352e858982ed811122cf867fb5e25694d97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Mar 2017 21:19:08 +0700 Subject: [PATCH 583/586] [dplayit] Separate and rewrite extractor and bypass geo restriction (closes #12393) --- youtube_dl/extractor/dplay.py | 117 +++++++++++++++++++++++------ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 100 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 32028bc3b..62e676389 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -6,37 +6,24 @@ import re import time from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_HTTPError, +) from ..utils import ( USER_AGENTS, + ExtractorError, int_or_none, + unified_strdate, + remove_end, update_url_query, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?Pit\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' + _VALID_URL = r'https?://(?Pwww\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' _TESTS = [{ - # geo restricted, via direct unsigned hls URL - 'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', - 'info_dict': { - 'id': '1255600', - 'display_id': 'stagione-1-episodio-25', - 'ext': 'mp4', - 'title': 'Episodio 25', - 'description': 'md5:cae5f40ad988811b197d2d27a53227eb', - 'duration': 2761, - 'timestamp': 1454701800, - 'upload_date': '20160205', - 'creator': 'RTIT', - 'series': 'Take me out', - 'season_number': 1, - 'episode_number': 25, - 'age_limit': 0, - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { # non geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { @@ -168,3 +155,91 @@ class DPlayIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DPlayItIE(InfoExtractor): + _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P[^/?#]+)' + _GEO_COUNTRIES = ['IT'] + _TEST = { + 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', + 'md5': '2b808ffb00fc47b884a172ca5d13053c', + 'info_dict': { + 'id': '6918', + 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', + 'ext': 'mp4', + 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', + 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'upload_date': '20160524', + 'series': 'Biografie imbarazzanti', + 'season_number': 1, + 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', + 'episode_number': 1, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'url\s*:\s*["\']https://dplay-south-prod\.disco-api\.com/playback/videoPlaybackInfo/(\d+)', + webpage, 'video id') + + title = remove_end(self._og_search_title(webpage), ' | Dplay') + + try: + info = self._download_json( + 'https://dplay-south-prod.disco-api.com/playback/videoPlaybackInfo/%s' % video_id, + display_id, headers={ + 'Authorization': 'Bearer %s' % self._get_cookies(url).get( + 'dplayit_token').value, + 'Referer': url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): + info = self._parse_json(e.cause.read().decode('utf-8'), display_id) + error = info['errors'][0] + if error.get('code') == 'access.denied.geoblocked': + self.raise_geo_restricted( + msg=error.get('detail'), countries=self._GEO_COUNTRIES) + raise ExtractorError(info['errors'][0]['detail'], expected=True) + raise + + hls_url = info['data']['attributes']['streaming']['hls']['url'] + + formats = self._extract_m3u8_formats( + hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + series = self._html_search_regex( + r'(?s)]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)

    ', + webpage, 'series', fatal=False) + episode = self._search_regex( + r']+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*
    \s*([^<]+)', + webpage, 'episode', fatal=False) + + mobj = re.search( + r'(?s)]+class=["\']dates["\'][^>]*>.+?\bS\.(?P\d+)\s+E\.(?P\d+)\s*-\s*(?P\d{2}/\d{2}/\d{4})', + webpage) + if mobj: + season_number = int(mobj.group('season_number')) + episode_number = int(mobj.group('episode_number')) + upload_date = unified_strdate(mobj.group('upload_date')) + else: + season_number = episode_number = upload_date = None + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'upload_date': upload_date, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b056dff53..bd2762e47 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -246,7 +246,10 @@ from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE -from .dplay import DPlayIE +from .dplay import ( + DPlayIE, + DPlayItIE, +) from .dramafever import ( DramaFeverIE, DramaFeverSeriesIE, From 0e7f9a9b48700efd40c4068b00364a7963dc9265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Mar 2017 21:30:30 +0700 Subject: [PATCH 584/586] [dplayit] Relax playback info URL extraction --- youtube_dl/extractor/dplay.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 62e676389..87c5dd63e 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -183,16 +183,15 @@ class DPlayItIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'url\s*:\s*["\']https://dplay-south-prod\.disco-api\.com/playback/videoPlaybackInfo/(\d+)', + info_url = self._search_regex( + r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', webpage, 'video id') title = remove_end(self._og_search_title(webpage), ' | Dplay') try: info = self._download_json( - 'https://dplay-south-prod.disco-api.com/playback/videoPlaybackInfo/%s' % video_id, - display_id, headers={ + info_url, display_id, headers={ 'Authorization': 'Bearer %s' % self._get_cookies(url).get( 'dplayit_token').value, 'Referer': url, @@ -231,7 +230,7 @@ class DPlayItIE(InfoExtractor): season_number = episode_number = upload_date = None return { - 'id': video_id, + 'id': info_url.rpartition('/')[-1], 'display_id': display_id, 'title': title, 'description': self._og_search_description(webpage), From 2913821723c826a0d2bfc16427592bf2b9d6d31c Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Thu, 9 Mar 2017 17:18:37 +0100 Subject: [PATCH 585/586] [prosiebensat1] Improve title extraction (closes #12318) --- youtube_dl/extractor/prosiebensat1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1245309a7..0cda992b4 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -369,7 +369,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') + title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title', default=None) + if title is None: + title = self._og_search_title(webpage) info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) From 76bee08fe72f154ad3754428eb80413ad0a3aa19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 9 Mar 2017 23:42:07 +0700 Subject: [PATCH 586/586] [prosiebensat1] Improve title extraction and add test --- youtube_dl/extractor/prosiebensat1.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 0cda992b4..d8a4bd244 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -300,6 +300,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'skip_download': True, }, }, + { + # title in

    + 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', + 'info_dict': { + 'id': '4895826', + 'ext': 'mp4', + 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', + 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', + 'upload_date': '20170302', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'geo restricted to Germany', + }, { # geo restricted to Germany 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', @@ -338,6 +353,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'
    \s*

    ([^<]+)

    \s*
    ', r'

    \s*(.+?)

    ', r']+id="veeseoTitle"[^>]*>(.+?)', + r']+class="subtitle"[^>]*>([^<]+)

    ', ] _DESCRIPTION_REGEXES = [ r'

    \s*(.+?)

    ', @@ -369,9 +385,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title', default=None) - if title is None: - title = self._og_search_title(webpage) + title = self._html_search_regex( + self._TITLE_REGEXES, webpage, 'title', + default=None) or self._og_search_title(webpage) info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None)