From 30e6161799dfdf9f53d3c8eaa9e10afe615bc5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Oct 2017 23:16:16 +0700 Subject: [PATCH 01/38] [soundgasm] Improve extraction (closes #14588) --- youtube_dl/extractor/soundgasm.py | 35 +++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index e004e2c5a..3d78a9d76 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -8,36 +8,49 @@ from .common import InfoExtractor class SoundgasmIE(InfoExtractor): IE_NAME = 'soundgasm' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[0-9a-zA-Z_\-]+)/(?P[0-9a-zA-Z_\-]+)' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)' _TEST = { 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', 'md5': '010082a2c802c5275bb00030743e75ad', 'info_dict': { 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', 'ext': 'm4a', - 'title': 'ytdl_Piano-sample', - 'description': 'Royalty Free Sample Music' + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('title') - audio_title = mobj.group('user') + '_' + mobj.group('title') + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) + audio_url = self._html_search_regex( - r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') - audio_id = re.split(r'\/|\.', audio_url)[-2] + r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'audio URL', group='url') + + title = self._search_regex( + r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)', + webpage, 'title', default=display_id) + description = self._html_search_regex( - r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', - fatal=False) + (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>', + r'(?s)<li>Description:\s(.*?)<\/li>'), + webpage, 'description', fatal=False) + + audio_id = self._search_regex( + r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id) return { 'id': audio_id, 'display_id': display_id, 'url': audio_url, - 'title': audio_title, - 'description': description + 'vcodec': 'none', + 'title': title, + 'description': description, + 'uploader': mobj.group('user'), } From 7c1f419341ac2dec123eaa0075212edc6af3302b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 Oct 2017 22:21:47 +0700 Subject: [PATCH 02/38] [vimeo] Restrict iframe embed regex (closes #14600) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c3f71b45e..cedb54876 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -412,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = [] # Look for embedded (iframe) Vimeo player for mobj in re.finditer( - r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', + r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1', webpage): urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) PLAIN_EMBED_RE = ( From f2332f18e66fc5255d11a2762bfaff02f8221251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 Oct 2017 22:26:43 +0700 Subject: [PATCH 03/38] [youtube] Restrict embed regex (#14600) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5aef555fb..9943dddc1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1391,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) (["\']) (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v|p)/.+?) + (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) \1''', webpage)] # lazyYT YouTube embed From 8e01f3ca811e15aae04c7f2c5345c5eca38f99d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 28 Oct 2017 22:58:01 +0700 Subject: [PATCH 04/38] [dctptv] Fix extraction (closes #14599) --- youtube_dl/extractor/dctp.py | 68 ++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 00fbbff2f..3a6d0560e 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -2,53 +2,85 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate +from ..compat import compat_str +from ..utils import ( + float_or_none, + unified_strdate, +) class DctpTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$' + _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', - 'md5': '174dd4a8a6225cf5655952f969cfbe24', 'info_dict': { 'id': '95eaa4f33dad413aa17b4ee613cccc6c', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Videoinstallation für eine Kaufhausfassade', 'description': 'Kurzfilm', 'upload_date': '20110407', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 71.24, + }, + 'params': { + # rtmp download + 'skip_download': True, }, } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) - object_id = self._html_search_meta('DC.identifier', webpage) + webpage = self._download_webpage(url, display_id) - servers_json = self._download_json( - 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/', - video_id, note='Downloading server list') - server = servers_json[0]['server'] - m3u8_path = self._search_regex( - r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path') - formats = self._extract_m3u8_formats( - 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4', - entry_protocol='m3u8_native') + video_id = self._html_search_meta( + 'DC.identifier', webpage, 'video id', + default=None) or self._search_regex( + r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id') title = self._og_search_title(webpage) + + servers = self._download_json( + 'http://www.dctp.tv/streaming_servers/', display_id, + note='Downloading server list', fatal=False) + + if servers: + endpoint = next( + server['endpoint'] + for server in servers + if isinstance(server.get('endpoint'), compat_str) and + 'cloudfront' in server['endpoint']) + else: + endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/' + + app = self._search_regex( + r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app') + + formats = [{ + 'url': endpoint, + 'app': app, + 'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id, + 'page_url': url, + 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf', + 'ext': 'flv', + }] + description = self._html_search_meta('DC.description', webpage) upload_date = unified_strdate( self._html_search_meta('DC.date.created', webpage)) thumbnail = self._og_search_thumbnail(webpage) + duration = float_or_none(self._search_regex( + r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration', + default=None), scale=1000) return { - 'id': object_id, + 'id': video_id, 'title': title, 'formats': formats, - 'display_id': video_id, + 'display_id': display_id, 'description': description, 'upload_date': upload_date, 'thumbnail': thumbnail, + 'duration': duration, } From 47a8587915668ef82632a7a75f8bc9862679623a Mon Sep 17 00:00:00 2001 From: Andrew Udvare <audvare@gmail.com> Date: Sun, 12 Mar 2017 18:19:32 -0400 Subject: [PATCH 05/38] [younow] Add extractor --- youtube_dl/extractor/extractors.py | 5 + youtube_dl/extractor/younow.py | 197 +++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 youtube_dl/extractor/younow.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 18350810b..b6ad50ec7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1335,6 +1335,11 @@ from .youku import ( YoukuIE, YoukuShowIE, ) +from .younow import ( + YouNowIE, + YouNowChannelIE, + YouNowMomentIE, +) from .youporn import YouPornIE from .yourupload import YourUploadIE from .youtube import ( diff --git a/youtube_dl/extractor/younow.py b/youtube_dl/extractor/younow.py new file mode 100644 index 000000000..99abd66a8 --- /dev/null +++ b/youtube_dl/extractor/younow.py @@ -0,0 +1,197 @@ +# coding: utf-8 +from __future__ import unicode_literals +from datetime import date, datetime + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none, UnsupportedError + +MOMENT_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/fetch/id=%s' +STREAM_URL_FORMAT = 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' + + +class YouNowIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)' + _TEST = { + 'url': 'https://www.younow.com/AmandaPadeezy', + 'info_dict': { + 'id': 'AmandaPadeezy', + 'ext': 'mp4', + 'is_live': True, + 'title': 'March 26, 2017', + 'description': 'YouNow is the best way to broadcast live and get an audience to watch you.', + 'thumbnail': 'https://ynassets.s3.amazonaws.com/broadcast/live/157869188/157869188.jpg', + 'tags': ['girls'], + 'categories': ['girls'], + 'uploader': 'AmandaPadeezy', + 'uploader_id': '6716501', + 'uploader_url': 'https://www.younow.com/AmandaPadeezy', + 'creator': 'AmandaPadeezy', + 'formats': [{ + 'url': 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=157869188/channelId=6716501', + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + } + } + + def _real_extract(self, url): + username = self._match_id(url) + data = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username) + + if data.get('media'): + stream_url = 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' % ( + data.get('broadcastId'), + data.get('userId'), + ) + else: + raise UnsupportedError('Unsupported stream or user is not streaming at this time') + + webpage = self._download_webpage(url, username) + try: + uploader = data['user']['profileUrlString'] + except KeyError: + uploader = username + try: + title = data['title'] + except KeyError: + title = date.today().strftime('%B %d, %Y') + + return { + 'id': uploader, + 'is_live': True, + 'title': title, + 'description': self._og_search_description(webpage), + 'thumbnail': data.get('awsUrl'), + 'tags': data.get('tags'), + 'categories': data.get('tags'), + 'uploader': uploader, + 'uploader_id': data.get('userId'), + 'uploader_url': 'https://www.younow.com/%s' % (data['user']['profileUrlString'],), + 'creator': uploader, + 'view_count': int_or_none(data.get('viewers')), + 'like_count': int_or_none(data.get('likes')), + 'formats': [{ + 'url': stream_url, + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + } + + +def _moment_to_entry(item): + title = item.get('text') + title_type = item.get('titleType') + if not title: + if title_type: + title = 'YouNow %s' % item.get('titleType') + else: + title = 'YouNow moment' + + entry = { + 'id': compat_str(item['momentId']), + 'title': title, + 'view_count': int_or_none(item.get('views')), + 'like_count': int_or_none(item.get('likes')), + 'timestamp': int_or_none(item.get('created')), + 'formats': [{ + 'url': STREAM_URL_FORMAT % (item['momentId'], item['momentId']), + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + } + + try: + entry['uploader'] = entry['creator'] = item['owner']['name'] + entry['uploader_url'] = 'https://www.younow.com/%s' % (item['owner']['name'],) + entry['uploader_id'] = item['owner']['userId'] + except KeyError: + pass + + return entry + + +class YouNowChannelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel' + _TEST = { + 'url': 'https://www.younow.com/Kate_Swiz/channel', + 'info_dict': { + 'title': 'Kate_Swiz moments' + }, + 'playlist_count': 6, + } + + MOMENTS_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/profile/channelId=%s/createdBefore=%d/records=20' + + def _real_extract(self, url): + entries = [] + username = self._match_id(url) + user_info = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username, note='Downloading user information') + channel_id = user_info['userId'] + created_before = 0 + moment_ids = [] + moment_ids_processed = [] + err = False + + while True: + if created_before: + cb = datetime.fromtimestamp(created_before) + else: + cb = datetime.now() + info = self._download_json(self.MOMENTS_URL_FORMAT % (channel_id, created_before), username, note='Downloading moments data (created before %s)' % (cb)) + + for item in info['items']: + if item['type'] == 'moment': + entry = _moment_to_entry(item) + moment_ids_processed.append(entry['id']) + entries.append(entry) + elif item['type'] == 'collection': + moment_ids += [compat_str(x) for x in item['momentsIds']] + + try: + created_before = int_or_none(item['created']) + except KeyError: + err = True + break + + if (err or + not info['hasMore'] or + 'items' not in info or + not info['items']): + break + + for mid in set(moment_ids): + if mid in moment_ids_processed: + continue + item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) + entries.append(_moment_to_entry(item['item'])) + + return self.playlist_result(entries, playlist_title='%s moments' % (username)) + + +class YouNowMomentIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/]+)/[^/]+' + _TEST = { + 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m', + 'info_dict': { + 'id': '20712117', + 'ext': 'mp4', + 'title': 'YouNow capture', + 'view_count': 19, + 'like_count': 0, + 'timestamp': 1490432040, + 'formats': [{ + 'url': 'https://hls.younow.com/momentsplaylists/live/20712117/20712117.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + 'upload_date': '20170325', + 'uploader': 'GABO...', + 'uploader_id': 35917228, + }, + } + + def _real_extract(self, url): + mid = self._match_id(url) + item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) + return _moment_to_entry(item['item']) From eb4b5818e2a297bd001eb1b4962d709b1245fd24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Oct 2017 04:16:07 +0700 Subject: [PATCH 06/38] [younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/younow.py | 221 +++++++++++++++-------------- 2 files changed, 114 insertions(+), 109 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b6ad50ec7..2eed706f9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1336,7 +1336,7 @@ from .youku import ( YoukuShowIE, ) from .younow import ( - YouNowIE, + YouNowLiveIE, YouNowChannelIE, YouNowMomentIE, ) diff --git a/youtube_dl/extractor/younow.py b/youtube_dl/extractor/younow.py index 99abd66a8..04dbc87fc 100644 --- a/youtube_dl/extractor/younow.py +++ b/youtube_dl/extractor/younow.py @@ -1,17 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -from datetime import date, datetime + +import itertools from .common import InfoExtractor from ..compat import compat_str -from ..utils import int_or_none, UnsupportedError +from ..utils import ( + ExtractorError, + int_or_none, + try_get, +) -MOMENT_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/fetch/id=%s' -STREAM_URL_FORMAT = 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' +CDN_API_BASE = 'https://cdn.younow.com/php/api' +MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE -class YouNowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)' +class YouNowLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://www.younow.com/AmandaPadeezy', 'info_dict': { @@ -19,179 +24,179 @@ class YouNowIE(InfoExtractor): 'ext': 'mp4', 'is_live': True, 'title': 'March 26, 2017', - 'description': 'YouNow is the best way to broadcast live and get an audience to watch you.', - 'thumbnail': 'https://ynassets.s3.amazonaws.com/broadcast/live/157869188/157869188.jpg', + 'thumbnail': r're:^https?://.*\.jpg$', 'tags': ['girls'], 'categories': ['girls'], 'uploader': 'AmandaPadeezy', 'uploader_id': '6716501', 'uploader_url': 'https://www.younow.com/AmandaPadeezy', 'creator': 'AmandaPadeezy', - 'formats': [{ - 'url': 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=157869188/channelId=6716501', - 'ext': 'mp4', - 'protocol': 'm3u8', - }], - } + }, + 'skip': True, } + @classmethod + def suitable(cls, url): + return (False + if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url) + else super(YouNowLiveIE, cls).suitable(url)) + def _real_extract(self, url): username = self._match_id(url) - data = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username) - if data.get('media'): - stream_url = 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' % ( - data.get('broadcastId'), - data.get('userId'), - ) - else: - raise UnsupportedError('Unsupported stream or user is not streaming at this time') + data = self._download_json( + 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' + % username, username) - webpage = self._download_webpage(url, username) - try: - uploader = data['user']['profileUrlString'] - except KeyError: - uploader = username - try: - title = data['title'] - except KeyError: - title = date.today().strftime('%B %d, %Y') + if data.get('errorCode') != 0: + raise ExtractorError(data['errorMsg'], expected=True) + + uploader = try_get( + data, lambda x: x['user']['profileUrlString'], + compat_str) or username return { 'id': uploader, 'is_live': True, - 'title': title, - 'description': self._og_search_description(webpage), + 'title': self._live_title(uploader), 'thumbnail': data.get('awsUrl'), 'tags': data.get('tags'), 'categories': data.get('tags'), 'uploader': uploader, 'uploader_id': data.get('userId'), - 'uploader_url': 'https://www.younow.com/%s' % (data['user']['profileUrlString'],), + 'uploader_url': 'https://www.younow.com/%s' % username, 'creator': uploader, 'view_count': int_or_none(data.get('viewers')), 'like_count': int_or_none(data.get('likes')), 'formats': [{ - 'url': stream_url, + 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' + % (CDN_API_BASE, data['broadcastId'], data['userId']), 'ext': 'mp4', 'protocol': 'm3u8', }], } -def _moment_to_entry(item): +def _extract_moment(item, fatal=True): + moment_id = item.get('momentId') + if not moment_id: + if not fatal: + return + raise ExtractorError('Unable to extract moment id') + + moment_id = compat_str(moment_id) + title = item.get('text') - title_type = item.get('titleType') if not title: - if title_type: - title = 'YouNow %s' % item.get('titleType') - else: - title = 'YouNow moment' + title = 'YouNow %s' % ( + item.get('momentType') or item.get('titleType') or 'moment') + + uploader = try_get(item, lambda x: x['owner']['name'], compat_str) + uploader_id = try_get(item, lambda x: x['owner']['userId']) + uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None entry = { - 'id': compat_str(item['momentId']), + 'extractor_key': 'YouNowMoment', + 'id': moment_id, 'title': title, 'view_count': int_or_none(item.get('views')), 'like_count': int_or_none(item.get('likes')), 'timestamp': int_or_none(item.get('created')), + 'creator': uploader, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, 'formats': [{ - 'url': STREAM_URL_FORMAT % (item['momentId'], item['momentId']), + 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' + % (moment_id, moment_id), 'ext': 'mp4', - 'protocol': 'm3u8', + 'protocol': 'm3u8_native', }], } - try: - entry['uploader'] = entry['creator'] = item['owner']['name'] - entry['uploader_url'] = 'https://www.younow.com/%s' % (item['owner']['name'],) - entry['uploader_id'] = item['owner']['userId'] - except KeyError: - pass - return entry class YouNowChannelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel' _TEST = { - 'url': 'https://www.younow.com/Kate_Swiz/channel', + 'url': 'https://www.younow.com/its_Kateee_/channel', 'info_dict': { - 'title': 'Kate_Swiz moments' + 'id': '14629760', + 'title': 'its_Kateee_ moments' }, - 'playlist_count': 6, + 'playlist_mincount': 8, } - MOMENTS_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/profile/channelId=%s/createdBefore=%d/records=20' + def _entries(self, username, channel_id): + created_before = 0 + for page_num in itertools.count(1): + if created_before is None: + break + info = self._download_json( + '%s/moment/profile/channelId=%s/createdBefore=%d/records=20' + % (CDN_API_BASE, channel_id, created_before), username, + note='Downloading moments page %d' % page_num) + items = info.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + item_type = item.get('type') + if item_type == 'moment': + entry = _extract_moment(item, fatal=False) + if entry: + yield entry + elif item_type == 'collection': + moments = item.get('momentsIds') + if isinstance(moments, list): + for moment_id in moments: + m = self._download_json( + MOMENT_URL_FORMAT % moment_id, username, + note='Downloading %s moment JSON' % moment_id, + fatal=False) + if m and isinstance(m, dict) and m.get('item'): + entry = _extract_moment(m['item']) + if entry: + yield entry + created_before = int_or_none(item.get('created')) def _real_extract(self, url): - entries = [] username = self._match_id(url) - user_info = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username, note='Downloading user information') - channel_id = user_info['userId'] - created_before = 0 - moment_ids = [] - moment_ids_processed = [] - err = False - - while True: - if created_before: - cb = datetime.fromtimestamp(created_before) - else: - cb = datetime.now() - info = self._download_json(self.MOMENTS_URL_FORMAT % (channel_id, created_before), username, note='Downloading moments data (created before %s)' % (cb)) - - for item in info['items']: - if item['type'] == 'moment': - entry = _moment_to_entry(item) - moment_ids_processed.append(entry['id']) - entries.append(entry) - elif item['type'] == 'collection': - moment_ids += [compat_str(x) for x in item['momentsIds']] - - try: - created_before = int_or_none(item['created']) - except KeyError: - err = True - break - - if (err or - not info['hasMore'] or - 'items' not in info or - not info['items']): - break - - for mid in set(moment_ids): - if mid in moment_ids_processed: - continue - item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) - entries.append(_moment_to_entry(item['item'])) - - return self.playlist_result(entries, playlist_title='%s moments' % (username)) + channel_id = compat_str(self._download_json( + 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' + % username, username, note='Downloading user information')['userId']) + return self.playlist_result( + self._entries(username, channel_id), channel_id, + '%s moments' % username) class YouNowMomentIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/]+)/[^/]+' + _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m', + 'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807', 'info_dict': { 'id': '20712117', 'ext': 'mp4', 'title': 'YouNow capture', - 'view_count': 19, - 'like_count': 0, + 'view_count': int, + 'like_count': int, 'timestamp': 1490432040, - 'formats': [{ - 'url': 'https://hls.younow.com/momentsplaylists/live/20712117/20712117.m3u8', - 'ext': 'mp4', - 'protocol': 'm3u8', - }], 'upload_date': '20170325', 'uploader': 'GABO...', 'uploader_id': 35917228, }, } + @classmethod + def suitable(cls, url): + return (False + if YouNowChannelIE.suitable(url) + else super(YouNowMomentIE, cls).suitable(url)) + def _real_extract(self, url): - mid = self._match_id(url) - item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) - return _moment_to_entry(item['item']) + video_id = self._match_id(url) + item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id) + return _extract_moment(item['item']) From c3206d02e94ce98c6467762a228a9e58616c6d8f Mon Sep 17 00:00:00 2001 From: enigmaquip <enigmaquip@users.noreply.github.com> Date: Sat, 28 Oct 2017 16:20:18 -0600 Subject: [PATCH 07/38] [fxnetworks] Extract series metadata --- youtube_dl/extractor/fxnetworks.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 629897317..37549fb01 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -3,27 +3,31 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE from ..utils import ( - update_url_query, extract_attributes, + int_or_none, parse_age_limit, smuggle_url, + update_url_query, ) class FXNetworksIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)' _TESTS = [{ - 'url': 'http://www.fxnetworks.com/video/719841347694', - 'md5': '1447d4722e42ebca19e5232ab93abb22', + 'url': 'http://www.fxnetworks.com/video/1032565827847', + 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703', 'info_dict': { - 'id': '719841347694', + 'id': 'dRzwHC_MMqIv', 'ext': 'mp4', - 'title': 'Vanpage', - 'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.', + 'title': 'First Look: Better Things - Season 2', + 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.', 'age_limit': 14, 'uploader': 'NEWA-FNG-FX', - 'upload_date': '20160706', - 'timestamp': 1467844741, + 'upload_date': '20170825', + 'timestamp': 1503686274, + 'episode_number': 0, + 'season_number': 2, + 'series': 'Better Things', }, 'add_ie': ['ThePlatform'], }, { @@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE): 'id': video_id, 'title': title, 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), + 'series': video_data.get('data-show-title'), + 'episode_number': int_or_none(video_data.get('data-episode')), + 'season_number': int_or_none(video_data.get('data-season')), 'thumbnail': video_data.get('data-large-thumb'), 'age_limit': parse_age_limit(rating), 'ie_key': 'ThePlatform', From 056653bbb1b94ba04f331ed4c27a1c0d24fe1e30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Oct 2017 07:04:48 +0700 Subject: [PATCH 08/38] [utils] Add support for zero years and months in parse_duration --- test/test_utils.py | 1 + youtube_dl/utils.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index efa73d0f4..cc13f795c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) + self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59fb33435..34866a54b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1835,10 +1835,20 @@ def parse_duration(s): days, hours, mins, secs, ms = m.groups() else: m = re.match( - r'''(?ix)(?:P?T)? + r'''(?ix)(?:P? + (?: + [0-9]+\s*y(?:ears?)?\s* + )? + (?: + [0-9]+\s*m(?:onths?)?\s* + )? + (?: + [0-9]+\s*w(?:eeks?)?\s* + )? (?: (?P<days>[0-9]+)\s*d(?:ays?)?\s* )? + T)? (?: (?P<hours>[0-9]+)\s*h(?:ours?)?\s* )? From 9211e3319e6006373d8b5055f7a3d0bbd734c57b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Oct 2017 07:05:55 +0700 Subject: [PATCH 09/38] [extractor/common] Prefix format id for audio only HLS formats --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a69240693..52f2055b5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1401,7 +1401,7 @@ class InfoExtractor(object): media_url = media.get('URI') if media_url: format_id = [] - for v in (group_id, name): + for v in (m3u8_id, group_id, name): if v: format_id.append(v) f = { From 514e8aefd488b385122fa989e937e5f9ae62d136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Oct 2017 07:11:37 +0700 Subject: [PATCH 10/38] [egghead] Fix extraction (closes #14388) --- youtube_dl/extractor/egghead.py | 81 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index e4a3046af..edabaafe6 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + determine_ext, int_or_none, try_get, unified_timestamp, @@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor): 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'playlist_count': 29, 'info_dict': { - 'id': 'professor-frisby-introduces-composable-functional-javascript', + 'id': '72', 'title': 'Professor Frisby Introduces Composable Functional JavaScript', 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', }, @@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - course = self._download_json( - 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id) + lessons = self._download_json( + 'https://egghead.io/api/v1/series/%s/lessons' % playlist_id, + playlist_id, 'Downloading course lessons JSON') - entries = [ - self.url_result( - 'wistia:%s' % lesson['wistia_id'], ie='Wistia', - video_id=lesson['wistia_id'], video_title=lesson.get('title')) - for lesson in course['lessons'] if lesson.get('wistia_id')] + entries = [] + for lesson in lessons: + lesson_url = lesson.get('http_url') + if not lesson_url or not isinstance(lesson_url, compat_str): + continue + lesson_id = lesson.get('id') + if lesson_id: + lesson_id = compat_str(lesson_id) + entries.append(self.url_result( + lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id)) + + course = self._download_json( + 'https://egghead.io/api/v1/series/%s' % playlist_id, + playlist_id, 'Downloading course JSON', fatal=False) or {} + + playlist_id = course.get('id') + if playlist_id: + playlist_id = compat_str(playlist_id) return self.playlist_result( entries, playlist_id, course.get('title'), @@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor): class EggheadLessonIE(InfoExtractor): IE_DESC = 'egghead.io lesson' IE_NAME = 'egghead:lesson' - _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)' - _TEST = { + _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', 'info_dict': { - 'id': 'fv5yotjxcg', + 'id': '1196', + 'display_id': 'javascript-linear-data-flow-with-container-style-types-box', 'ext': 'mp4', 'title': 'Create linear data flow with container style types (Box)', 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e', @@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor): }, 'params': { 'skip_download': True, + 'format': 'bestvideo', }, - } + }, { + 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application', + 'only_matching': True, + }] def _real_extract(self, url): - lesson_id = self._match_id(url) + display_id = self._match_id(url) lesson = self._download_json( - 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id) + 'https://egghead.io/api/v1/lessons/%s' % display_id, display_id) + + lesson_id = compat_str(lesson['id']) + title = lesson['title'] + + formats = [] + for _, format_url in lesson['media_urls'].items(): + if not format_url or not isinstance(format_url, compat_str): + continue + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, lesson_id, 'mp4', entry_protocol='m3u8', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, lesson_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': format_url, + }) + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'ie_key': 'Wistia', - 'url': 'wistia:%s' % lesson['wistia_id'], - 'id': lesson['wistia_id'], - 'title': lesson.get('title'), + 'id': lesson_id, + 'display_id': display_id, + 'title': title, 'description': lesson.get('summary'), 'thumbnail': lesson.get('thumb_nail'), 'timestamp': unified_timestamp(lesson.get('published_at')), 'duration': int_or_none(lesson.get('duration')), 'view_count': int_or_none(lesson.get('plays_count')), 'tags': try_get(lesson, lambda x: x['tag_list'], list), + 'series': try_get( + lesson, lambda x: x['series']['title'], compat_str), + 'formats': formats, } From 518d357b46cb840224d69cc543667be3a87b9b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Oct 2017 07:21:33 +0700 Subject: [PATCH 11/38] [ChangeLog] Actualize --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 547b55981..795491d34 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version <unreleased> + +Core +* [extractor/common] Prefix format id for audio only HLS formats ++ [utils] Add support for zero years and months in parse_duration + +Extractors +* [egghead] Fix extraction (#14388) ++ [fxnetworks] Extract series metadata (#14603) ++ [younow] Add support for younow.com (#9255, #9432, #12436) +* [dctptv] Fix extraction (#14599) +* [youtube] Restrict embed regex (#14600) +* [vimeo] Restrict iframe embed regex (#14600) +* [soundgasm] Improve extraction (#14588) +- [myvideo] Remove extractor (#8557) ++ [nbc] Add support for classic-tv videos (#14575) ++ [vrtnu] Add support for cookies authentication and simplify (#11873) ++ [canvas] Add support for vrt.be/vrtnu (#11873) +* [twitch:clips] Fix title extraction (#14566) ++ [ndtv] Add support for sub-sites (#14534) +* [dramafever] Fix login error message extraction ++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt, + ro, hu) (#14553) + + version 2017.10.20 Core From 6d0630d8801fd3278a05fa7e55a73bd454403e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Oct 2017 07:22:53 +0700 Subject: [PATCH 12/38] release 2017.10.29 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 81fe10d54..881475878 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.20** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.29** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.20 +[debug] youtube-dl version 2017.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 795491d34..d33a710fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.10.29 Core * [extractor/common] Prefix format id for audio only HLS formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index be5de22df..7b8e7403a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -498,7 +498,6 @@ - **MySpace:album** - **MySpass** - **Myvi** - - **myvideo** (Currently broken) - **MyVidster** - **n-tv.de** - **natgeo** @@ -977,6 +976,7 @@ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be + - **VrtNU**: VrtNU.be - **vrv** - **vrv:series** - **VShare** @@ -1035,6 +1035,9 @@ - **YouJizz** - **youku**: 优酷 - **youku:show** + - **YouNowChannel** + - **YouNowLive** + - **YouNowMoment** - **YouPorn** - **YourUpload** - **youtube**: YouTube.com diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4d1686670..43f080bc3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.20' +__version__ = '2017.10.29' From 8fe767e07261abb8013b18ca2ed31ebb8d95c7d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Oct 2017 23:05:25 +0700 Subject: [PATCH 13/38] [spankbang] Detect unavailable videos (closes #14644) --- youtube_dl/extractor/spankbang.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 3394c7e6b..2863e53b5 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class SpankBangIE(InfoExtractor): @@ -33,6 +34,10 @@ class SpankBangIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if re.search(r'<[^>]+\bid=["\']video_removed', webpage): + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) + stream_key = self._html_search_regex( r'''var\s+stream_key\s*=\s*['"](.+?)['"]''', webpage, 'stream key') From 044eeb145556cb41485b6b6644f40b2161a4e0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Nov 2017 23:39:26 +0700 Subject: [PATCH 14/38] [extractor/common] Respect URL query in _extract_wowza_formats (closes #14645) --- youtube_dl/extractor/common.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 52f2055b5..a67ac4411 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2233,27 +2233,35 @@ class InfoExtractor(object): return formats def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): + query = compat_urlparse.urlparse(url).query url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) url_base = self._search_regex( r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url') http_base_url = '%s:%s' % ('http', url_base) formats = [] + + def manifest_url(manifest): + m_url = '%s/%s' % (http_base_url, manifest) + if query: + m_url += '?%s' % query + return m_url + if 'm3u8' not in skip_protocols: formats.extend(self._extract_m3u8_formats( - http_base_url + '/playlist.m3u8', video_id, 'mp4', + manifest_url('playlist.m3u8'), video_id, 'mp4', m3u8_entry_protocol, m3u8_id='hls', fatal=False)) if 'f4m' not in skip_protocols: formats.extend(self._extract_f4m_formats( - http_base_url + '/manifest.f4m', + manifest_url('manifest.f4m'), video_id, f4m_id='hds', fatal=False)) if 'dash' not in skip_protocols: formats.extend(self._extract_mpd_formats( - http_base_url + '/manifest.mpd', + manifest_url('manifest.mpd'), video_id, mpd_id='dash', fatal=False)) if re.search(r'(?:/smil:|\.smil)', url_base): if 'smil' not in skip_protocols: rtmp_formats = self._extract_smil_formats( - http_base_url + '/jwplayer.smil', + manifest_url('jwplayer.smil'), video_id, fatal=False) for rtmp_format in rtmp_formats: rtsp_format = rtmp_format.copy() From b0f4331002798522621aff55deaf18406d17e081 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 2 Nov 2017 13:30:01 +0100 Subject: [PATCH 15/38] [gamespot] extract formats referenced with new data fields(#14652) --- youtube_dl/extractor/gamespot.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 02804d297..6d177cbaf 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' + _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', @@ -35,6 +35,9 @@ class GameSpotIE(OnceIE): 'params': { 'skip_download': True, # m3u8 downloads }, + }, { + 'url': 'https://www.gamespot.com/videos/embed/6439218/', + 'only_matching': True, }] def _real_extract(self, url): @@ -52,7 +55,7 @@ class GameSpotIE(OnceIE): manifest_url = f4m_url formats.extend(self._extract_f4m_formats( f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) - m3u8_url = streams.get('m3u8_stream') + m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream')) if m3u8_url: manifest_url = m3u8_url m3u8_formats = self._extract_m3u8_formats( @@ -60,7 +63,7 @@ class GameSpotIE(OnceIE): m3u8_id='hls', fatal=False) formats.extend(m3u8_formats) progressive_url = dict_get( - streams, ('progressive_hd', 'progressive_high', 'progressive_low')) + streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr')) if progressive_url and manifest_url: qualities_basename = self._search_regex( r'/([^/]+)\.csmil/', From 44cca168cc36a71ea77d8635a44f6ee9d2c33a99 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 2 Nov 2017 14:16:15 +0100 Subject: [PATCH 16/38] [skysport] add support ooyala embed_token protected videos(fixes #14641) --- youtube_dl/extractor/skysports.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/skysports.py index 4ca9f6b3c..efcbb36a9 100644 --- a/youtube_dl/extractor/skysports.py +++ b/youtube_dl/extractor/skysports.py @@ -2,7 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import strip_or_none +from ..utils import ( + extract_attributes, + smuggle_url, + strip_or_none, + urljoin, +) class SkySportsIE(InfoExtractor): @@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + video_data = extract_attributes(self._search_regex( + r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data')) + + video_url = 'ooyala:%s' % video_data['data-video-id'] + if video_data.get('data-token-required') == 'true': + token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {} + token_fetch_url = token_fetch_options.get('url') + if token_fetch_url: + embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False) + if embed_token: + video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')}) return { '_type': 'url_transparent', 'id': video_id, - 'url': 'ooyala:%s' % self._search_regex( - r'data-video-id="([^"]+)"', webpage, 'ooyala id'), + 'url': video_url, 'title': self._og_search_title(webpage), 'description': strip_or_none(self._og_search_description(webpage)), 'ie_key': 'Ooyala', From cd670befc4c823a38a88fffbaa6c493e539dd79d Mon Sep 17 00:00:00 2001 From: Jimbolino <Jimbolino@users.noreply.github.com> Date: Thu, 2 Nov 2017 17:48:43 +0100 Subject: [PATCH 17/38] [22tracks] Remove extractor (closes #11024) --- youtube_dl/extractor/extractors.py | 4 -- youtube_dl/extractor/twentytwotracks.py | 86 ------------------------- 2 files changed, 90 deletions(-) delete mode 100644 youtube_dl/extractor/twentytwotracks.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2eed706f9..92f7e9027 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1110,10 +1110,6 @@ from .tvplayer import TVPlayerIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE -from .twentytwotracks import ( - TwentyTwoTracksIE, - TwentyTwoTracksGenreIE -) from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py deleted file mode 100644 index d6c0ab184..000000000 --- a/youtube_dl/extractor/twentytwotracks.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - -# 22Tracks regularly replace the audio tracks that can be streamed on their -# site. The tracks usually expire after 1 months, so we can't add tests. - - -class TwentyTwoTracksIE(InfoExtractor): - _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)' - IE_NAME = '22tracks:track' - - _API_BASE = 'http://22tracks.com/api' - - def _extract_info(self, city, genre_name, track_id=None): - item_id = track_id if track_id else genre_name - - cities = self._download_json( - '%s/cities' % self._API_BASE, item_id, - 'Downloading cities info', - 'Unable to download cities info') - city_id = [x['id'] for x in cities if x['slug'] == city][0] - - genres = self._download_json( - '%s/genres/%s' % (self._API_BASE, city_id), item_id, - 'Downloading %s genres info' % city, - 'Unable to download %s genres info' % city) - genre = [x for x in genres if x['slug'] == genre_name][0] - genre_id = genre['id'] - - tracks = self._download_json( - '%s/tracks/%s' % (self._API_BASE, genre_id), item_id, - 'Downloading %s genre tracks info' % genre_name, - 'Unable to download track info') - - return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks] - - def _get_track_url(self, filename, track_id): - token = self._download_json( - 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename, - track_id, 'Downloading token', 'Unable to download token') - return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e']) - - def _extract_track_info(self, track_info, track_id): - download_url = self._get_track_url(track_info['filename'], track_id) - title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip()) - return { - 'id': track_id, - 'url': download_url, - 'ext': 'mp3', - 'title': title, - 'duration': int_or_none(track_info.get('duration')), - 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created')) - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - city = mobj.group('city') - genre = mobj.group('genre') - track_id = mobj.group('id') - - track_info = self._extract_info(city, genre, track_id) - return self._extract_track_info(track_info, track_id) - - -class TwentyTwoTracksGenreIE(TwentyTwoTracksIE): - _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$' - IE_NAME = '22tracks:genre' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - city = mobj.group('city') - genre = mobj.group('genre') - - genre_title, tracks = self._extract_info(city, genre) - - entries = [ - self._extract_track_info(track_info, track_info['id']) - for track_info in tracks] - - return self.playlist_result(entries, genre, genre_title) From 48107c198bd76e611e3d4c2486cdc5403829a05a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Nov 2017 22:10:55 +0700 Subject: [PATCH 18/38] [f4m] Prefer baseURL for relative URLs (closes #14660) --- youtube_dl/downloader/f4m.py | 25 +++++++++++++++++-------- youtube_dl/extractor/common.py | 14 +++++++------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index c8fde9a89..fdb80f42a 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -243,8 +243,17 @@ def remove_encrypted_media(media): media)) -def _add_ns(prop): - return '{http://ns.adobe.com/f4m/1.0}%s' % prop +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url class F4mFD(FragmentFD): @@ -330,13 +339,13 @@ class F4mFD(FragmentFD): rate, media = list(filter( lambda f: int(f[0]) == requested_bitrate, formats))[0] - base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - # From Adobe F4M 3.0 spec: - # The <baseURL> element SHALL be the base URL for all relative - # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said - # URLs should be relative to the location of the containing document. - boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a67ac4411..64fb869aa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -29,7 +29,10 @@ from ..compat import ( compat_urlparse, compat_xml_parse_error, ) -from ..downloader.f4m import remove_encrypted_media +from ..downloader.f4m import ( + get_base_url, + remove_encrypted_media, +) from ..utils import ( NO_DEFAULT, age_restricted, @@ -1239,11 +1242,8 @@ class InfoExtractor(object): media_nodes = remove_encrypted_media(media_nodes) if not media_nodes: return formats - base_url = xpath_text( - manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], - 'base URL', default=None) - if base_url: - base_url = base_url.strip() + + manifest_base_url = get_base_url(manifest) bootstrap_info = xpath_element( manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], @@ -1275,7 +1275,7 @@ class InfoExtractor(object): continue manifest_url = ( media_url if media_url.startswith('http://') or media_url.startswith('https://') - else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url)) + else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url)) # If media_url is itself a f4m manifest do the recursive extraction # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested From 187ee66c941d9c397a46ffa490375e2c405500e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Nov 2017 22:11:39 +0700 Subject: [PATCH 19/38] [extractor/common] Add protocol for f4m formats --- youtube_dl/extractor/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 64fb869aa..e2d9f52b0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1310,6 +1310,7 @@ class InfoExtractor(object): 'url': manifest_url, 'manifest_url': manifest_url, 'ext': 'flv' if bootstrap_info is not None else None, + 'protocol': 'f4m', 'tbr': tbr, 'width': width, 'height': height, From 181e381fda4ddb9083f3834a8bd1bab72c937545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Nov 2017 22:12:20 +0700 Subject: [PATCH 20/38] [test_InfoExtractor] Add test for #14660 --- test/test_InfoExtractor.py | 26 ++++++++++++++++++++++++++ test/testdata/f4m/custom_base_url.f4m | 10 ++++++++++ 2 files changed, 36 insertions(+) create mode 100644 test/testdata/f4m/custom_base_url.f4m diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index f18a823fc..686c63efa 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_f4m_formats(self): + _TEST_CASES = [ + ( + # https://github.com/rg3/youtube-dl/issues/14660 + 'custom_base_url', + 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + [{ + 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + 'ext': 'flv', + 'format_id': '2148', + 'protocol': 'f4m', + 'tbr': 2148, + 'width': 1280, + 'height': 720, + }] + ), + ] + + for f4m_file, f4m_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_f4m_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + f4m_url, None) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) if __name__ == '__main__': unittest.main() diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m new file mode 100644 index 000000000..74e1539e8 --- /dev/null +++ b/test/testdata/f4m/custom_base_url.f4m @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<manifest xmlns="http://ns.adobe.com/f4m/1.0"> + <streamType>recorded</streamType> + <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL> + <duration>269.293</duration> + <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo> + <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2"> + <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata> + </media> +</manifest> From 6e71bbf4abc729cae3b0e428c3bb321690c9e485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Nov 2017 16:12:56 +0700 Subject: [PATCH 21/38] [hotstar] Bypass geo restriction (closes #14672) --- youtube_dl/extractor/hotstar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 3a7a66a34..9be958be6 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -11,6 +11,7 @@ from ..utils import ( class HotStarIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' + _GEO_COUNTRIES = ['IN'] _TESTS = [{ 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', 'info_dict': { From 477c97f86b5451f384a84a7a8d8237cfd1bec1d2 Mon Sep 17 00:00:00 2001 From: Alpesh Valia <alpeshvalia727@gmail.com> Date: Thu, 16 Mar 2017 22:00:11 +0530 Subject: [PATCH 22/38] [hotstar:playlist] Add extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/hotstar.py | 58 +++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 92f7e9027..d084707ee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -432,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE -from .hotstar import HotStarIE +from .hotstar import ( + HotStarIE, + HotStarPlaylistIE, +) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .hrti import ( diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 9be958be6..8d8a80a82 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -7,6 +7,7 @@ from ..utils import ( determine_ext, int_or_none, ) +import re class HotStarIE(InfoExtractor): @@ -17,7 +18,7 @@ class HotStarIE(InfoExtractor): 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB - English', + 'title': 'On Air With AIB', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', 'timestamp': 1447227000, 'upload_date': '20151111', @@ -100,3 +101,58 @@ class HotStarIE(InfoExtractor): 'episode_number': int_or_none(video_data.get('episodeNumber')), 'series': video_data.get('contentTitle'), } + + +class HotStarPlaylistIE(InfoExtractor): + IE_NAME = 'hotstar:playlist' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P<playlist_title>.+)/(?P<series_id>\d+)/episodes/(?P<playlist_id>\d{1,})' + + _TESTS = [{ + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'info_dict': { + 'id': '10856', + 'title': 'pow-bandi-yuddh-ke', + }, + 'playlist_mincount': 0, + }, { + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'only_matching': True, + }] + + def _extract_episode_info(self, series_id, playlist_title, video): + + picture_url = video.get('urlPictures') + thumbnail = '' + if picture_url: + thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % (picture_url[-2:], picture_url, picture_url) + + episode_title = video.get('episodeTitle', '') + episode_title = episode_title.lower().replace(' ', '-') + url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) + + info_dict = { + 'id': video.get('contentId'), + 'title': video.get('episodeTitle'), + 'description': video.get('longDescription'), + 'thumbnail': thumbnail, + 'url': url, + '_type': 'url', + } + return info_dict + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('series_id') + playlist_id = mobj.group('playlist_id') + playlist_title = mobj.group('playlist_title') + + collection = self._download_json( + "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, + playlist_id + ) + + videos = collection.get('resultObj', {}).get('response', {}).get('docs', []) + entries = [ + self._extract_episode_info(series_id, playlist_title, video) + for video in videos if video.get('contentId')] + return self.playlist_result(entries, playlist_id, playlist_title) From 909191de9154bf289b333cfe01b8e88e3ac1fefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Nov 2017 19:14:48 +0700 Subject: [PATCH 23/38] [hotstar:playlist] Fix issues and improve (closes #12465) --- youtube_dl/extractor/hotstar.py | 128 ++++++++++++++++---------------- 1 file changed, 66 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 8d8a80a82..d28af36ec 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -1,18 +1,41 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, - int_or_none, -) import re +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, +) -class HotStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' + +class HotStarBaseIE(InfoExtractor): _GEO_COUNTRIES = ['IN'] + + def _download_json(self, *args, **kwargs): + response = super(HotStarBaseIE, self)._download_json(*args, **kwargs) + if response['resultCode'] != 'OK': + if kwargs.get('fatal'): + raise ExtractorError( + response['errorDescription'], expected=True) + return None + return response['resultObj'] + + def _download_content_info(self, content_id): + return self._download_json( + 'https://account.hotstar.com/AVS/besc', content_id, query={ + 'action': 'GetAggregatedContentDetails', + 'appVersion': '5.0.40', + 'channel': 'PCTV', + 'contentId': content_id, + })['contentInfo'][0] + + +class HotStarIE(HotStarBaseIE): + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _TESTS = [{ 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', 'info_dict': { @@ -36,23 +59,11 @@ class HotStarIE(InfoExtractor): 'only_matching': True, }] - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None): - json_data = super(HotStarIE, self)._download_json( - url_or_request, video_id, note, fatal=fatal, query=query) - if json_data['resultCode'] != 'OK': - if fatal: - raise ExtractorError(json_data['errorDescription']) - return None - return json_data['resultObj'] - def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - 'http://account.hotstar.com/AVS/besc', video_id, query={ - 'action': 'GetAggregatedContentDetails', - 'channel': 'PCTV', - 'contentId': video_id, - })['contentInfo'][0] + + video_data = self._download_content_info(video_id) + title = video_data['episodeTitle'] if video_data.get('encrypted') == 'Y': @@ -103,56 +114,49 @@ class HotStarIE(InfoExtractor): } -class HotStarPlaylistIE(InfoExtractor): +class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P<playlist_title>.+)/(?P<series_id>\d+)/episodes/(?P<playlist_id>\d{1,})' - + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)' _TESTS = [{ - 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993', 'info_dict': { - 'id': '10856', - 'title': 'pow-bandi-yuddh-ke', + 'id': '14812', }, - 'playlist_mincount': 0, + 'playlist_mincount': 75, }, { - 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998', 'only_matching': True, }] - - def _extract_episode_info(self, series_id, playlist_title, video): - - picture_url = video.get('urlPictures') - thumbnail = '' - if picture_url: - thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % (picture_url[-2:], picture_url, picture_url) - - episode_title = video.get('episodeTitle', '') - episode_title = episode_title.lower().replace(' ', '-') - url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) - - info_dict = { - 'id': video.get('contentId'), - 'title': video.get('episodeTitle'), - 'description': video.get('longDescription'), - 'thumbnail': thumbnail, - 'url': url, - '_type': 'url', - } - return info_dict + _ITEM_TYPES = { + 'episodes': 'EPISODE', + 'popular-clips': 'CLIPS', + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - series_id = mobj.group('series_id') - playlist_id = mobj.group('playlist_id') - playlist_title = mobj.group('playlist_title') + base_url = mobj.group('url') + content_id = mobj.group('content_id') + playlist_type = mobj.group('type') + + content_info = self._download_content_info(content_id) + playlist_id = compat_str(content_info['categoryId']) collection = self._download_json( - "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, - playlist_id - ) + 'https://search.hotstar.com/AVS/besc', playlist_id, query={ + 'action': 'SearchContents', + 'appVersion': '5.0.40', + 'channel': 'PCTV', + 'moreFilters': 'series:%s;' % playlist_id, + 'query': '*', + 'searchOrder': 'last_broadcast_date desc,year desc,title asc', + 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'), + }) - videos = collection.get('resultObj', {}).get('response', {}).get('docs', []) entries = [ - self._extract_episode_info(series_id, playlist_title, video) - for video in videos if video.get('contentId')] - return self.playlist_result(entries, playlist_id, playlist_title) + self.url_result( + '%s/_/%s' % (base_url, video['contentId']), + ie=HotStarIE.ie_key(), video_id=video['contentId']) + for video in collection['response']['docs'] + if video.get('contentId')] + + return self.playlist_result(entries, playlist_id) From e0998333fac2238eff8880992c11f76402c4007c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Nov 2017 22:36:46 +0700 Subject: [PATCH 24/38] [ChangeLog] Actualize --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index d33a710fb..3cbbdda97 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version <unreleased> + +Core ++ [extractor/common] Add protocol for f4m formats +* [f4m] Prefer baseURL for relative URLs (#14660) +* [extractor/common] Respect URL query in _extract_wowza_formats (14645) + +Extractors ++ [hotstar:playlist] Add support for playlists (#12465) +* [hotstar] Bypass geo restriction (#14672) +- [22tracks] Remove extractor (#11024, #14628) ++ [skysport] Sdd support ooyala videos protected with embed_token (#14641) +* [gamespot] Extract formats referenced with new data fields (#14652) +* [spankbang] Detect unavailable videos (#14644) + + version 2017.10.29 Core From f34b841b51be6872914ffe17b210c54b0d823c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Nov 2017 22:39:24 +0700 Subject: [PATCH 25/38] release 2017.11.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 881475878..be6e6ddab 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.29 +[debug] youtube-dl version 2017.11.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 3cbbdda97..8af368274 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.11.06 Core + [extractor/common] Add protocol for f4m formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7b8e7403a..6009df571 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -3,8 +3,6 @@ - **1up.com** - **20min** - **220.ro** - - **22tracks:genre** - - **22tracks:track** - **24video** - **3qsdn**: 3Q SDN - **3sat** @@ -342,6 +340,7 @@ - **HornBunny** - **HotNewHipHop** - **HotStar** + - **hotstar:playlist** - **Howcast** - **HowStuffWorks** - **HRTi** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 43f080bc3..8b67d23fe 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.29' +__version__ = '2017.11.06' From cc6a960e134614f8af2a42dcd8bf146d63638a3c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 8 Nov 2017 20:30:05 +0100 Subject: [PATCH 26/38] use older login method(closes #11572) --- youtube_dl/extractor/crunchyroll.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8bdaf0c2c..18ef3da10 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -43,6 +43,17 @@ class CrunchyrollBaseIE(InfoExtractor): if username is None: return + self._download_webpage( + 'https://www.crunchyroll.com/?a=formhandler', + None, 'Logging in', 'Wrong login info', + data=urlencode_postdata({ + 'formname': 'RpcApiUser_Login', + 'next_url': 'https://www.crunchyroll.com/acct/membership', + 'name': username, + 'password': password, + })) + + ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -86,6 +97,7 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') + ''' def _real_initialize(self): self._login() From 4222346fb2f42af10ac902cd46469d23923cf114 Mon Sep 17 00:00:00 2001 From: hcwhan <hcwhan@gmail.com> Date: Tue, 7 Nov 2017 17:59:09 +0800 Subject: [PATCH 27/38] [pandatv] Update API URL and sign format URLs --- youtube_dl/extractor/pandatv.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py index c86d70771..c99a1bb1f 100644 --- a/youtube_dl/extractor/pandatv.py +++ b/youtube_dl/extractor/pandatv.py @@ -6,6 +6,7 @@ from ..utils import ( ExtractorError, qualities, ) +import json class PandaTVIE(InfoExtractor): @@ -33,7 +34,7 @@ class PandaTVIE(InfoExtractor): video_id = self._match_id(url) config = self._download_json( - 'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id) + 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id) error_code = config.get('errno', 0) if error_code is not 0: @@ -66,6 +67,11 @@ class PandaTVIE(InfoExtractor): plflag1 = '4' live_panda = 'live_panda' if plflag0 < 1 else '' + plflag_auth = json.loads(video_info["plflag_list"]) + sign = plflag_auth["auth"]["sign"] + ts = plflag_auth["auth"]["time"] + rid = plflag_auth["auth"]["rid"] + quality_key = qualities(['OD', 'HD', 'SD']) suffix = ['_small', '_mid', ''] formats = [] @@ -77,8 +83,8 @@ class PandaTVIE(InfoExtractor): continue for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): formats.append({ - 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s' - % (pl, plflag1, room_key, live_panda, suffix[quality], ext), + 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s' + % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid), 'format_id': '%s-%s' % (k, ext), 'quality': quality, 'source_preference': pref, From 61fb07e156671159353ae19a152926cab277ac87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Nov 2017 23:25:43 +0700 Subject: [PATCH 28/38] [pandatv] Modernize (closes #14693) --- youtube_dl/extractor/pandatv.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py index c99a1bb1f..13a2e7efc 100644 --- a/youtube_dl/extractor/pandatv.py +++ b/youtube_dl/extractor/pandatv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, qualities, ) -import json class PandaTVIE(InfoExtractor): @@ -67,10 +66,10 @@ class PandaTVIE(InfoExtractor): plflag1 = '4' live_panda = 'live_panda' if plflag0 < 1 else '' - plflag_auth = json.loads(video_info["plflag_list"]) - sign = plflag_auth["auth"]["sign"] - ts = plflag_auth["auth"]["time"] - rid = plflag_auth["auth"]["rid"] + plflag_auth = self._parse_json(video_info['plflag_list'], video_id) + sign = plflag_auth['auth']['sign'] + ts = plflag_auth['auth']['time'] + rid = plflag_auth['auth']['rid'] quality_key = qualities(['OD', 'HD', 'SD']) suffix = ['_small', '_mid', ''] From a9543e37c8e460e69a8556c8e5004ebd8e9b4da4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Nov 2017 00:29:08 +0800 Subject: [PATCH 29/38] [wsj] Recognize another URL pattern (closes #14704) --- ChangeLog | 6 ++++++ youtube_dl/extractor/wsj.py | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 8af368274..cedab4723 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [wsj] Recognize another URL pattern (#14704) + + version 2017.11.06 Core diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index 9b5487710..67236f377 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -13,7 +13,7 @@ class WSJIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=| - https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/| + https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+| wsj: ) (?P<id>[a-fA-F0-9-]{36}) @@ -38,6 +38,9 @@ class WSJIE(InfoExtractor): }, { 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html', 'only_matching': True, + }, { + 'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9', + 'only_matching': True, }] def _real_extract(self, url): From 59d2e6d04f621f41a72a232b8c93250991b4ae5c Mon Sep 17 00:00:00 2001 From: gkoelln <gkoelln7@gmail.com> Date: Fri, 10 Nov 2017 15:59:48 -0600 Subject: [PATCH 30/38] [cartoonnetwork] Update tokenizer_src (closes #14666) --- youtube_dl/extractor/cartoonnetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 086ec90c9..6aeebd7b3 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE): 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'secure': { 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', - 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', + 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile', }, }, { 'url': url, From a5203935d6cb753bafaf67164553027b62c01781 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 11 Nov 2017 12:41:15 +0100 Subject: [PATCH 31/38] [gamespot] skip Brightcove Once http formats(#14652) --- youtube_dl/extractor/gamespot.py | 3 ++- youtube_dl/extractor/once.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 6d177cbaf..be1ed8b4a 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -108,7 +108,8 @@ class GameSpotIE(OnceIE): onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') if onceux_url: formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), + skip_http_formats=True)) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index a637c8ecf..6ba6fe5d3 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -11,7 +11,7 @@ class OnceIE(InfoExtractor): ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' - def _extract_once_formats(self, url): + def _extract_once_formats(self, url, skip_http_formats=False): domain_id, application_id, media_item_id = re.match( OnceIE._VALID_URL, url).groups() formats = self._extract_m3u8_formats( @@ -27,7 +27,7 @@ class OnceIE(InfoExtractor): rendition_id = self._search_regex( r'/now/media/playlist/[^/]+/[^/]+/([^/]+)', adaptive_format['url'], 'redition id', default=None) - if rendition_id: + if rendition_id and not skip_http_formats: progressive_format = adaptive_format.copy() progressive_format.update({ 'url': self.PROGRESSIVE_URL_TEMPLATE % ( From 79d1f8ed6803b6097f0f3cd57f72e0378bdc1f34 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 11 Nov 2017 13:02:39 +0100 Subject: [PATCH 32/38] [gamespot] add support for article URLS(closes #14652) --- youtube_dl/extractor/gamespot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index be1ed8b4a..e6d6d9b1d 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', From e4d9586562d24cbbea6ee07162290ec602399f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Nov 2017 20:49:03 +0700 Subject: [PATCH 33/38] Remove sensitive data from logging in messages --- youtube_dl/extractor/animeondemand.py | 2 +- youtube_dl/extractor/atresplayer.py | 2 +- youtube_dl/extractor/bambuser.py | 2 +- youtube_dl/extractor/dramafever.py | 2 +- youtube_dl/extractor/funimation.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/pluralsight.py | 2 +- youtube_dl/extractor/roosterteeth.py | 2 +- youtube_dl/extractor/safari.py | 2 +- youtube_dl/extractor/twitch.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/viki.py | 2 +- youtube_dl/extractor/vk.py | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 69d363311..34c2b363e 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -78,7 +78,7 @@ class AnimeOnDemandIE(InfoExtractor): post_url = urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, + post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._LOGIN_URL, }) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 01fa308ff..1a31ebe08 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') error = self._html_search_regex( r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>', diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 0eb1930c2..633c57553 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') login_error = self._html_search_regex( r'(?s)<div class="messages error">(.+?)</div>', diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 95883a037..6b60e542b 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -54,7 +54,7 @@ class DramaFeverBaseIE(AMPIE): request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form)) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') if all(logout_pattern not in response for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 8c37509ec..107f658ba 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor): try: data = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in as %s' % username, data=urlencode_postdata({ + None, 'Logging in', data=urlencode_postdata({ 'username': username, 'password': password, })) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 8b83e1f76..a9f9b10c4 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -70,7 +70,7 @@ class NocoIE(InfoExtractor): return login = self._download_json( - self._LOGIN_URL, None, 'Logging in as %s' % username, + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata({ 'a': 'login', 'cookie': '1', diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index a6a2c273f..d4b1d34ca 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor): 'https://www.patreon.com/processLogin', compat_urllib_parse_urlencode(login_form).encode('utf-8') ) - login_page = self._download_webpage(request, None, note='Logging in as %s' % username) + login_page = self._download_webpage(request, None, note='Logging in') if re.search(r'onLoginFailed', login_page): raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index f6a9131b1..4bf0aa786 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, + post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={'Content-Type': 'application/x-www-form-urlencoded'}) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 46dfc78f5..8b703800e 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor): login_request = self._download_webpage( self._LOGIN_URL, None, - note='Logging in as %s' % username, + note='Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._LOGIN_URL, diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 909a6ba97..cc6698f88 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor): request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) login_page = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') if not is_logged(login_page): raise ExtractorError( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index fefcd2807..bf57eac01 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor): fail(clean_html(login_page)) redirect_page, handle = login_step( - login_page, handle, 'Logging in as %s' % username, { + login_page, handle, 'Logging in', { 'username': username, 'password': password, }) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 207c4a6a7..c248ea727 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor): }) response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in as %s' % username, + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._ORIGIN_URL, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 853e5c75f..ad2a2a4b7 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor): login = self._call_api( 'sessions.json', None, - 'Logging in as %s' % username, post_data=login_form) + 'Logging in', post_data=login_form) self._token = login.get('token') if not self._token: diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 105e172d5..0d8376522 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor): login_page = self._download_webpage( 'https://login.vk.com/?act=login', None, - note='Logging in as %s' % username, + note='Logging in', data=urlencode_postdata(login_form)) if re.search(r'onLoginFailed', login_page): From af85ce29c61749676ab934a2b297505ab33bf4c7 Mon Sep 17 00:00:00 2001 From: Bob Poekert <bob@poekert.com> Date: Sat, 11 Nov 2017 22:25:21 -0800 Subject: [PATCH 34/38] [ccma] Fix typo --- youtube_dl/extractor/ccma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 39938c9ac..bec0a825a 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor): 'description': clean_html(informacio.get('descripcio')), 'duration': duration, 'timestamp': timestamp, - 'thumnails': thumbnails, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From 5fc12b954971f5f63d1e87b05e8b01a9ae0e3b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Nov 2017 18:35:17 +0700 Subject: [PATCH 35/38] [instagram:user] Fix extraction (closes #14699) --- youtube_dl/extractor/instagram.py | 121 +++++++++++++++--------------- 1 file changed, 59 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 4667335e0..20db31f86 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -7,7 +8,6 @@ from ..compat import compat_str from ..utils import ( get_element_by_attribute, int_or_none, - limit_length, lowercase_escape, try_get, ) @@ -212,7 +212,7 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { @@ -221,82 +221,79 @@ class InstagramUserIE(InfoExtractor): 'id': 'porsche', 'title': 'porsche', }, - 'playlist_mincount': 2, - 'playlist': [{ - 'info_dict': { - 'id': '614605558512799803_462752227', - 'ext': 'mp4', - 'title': '#Porsche Intelligent Performance.', - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'Porsche', - 'uploader_id': 'porsche', - 'timestamp': 1387486713, - 'upload_date': '20131219', - }, - }], + 'playlist_count': 5, 'params': { 'extract_flat': True, 'skip_download': True, + 'playlistend': 5, } } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader_id = mobj.group('username') + def _entries(self, uploader_id): + query = { + '__a': 1, + } - entries = [] - page_count = 0 - media_url = 'http://instagram.com/%s/media' % uploader_id - while True: + def get_count(kind): + return int_or_none(try_get( + node, lambda x: x['%ss' % kind]['count'])) + + for page_num in itertools.count(1): page = self._download_json( - media_url, uploader_id, - note='Downloading page %d ' % (page_count + 1), - ) - page_count += 1 + 'https://instagram.com/%s/' % uploader_id, uploader_id, + note='Downloading page %d' % page_num, + fatal=False, query=query) + if not page: + break - for it in page['items']: - if it.get('type') != 'video': + nodes = try_get(page, lambda x: x['user']['media']['nodes'], list) + if not nodes: + break + + max_id = None + + for node in nodes: + node_id = node.get('id') + if node_id: + max_id = node_id + + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + video_id = node.get('code') + if not video_id: continue - like_count = int_or_none(it.get('likes', {}).get('count')) - user = it.get('user', {}) - formats = [{ - 'format_id': k, - 'height': v.get('height'), - 'width': v.get('width'), - 'url': v['url'], - } for k, v in it['videos'].items()] - self._sort_formats(formats) + info = self.url_result( + 'https://instagram.com/p/%s/' % video_id, + ie=InstagramIE.ie_key(), video_id=video_id) - thumbnails_el = it.get('images', {}) - thumbnail = thumbnails_el.get('thumbnail', {}).get('url') + description = try_get( + node, [lambda x: x['caption'], lambda x: x['text']['id']], + compat_str) + thumbnail = node.get('thumbnail_src') or node.get('display_src') + timestamp = int_or_none(node.get('date')) - # In some cases caption is null, which corresponds to None - # in python. As a result, it.get('caption', {}) gives None - title = (it.get('caption') or {}).get('text', it['id']) + comment_count = get_count('comment') + like_count = get_count('like') + view_count = int_or_none(node.get('video_views')) - entries.append({ - 'id': it['id'], - 'title': limit_length(title, 80), - 'formats': formats, + info.update({ + 'description': description, 'thumbnail': thumbnail, - 'webpage_url': it.get('link'), - 'uploader': user.get('full_name'), - 'uploader_id': user.get('username'), + 'timestamp': timestamp, + 'comment_count': comment_count, 'like_count': like_count, - 'timestamp': int_or_none(it.get('created_time')), + 'view_count': view_count, }) - if not page['items']: - break - max_id = page['items'][-1]['id'].split('_')[0] - media_url = ( - 'http://instagram.com/%s/media?max_id=%s' % ( - uploader_id, max_id)) + yield info - return { - '_type': 'playlist', - 'entries': entries, - 'id': uploader_id, - 'title': uploader_id, - } + if not max_id: + break + + query['max_id'] = max_id + + def _real_extract(self, url): + uploader_id = self._match_id(url) + return self.playlist_result( + self._entries(uploader_id), uploader_id, uploader_id) From d4e31b72b971172ffdee7fbe3070d20e4454259c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 13 Nov 2017 10:24:35 +0100 Subject: [PATCH 36/38] [gamespot] lower the preference of http formats(#14652) --- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/once.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index e6d6d9b1d..a9606a02c 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -109,7 +109,7 @@ class GameSpotIE(OnceIE): if onceux_url: formats.extend(self._extract_once_formats(re.sub( r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), - skip_http_formats=True)) + http_formats_preference=-1)) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 6ba6fe5d3..8ae5fadd8 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -11,7 +11,7 @@ class OnceIE(InfoExtractor): ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' - def _extract_once_formats(self, url, skip_http_formats=False): + def _extract_once_formats(self, url, http_formats_preference=None): domain_id, application_id, media_item_id = re.match( OnceIE._VALID_URL, url).groups() formats = self._extract_m3u8_formats( @@ -27,7 +27,7 @@ class OnceIE(InfoExtractor): rendition_id = self._search_regex( r'/now/media/playlist/[^/]+/[^/]+/([^/]+)', adaptive_format['url'], 'redition id', default=None) - if rendition_id and not skip_http_formats: + if rendition_id: progressive_format = adaptive_format.copy() progressive_format.update({ 'url': self.PROGRESSIVE_URL_TEMPLATE % ( @@ -35,6 +35,7 @@ class OnceIE(InfoExtractor): 'format_id': adaptive_format['format_id'].replace( 'hls', 'http'), 'protocol': 'http', + 'preference': http_formats_preference, }) progressive_formats.append(progressive_format) self._check_formats(progressive_formats, media_item_id) From 388beb86e0c8e3f76958aa8a258bd396b8b1e0fe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 13 Nov 2017 10:30:12 +0100 Subject: [PATCH 37/38] [gamespot] add test for #14652 --- youtube_dl/extractor/gamespot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index a9606a02c..ab647dd41 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -38,6 +38,9 @@ class GameSpotIE(OnceIE): }, { 'url': 'https://www.gamespot.com/videos/embed/6439218/', 'only_matching': True, + }, { + 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/', + 'only_matching': True, }] def _real_extract(self, url): From 27adc9ec65be412e07f6e55d9d9b56c1c224d1db Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 13 Nov 2017 11:24:15 +0100 Subject: [PATCH 38/38] [tva] fix extraction(closes #14736) --- youtube_dl/extractor/tva.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py index b57abeaa4..0b863df2f 100644 --- a/youtube_dl/extractor/tva.py +++ b/youtube_dl/extractor/tva.py @@ -32,6 +32,8 @@ class TVAIE(InfoExtractor): video_data = self._download_json( 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ 'Accept': 'application/json', + }, query={ + 'appId': '5955fc5f23eec60006c951f1', }) def get_attribute(key):