From 6f5c598a28b2b400ffb4d2bc2d228c2802ab7fd7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 25 Nov 2017 15:49:49 +0100 Subject: [PATCH 01/41] [france2.fr:generation-what] fix extraction --- youtube_dl/extractor/francetv.py | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 037e538cc..5a3abeaff 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..compat import compat_urlparse @@ -308,31 +307,32 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): return self._extract_video(video_id, catalogue) -class GenerationQuoiIE(InfoExtractor): - IE_NAME = 'france2.fr:generation-quoi' - _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P[^/?#]+)' +class GenerationWhatIE(InfoExtractor): + IE_NAME = 'france2.fr:generation-what' + _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P[^/?#]+)' - _TEST = { - 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', + _TESTS = [{ + 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', 'info_dict': { - 'id': 'k7FJX8VBcvvLmX4wA5Q', + 'id': 'wtvKYUG45iw', 'ext': 'mp4', - 'title': 'Génération Quoi - Garde à Vous', - 'uploader': 'Génération Quoi', + 'title': 'Generation What - Garde à vous - FRA', + 'uploader': 'Generation What', + 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', + 'upload_date': '20160411', }, - 'params': { - # It uses Dailymotion - 'skip_download': True, - }, - } + }, { + 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) - info_json = self._download_webpage(info_url, display_id) - info = json.loads(info_json) - return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], - ie='Dailymotion') + webpage = self._download_webpage(url, display_id) + youtube_id = self._search_regex( + r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", + webpage, 'youtube id') + return self.url_result(youtube_id, 'Youtube', youtube_id) class CultureboxIE(FranceTVBaseInfoExtractor): From fe4bfe36e148c0e1d3180a2992503536b4b4f84b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 25 Nov 2017 15:58:28 +0100 Subject: [PATCH 02/41] [gamersyde] remove extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/gamersyde.py | 70 ------------------------------ 2 files changed, 1 insertion(+), 72 deletions(-) delete mode 100644 youtube_dl/extractor/gamersyde.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f1a464970..bc1ca0f0a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -374,7 +374,7 @@ from .francetv import ( FranceTVIE, FranceTVEmbedIE, FranceTVInfoIE, - GenerationQuoiIE, + GenerationWhatIE, CultureboxIE, ) from .freesound import FreesoundIE @@ -390,7 +390,6 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) -from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py deleted file mode 100644 index a218a6944..000000000 --- a/youtube_dl/extractor/gamersyde.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - parse_duration, - remove_start, -) - - -class GamersydeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P[\da-z_]+)-(?P\d+)_[a-z]{2}\.html' - _TEST = { - 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', - 'md5': 'f38d400d32f19724570040d5ce3a505f', - 'info_dict': { - 'id': '34371', - 'ext': 'mp4', - 'duration': 372, - 'title': 'Bloodborne - Birth of a hero', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - playlist = self._parse_json( - self._search_regex( - r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'), - display_id, transform_source=js_to_json) - - formats = [] - for source in playlist['sources']: - video_url = source.get('file') - if not video_url: - continue - format_id = source.get('label') - f = { - 'url': video_url, - 'format_id': format_id, - } - m = re.search(r'^(?P\d+)[pP](?P\d+)fps', format_id) - if m: - f.update({ - 'height': int(m.group('height')), - 'fps': int(m.group('fps')), - }) - formats.append(f) - self._sort_formats(formats) - - title = remove_start(playlist['title'], '%s - ' % video_id) - thumbnail = playlist.get('image') - duration = parse_duration(self._search_regex( - r'Length:([^<]+)<', webpage, 'duration', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } From dbb25af657166108e7afcd4e1ff197e2a5644724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 25 Nov 2017 23:08:46 +0700 Subject: [PATCH 03/41] [tnaflix] Don't modify download URLs (closes #14811) --- youtube_dl/extractor/tnaflix.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 7e6ec3430..9f47ce2aa 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -91,7 +91,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): formats = [] def extract_video_url(vl): - return re.sub(r'speed=\d+', 'speed=', unescapeHTML(vl.text)) + # Any URL modification now results in HTTP Error 403: Forbidden + return unescapeHTML(vl.text) video_link = cfg_xml.find('./videoLink') if video_link is not None: From 9105523818270cadb7ad90a3b1ea7e6f18d2dc54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 25 Nov 2017 23:13:17 +0700 Subject: [PATCH 04/41] [empflix] Fix extractrion --- youtube_dl/extractor/tnaflix.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 9f47ce2aa..c4ead1fcd 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -21,6 +21,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): r'flashvars\.config\s*=\s*escape\("([^"]+)"', r']+name="config\d?" value="([^"]+)"', ] + _HOST = 'tna' + _VKEY_SUFFIX = '' _TITLE_REGEX = r']+name="title" value="([^"]+)"' _DESCRIPTION_REGEX = r']+name="description" value="([^"]+)"' _UPLOADER_REGEX = r']+name="username" value="([^"]+)"' @@ -81,8 +83,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): if not cfg_url: inputs = self._hidden_inputs(webpage) - cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' - % (inputs['vkey'], inputs['nkey'], video_id)) + cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' + % (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id)) cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', @@ -240,6 +242,8 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): class EMPFlixIE(TNAFlixNetworkBaseIE): _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P.+?)-(?P[0-9]+)\.html' + _HOST = 'emp' + _VKEY_SUFFIX = '-1' _UPLOADER_REGEX = r']+class="infoTitle"[^>]*>Uploaded By:(.+?)' _TESTS = [{ From b7785cf15685f7dfbe96cc6d2dd8089ff1363be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 25 Nov 2017 23:22:54 +0700 Subject: [PATCH 05/41] [empflix] Relax _VALID_URL --- youtube_dl/extractor/tnaflix.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index c4ead1fcd..e57b8e318 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -74,7 +74,13 @@ class TNAFlixNetworkBaseIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id + for display_id_key in ('display_id', 'display_id_2'): + if display_id_key in mobj.groupdict(): + display_id = mobj.group(display_id_key) + if display_id: + break + else: + display_id = video_id webpage = self._download_webpage(url, display_id) @@ -240,7 +246,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): class EMPFlixIE(TNAFlixNetworkBaseIE): - _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P.+?)-(?P[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P.+?)-|[^/]+/(?P[^/]+)/video)(?P[0-9]+)' _HOST = 'emp' _VKEY_SUFFIX = '-1' @@ -264,6 +270,9 @@ class EMPFlixIE(TNAFlixNetworkBaseIE): }, { 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', 'only_matching': True, + }, { + 'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051', + 'only_matching': True, }] From 8cfbcfab9a94d363dbc8e1b026648c3ccf0d399e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 25 Nov 2017 23:37:50 +0700 Subject: [PATCH 06/41] [tnaflix] Extract common parts of tnaflix and empflix --- youtube_dl/extractor/tnaflix.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index e57b8e318..0c2f8f119 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -201,18 +201,21 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE): webpage)] -class TNAFlixIE(TNAFlixNetworkBaseIE): +class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE): + _DESCRIPTION_REGEX = r'(?s)>Description:]+>(.+?)<' + _UPLOADER_REGEX = r'by\s*]+\bhref=["\']/profile/[^>]+>([^<]+)<' + _CATEGORIES_REGEX = r'(?s)]*>Categories:(.+?)' + + +class TNAFlixIE(TNAEMPFlixBaseIE): _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P[^/]+)/video(?P\d+)' _TITLE_REGEX = r'(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)' - _DESCRIPTION_REGEX = r'(?s)>Description:]+>(.+?)<' - _UPLOADER_REGEX = r'\s*Verified Member\s*\s*(.+?)<' - _CATEGORIES_REGEX = r'(?s)]*>Categories:(.+?)' _TESTS = [{ # anonymous uploader, no categories 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': 'ecf3498417d09216374fc5907f9c6ec0', + 'md5': '7e569419fe6d69543d01e6be22f5f7c4', 'info_dict': { 'id': '553878', 'display_id': 'Carmella-Decesare-striptease', @@ -237,7 +240,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): 'duration': 164, 'age_limit': 18, 'uploader': 'bobwhite39', - 'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'], + 'categories': list, } }, { 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', @@ -245,16 +248,15 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): }] -class EMPFlixIE(TNAFlixNetworkBaseIE): +class EMPFlixIE(TNAEMPFlixBaseIE): _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P.+?)-|[^/]+/(?P[^/]+)/video)(?P[0-9]+)' _HOST = 'emp' _VKEY_SUFFIX = '-1' - _UPLOADER_REGEX = r']+class="infoTitle"[^>]*>Uploaded By:(.+?)' _TESTS = [{ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', - 'md5': 'b1bc15b6412d33902d6e5952035fcabc', + 'md5': 'bc30d48b91a7179448a0bda465114676', 'info_dict': { 'id': '33051', 'display_id': 'Amateur-Finger-Fuck', From 9ef909f2b2bf81e16c4fd2e428a740a523ad61cf Mon Sep 17 00:00:00 2001 From: jahudka Date: Sat, 25 Nov 2017 18:04:13 +0100 Subject: [PATCH 07/41] [openload] Add support for openload.link --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b50d6c77b..efa5d4601 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -242,7 +242,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -286,6 +286,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 'only_matching': True, + }, { + 'url': 'http://www.openload.link/f/KnG-kKZdcfY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 6ff27b8d5a11f0960f53b93a1ea2423d3384af21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 26 Nov 2017 00:05:28 +0700 Subject: [PATCH 08/41] [openload] Don't use bare except when removing temp files --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index efa5d4601..a99af12a4 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -140,7 +140,7 @@ class PhantomJSwrapper(object): for name in self._TMP_FILE_NAMES: try: os.remove(self._TMP_FILES[name].name) - except: + except (IOError, OSError): pass def _save_cookies(self, url): From c0f647a179b8d75a5f23a06f208ff8c1c0088a54 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 25 Nov 2017 18:13:17 +0100 Subject: [PATCH 09/41] [nexx] extract more formats --- youtube_dl/extractor/nexx.py | 69 ++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 071879ba4..d79a1ce28 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -28,7 +28,7 @@ class NexxIE(InfoExtractor): _TESTS = [{ # movie 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', - 'md5': '16746bfc28c42049492385c989b26c4a', + 'md5': '828cea195be04e66057b846288295ba1', 'info_dict': { 'id': '128907', 'ext': 'mp4', @@ -42,9 +42,6 @@ class NexxIE(InfoExtractor): 'timestamp': 1384264416, 'upload_date': '20131112', }, - 'params': { - 'format': 'bestvideo', - }, }, { # episode 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858', @@ -62,7 +59,6 @@ class NexxIE(InfoExtractor): 'season_number': 2, }, 'params': { - 'format': 'bestvideo', 'skip_download': True, }, }, { @@ -193,35 +189,70 @@ class NexxIE(InfoExtractor): stream_data = video['streamdata'] language = general.get('language_raw') or '' - # TODO: reverse more cdns and formats + # TODO: reverse more cdns cdn = stream_data['cdnType'] assert cdn == 'azure' azure_locator = stream_data['azureLocator'] - AZURE_URL = 'http://nx-p%02d.akamaized.net/' + AZURE_URL = 'http://nx%s%02d.akamaized.net/' - for secure in ('s', ''): - cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper()) - if cdn_shield: - azure_base = 'http%s://%s' % (secure, cdn_shield) - break - else: - azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', '')) + def get_cdn_base(cdn, prefix='-p'): + azure_base = None + for secure in ('', 's'): + cdn_shield = stream_data.get('cdn%sHTTP%s' % (cdn, secure.upper())) + if cdn_shield: + azure_base = 'http%s://%s' % (secure, cdn_shield) + break + else: + azure_base = AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', ''))) + return azure_base + azure_stream_base = get_cdn_base('Shield') is_ml = ',' in language - azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % ( - azure_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % ( + azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s' protection_token = try_get( video, lambda x: x['protectiondata']['token'], compat_str) if protection_token: - azure_m3u8_url += '?hdnts=%s' % protection_token + azure_manifest_url += '?hdnts=%s' % protection_token formats = self._extract_m3u8_formats( - azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='%s-hls' % cdn) + azure_manifest_url % '(format=m3u8-aapl)', + video_id, 'mp4', 'm3u8_native', + m3u8_id='%s-hls' % cdn, fatal=False) + formats.extend(self._extract_mpd_formats( + azure_manifest_url % '(format=mpd-time-csf)', + video_id, mpd_id='%s-dash' % cdn, fatal=False)) + formats.extend(self._extract_ism_formats( + azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) + + azure_progressive_base = get_cdn_base('ShieldProg', '-d') + azure_file_distribution = stream_data.get('azureFileDistribution') + if azure_file_distribution: + fds = azure_file_distribution.split(',') + if fds: + for fd in fds: + ss = fd.split(':') + if len(ss) == 2: + tbr = int_or_none(ss[0]) + if tbr: + f = { + 'url': '%s%s/%s_src_%s_%d.mp4' % ( + azure_progressive_base, azure_locator, video_id, ss[1], tbr), + 'format_id': 'http-%d' % tbr, + 'tbr': tbr, + } + width_height = ss[1].split('x') + if len(width_height) == 2: + f.update({ + 'width': int_or_none(width_height[0]), + 'height': int_or_none(width_height[1]), + }) + formats.append(f) + self._sort_formats(formats) return { From a238a868ba33b4683cea57a05893b339a302ff9f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 25 Nov 2017 18:25:00 +0100 Subject: [PATCH 10/41] [freespeech] fix extraction --- youtube_dl/extractor/freespeech.py | 31 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 7fa271b51..486a49c05 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -1,37 +1,34 @@ from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor class FreespeechIE(InfoExtractor): IE_NAME = 'freespeech.org' - _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P<id>.+)' _TEST = { 'add_ie': ['Youtube'], - 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', + 'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/', 'info_dict': { - 'id': 'poKsVCZ64uU', - 'ext': 'webm', - 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', - 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', - 'uploader': 'freespeechtv', + 'id': 'waRk6IPqyWM', + 'ext': 'mp4', + 'title': 'What\'s At Stake - Net Neutrality Special', + 'description': 'Presented by MNN and FSTV', + 'upload_date': '20170728', 'uploader_id': 'freespeechtv', - 'upload_date': '20121002', + 'uploader': 'freespeechtv', }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - title = mobj.group('title') - webpage = self._download_webpage(url, title) - info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info') - info = json.loads(info_json) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + youtube_url = self._search_regex( + r'data-video-url="([^"]+)"', + webpage, 'youtube url') return { '_type': 'url', - 'url': info['jw_player']['basic_video_node_player']['file'], + 'url': youtube_url, 'ie_key': 'Youtube', } From b485d5d6bf2052edf6d7218a0e4afa89884f13c6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Nov 2017 18:36:31 +0100 Subject: [PATCH 11/41] [nexx] make http format ids more consistent --- youtube_dl/extractor/nexx.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index d79a1ce28..9203c0477 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -198,18 +198,15 @@ class NexxIE(InfoExtractor): AZURE_URL = 'http://nx%s%02d.akamaized.net/' - def get_cdn_base(cdn, prefix='-p'): - azure_base = None + def get_cdn_shield_base(shield_type='', prefix='-p'): for secure in ('', 's'): - cdn_shield = stream_data.get('cdn%sHTTP%s' % (cdn, secure.upper())) + cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) if cdn_shield: - azure_base = 'http%s://%s' % (secure, cdn_shield) - break + return 'http%s://%s' % (secure, cdn_shield) else: - azure_base = AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', ''))) - return azure_base + return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', ''))) - azure_stream_base = get_cdn_base('Shield') + azure_stream_base = get_cdn_shield_base() is_ml = ',' in language azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % ( azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s' @@ -229,7 +226,7 @@ class NexxIE(InfoExtractor): formats.extend(self._extract_ism_formats( azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) - azure_progressive_base = get_cdn_base('ShieldProg', '-d') + azure_progressive_base = get_cdn_shield_base('Prog', '-d') azure_file_distribution = stream_data.get('azureFileDistribution') if azure_file_distribution: fds = azure_file_distribution.split(',') @@ -242,7 +239,7 @@ class NexxIE(InfoExtractor): f = { 'url': '%s%s/%s_src_%s_%d.mp4' % ( azure_progressive_base, azure_locator, video_id, ss[1], tbr), - 'format_id': 'http-%d' % tbr, + 'format_id': '%s-http-%d' % (cdn, tbr), 'tbr': tbr, } width_height = ss[1].split('x') From 87dac57cf69f7a4528c511dc7c1b9a690199cf1e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Nov 2017 18:50:15 +0100 Subject: [PATCH 12/41] [firstpost] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/firstpost.py | 50 ------------------------------ 2 files changed, 51 deletions(-) delete mode 100644 youtube_dl/extractor/firstpost.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bc1ca0f0a..d3104f393 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -344,7 +344,6 @@ from .filmon import ( FilmOnIE, FilmOnChannelIE, ) -from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE from .fivetv import FiveTVIE diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py deleted file mode 100644 index e8936cb24..000000000 --- a/youtube_dl/extractor/firstpost.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FirstpostIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' - - _TEST = { - 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', - 'md5': 'ee9114957692f01fb1263ed87039112a', - 'info_dict': { - 'id': '1025403', - 'ext': 'mp4', - 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', - 'description': 'md5:feef3041cb09724e0bdc02843348f5f4', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - title = self._html_search_meta('twitter:title', page, 'title', fatal=True) - description = self._html_search_meta('twitter:description', page, 'title') - - data = self._download_xml( - 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, - 'Downloading video XML') - - item = data.find('./playlist/item') - thumbnail = item.find('./image').text - - formats = [ - { - 'url': details.find('./file').text, - 'format_id': details.find('./label').text.strip(), - 'width': int(details.find('./width').text.strip()), - 'height': int(details.find('./height').text.strip()), - } for details in item.findall('./source/file_details') if details.find('./file').text - ] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats, - } From 93f3f10cdcf40bdeb6c61b36bbbc0d0ab45ba66c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Nov 2017 19:28:26 +0100 Subject: [PATCH 13/41] [fczenit] fix extraction --- youtube_dl/extractor/fczenit.py | 40 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py index 8d1010b88..8db7c5963 100644 --- a/youtube_dl/extractor/fczenit.py +++ b/youtube_dl/extractor/fczenit.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + float_or_none, +) class FczenitIE(InfoExtractor): @@ -14,6 +17,8 @@ class FczenitIE(InfoExtractor): 'id': '41044', 'ext': 'mp4', 'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»', + 'timestamp': 1462283735, + 'upload_date': '20160503', }, } @@ -21,28 +26,31 @@ class FczenitIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex( - r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title') + msi_id = self._search_regex( + r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id') - video_items = self._parse_json(self._search_regex( - r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'), - video_id) - - def merge_dicts(*dicts): - ret = {} - for a_dict in dicts: - ret.update(a_dict) - return ret + msi_data = self._download_json( + 'http://player.fc-zenit.ru/msi/video', msi_id, query={ + 'video': msi_id, + })['data'] + title = msi_data['name'] formats = [{ - 'url': compat_urlparse.urljoin(url, video_url), - 'tbr': int(tbr), - } for tbr, video_url in merge_dicts(*video_items).items()] + 'format_id': q.get('label'), + 'url': q['url'], + 'height': int_or_none(q.get('label')), + } for q in msi_data['qualities'] if q.get('url')] self._sort_formats(formats) + tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')] + return { 'id': video_id, - 'title': video_title, + 'title': title, + 'thumbnail': msi_data.get('preview'), 'formats': formats, + 'duration': float_or_none(msi_data.get('duration')), + 'timestamp': int_or_none(msi_data.get('date')), + 'tags': tags, } From 7512aa986f517c301fb3272584d21c8d5cec720f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Nov 2017 20:53:10 +0700 Subject: [PATCH 14/41] Fix some only matching tests (closes #14855) --- youtube_dl/extractor/orf.py | 6 +++--- youtube_dl/extractor/rozhlas.py | 2 +- youtube_dl/extractor/vidzi.py | 4 ++-- youtube_dl/extractor/youku.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 74fe8017e..c1fb580ca 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -49,13 +49,13 @@ class ORFTVthekIE(InfoExtractor): 'params': { 'skip_download': True, # rtsp downloads }, - '_skip': 'Blocked outside of Austria / Germany', + 'skip': 'Blocked outside of Austria / Germany', }, { 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141', - 'skip_download': True, + 'only_matching': True, }, { 'url': 'http://tvthek.orf.at/profile/Universum/35429', - 'skip_download': True, + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dl/extractor/rozhlas.py index f8eda8dea..fccf69401 100644 --- a/youtube_dl/extractor/rozhlas.py +++ b/youtube_dl/extractor/rozhlas.py @@ -21,7 +21,7 @@ class RozhlasIE(InfoExtractor): } }, { 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', - 'skip_download': True, + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index e64873bce..ac35d55a9 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -28,10 +28,10 @@ class VidziIE(InfoExtractor): }, }, { 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', - 'skip_download': True, + 'only_matching': True, }, { 'url': 'http://vidzi.cc/cghql9yq6emu.html', - 'skip_download': True, + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 6822a30bc..a2b3b4daf 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -240,7 +240,7 @@ class YoukuShowIE(InfoExtractor): }, { # Ongoing playlist. The initial page is the last one 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html', - 'only_matchine': True, + 'only_matching': True, }] def _extract_entries(self, playlist_data_url, show_id, note, query): From d08dcd2dbd4c0d63ff2f4dd504aceceabfa212cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Nov 2017 21:06:14 +0700 Subject: [PATCH 15/41] [test_YoutubeDL] Fix typo (closes #14856) --- test/test_YoutubeDL.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 4af92fbd4..f0f5a8470 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -466,11 +466,11 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') - ydl = YDL({'is_live': True}) - self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') - ydl = YDL({'simulate': True, 'is_live': True}) - self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best') ydl = YDL({'outtmpl': '-'}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') From e94d1adc360d0fbfadced82c72a7f20375a1ec87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Nov 2017 21:10:32 +0700 Subject: [PATCH 16/41] Add testdata to youtube-dl.tar.gz (closes #14854) --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index c74eea792..141593efd 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*~' \ --exclude '__pycache__' \ --exclude '.git' \ - --exclude 'testdata' \ --exclude 'docs/_build' \ -- \ bin devscripts test youtube_dl docs \ From 6c07f0b288a674c385308cccef31d034b6bd3788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Nov 2017 21:37:27 +0700 Subject: [PATCH 17/41] [ChangeLog] Actualize --- ChangeLog | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c4a84c597..1f1b704cc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,31 @@ version <unreleased> +Core +* [extractor/common] Use final URL when dumping request (#14769) + Extractors +* [fczenit] Fix extraction +- [firstpost] Remove extractor +* [freespeech] Fix extraction +* [nexx] Extract more formats ++ [openload] Add support for openload.link (#14763) +* [empflix] Relax URL regular expression +* [empflix] Fix extractrion +* [tnaflix] Don't modify download URLs (#14811) +- [gamersyde] Remove extractor +* [francetv:generationwhat] Fix extraction ++ [massengeschmacktv] Add support for Massengeschmack TV +* [fox9] Fix extraction +* [faz] Fix extraction and add support for Perform Group embeds (#14714) ++ [performgroup] Add support for performgroup.com ++ [jwplatform] Add support for iframes (#14828) +* [culturebox] Fix extraction (#14827) * [youku] Fix extraction; update ccode (#14815) -+ [JWPlatform] support iframes (#14828) +* [livestream] Make SMIL extraction non fatal (#14792) ++ [drtuber] Add support for mobile URLs (#14772) ++ [spankbang] Add support for mobile URLs (#14771) +* [instagram] Fix description, timestamp and counters extraction (#14755) + version 2017.11.15 From 5ddeb7702a2c14d51a2c76178c7abb30a4ea06e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Nov 2017 21:49:22 +0700 Subject: [PATCH 18/41] release 2017.11.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 7 +++---- youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4dd1a6e59..15211e5d9 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.11.15 +[debug] youtube-dl version 2017.11.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1f1b704cc..a568fa233 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.11.26 Core * [extractor/common] Use final URL when dumping request (#14769) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6009df571..b44c9e7a3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -266,10 +266,8 @@ - **fc2** - **fc2:embed** - **Fczenit** - - **fernsehkritik.tv** - **filmon** - **filmon:channel** - - **Firstpost** - **FiveTV** - **Flickr** - **Flipagram** @@ -283,7 +281,7 @@ - **foxnews:article** - **foxnews:insider** - **FoxSports** - - **france2.fr:generation-quoi** + - **france2.fr:generation-what** - **FranceCulture** - **FranceInter** - **FranceTV** @@ -301,7 +299,6 @@ - **GameInformer** - **GameOne** - **gameone:playlist** - - **Gamersyde** - **GameSpot** - **GameStar** - **Gaskrank** @@ -441,6 +438,7 @@ - **mangomolo:live** - **mangomolo:video** - **ManyVids** + - **massengeschmack.tv** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** @@ -608,6 +606,7 @@ - **pcmag** - **PearVideo** - **People** + - **PerformGroup** - **periscope**: Periscope - **periscope:user**: Periscope user videos - **PhilharmonieDeParis**: Philharmonie de Paris diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1c3cbefeb..81e3f792d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.11.15' +__version__ = '2017.11.26' From f58a5060443229e017a04e79cb37d499b6347fe3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 27 Nov 2017 21:30:47 +0800 Subject: [PATCH 19/41] [test_InfoExtractor] Fix flake8 --- test/test_InfoExtractor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 686c63efa..7b95f883f 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -601,5 +601,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + if __name__ == '__main__': unittest.main() From 82a62de192401157a5eb44cacba16296795b4ee5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 27 Nov 2017 21:32:06 +0800 Subject: [PATCH 20/41] [Makefile,devscripts/run_tests.sh] Actually exclude network tests Closes #14858 --- Makefile | 11 ++++++++++- devscripts/run_tests.sh | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 141593efd..0c3936811 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,17 @@ test: ot: offlinetest +# Keep this list in sync with devscripts/run_tests.sh offlinetest: codetest - $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py + $(PYTHON) -m nose --verbose test \ + --exclude test_age_restriction.py \ + --exclude test_download.py \ + --exclude test_iqiyi_sdk_interpreter.py \ + --exclude test_socks.py \ + --exclude test_subtitles.py \ + --exclude test_write_annotations.py \ + --exclude test_youtube_lists.py \ + --exclude test_youtube_signature.py tar: youtube-dl.tar.gz diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index 6ba26720d..dd37a80f5 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -1,6 +1,7 @@ #!/bin/bash -DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists" +# Keep this list in sync with the `offlinetest` target in Makefile +DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature" test_set="" multiprocess_args="" From dafb4c66476754a46cc83fa750e9237da8fbf595 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 27 Nov 2017 22:49:35 +0800 Subject: [PATCH 21/41] [Makefile] Include setup.cfg in the tarball (closes #14857) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0c3936811..1c760bef8 100644 --- a/Makefile +++ b/Makefile @@ -124,5 +124,5 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- bin devscripts test youtube_dl docs \ ChangeLog LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ - youtube-dl.zsh youtube-dl.fish setup.py \ + youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \ youtube-dl From ffe6979ef925cb8b2e9324c2ad4635cb01d45ab7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 27 Nov 2017 16:53:42 +0100 Subject: [PATCH 22/41] [utils] add hvc1 codec code to parse_codecs --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 34866a54b..febc9d26f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2372,7 +2372,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): From 53f024e7c5bcea57b2446b3b5550801c011736d1 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 27 Nov 2017 16:55:08 +0100 Subject: [PATCH 23/41] [pbs] add another media id regex --- youtube_dl/extractor/pbs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index b51dcbe10..f11d5da52 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -421,6 +421,7 @@ class PBSIE(InfoExtractor): r'class="coveplayerid">([^<]+)<', # coveplayer r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/ r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer + r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", ] media_id = self._search_regex( From 115afb77ec2d31764307c06866b63ba859d3aafb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 27 Nov 2017 21:59:27 +0100 Subject: [PATCH 24/41] [itv] update hls formats extraction --- youtube_dl/extractor/itv.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 26c48e4b8..413a219dc 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import uuid import xml.etree.ElementTree as etree import json +import re from .common import InfoExtractor from ..compat import ( @@ -142,9 +143,9 @@ class ITVIE(InfoExtractor): f['url'] = rtmp_url formats.append(f) - ios_playlist_url = params.get('data-video-playlist') + ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') hmac = params.get('data-video-hmac') - if ios_playlist_url and hmac: + if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url): headers = self.geo_verification_headers() headers.update({ 'Accept': 'application/vnd.itv.vod.playlist.v2+json', @@ -159,12 +160,12 @@ class ITVIE(InfoExtractor): 'token': '' }, 'device': { - 'manufacturer': 'Apple', - 'model': 'iPad', + 'manufacturer': 'Safari', + 'model': '5', 'os': { - 'name': 'iPhone OS', - 'version': '9.3', - 'type': 'ios' + 'name': 'Windows NT', + 'version': '6.1', + 'type': 'desktop' } }, 'client': { @@ -173,10 +174,10 @@ class ITVIE(InfoExtractor): }, 'variantAvailability': { 'featureset': { - 'min': ['hls', 'aes'], - 'max': ['hls', 'aes'] + 'min': ['hls', 'aes', 'outband-webvtt'], + 'max': ['hls', 'aes', 'outband-webvtt'] }, - 'platformTag': 'mobile' + 'platformTag': 'dotcom' } }).encode(), headers=headers, fatal=False) if ios_playlist: From a3474aa59e44b15471f46fcac8680d79e82d5116 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 28 Nov 2017 09:04:51 +0100 Subject: [PATCH 25/41] [Odnoklassniki] fix api metadata request(fixes #14862) --- youtube_dl/extractor/odnoklassniki.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 854b6800c..8e13bcf1f 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -14,6 +14,7 @@ from ..utils import ( int_or_none, qualities, unescapeHTML, + urlencode_postdata, ) @@ -56,7 +57,7 @@ class OdnoklassnikiIE(InfoExtractor): 'url': 'http://ok.ru/video/64211978996595-1', 'md5': '2f206894ffb5dbfcce2c5a14b909eea5', 'info_dict': { - 'id': '64211978996595-1', + 'id': 'V_VztHT5BzY', 'ext': 'mp4', 'title': 'Космическая среда от 26 августа 2015', 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', @@ -127,9 +128,14 @@ class OdnoklassnikiIE(InfoExtractor): if metadata: metadata = self._parse_json(metadata, video_id) else: + data = {} + st_location = flashvars.get('location') + if st_location: + data['st.location'] = st_location metadata = self._download_json( compat_urllib_parse_unquote(flashvars['metadataUrl']), - video_id, 'Downloading metadata JSON') + video_id, 'Downloading metadata JSON', + data=urlencode_postdata(data)) movie = metadata['movie'] From 5f699251e9421035cc89baf0b9dc78f3b6eacb77 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 28 Nov 2017 10:55:32 +0100 Subject: [PATCH 26/41] [daisuki] add support for motto.daisuki.com(fixes #14681) --- youtube_dl/extractor/daisuki.py | 84 ++++++++++++++---------------- youtube_dl/extractor/extractors.py | 4 +- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py index 58cc98666..5c9ac68a0 100644 --- a/youtube_dl/extractor/daisuki.py +++ b/youtube_dl/extractor/daisuki.py @@ -13,33 +13,30 @@ from ..aes import ( from ..utils import ( bytes_to_intlist, bytes_to_long, - clean_html, + extract_attributes, ExtractorError, intlist_to_bytes, - get_element_by_id, js_to_json, int_or_none, long_to_bytes, pkcs1pad, - remove_end, ) -class DaisukiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html' +class DaisukiMottoIE(InfoExtractor): + _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})' _TEST = { - 'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html', + 'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428', 'info_dict': { - 'id': '11213', + 'id': 'V2e', 'ext': 'mp4', - 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', + 'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!', 'subtitles': { 'mul': [{ 'ext': 'ttml', }], }, - 'creator': 'BANDAI NAMCO Entertainment', }, 'params': { 'skip_download': True, # AES-encrypted HLS stream @@ -73,15 +70,17 @@ class DaisukiIE(InfoExtractor): n, e = self._RSA_KEY encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) - init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={ - 's': flashvars.get('s', ''), - 'c': flashvars.get('ss3_prm', ''), - 'e': url, - 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( - bytes_to_intlist(json.dumps(data)), - aes_key, iv))).decode('ascii'), - 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), - }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) + init_data = self._download_json( + 'http://motto.daisuki.net/fastAPI/bgn/init/', + video_id, query={ + 's': flashvars.get('s', ''), + 'c': flashvars.get('ss3_prm', ''), + 'e': url, + 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( + bytes_to_intlist(json.dumps(data)), + aes_key, iv))).decode('ascii'), + 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), + }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) if 'rtn' in init_data: encrypted_rtn = init_data['rtn'] @@ -98,14 +97,11 @@ class DaisukiIE(InfoExtractor): aes_key, iv)).decode('utf-8').rstrip('\0'), video_id) + title = rtn['title_str'] + formats = self._extract_m3u8_formats( rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') - title = remove_end(self._og_search_title(webpage), ' - DAISUKI') - - creator = self._html_search_regex( - r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) - subtitles = {} caption_url = rtn.get('caption_url') if caption_url: @@ -120,21 +116,18 @@ class DaisukiIE(InfoExtractor): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'creator': creator, } -class DaisukiPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html' +class DaisukiMottoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/' _TEST = { - 'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html', + 'url': 'http://motto.daisuki.net/information/', 'info_dict': { - 'id': 'TheIdolMasterCG', - 'title': 'THE IDOLM@STER CINDERELLA GIRLS', - 'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8', + 'title': 'DRAGON BALL SUPER', }, - 'playlist_count': 26, + 'playlist_mincount': 117, } def _real_extract(self, url): @@ -142,18 +135,19 @@ class DaisukiPlaylistIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - episode_pattern = r'''(?sx) - <img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+? - <p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)''' - entries = [{ - '_type': 'url_transparent', - 'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'), - 'episode_id': episode_id, - 'episode_number': int_or_none(episode_id), - } for movie_id, episode_id in re.findall(episode_pattern, webpage)] + entries = [] + for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage): + attr = extract_attributes(li) + ad_id = attr.get('data-ad_id') + product_id = attr.get('data-product_id') + if ad_id and product_id: + episode_id = attr.get('data-chapter') + entries.append({ + '_type': 'url_transparent', + 'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id), + 'episode_id': episode_id, + 'episode_number': int_or_none(episode_id), + 'ie_key': 'DaisukiMotto', + }) - playlist_title = remove_end( - self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI') - playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage)) - - return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d3104f393..6db1297f1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -246,8 +246,8 @@ from .dailymotion import ( DailymotionCloudIE, ) from .daisuki import ( - DaisukiIE, - DaisukiPlaylistIE, + DaisukiMottoIE, + DaisukiMottoPlaylistIE, ) from .daum import ( DaumIE, From 07cf18b9c56bf8ce08404b084672f33e860a805e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 29 Nov 2017 14:21:38 +0100 Subject: [PATCH 27/41] [br] add support for BR Mediathek videos(fixes #14560)(fixes #14788) --- youtube_dl/extractor/br.py | 146 ++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 147 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 2c32b6ae2..9bde7f2d8 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -1,20 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, parse_duration, + parse_iso8601, xpath_element, xpath_text, ) class BRIE(InfoExtractor): - IE_DESC = 'Bayerischer Rundfunk Mediathek' + IE_DESC = 'Bayerischer Rundfunk' _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' _TESTS = [ @@ -123,10 +126,10 @@ class BRIE(InfoExtractor): for asset in assets.findall('asset'): format_url = xpath_text(asset, ['downloadUrl', 'url']) asset_type = asset.get('type') - if asset_type == 'HDS': + if asset_type.startswith('HDS'): formats.extend(self._extract_f4m_formats( format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) - elif asset_type == 'HLS': + elif asset_type.startswith('HLS'): formats.extend(self._extract_m3u8_formats( format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) else: @@ -169,3 +172,140 @@ class BRIE(InfoExtractor): } for variant in variants.findall('variant') if xpath_text(variant, 'url')] thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) return thumbnails + + +class BRMediathekIE(InfoExtractor): + IE_DESC = 'Bayerischer Rundfunk Mediathek' + _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})' + + _TESTS = [{ + 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', + 'md5': 'fdc3d485835966d1622587d08ba632ec', + 'info_dict': { + 'id': 'av:5a1e6a6e8fce6d001871cc8e', + 'ext': 'mp4', + 'title': 'Die Sendung vom 28.11.2017', + 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', + 'timestamp': 1511942766, + 'upload_date': '20171129', + } + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + + clip = self._download_json( + 'https://proxy-base.master.mango.express/graphql', + clip_id, data=json.dumps({ + "query": """{ + viewer { + clip(id: "%s") { + title + description + duration + createdAt + ageRestriction + videoFiles { + edges { + node { + publicLocation + fileSize + videoProfile { + width + height + bitrate + encoding + } + } + } + } + captionFiles { + edges { + node { + publicLocation + } + } + } + teaserImages { + edges { + node { + imageFiles { + edges { + node { + publicLocation + width + height + } + } + } + } + } + } + } + } +}""" % clip_id}).encode(), headers={ + 'Content-Type': 'application/json', + })['data']['viewer']['clip'] + title = clip['title'] + + formats = [] + for edge in clip.get('videoFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + ext = determine_ext(n_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + n_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + video_profile = node.get('videoProfile', {}) + tbr = int_or_none(video_profile.get('bitrate')) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': n_url, + 'width': int_or_none(video_profile.get('width')), + 'height': int_or_none(video_profile.get('height')), + 'tbr': tbr, + 'filesize': int_or_none(node.get('fileSize')), + }) + self._sort_formats(formats) + + subtitles = {} + for edge in clip.get('captionFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + subtitles.setdefault('de', []).append({ + 'url': n_url, + }) + + thumbnails = [] + for edge in clip.get('teaserImages', {}).get('edges', []): + for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): + node = image_edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + thumbnails.append({ + 'url': n_url, + 'width': int_or_none(node.get('width')), + 'height': int_or_none(node.get('height')), + }) + + return { + 'id': clip_id, + 'title': title, + 'description': clip.get('description'), + 'duration': int_or_none(clip.get('duration')), + 'timestamp': parse_iso8601(clip.get('createdAt')), + 'age_limit': int_or_none(clip.get('ageRestriction')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6db1297f1..2cc3bc463 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -127,7 +127,10 @@ from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bostonglobe import BostonGlobeIE from .bpb import BpbIE -from .br import BRIE +from .br import ( + BRIE, + BRMediathekIE, +) from .bravotv import BravoTVIE from .breakcom import BreakIE from .brightcove import ( From fb61b57d0f4c6422b2722f56f5740ea1b19adfcf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 29 Nov 2017 16:09:45 +0100 Subject: [PATCH 28/41] [vvvvid] fix extraction for kenc videos(fixes #13406) --- youtube_dl/extractor/vvvvid.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index 656a4b9e5..3d0dc403b 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -22,6 +22,9 @@ class VVVVIDIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ping Pong', }, + 'params': { + 'skip_download': True, + }, }, { # video_type == 'video/rcs' 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01', @@ -31,6 +34,9 @@ class VVVVIDIE(InfoExtractor): 'ext': 'mp4', 'title': 'Episodio 01', }, + 'params': { + 'skip_download': True, + }, }] _conn_id = None @@ -116,8 +122,20 @@ class VVVVIDIE(InfoExtractor): embed_code = ds(embed_code) video_type = video_data.get('video_type') if video_type in ('video/rcs', 'video/kenc'): - formats.extend(self._extract_akamai_formats( - embed_code, video_id)) + embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8') + if video_type == 'video/kenc': + kenc = self._download_json( + 'https://www.vvvvid.it/kenc', video_id, query={ + 'action': 'kt', + 'conn_id': self._conn_id, + 'url': embed_code, + }, fatal=False) or {} + kenc_message = kenc.get('message') + if kenc_message: + embed_code += '?' + ds(kenc_message) + formats.extend(self._extract_m3u8_formats( + embed_code, video_id, 'mp4', + m3u8_id='hls', fatal=False)) else: formats.extend(self._extract_wowza_formats( 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) From 5ea765fb720523efb2702358e188e5157d08775b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 29 Nov 2017 17:50:38 +0100 Subject: [PATCH 29/41] [mtv] correct scc extention in extracted subtitles(closes #13730) --- youtube_dl/extractor/mtv.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 1154a3536..7a3b57abd 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -115,10 +115,17 @@ class MTVServicesInfoExtractor(InfoExtractor): if transcript.get('kind') != 'captions': continue lang = transcript.get('srclang') - subtitles[lang] = [{ - 'url': compat_str(typographic.get('src')), - 'ext': typographic.get('format') - } for typographic in transcript.findall('./typographic')] + for typographic in transcript.findall('./typographic'): + sub_src = typographic.get('src') + if not sub_src: + continue + ext = typographic.get('format') + if ext == 'cea-608': + ext = 'scc' + subtitles.setdefault(lang, []).append({ + 'url': compat_str(sub_src), + 'ext': ext + }) return subtitles def _get_video_info(self, itemdoc, use_hls=True): From 1663b329460b8b83b4eb0381ccb4f201374647db Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 29 Nov 2017 20:36:55 +0100 Subject: [PATCH 30/41] [xiami] add Referer header to api request --- youtube_dl/extractor/xiami.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index d017e03de..7f871c8ec 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -40,9 +40,12 @@ class XiamiBaseIE(InfoExtractor): 'subtitles': subtitles, } - def _extract_tracks(self, item_id, typ=None): + def _extract_tracks(self, item_id, referer, typ=None): playlist = self._download_json( - '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id) + '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), + item_id, headers={ + 'Referer': referer, + }) return [ self._extract_track(track, item_id) for track in playlist['data']['trackList']] @@ -135,13 +138,13 @@ class XiamiSongIE(XiamiBaseIE): }] def _real_extract(self, url): - return self._extract_tracks(self._match_id(url))[0] + return self._extract_tracks(self._match_id(url), url)[0] class XiamiPlaylistBaseIE(XiamiBaseIE): def _real_extract(self, url): item_id = self._match_id(url) - return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id) + return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id) class XiamiAlbumIE(XiamiPlaylistBaseIE): From f5ac68d88f3f11b88213ccba5e079d7d5221e243 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 30 Nov 2017 23:37:05 +0100 Subject: [PATCH 31/41] [mnet] fix format extraction(fixes #14883) --- youtube_dl/extractor/mnet.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py index 6a85dcbd5..0e26ca1b3 100644 --- a/youtube_dl/extractor/mnet.py +++ b/youtube_dl/extractor/mnet.py @@ -40,21 +40,29 @@ class MnetIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + # TODO: extract rtmp formats + # no stype -> rtmp url + # stype=H -> m3u8 url + # stype=M -> mpd url info = self._download_json( - 'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id, - video_id, 'Downloading vod config JSON')['data']['info'] + 'http://content.api.mnet.com/player/vodConfig', + video_id, 'Downloading vod config JSON', query={ + 'id': video_id, + 'ctype': 'CLIP', + 'stype': 'H', + })['data']['info'] title = info['title'] - rtmp_info = self._download_json( - info['cdn'], video_id, 'Downloading vod cdn JSON') - - formats = [{ - 'url': rtmp_info['serverurl'] + rtmp_info['fileurl'], - 'ext': 'flv', - 'page_url': url, - 'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318', - }] + cdn_data = self._download_json( + info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0] + m3u8_url = cdn_data['url'] + token = cdn_data.get('token') + if token and token != '-': + m3u8_url += '?' + token + formats = self._extract_wowza_formats( + m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m']) + self._sort_formats(formats) description = info.get('ment') duration = parse_duration(info.get('time')) From 0981585befc7af01331b72304ed11b9865b40401 Mon Sep 17 00:00:00 2001 From: zcanfly <zcanfly@126.com> Date: Sat, 2 Dec 2017 19:16:22 +0800 Subject: [PATCH 32/41] [youku] Update ccode (closes #14872) --- youtube_dl/extractor/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index a2b3b4daf..f0ba01197 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0502', + 'ccode': '0501', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, From fea92aa65dbc28e3a5cd109a5b9de5048c9cdfc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Dec 2017 19:03:24 +0700 Subject: [PATCH 33/41] [xhamster] Fix extraction (closes #14884) --- youtube_dl/extractor/xhamster.py | 87 ++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index be3624ef2..52f8ded2f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -6,10 +6,12 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( clean_html, + determine_ext, dict_get, ExtractorError, int_or_none, parse_duration, + try_get, unified_strdate, ) @@ -32,6 +34,7 @@ class XHamsterIE(InfoExtractor): 'display_id': 'femaleagent_shy_beauty_takes_the_bait', 'ext': 'mp4', 'title': 'FemaleAgent Shy beauty takes the bait', + 'timestamp': 1350194821, 'upload_date': '20121014', 'uploader': 'Ruseful2011', 'duration': 893, @@ -45,6 +48,7 @@ class XHamsterIE(InfoExtractor): 'display_id': 'britney_spears_sexy_booty', 'ext': 'mp4', 'title': 'Britney Spears Sexy Booty', + 'timestamp': 1379123460, 'upload_date': '20130914', 'uploader': 'jojo747400', 'duration': 200, @@ -61,6 +65,7 @@ class XHamsterIE(InfoExtractor): 'id': '5667973', 'ext': 'mp4', 'title': '....', + 'timestamp': 1454948101, 'upload_date': '20160208', 'uploader': 'parejafree', 'duration': 72, @@ -96,6 +101,83 @@ class XHamsterIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) + age_limit = self._rta_search(webpage) + + def get_height(s): + return int_or_none(self._search_regex( + r'^(\d+)[pP]', s, 'height', default=None)) + + initials = self._parse_json( + self._search_regex( + r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials', + default='{}'), + video_id, fatal=False) + if initials: + video = initials['videoModel'] + title = video['title'] + formats = [] + for format_id, formats_dict in video['sources'].items(): + if not isinstance(formats_dict, dict): + continue + for quality, format_item in formats_dict.items(): + if format_id == 'download': + # Download link takes some time to be generated, + # skipping for now + continue + if not isinstance(format_item, dict): + continue + format_url = format_item.get('link') + filesize = int_or_none( + format_item.get('size'), invscale=1000000) + else: + format_url = format_item + filesize = None + if not isinstance(format_url, compat_str): + continue + formats.append({ + 'format_id': '%s-%s' % (format_id, quality), + 'url': format_url, + 'ext': determine_ext(format_url, 'mp4'), + 'height': get_height(quality), + 'filesize': filesize, + }) + self._sort_formats(formats) + + categories_list = video.get('categories') + if isinstance(categories_list, list): + categories = [] + for c in categories_list: + if not isinstance(c, dict): + continue + c_name = c.get('name') + if isinstance(c_name, compat_str): + categories.append(c_name) + else: + categories = None + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': video.get('description'), + 'timestamp': int_or_none(video.get('created')), + 'uploader': try_get( + video, lambda x: x['author']['name'], compat_str), + 'thumbnail': video.get('thumbURL'), + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('views')), + 'like_count': int_or_none(try_get( + video, lambda x: x['rating']['likes'], int)), + 'dislike_count': int_or_none(try_get( + video, lambda x: x['rating']['dislikes'], int)), + 'comment_count': int_or_none(video.get('views')), + 'age_limit': age_limit, + 'categories': categories, + 'formats': formats, + } + + # Old layout fallback + title = self._html_search_regex( [r'<h1[^>]*>([^<]+)</h1>', r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"', @@ -119,8 +201,7 @@ class XHamsterIE(InfoExtractor): formats.append({ 'format_id': format_id, 'url': format_url, - 'height': int_or_none(self._search_regex( - r'^(\d+)[pP]', format_id, 'height', default=None)) + 'height': get_height(format_id), }) video_url = self._search_regex( @@ -167,8 +248,6 @@ class XHamsterIE(InfoExtractor): mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage) comment_count = mobj.group('commentcount') if mobj else 0 - age_limit = self._rta_search(webpage) - categories_html = self._search_regex( r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage, 'categories', default=None) From 41bf647e895faca42cdc2565ea034ed341593f8e Mon Sep 17 00:00:00 2001 From: Petr Novak <petr.novak@cosmoboy.cz> Date: Sat, 25 Nov 2017 02:13:23 +0100 Subject: [PATCH 34/41] [extractor/common] Add support for DASH manifests with SegmentLists with bare SegmentURLs --- test/test_InfoExtractor.py | 83 +++++++++++- test/testdata/mpd/urls_only.mpd | 218 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 9 ++ 3 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 test/testdata/mpd/urls_only.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7b95f883f..e58452ab5 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -562,7 +562,88 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1920, 'height': 1080, }] - ), + ), ( + 'urls_only', + 'http://unknown/manifest.mpd', + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_144p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 200, + 'width': 256, + 'height': 144, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_240p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 400, + 'width': 424, + 'height': 240, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_360p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 800, + 'width': 640, + 'height': 360, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_480p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1200, + 'width': 856, + 'height': 480, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_576p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1600, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_720p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 2400, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_1080p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 4400, + 'width': 1920, + 'height': 1080, + }] + ) ] for mpd_file, mpd_url, expected_formats in _TEST_CASES: diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd new file mode 100644 index 000000000..2b9d595d3 --- /dev/null +++ b/test/testdata/mpd/urls_only.mpd @@ -0,0 +1,218 @@ +<?xml version="1.0" ?> +<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> + <Period duration="PT0H4M1.728S"> + <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true"> + <ContentComponent contentType="video" id="1"/> + <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + </AdaptationSet> + </Period> +</MPD> diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8e4ee0deb..15999411b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1975,6 +1975,15 @@ class InfoExtractor(object): }) segment_index += 1 representation_ms_info['fragments'] = fragments + elif 'segment_urls' in representation_ms_info: + # Segment URLs with no SegmentTimeline + # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 + fragments = [] + for segment_url in representation_ms_info['segment_urls']: + fragments.append({ + location_key(segment_url): segment_url, + }) + representation_ms_info['fragments'] = fragments # NB: MPD manifest may contain direct URLs to unfragmented media. # No fragments key is present in this case. if 'fragments' in representation_ms_info: From 603fc4e0ea472c7c2a78ff201d69686a9e3fe1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Dec 2017 21:10:35 +0700 Subject: [PATCH 35/41] [extractor/common] Add durations for DASH fragments with bare SegmentURLs --- youtube_dl/extractor/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 15999411b..3baf683d8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1979,10 +1979,16 @@ class InfoExtractor(object): # Segment URLs with no SegmentTimeline # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 fragments = [] + segment_duration = float_or_none( + representation_ms_info['segment_duration'], + representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None for segment_url in representation_ms_info['segment_urls']: - fragments.append({ + fragment = { location_key(segment_url): segment_url, - }) + } + if segment_duration: + fragment['duration'] = segment_duration + fragments.append(fragment) representation_ms_info['fragments'] = fragments # NB: MPD manifest may contain direct URLs to unfragmented media. # No fragments key is present in this case. From 593f2f798922d54be8d3f20f4a2048493095016d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Dec 2017 21:15:45 +0700 Subject: [PATCH 36/41] [downloader/fragment] Commit part file after each fragment In order to obtain correct resume_len on next iteration --- youtube_dl/downloader/fragment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 93002e45a..7bb61a541 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -107,6 +107,7 @@ class FragmentFD(FileDownloader): def _append_fragment(self, ctx, frag_content): try: ctx['dest_stream'].write(frag_content) + ctx['dest_stream'].flush() finally: if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) From 78593e294cc4623108d17b1b0b2d26d507953006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Dec 2017 21:22:43 +0700 Subject: [PATCH 37/41] Add references for #14844 --- test/test_InfoExtractor.py | 1 + youtube_dl/extractor/common.py | 1 + 2 files changed, 2 insertions(+) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index e58452ab5..8a372d2c9 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -563,6 +563,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'height': 1080, }] ), ( + # https://github.com/rg3/youtube-dl/pull/14844 'urls_only', 'http://unknown/manifest.mpd', [{ diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3baf683d8..80a9c982f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1978,6 +1978,7 @@ class InfoExtractor(object): elif 'segment_urls' in representation_ms_info: # Segment URLs with no SegmentTimeline # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 + # https://github.com/rg3/youtube-dl/pull/14844 fragments = [] segment_duration = float_or_none( representation_ms_info['segment_duration'], From e25ee72657c3e27a24f62ac54d4b100c558ea662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Dec 2017 21:29:06 +0700 Subject: [PATCH 38/41] [ChangeLog] Actualize --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index a568fa233..81eda954c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version <unreleased> + +Core ++ [downloader/fragment] Commit part file after each fragment ++ [extractor/common] Add durations for DASH fragments with bare SegmentURLs ++ [extractor/common] Add support for DASH manifests with SegmentLists with + bare SegmentURLs (#14844) ++ [utils] Add hvc1 codec code to parse_codecs + +Extractors +* [xhamster] Fix extraction (#14884) +* [youku] Update ccode (#14872) +* [mnet] Fix format extraction (#14883) ++ [xiami] Add Referer header to API request +* [mtv] Correct scc extention in extracted subtitles (#13730) +* [vvvvid] Fix extraction for kenc videos (#13406) ++ [br] Add support for BR Mediathek videos (#14560, #14788) ++ [daisuki] Add support for motto.daisuki.com (#14681) +* [odnoklassniki] Fix API metadata request (#14862) +* [itv] Fix HLS formats extraction ++ [pbs] Add another media id regular expression + + version 2017.11.26 Core From 0d56eddc5970a9e55334add371f555227b957589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Dec 2017 21:34:34 +0700 Subject: [PATCH 39/41] release 2017.12.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 7 ++++--- youtube_dl/version.py | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15211e5d9..513823b9b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.11.26 +[debug] youtube-dl version 2017.12.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 81eda954c..63837d62b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.12.02 Core + [downloader/fragment] Commit part file after each fragment diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b44c9e7a3..0287a4011 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -112,11 +112,12 @@ - **BokeCC** - **BostonGlobe** - **Bpb**: Bundeszentrale für politische Bildung - - **BR**: Bayerischer Rundfunk Mediathek + - **BR**: Bayerischer Rundfunk - **BravoTV** - **Break** - **brightcove:legacy** - **brightcove:new** + - **BRMediathek**: Bayerischer Rundfunk Mediathek - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** @@ -198,8 +199,8 @@ - **dailymotion:playlist** - **dailymotion:user** - **DailymotionCloud** - - **Daisuki** - - **DaisukiPlaylist** + - **DaisukiMotto** + - **DaisukiMottoPlaylist** - **daum.net** - **daum.net:clip** - **daum.net:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 81e3f792d..88bf1d652 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.11.26' +__version__ = '2017.12.02' From d7df30898115c4762514868140cfa856618cdfd5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Dec 2017 20:22:17 +0100 Subject: [PATCH 40/41] [toutv] fix login(closes 14614) --- youtube_dl/extractor/toutv.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index e59ed2661..17c0adc15 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -41,7 +43,7 @@ class TouTvIE(InfoExtractor): email, password = self._get_login_info() if email is None: return - state = 'http://ici.tou.tv//' + state = 'http://ici.tou.tv/' webpage = self._download_webpage(state, None, 'Downloading homepage') toutvlogin = self._parse_json(self._search_regex( r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) @@ -54,16 +56,30 @@ class TouTvIE(InfoExtractor): 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', 'state': state, }) - login_form = self._search_regex( - r'(?s)(<form[^>]+(?:id|name)="Form-login".+?</form>)', login_webpage, 'login form') - form_data = self._hidden_inputs(login_form) + + def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): + form, form_elem = re.search( + r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups() + form_data = self._hidden_inputs(form) + form_url = extract_attributes(form_elem).get('action') or default_form_url + return form_url, form_data + + post_url, form_data = extract_form_url_and_data( + login_webpage, + 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login', + r'(?:id|name)="Form-login"') form_data.update({ 'login-email': email, 'login-password': password, }) - post_url = extract_attributes(login_form).get('action') or authorize_url - _, urlh = self._download_webpage_handle( + consent_webpage = self._download_webpage( post_url, None, 'Logging in', data=urlencode_postdata(form_data)) + post_url, form_data = extract_form_url_and_data( + consent_webpage, + 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent') + _, urlh = self._download_webpage_handle( + post_url, None, 'Following Redirection', + data=urlencode_postdata(form_data)) self._access_token = self._search_regex( r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', urlh.geturl(), 'access token') From d4f05d473134d7bd61b054468e6ba297cef3c88f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Dec 2017 00:04:43 +0100 Subject: [PATCH 41/41] [utils] add sami mimetype to mimetype2ext --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index febc9d26f..eccbc0b1f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2350,6 +2350,7 @@ def mimetype2ext(mt): 'ttml+xml': 'ttml', 'x-flv': 'flv', 'x-mp4-fragmented': 'mp4', + 'x-ms-sami': 'sami', 'x-ms-wmv': 'wmv', 'mpegurl': 'm3u8', 'x-mpegurl': 'm3u8',