From 64348907bdf8ece4fc79c8097f1b478f51ab946f Mon Sep 17 00:00:00 2001 From: Ni Ndogo Date: Sun, 28 Oct 2018 15:58:17 +0300 Subject: [PATCH 001/132] Added Porntrex --- youtube_dl/extractor/extractors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6012d0f5f..7a996e16c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -868,6 +868,10 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE +from .porntrex import ( + PornTrexIE, + PornTrexPlayListIE, +) from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, From 1ad6e81451de4313adc0a07101169b7ebf50b6fb Mon Sep 17 00:00:00 2001 From: Ni Ndogo Date: Sun, 28 Oct 2018 15:59:33 +0300 Subject: [PATCH 002/132] Adding Porntrex --- youtube_dl/extractor/porntrex.py | 151 +++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 youtube_dl/extractor/porntrex.py diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py new file mode 100644 index 000000000..a7701e33e --- /dev/null +++ b/youtube_dl/extractor/porntrex.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + urlencode_postdata, + ExtractorError, +) + + +class PornTrexIE(InfoExtractor): + '''Class for downloading Porntrex video.''' + _NETRC_MACHINE = 'porntrex' + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/.*' + _TEST = { + 'url': 'https://www.porntrex.com/video/519351/be-ariyana-adin-breaking-and-entering-this-pussy', + # 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '519351', + 'ext': 'mp4', + 'title': 'BE - Ariyana Adin - Breaking And Entering This Pussy', + 'uploader': 'brand95', + 'description': 'BE - Ariyana Adin - Breaking And Entering This Pussy', + } + } + + def get_resolution(self, url): + try: + resolution = ((url.split('.')[2])).split('_')[-1] + except: + resolution = '480p' + return resolution + + def get_protocol(self, url): + return url.split('/')[0] + + def get_thumbnails(self, html): + thumbnails_regex = re.compile('href="(http.*?/screenshots/\d+.jpg/)"') + thumbnails_list = re.findall(thumbnails_regex, html) + thumbnails = [] + for thumbs in thumbnails_list: + thumbnails.append({'url': thumbs}) + return thumbnails + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + 'https://www.porntrex.com/login/', None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username.encode('utf-8'), + 'pass': password.encode('utf-8'), + }) + + login_page = self._download_webpage( + 'https://www.porntrex.com/ajax-login/', None, + note='Logging in', + data=urlencode_postdata(login_form)) + + if re.search(r'generic-error hidden', login_page): + raise ExtractorError( + 'Unable to login, incorrect username and/or password', expected=True) + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + is_video_private_regex = re.compile('Only active members can watch private videos.') + if (re.findall(is_video_private_regex, webpage)): + self.raise_login_required() + + title = self._html_search_regex(r'(.+?)', webpage, 'title',) + url2_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'") + url2 = re.findall(url2_regex, webpage) + uploader_regex = re.compile(r'(.+?)', re.DOTALL) + uploader = re.findall(uploader_regex, webpage)[0].strip() + formats = [] + for x, _ in enumerate(url2): + formats.append({'url': url2[x], + 'ext': url2[x].split('.')[-1], + 'resolution': self.get_resolution(url2[x]), + 'protocol': self.get_protocol(url2[x]), + }) + # self.get_thumbnails(webpage) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': uploader, + 'thumbnails': self.get_thumbnails(webpage), + 'formats': formats, + } + + +class PornTrexPlayListIE(InfoExtractor): + '''Class for downloading Porntrex video playlists.''' + _NETRC_MACHINE = 'porntrex' + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/.*' + _TEST = { + 'url': 'https://www.porntrex.com/playlists/60671/les45/', + # 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '477697', + 'ext': 'mp4', + 'uploader': 'tarpi', + 'title': '4. Kelly Divine, Tiffany Minx (1080p)', + 'description': '4. Kelly Divine, Tiffany Minx (1080p)' + # 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def playlist_title(self, html): + return self._html_search_regex(r'(.+?)', html, 'title',) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + get_all_urls_regex = re.compile('data-playlist-item="(.*?)"') + all_urls = re.findall(get_all_urls_regex, webpage) + + entries = [] + for this_url in all_urls: + entries.append({'_type': 'url', + 'id': 'PornTrex', + 'url': this_url, + }) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': self.playlist_title(webpage), + 'entries': entries, + } From 26eaa4e762bb993baded902a1a8fa0658fa7cc4c Mon Sep 17 00:00:00 2001 From: Ni Ndogo Date: Sun, 28 Oct 2018 19:56:22 +0300 Subject: [PATCH 003/132] Updating porntrex --- youtube_dl/extractor/porntrex.py | 50 +++++++++++++++++--------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index a7701e33e..c30deb671 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -15,29 +15,33 @@ class PornTrexIE(InfoExtractor): _NETRC_MACHINE = 'porntrex' _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/.*' _TEST = { - 'url': 'https://www.porntrex.com/video/519351/be-ariyana-adin-breaking-and-entering-this-pussy', - # 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'url': 'https://www.porntrex.com/video/519351/\ + be-ariyana-adin-breaking-and-entering-this-pussy', 'info_dict': { 'id': '519351', 'ext': 'mp4', 'title': 'BE - Ariyana Adin - Breaking And Entering This Pussy', 'uploader': 'brand95', - 'description': 'BE - Ariyana Adin - Breaking And Entering This Pussy', + 'description': 'BE - Ariyana Adin - Breaking And Entering This \ +Pussy', } } def get_resolution(self, url): + '''Video resolution extraction from url''' try: - resolution = ((url.split('.')[2])).split('_')[-1] - except: + resolution = ((url.split('.')[2])).split('_')[2] + except IndexError: resolution = '480p' return resolution - + def get_protocol(self, url): - return url.split('/')[0] + '''Video protocol extraction from url''' + return url.split(':')[0] def get_thumbnails(self, html): - thumbnails_regex = re.compile('href="(http.*?/screenshots/\d+.jpg/)"') + '''Each video has 10 thumbnails - extracted here.''' + thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"') thumbnails_list = re.findall(thumbnails_regex, html) thumbnails = [] for thumbs in thumbnails_list: @@ -57,7 +61,7 @@ class PornTrexIE(InfoExtractor): login_form.update({ 'username': username.encode('utf-8'), 'pass': password.encode('utf-8'), - }) + }) login_page = self._download_webpage( 'https://www.porntrex.com/ajax-login/', None, @@ -66,8 +70,9 @@ class PornTrexIE(InfoExtractor): if re.search(r'generic-error hidden', login_page): raise ExtractorError( - 'Unable to login, incorrect username and/or password', expected=True) - + 'Unable to login, incorrect username and/or password', + expected=True) + def _real_initialize(self): self._login() @@ -75,14 +80,18 @@ class PornTrexIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - is_video_private_regex = re.compile('Only active members can watch private videos.') + private_string = 'Only active members can watch private videos.' + is_video_private_regex = re.compile(private_string) if (re.findall(is_video_private_regex, webpage)): self.raise_login_required() - title = self._html_search_regex(r'(.+?)', webpage, 'title',) + title = self._html_search_regex( + r'(.+?)', webpage, 'title',) url2_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'") url2 = re.findall(url2_regex, webpage) - uploader_regex = re.compile(r'(.+?)', re.DOTALL) + uploader_regex = re.compile( + r'(.+?)', + re.DOTALL) uploader = re.findall(uploader_regex, webpage)[0].strip() formats = [] for x, _ in enumerate(url2): @@ -93,6 +102,7 @@ class PornTrexIE(InfoExtractor): }) # self.get_thumbnails(webpage) self._sort_formats(formats) + print(formats) return { 'id': video_id, @@ -105,24 +115,18 @@ class PornTrexIE(InfoExtractor): class PornTrexPlayListIE(InfoExtractor): - '''Class for downloading Porntrex video playlists.''' + '''Class for downloading Porntrex video playlists.''' _NETRC_MACHINE = 'porntrex' - _VALID_URL = r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/.*' + _VALID_URL = \ + r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/.*' _TEST = { 'url': 'https://www.porntrex.com/playlists/60671/les45/', - # 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '477697', 'ext': 'mp4', 'uploader': 'tarpi', 'title': '4. Kelly Divine, Tiffany Minx (1080p)', 'description': '4. Kelly Divine, Tiffany Minx (1080p)' - # 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } } From 778000a1df24f907c58242300a5c4c7f4b6300ae Mon Sep 17 00:00:00 2001 From: Ni Ndogo Date: Tue, 30 Oct 2018 15:28:29 +0300 Subject: [PATCH 004/132] Clean ups as requested --- youtube_dl/extractor/porntrex.py | 56 ++++++++++---------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index c30deb671..139e6acd6 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -11,43 +11,19 @@ from ..utils import ( class PornTrexIE(InfoExtractor): - '''Class for downloading Porntrex video.''' _NETRC_MACHINE = 'porntrex' - _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/.*' + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' _TEST = { - 'url': 'https://www.porntrex.com/video/519351/\ - be-ariyana-adin-breaking-and-entering-this-pussy', + 'url': 'https://www.porntrex.com/video/519351/be-ariyana-adin-breaking-and-entering-this-pussy', 'info_dict': { 'id': '519351', 'ext': 'mp4', 'title': 'BE - Ariyana Adin - Breaking And Entering This Pussy', 'uploader': 'brand95', - 'description': 'BE - Ariyana Adin - Breaking And Entering This \ -Pussy', + 'description': 'BE - Ariyana Adin - Breaking And Entering This Pussy', } } - def get_resolution(self, url): - '''Video resolution extraction from url''' - try: - resolution = ((url.split('.')[2])).split('_')[2] - except IndexError: - resolution = '480p' - return resolution - - def get_protocol(self, url): - '''Video protocol extraction from url''' - return url.split(':')[0] - - def get_thumbnails(self, html): - '''Each video has 10 thumbnails - extracted here.''' - thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"') - thumbnails_list = re.findall(thumbnails_regex, html) - thumbnails = [] - for thumbs in thumbnails_list: - thumbnails.append({'url': thumbs}) - return thumbnails - def _login(self): username, password = self._get_login_info() if username is None: @@ -82,7 +58,7 @@ Pussy', private_string = 'Only active members can watch private videos.' is_video_private_regex = re.compile(private_string) - if (re.findall(is_video_private_regex, webpage)): + if re.findall(is_video_private_regex, webpage): self.raise_login_required() title = self._html_search_regex( @@ -93,32 +69,34 @@ Pussy', r'(.+?)', re.DOTALL) uploader = re.findall(uploader_regex, webpage)[0].strip() + thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"') + thumbnails_list = re.findall(thumbnails_regex, webpage) + thumbnails = [] + for thumbs in thumbnails_list: + thumbnails.append({'url': thumbs}) formats = [] for x, _ in enumerate(url2): formats.append({'url': url2[x], 'ext': url2[x].split('.')[-1], - 'resolution': self.get_resolution(url2[x]), - 'protocol': self.get_protocol(url2[x]), + 'protocol': url2[x].split(':')[0], }) - # self.get_thumbnails(webpage) + self._sort_formats(formats) - print(formats) return { 'id': video_id, 'title': title, 'description': self._og_search_description(webpage), 'uploader': uploader, - 'thumbnails': self.get_thumbnails(webpage), + 'thumbnails': thumbnails, 'formats': formats, } class PornTrexPlayListIE(InfoExtractor): - '''Class for downloading Porntrex video playlists.''' _NETRC_MACHINE = 'porntrex' _VALID_URL = \ - r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/.*' + r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/' _TEST = { 'url': 'https://www.porntrex.com/playlists/60671/les45/', 'info_dict': { @@ -130,9 +108,6 @@ class PornTrexPlayListIE(InfoExtractor): } } - def playlist_title(self, html): - return self._html_search_regex(r'(.+?)', html, 'title',) - def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) @@ -150,6 +125,9 @@ class PornTrexPlayListIE(InfoExtractor): return { '_type': 'playlist', 'id': playlist_id, - 'title': self.playlist_title(webpage), + 'title': self._html_search_regex( + r'(.+?)', + webpage, + 'title',), 'entries': entries, } From d3947f3051db3cc58d84674873ca84d8d800a40a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 1 Sep 2018 00:18:17 +0200 Subject: [PATCH 005/132] [ard] Add support for Beta ARD Mediathek Thanks to https://blog.fefe.de/?ts=a577685d for pointing out support is missing. --- youtube_dl/extractor/ard.py | 62 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 63 insertions(+) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 23f574d36..9c6be2dd9 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -282,3 +282,65 @@ class ARDIE(InfoExtractor): 'upload_date': upload_date, 'thumbnail': thumbnail, } + + +class ARDBetaMediathekIE(InfoExtractor): + _VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P[a-zA-Z0-9]+)/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita', + 'md5': '2d02d996156ea3c397cfc5036b5d7f8f', + 'info_dict': { + 'display_id': 'die-robuste-roswita', + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', + 'title': 'Tatort: Die robuste Roswita', + 'description': r're:^Der Mord.*trüber ist als die Ilm.', + 'duration': 5316, + 'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard', + 'upload_date': '20180826', + 'ext': 'mp4', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);', webpage, 'json') + data = self._parse_json(data_json, display_id) + + res = { + 'id': video_id, + 'display_id': display_id, + } + formats = [] + for widget in data.values(): + if widget.get('_geoblocked'): + raise ExtractorError('This video is not available due to geoblocking', expected=True) + + if '_duration' in widget: + res['duration'] = widget['_duration'] + if 'clipTitle' in widget: + res['title'] = widget['clipTitle'] + if '_previewImage' in widget: + res['thumbnail'] = widget['_previewImage'] + if 'broadcastedOn' in widget: + res['upload_date'] = unified_strdate(widget['broadcastedOn']) + if 'synopsis' in widget: + res['description'] = widget['synopsis'] + if '_subtitleUrl' in widget: + res['subtitles'] = {'de': [{ + 'ext': 'ttml', + 'url': widget['_subtitleUrl'], + }]} + if '_quality' in widget: + formats.append({ + 'format_id': widget['_quality'], + 'url': widget['_stream']['json'][0], + }) + + self._sort_formats(formats) + res['formats'] = formats + + return res diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7a996e16c..a17a6eaf6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -54,6 +54,7 @@ from .appletrailers import ( from .archiveorg import ArchiveOrgIE from .arkena import ArkenaIE from .ard import ( + ARDBetaMediathekIE, ARDIE, ARDMediathekIE, ) From e8bd053211a733931fe01f80916b8b112d33ef7e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 1 Sep 2018 00:45:36 +0200 Subject: [PATCH 006/132] [ard] Better format handling Skip f4m, doesn't work (yet); correctly extract m3u8, and prefer plain HTTP files. --- youtube_dl/extractor/ard.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 9c6be2dd9..cff8ca4a5 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -335,10 +335,24 @@ class ARDBetaMediathekIE(InfoExtractor): 'url': widget['_subtitleUrl'], }]} if '_quality' in widget: - formats.append({ - 'format_id': widget['_quality'], - 'url': widget['_stream']['json'][0], - }) + format_url = widget['_stream']['json'][0] + + if format_url.endswith('.f4m'): + # Skip f4m - these URLs just return a 403 + formats.append({ + 'format_id': 'f4m-' + widget['_quality'], + 'url': format_url, + 'preference': -1001, + }) + elif format_url.endswith('m3u8'): + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': 'http-' + widget['_quality'], + 'url': format_url, + 'preference': 10, # Plain HTTP, that's nice + }) self._sort_formats(formats) res['formats'] = formats From 5c293329196ecc17217a2f516b2500dacd8e9887 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 1 Sep 2018 01:59:13 +0200 Subject: [PATCH 007/132] [ard] beta mediathek: make regexp for JSON more robust --- youtube_dl/extractor/ard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index cff8ca4a5..dcb347849 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -307,7 +307,7 @@ class ARDBetaMediathekIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);', webpage, 'json') + data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json') data = self._parse_json(data_json, display_id) res = { From 484190f277ec62fedcc34bb5e31830fe3db83fb2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Sep 2018 08:16:28 +0100 Subject: [PATCH 008/132] [crunchyroll] parse vilos media data(closes #17343) --- youtube_dl/extractor/crunchyroll.py | 205 ++++++++++++++++------------ youtube_dl/extractor/vrv.py | 48 ++++--- 2 files changed, 141 insertions(+), 112 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 463f995c7..4ed458372 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -7,7 +7,7 @@ import zlib from hashlib import sha1 from math import pow, sqrt, floor -from .common import InfoExtractor +from .vrv import VRVIE from ..compat import ( compat_b64decode, compat_etree_fromstring, @@ -18,6 +18,8 @@ from ..compat import ( from ..utils import ( ExtractorError, bytes_to_intlist, + extract_attributes, + float_or_none, intlist_to_bytes, int_or_none, lowercase_escape, @@ -26,14 +28,13 @@ from ..utils import ( unified_strdate, urlencode_postdata, xpath_text, - extract_attributes, ) from ..aes import ( aes_cbc_decrypt, ) -class CrunchyrollBaseIE(InfoExtractor): +class CrunchyrollBaseIE(VRVIE): _LOGIN_URL = 'https://www.crunchyroll.com/login' _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' @@ -148,7 +149,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'ext': 'mp4', 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', 'description': 'md5:2d17137920c64f2f49981a7797d275ef', - 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Yomiuri Telecasting Corporation (YTV)', 'upload_date': '20131013', 'url': 're:(?!.*&)', @@ -221,7 +222,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'info_dict': { 'id': '535080', 'ext': 'mp4', - 'title': '11eyes Episode 1 – Piros éjszaka - Red Night', + 'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka', 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".', 'uploader': 'Marvelous AQL Inc.', 'upload_date': '20091021', @@ -437,13 +438,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if 'To view this, please log in to verify you are 18 or older.' in webpage: self.raise_login_required() + media = self._parse_json(self._search_regex( + r'vilos\.config\.media\s*=\s*({.+?});', + webpage, 'vilos media', default='{}'), video_id) + media_metadata = media.get('metadata') or {} + video_title = self._html_search_regex( r'(?s)]*>((?:(?!]+itemprop=["\']title["\'][^>]*>(?:(?!', webpage, 'video_title') video_title = re.sub(r' {2,}', ' ', video_title) - video_description = self._parse_json(self._html_search_regex( + video_description = (self._parse_json(self._html_search_regex( r']*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, - webpage, 'description', default='{}'), video_id).get('description') + webpage, 'description', default='{}'), video_id) or media_metadata).get('description') if video_description: video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) video_upload_date = self._html_search_regex( @@ -456,91 +462,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text [r']+href="/publisher/[^"]+"[^>]*>([^<]+)', r'
\s*Publisher:\s*\s*(.+?)\s*\s*
'], webpage, 'video_uploader', fatal=False) - available_fmts = [] - for a, fmt in re.findall(r'(]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): - attrs = extract_attributes(a) - href = attrs.get('href') - if href and '/freetrial' in href: - continue - available_fmts.append(fmt) - if not available_fmts: - for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): - available_fmts = re.findall(p, webpage) - if available_fmts: - break - video_encode_ids = [] formats = [] - for fmt in available_fmts: - stream_quality, stream_format = self._FORMAT_IDS[fmt] - video_format = fmt + 'p' - stream_infos = [] - streamdata = self._call_rpc_api( - 'VideoPlayer_GetStandardConfig', video_id, - 'Downloading media info for %s' % video_format, data={ - 'media_id': video_id, - 'video_format': stream_format, - 'video_quality': stream_quality, - 'current_page': url, - }) - if streamdata is not None: - stream_info = streamdata.find('./{default}preload/stream_info') + for stream in media.get('streams', []): + formats.extend(self._extract_vrv_formats( + stream.get('url'), video_id, stream.get('format'), + stream.get('audio_lang'), stream.get('hardsub_lang'))) + if not formats: + available_fmts = [] + for a, fmt in re.findall(r'(]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): + attrs = extract_attributes(a) + href = attrs.get('href') + if href and '/freetrial' in href: + continue + available_fmts.append(fmt) + if not available_fmts: + for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): + available_fmts = re.findall(p, webpage) + if available_fmts: + break + if not available_fmts: + available_fmts = self._FORMAT_IDS.keys() + video_encode_ids = [] + + for fmt in available_fmts: + stream_quality, stream_format = self._FORMAT_IDS[fmt] + video_format = fmt + 'p' + stream_infos = [] + streamdata = self._call_rpc_api( + 'VideoPlayer_GetStandardConfig', video_id, + 'Downloading media info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_quality': stream_quality, + 'current_page': url, + }) + if streamdata is not None: + stream_info = streamdata.find('./{default}preload/stream_info') + if stream_info is not None: + stream_infos.append(stream_info) + stream_info = self._call_rpc_api( + 'VideoEncode_GetStreamInfo', video_id, + 'Downloading stream info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_encode_quality': stream_quality, + }) if stream_info is not None: stream_infos.append(stream_info) - stream_info = self._call_rpc_api( - 'VideoEncode_GetStreamInfo', video_id, - 'Downloading stream info for %s' % video_format, data={ - 'media_id': video_id, - 'video_format': stream_format, - 'video_encode_quality': stream_quality, - }) - if stream_info is not None: - stream_infos.append(stream_info) - for stream_info in stream_infos: - video_encode_id = xpath_text(stream_info, './video_encode_id') - if video_encode_id in video_encode_ids: - continue - video_encode_ids.append(video_encode_id) + for stream_info in stream_infos: + video_encode_id = xpath_text(stream_info, './video_encode_id') + if video_encode_id in video_encode_ids: + continue + video_encode_ids.append(video_encode_id) - video_file = xpath_text(stream_info, './file') - if not video_file: - continue - if video_file.startswith('http'): - formats.extend(self._extract_m3u8_formats( - video_file, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - video_url = xpath_text(stream_info, './host') - if not video_url: - continue - metadata = stream_info.find('./metadata') - format_info = { - 'format': video_format, - 'height': int_or_none(xpath_text(metadata, './height')), - 'width': int_or_none(xpath_text(metadata, './width')), - } - - if '.fplive.net/' in video_url: - video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) - parsed_video_url = compat_urlparse.urlparse(video_url) - direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( - netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) - if self._is_valid_url(direct_video_url, video_id, video_format): - format_info.update({ - 'format_id': 'http-' + video_format, - 'url': direct_video_url, - }) - formats.append(format_info) + video_file = xpath_text(stream_info, './file') + if not video_file: + continue + if video_file.startswith('http'): + formats.extend(self._extract_m3u8_formats( + video_file, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) continue - format_info.update({ - 'format_id': 'rtmp-' + video_format, - 'url': video_url, - 'play_path': video_file, - 'ext': 'flv', - }) - formats.append(format_info) + video_url = xpath_text(stream_info, './host') + if not video_url: + continue + metadata = stream_info.find('./metadata') + format_info = { + 'format': video_format, + 'height': int_or_none(xpath_text(metadata, './height')), + 'width': int_or_none(xpath_text(metadata, './width')), + } + + if '.fplive.net/' in video_url: + video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) + parsed_video_url = compat_urlparse.urlparse(video_url) + direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( + netloc='v.lvlt.crcdn.net', + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) + if self._is_valid_url(direct_video_url, video_id, video_format): + format_info.update({ + 'format_id': 'http-' + video_format, + 'url': direct_video_url, + }) + formats.append(format_info) + continue + + format_info.update({ + 'format_id': 'rtmp-' + video_format, + 'url': video_url, + 'play_path': video_file, + 'ext': 'flv', + }) + formats.append(format_info) self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) metadata = self._call_rpc_api( @@ -549,7 +563,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'media_id': video_id, }) - subtitles = self.extract_subtitles(video_id, webpage) + subtitles = {} + for subtitle in media.get('subtitles', []): + subtitle_url = subtitle.get('url') + if not subtitle_url: + continue + subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({ + 'url': subtitle_url, + 'ext': subtitle.get('format', 'ass'), + }) + if not subtitles: + subtitles = self.extract_subtitles(video_id, webpage) # webpage provide more accurate data than series_title from XML series = self._html_search_regex( @@ -557,8 +581,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage, 'series', fatal=False) season = xpath_text(metadata, 'series_title') - episode = xpath_text(metadata, 'episode_title') - episode_number = int_or_none(xpath_text(metadata, 'episode_number')) + episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title') + episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number')) season_number = int_or_none(self._search_regex( r'(?s)]+id=["\']showmedia_about_episode_num[^>]+>.+?\s*

\s*Season (\d+)', @@ -568,7 +592,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'id': video_id, 'title': video_title, 'description': video_description, - 'thumbnail': xpath_text(metadata, 'episode_image_url'), + 'duration': float_or_none(media_metadata.get('duration'), 1000), + 'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), 'uploader': video_uploader, 'upload_date': video_upload_date, 'series': series, diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 64b13f0ed..921e9e172 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor): class VRVIE(VRVBaseIE): IE_NAME = 'vrv' _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P[A-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', 'info_dict': { 'id': 'GR9PNZ396', @@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE): # m3u8 download 'skip_download': True, }, - } + }] + + def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): + if not url or stream_format not in ('hls', 'dash'): + return [] + stream_id = hardsub_lang or audio_lang + format_id = '%s-%s' % (stream_format, stream_id) + if stream_format == 'hls': + adaptive_formats = self._extract_m3u8_formats( + url, video_id, 'mp4', m3u8_id=format_id, + note='Downloading %s m3u8 information' % stream_id, + fatal=False) + elif stream_format == 'dash': + adaptive_formats = self._extract_mpd_formats( + url, video_id, mpd_id=format_id, + note='Downloading %s MPD information' % stream_id, + fatal=False) + if audio_lang: + for f in adaptive_formats: + if f.get('acodec') != 'none': + f['language'] = audio_lang + return adaptive_formats def _real_extract(self, url): video_id = self._match_id(url) @@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE): for stream_type, streams in streams_json.get('streams', {}).items(): if stream_type in ('adaptive_hls', 'adaptive_dash'): for stream in streams.values(): - stream_url = stream.get('url') - if not stream_url: - continue - stream_id = stream.get('hardsub_locale') or audio_locale - format_id = '%s-%s' % (stream_type.split('_')[1], stream_id) - if stream_type == 'adaptive_hls': - adaptive_formats = self._extract_m3u8_formats( - stream_url, video_id, 'mp4', m3u8_id=format_id, - note='Downloading %s m3u8 information' % stream_id, - fatal=False) - else: - adaptive_formats = self._extract_mpd_formats( - stream_url, video_id, mpd_id=format_id, - note='Downloading %s MPD information' % stream_id, - fatal=False) - if audio_locale: - for f in adaptive_formats: - if f.get('acodec') != 'none': - f['language'] = audio_locale - formats.extend(adaptive_formats) + formats.extend(self._extract_vrv_formats( + stream.get('url'), video_id, stream_type.split('_')[1], + audio_locale, stream.get('hardsub_locale'))) self._sort_formats(formats) subtitles = {} From 305e20af268a6a5359f7ec4696eaf4c1d9598fed Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Sep 2018 08:40:38 +0100 Subject: [PATCH 009/132] [ard] extract f4m formats --- youtube_dl/extractor/ard.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index dcb347849..194a369c0 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -338,12 +338,9 @@ class ARDBetaMediathekIE(InfoExtractor): format_url = widget['_stream']['json'][0] if format_url.endswith('.f4m'): - # Skip f4m - these URLs just return a 403 - formats.append({ - 'format_id': 'f4m-' + widget['_quality'], - 'url': format_url, - 'preference': -1001, - }) + formats.extend(self._extract_f4m_formats( + format_url + '?hdcore=3.11.0', + video_id, f4m_id='hds', fatal=False)) elif format_url.endswith('m3u8'): formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) From 6d85d1a404a43ebc088aaf2b5e6a39ea63ad7f74 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Sep 2018 10:04:10 +0100 Subject: [PATCH 010/132] [crunchyroll] limit VRVIE inheritance to CrunchyrollIE --- youtube_dl/extractor/crunchyroll.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4ed458372..ba8b9fa7e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -7,6 +7,7 @@ import zlib from hashlib import sha1 from math import pow, sqrt, floor +from .common import InfoExtractor from .vrv import VRVIE from ..compat import ( compat_b64decode, @@ -34,7 +35,7 @@ from ..aes import ( ) -class CrunchyrollBaseIE(VRVIE): +class CrunchyrollBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.crunchyroll.com/login' _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' @@ -140,7 +141,8 @@ class CrunchyrollBaseIE(VRVIE): parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) -class CrunchyrollIE(CrunchyrollBaseIE): +class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): + IE_NAME = 'crunchyroll' _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', From 7ee90713aea4dee2c9e75a9c2dbcad7b686d3e13 Mon Sep 17 00:00:00 2001 From: Gorfiend Date: Sat, 1 Sep 2018 05:04:45 -0400 Subject: [PATCH 011/132] [niconico] Fix extraction on python3 (closes #17393) --- youtube_dl/extractor/niconico.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dbe871f16..76b412ff1 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor): }, 'timing_constraint': 'unlimited' } - })) + }).encode()) resolution = video_quality.get('resolution', {}) From 22b6819f64e90e2011f52203337a31c536c3ba94 Mon Sep 17 00:00:00 2001 From: LangerJan Date: Sat, 1 Sep 2018 11:42:30 +0200 Subject: [PATCH 012/132] [ard] Add support for one.ard.de --- youtube_dl/extractor/ard.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 194a369c0..6bf8f61eb 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -21,7 +21,7 @@ from ..compat import compat_etree_fromstring class ARDMediathekIE(InfoExtractor): IE_NAME = 'ARD:mediathek' - _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' + _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' _TESTS = [{ # available till 26.07.2022 @@ -37,6 +37,9 @@ class ARDMediathekIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', + 'only_matching': True, }, { # audio 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', From 8181421912f0cee734ddccbff3d5fa84fbd58eb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Sep 2018 18:36:18 +0700 Subject: [PATCH 013/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index 49f44a6e6..7585f06e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version + +Core +* [utils] Skip remote IP addresses non matching to source address' IP version + when creating a connection (#13422, #17362) + +Extractors ++ [ard] Add support for one.ard.de (#17397) +* [niconico] Fix extraction on python3 (#17393, #17407) +* [ard] Extract f4m formats +* [crunchyroll] Parse vilos media data (#17343) ++ [ard] Add support for Beta ARD Mediathek ++ [bandcamp] Extract more metadata (#13197) +* [internazionale] Fix extraction of non-available-abroad videos (#17386) + + version 2018.08.28 Extractors From 92442c04abfb7934d333738ce8e1a2ab7f269f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Sep 2018 18:40:23 +0700 Subject: [PATCH 014/132] release 2018.09.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0816c4f5f..16d8f36d7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.08.28*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.08.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.08.28 +[debug] youtube-dl version 2018.09.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7585f06e9..4632133a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.01 Core * [utils] Skip remote IP addresses non matching to source address' IP version diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5beb9bc17..f0d6be901 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -56,6 +56,7 @@ - **archive.org**: archive.org videos - **ARD** - **ARD:mediathek** + - **ARDBetaMediathek** - **Arkena** - **arte.tv** - **arte.tv:+7** @@ -191,7 +192,7 @@ - **Crackle** - **Criterion** - **CrooksAndLiars** - - **Crunchyroll** + - **crunchyroll** - **crunchyroll:playlist** - **CSNNE** - **CSpan**: C-SPAN diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3e3fe1375..e655e0050 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.08.28' +__version__ = '2018.09.01' From c7bfe7f1a1d2fd674cd7f0c468625a1ce1169a17 Mon Sep 17 00:00:00 2001 From: Mohammed Yaseen Mowzer Date: Thu, 14 Jun 2018 17:12:33 +0200 Subject: [PATCH 015/132] [generic] Skip unsuccessful jwplayer extraction (closes #16735) --- youtube_dl/extractor/generic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 229dfda1b..1db154c4f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3150,9 +3150,13 @@ class GenericIE(InfoExtractor): jwplayer_data = self._find_jwplayer_data( webpage, video_id, transform_source=js_to_json) if jwplayer_data: - info = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, base_url=url) - return merge_dicts(info, info_dict) + try: + info = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=url) + return merge_dicts(info, info_dict) + except ExtractorError: + # See https://github.com/rg3/youtube-dl/pull/16735 + pass # Video.js embed mobj = re.search( From 41c4d559bbab2b808270af696f5d00a8dd1b9c72 Mon Sep 17 00:00:00 2001 From: Hormoz K Date: Sat, 4 Aug 2018 09:47:58 -0400 Subject: [PATCH 016/132] [radiojavan] Fix extraction --- youtube_dl/extractor/radiojavan.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index a53ad97a5..4124bcd45 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -6,11 +6,13 @@ from .common import InfoExtractor from ..utils import ( unified_strdate, str_to_int, + urlencode_postdata, ) class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' + _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', @@ -31,8 +33,18 @@ class RadioJavanIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + download_host = self._download_json( + self._HOST_TRACKER_URL, + video_id, + data=urlencode_postdata({'id': video_id}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + } + )['host'] + formats = [{ - 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, + 'url': '%s/%s' % (download_host, video_path), 'format_id': '%sp' % height, 'height': int(height), } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] From 10c535b79fc48435a27f194456e364cb66141e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 3 Sep 2018 02:53:26 +0700 Subject: [PATCH 017/132] [radiojavan] Improve extraction (closes #17151) --- youtube_dl/extractor/radiojavan.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 4124bcd45..3f74f0c01 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -4,15 +4,16 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, + parse_resolution, str_to_int, + unified_strdate, urlencode_postdata, + urljoin, ) class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' - _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', @@ -31,23 +32,26 @@ class RadioJavanIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - download_host = self._download_json( - self._HOST_TRACKER_URL, - video_id, + 'https://www.radiojavan.com/videos/video_host', video_id, data=urlencode_postdata({'id': video_id}), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url, - } - )['host'] + }).get('host', 'https://host1.rjmusicmedia.com') - formats = [{ - 'url': '%s/%s' % (download_host, video_path), - 'format_id': '%sp' % height, - 'height': int(height), - } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + webpage = self._download_webpage(url, video_id) + + formats = [] + for format_id, _, video_path in re.findall( + r'RJ\.video(?P\d+[pPkK])\s*=\s*(["\'])(?P(?:(?!\2).)+)\2', + webpage): + f = parse_resolution(format_id) + f.update({ + 'url': urljoin(download_host, video_path), + 'format_id': format_id, + }) + formats.append(f) self._sort_formats(formats) title = self._og_search_title(webpage) From 7f06451413fa1dbe43767bd67e8e6f738d682fef Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 4 Sep 2018 10:37:11 +0100 Subject: [PATCH 018/132] [slideslive] make the check for video_service_name case-insensitive(closes #17429) --- youtube_dl/extractor/slideslive.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index 104576033..ed84322c5 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -8,6 +8,7 @@ from ..utils import ExtractorError class SlidesLiveIE(InfoExtractor): _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' _TESTS = [{ + # video_service_name = YOUTUBE 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', 'info_dict': { @@ -19,14 +20,18 @@ class SlidesLiveIE(InfoExtractor): 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'upload_date': '20170925', } + }, { + # video_service_name = youtube + 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( url, video_id, headers={'Accept': 'application/json'}) - service_name = video_data['video_service_name'] - if service_name == 'YOUTUBE': + service_name = video_data['video_service_name'].lower() + if service_name == 'youtube': yt_video_id = video_data['video_service_id'] return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) else: From 0385b9e749b88722c7782606e3fe184cf5360def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Sep 2018 00:22:30 +0700 Subject: [PATCH 019/132] [iprima] Confirm adult check (closes #17437) --- youtube_dl/extractor/iprima.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index a29e6a5ba..3c4b7e48b 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -38,6 +38,8 @@ class IPrimaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1') + webpage = self._download_webpage(url, video_id) video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') From 5aba2034143dc38b5caf5455177b6296ac44c6c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Sep 2018 00:51:20 +0700 Subject: [PATCH 020/132] [pornhub:uservideos] Add support for new URLs (closes #17388) --- youtube_dl/extractor/pornhub.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ffc4405a8..6782848d9 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -254,7 +254,7 @@ class PornHubIE(InfoExtractor): self._sort_formats(formats) video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) view_count = self._extract_count( @@ -346,7 +346,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P[^/]+)/videos' + _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' _TESTS = [{ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { @@ -378,6 +378,12 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'only_matching': True, }] def _real_extract(self, url): From 8708e6291dc6aa166e730774122161e9d833a9e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 03:36:10 +0700 Subject: [PATCH 021/132] [youtube] Fix extraction (closes #17457, closes #17464) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0442906df..27047425d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1178,7 +1178,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\.sig\|\|(?P[a-zA-Z0-9$]+)\('), + r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From 4f19dd4da1d4eb28eea689942207a2c4eec6f5ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 03:40:06 +0700 Subject: [PATCH 022/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4632133a3..b378f0a0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version + +Extractors +* [youtube] Fix extraction (#17457, #17464) ++ [pornhub:uservideos] Add support for new URLs (#17388) +* [iprima] Confirm adult check (#17437) +* [slideslive] Make check for video service name case-insensitive (#17429) +* [radiojavan] Fix extraction (#17151) +* [generic] Skip unsuccessful jwplayer extraction (#16735) + + version 2018.09.01 Core From 298c50fb1a8483105d8299061cf263ee18e3519f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 03:42:28 +0700 Subject: [PATCH 023/132] release 2018.09.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 16d8f36d7..2d67247e6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.01 +[debug] youtube-dl version 2018.09.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b378f0a0e..ac95c9b71 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.08 Extractors * [youtube] Fix extraction (#17457, #17464) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e655e0050..716d9ffe0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.01' +__version__ = '2018.09.08' From d3d937f37981e873d17106bee4c4e3813e4fca4a Mon Sep 17 00:00:00 2001 From: Timendum Date: Sat, 8 Sep 2018 09:44:06 +0200 Subject: [PATCH 024/132] [nbc] Fix extraction of percent encoded URLs (closes #17374) --- youtube_dl/extractor/nbc.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index c843f8649..765c46fd2 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE from .adobepass import AdobePassIE +from ..compat import compat_urllib_parse_unquote from ..utils import ( find_xpath_attr, smuggle_url, @@ -75,11 +76,16 @@ class NBCIE(AdobePassIE): 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'only_matching': True, }, + { + # Percent escaped url + 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189', + 'only_matching': True, + } ] def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() - permalink = 'http' + permalink + permalink = 'http' + compat_urllib_parse_unquote(permalink) response = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ 'filter[permalink]': permalink, From d9d5ba48ebb82db1cba2c50a57c1b6f86f78bfd4 Mon Sep 17 00:00:00 2001 From: Jens Rutschmann Date: Sat, 1 Sep 2018 19:43:34 +0200 Subject: [PATCH 025/132] [tele5] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tele5.py | 39 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/tele5.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a17a6eaf6..6e95b8bce 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1090,6 +1090,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py new file mode 100644 index 000000000..cfa8d2475 --- /dev/null +++ b/youtube_dl/extractor/tele5.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .nexx import NexxIE + + +class Tele5IE(InfoExtractor): + _VALID_URL = r'https://www\.tele5\.de/(?:mediathek/filme-online/videos\?vid=|tv/)(?P[\w-]+)' + + _TESTS = [{ + 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', + 'info_dict': { + 'id': '1550589', + 'ext': 'mp4', + 'upload_date': '20180822', + 'timestamp': 1534927316, + 'title': 'SchleFaZ: Atomic Shark', + } + }, { + 'url': 'https://www.tele5.de/tv/dark-matter/videos', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'id\s*=\s*["\']video-player["\']\s*data-id\s*=\s*["\']([0-9]+)["\']', + webpage, 'video_id') + + return self.url_result( + 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, + ie=NexxIE.ie_key(), video_id=video_id) From b4a9c66b8b20345919908d5a104ef5e64ff68fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 16:04:39 +0700 Subject: [PATCH 026/132] [tele5] Improve extraction (closes #7805, closes #7922, closes #17331, closes #17414) --- youtube_dl/extractor/tele5.py | 39 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index cfa8d2475..25573e49f 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -1,38 +1,43 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from .nexx import NexxIE +from ..compat import compat_urlparse class Tele5IE(InfoExtractor): - _VALID_URL = r'https://www\.tele5\.de/(?:mediathek/filme-online/videos\?vid=|tv/)(?P[\w-]+)' - + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P[^?#&]+)' _TESTS = [{ - 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', + 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', 'info_dict': { - 'id': '1550589', + 'id': '1549416', 'ext': 'mp4', - 'upload_date': '20180822', - 'timestamp': 1534927316, - 'title': 'SchleFaZ: Atomic Shark', - } + 'upload_date': '20180814', + 'timestamp': 1534290623, + 'title': 'Pandorum', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', + 'only_matching': True, }, { 'url': 'https://www.tele5.de/tv/dark-matter/videos', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] - webpage = self._download_webpage(url, display_id) - - video_id = self._html_search_regex( - r'id\s*=\s*["\']video-player["\']\s*data-id\s*=\s*["\']([0-9]+)["\']', - webpage, 'video_id') + if not video_id: + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)', + webpage, 'video id') return self.url_result( 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, From 5ea082446e1d8986553a814fb3c629d769cc0912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:09 +0700 Subject: [PATCH 027/132] [dtube] PEP 8 (#17455) --- youtube_dl/extractor/dtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 4ca97f860..5887887e1 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor): try: self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) self._downloader._opener.open(video_url, timeout=5).close() - except timeout as e: + except timeout: self.to_screen( '%s: %s URL is invalid, skipping' % (video_id, format_id)) continue From ed18ba2c0f7384093c615588510a4ffff6da5a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:34 +0700 Subject: [PATCH 028/132] [motherless] PEP 8 (#17455) --- youtube_dl/extractor/motherless.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index bed5645f2..d4bd273b6 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -167,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor): if not entries: entries = [ self.url_result( - compat_urlparse.urljoin(base, '/' + video_id), - ie=MotherlessIE.ie_key(), video_id=video_id) - for video_id in orderedSet(re.findall( + compat_urlparse.urljoin(base, '/' + entry_id), + ie=MotherlessIE.ie_key(), video_id=entry_id) + for entry_id in orderedSet(re.findall( r'data-codename=["\']([A-Z0-9]+)', webpage))] return entries From 08485512968ae9a2af24b233cb5d774e4a7eeb9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:48 +0700 Subject: [PATCH 029/132] [seznamzpravy] PEP 8 (#17455) --- youtube_dl/extractor/seznamzpravy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py index 6d4e3b76d..7a1c7e38b 100644 --- a/youtube_dl/extractor/seznamzpravy.py +++ b/youtube_dl/extractor/seznamzpravy.py @@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor): description = info.get('description') or self._og_search_description(webpage) return self.playlist_result([ - self.url_result(url, ie=SeznamZpravyIE.ie_key()) - for url in SeznamZpravyIE._extract_urls(webpage)], + self.url_result(entry_url, ie=SeznamZpravyIE.ie_key()) + for entry_url in SeznamZpravyIE._extract_urls(webpage)], article_id, title, description) From e57f4c62ca4d1d83a87fceaae97d5861f10397c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:59 +0700 Subject: [PATCH 030/132] [generic] PEP 8 (#17455) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1db154c4f..76ef01332 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3112,7 +3112,7 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key()) - sharevideos_urls = [mobj.group('url') for mobj in re.finditer( + sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer( r']+?\bsrc\s*=\s*(["\'])(?P(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', webpage)] if sharevideos_urls: From f89df2d37a019f8eb353191a613cb61385dd8dbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 01:34:49 +0700 Subject: [PATCH 031/132] [iprima] Add support for prima.iprima.cz (closes #17514) --- youtube_dl/extractor/iprima.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 3c4b7e48b..1d58d6e85 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -12,7 +12,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P[^?#]+)' + _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P[^?#]+)' _GEO_BYPASS = False _TESTS = [{ @@ -33,6 +33,14 @@ class IPrimaIE(InfoExtractor): # geo restricted 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', 'only_matching': True, + }, { + # iframe api.play-backend.iprima.cz + 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2', + 'only_matching': True, + }, { + # iframe prima.iprima.cz + 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', + 'only_matching': True, }] def _real_extract(self, url): @@ -42,7 +50,10 @@ class IPrimaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') + video_id = self._search_regex( + (r']+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', + r'data-product="([^"]+)">'), + webpage, 'real id') playerpage = self._download_webpage( 'http://play.iprima.cz/prehravac/init', From 8f0e36d4190ca2f2f2ff7cf5c53a6e264b07b067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 02:37:22 +0700 Subject: [PATCH 032/132] [utils] Properly recognize AV1 codec (closes #17506) --- test/test_utils.py | 4 ++++ youtube_dl/utils.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8da5ccc56..9e28e008f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -785,6 +785,10 @@ class TestUtil(unittest.TestCase): 'vcodec': 'h264', 'acodec': 'aac', }) + self.assertEqual(parse_codecs('av01.0.05M.08'), { + 'vcodec': 'av01.0.05M.08', + 'acodec': 'none', + }) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bcfb72d43..e84d35d4d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2477,7 +2477,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): From 71b5a8615ddc485850c532c89062ff8dd125c16c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 02:45:44 +0700 Subject: [PATCH 033/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index ac95c9b71..c02450bf4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version + +Core ++ [utils] Properly recognize AV1 codec (#17506) + +Extractors ++ [iprima] Add support for prima.iprima.cz (#17514) ++ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414) +* [nbc] Fix extraction of percent encoded URLs (#17374) + + version 2018.09.08 Extractors From 6109c1de7eb4610c9776ce2226e43a124e20d7de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 02:48:37 +0700 Subject: [PATCH 034/132] release 2018.09.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2d67247e6..f41266f32 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.08 +[debug] youtube-dl version 2018.09.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c02450bf4..d184f69ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.10 Core + [utils] Properly recognize AV1 codec (#17506) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f0d6be901..9b8601751 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -847,6 +847,7 @@ - **techtv.mit.edu** - **ted** - **Tele13** + - **Tele5** - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 716d9ffe0..b078c4993 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.08' +__version__ = '2018.09.10' From e4d797854c426ff09cb3609db74c25db7113a45d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Sep 2018 02:24:32 +0700 Subject: [PATCH 035/132] [eporner] Extract JSON-LD (closes #17519) --- youtube_dl/extractor/eporner.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 6d03d7095..c050bf9df 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -9,6 +9,7 @@ from ..utils import ( encode_base_n, ExtractorError, int_or_none, + merge_dicts, parse_duration, str_to_int, url_or_none, @@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor): 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', 'ext': 'mp4', 'title': 'Infamous Tiffany Teen Strip Tease Video', + 'description': 'md5:764f39abf932daafa37485eb46efa152', + 'timestamp': 1232520922, + 'upload_date': '20090121', 'duration': 1838, 'view_count': int, 'age_limit': 18, }, + 'params': { + 'proxy': '127.0.0.1:8118' + } }, { # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', @@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor): }) self._sort_formats(formats) - duration = parse_duration(self._html_search_meta('duration', webpage)) + json_ld = self._search_json_ld(webpage, display_id, default={}) + + duration = parse_duration(self._html_search_meta( + 'duration', webpage, default=None)) view_count = str_to_int(self._search_regex( r'id="cinemaviews">\s*([0-9,]+)\s*views', webpage, 'view count', fatal=False)) - return { + return merge_dicts(json_ld, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, - } + }) From d4b90184b921968475ea6b42955e8faf31381cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Sep 2018 02:29:45 +0700 Subject: [PATCH 036/132] [tube8] Fix metadata extraction (closes #17520) --- youtube_dl/extractor/tube8.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 368c45729..db93b0182 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE): r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( - r'>Description:\s*(.+?)\s*<', webpage, 'description', fatal=False) + r'(?s)Description:\s*
(.+?)
', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'\s*(.+?)\s*<', webpage, 'uploader', fatal=False) @@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE): dislike_count = int_or_none(self._search_regex( r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) view_count = str_to_int(self._search_regex( - r'Views: ([\d,\.]+)\s*', + r'Views:\s*\s*
([\d,\.]+)', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._search_regex( r'(\d+)', webpage, 'comment count', fatal=False)) category = self._search_regex( - r'Category:\s*\s*]+href=[^>]+>([^<]+)', + r'Category:\s*\s*
\s*]+href=[^>]+>([^<]+)', webpage, 'category', fatal=False) categories = [category] if category else None tags_str = self._search_regex( - r'(?s)Tags:\s*(.+?)\s*
(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None From 907340258799b04680977fdb58daec577cbefb4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Sep 2018 02:41:05 +0700 Subject: [PATCH 037/132] [vzaar] Add support for HLS --- youtube_dl/extractor/vzaar.py | 60 +++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 02fcd52c7..6000671c3 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -4,15 +4,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, float_or_none, + unified_timestamp, + url_or_none, ) class VzaarIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' _TESTS = [{ + # HTTP and HLS 'url': 'https://vzaar.com/videos/1152805', 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', 'info_dict': { @@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor): video_id = self._match_id(url) video_data = self._download_json( 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - source_url = video_data['sourceUrl'] - info = { + title = video_data['videoTitle'] + + formats = [] + + source_url = url_or_none(video_data.get('sourceUrl')) + if source_url: + f = { + 'url': source_url, + 'format_id': 'http', + } + if 'audio' in source_url: + f.update({ + 'vcodec': 'none', + 'ext': 'mp3', + }) + else: + f.update({ + 'width': int_or_none(video_data.get('width')), + 'height': int_or_none(video_data.get('height')), + 'ext': 'mp4', + 'fps': float_or_none(video_data.get('fps')), + }) + formats.append(f) + + video_guid = video_data.get('guid') + usp = video_data.get('usp') + if isinstance(video_guid, compat_str) and isinstance(usp, dict): + m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?' + % (video_guid, video_id)) + '&'.join( + '%s=%s' % (k, v) for k, v in usp.items()) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + + return { 'id': video_id, - 'title': video_data['videoTitle'], - 'url': source_url, + 'title': title, 'thumbnail': self._proto_relative_url(video_data.get('poster')), 'duration': float_or_none(video_data.get('videoDuration')), + 'timestamp': unified_timestamp(video_data.get('ts')), + 'formats': formats, } - if 'audio' in source_url: - info.update({ - 'vcodec': 'none', - 'ext': 'mp3', - }) - else: - info.update({ - 'width': int_or_none(video_data.get('width')), - 'height': int_or_none(video_data.get('height')), - 'ext': 'mp4', - }) - return info From f3f25e95ad83ebe238a120a9625bf77645502708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Sep 2018 23:26:38 +0700 Subject: [PATCH 038/132] [twitch:clips] Extend _VALID_URL (closes #17559) --- youtube_dl/extractor/twitch.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index b39972b1e..26661fdf6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -559,7 +559,8 @@ class TwitchStreamIE(TwitchBaseIE): TwitchAllVideosIE, TwitchUploadsIE, TwitchPastBroadcastsIE, - TwitchHighlightsIE)) + TwitchHighlightsIE, + TwitchClipsIE)) else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): @@ -633,7 +634,7 @@ class TwitchStreamIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE): IE_NAME = 'twitch:clips' - _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', @@ -653,6 +654,9 @@ class TwitchClipsIE(TwitchBaseIE): # multiple formats 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', + 'only_matching': True, }] def _real_extract(self, url): From b09914218a73084ca01ad5c83a3b08856b74d240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Sep 2018 23:56:03 +0700 Subject: [PATCH 039/132] [asiancrush] Fix extraction (closes #15630) --- youtube_dl/extractor/asiancrush.py | 42 ++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 594c88c9c..6d71c5ad5 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -8,7 +8,6 @@ from .kaltura import KalturaIE from ..utils import ( extract_attributes, remove_end, - urlencode_postdata, ) @@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id, - data=urlencode_postdata({ - 'postid': video_id, - 'action': 'get_channel_kaltura_vars', - })) + webpage = self._download_webpage(url, video_id) - entry_id = data['entry_id'] + entry_id, partner_id, title = [None] * 3 + + vars = self._parse_json( + self._search_regex( + r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', + default='{}'), video_id, fatal=False) + if vars: + entry_id = vars.get('entry_id') + partner_id = vars.get('partner_id') + title = vars.get('vid_label') + + if not entry_id: + entry_id = self._search_regex( + r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') + + player = self._download_webpage( + 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + query={'id': entry_id}) + + kaltura_id = self._search_regex( + r'entry_id["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', player, + 'kaltura id', group='id') + + if not partner_id: + partner_id = self._search_regex( + r'/p(?:artner_id)?/(\d+)', player, 'partner id', + default='513551') return self.url_result( - 'kaltura:%s:%s' % (data['partner_id'], entry_id), - ie=KalturaIE.ie_key(), video_id=entry_id, - video_title=data.get('vid_label')) + 'kaltura:%s:%s' % (partner_id, kaltura_id), + ie=KalturaIE.ie_key(), video_id=kaltura_id, + video_title=title) class AsianCrushPlaylistIE(InfoExtractor): From 8570904d1fd644f615ae4b26bbb83830c2e7801c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 00:42:27 +0700 Subject: [PATCH 040/132] [porntube] Fix extraction (closes #17541) --- youtube_dl/extractor/fourtube.py | 94 ++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index ad273a0e7..20391b2bb 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,15 +3,44 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_b64decode, + compat_str, + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import ( + int_or_none, parse_duration, parse_iso8601, str_to_int, + try_get, + unified_timestamp, + url_or_none, ) class FourTubeBaseIE(InfoExtractor): + _TKN_HOST = 'tkn.kodicdn.com' + + def _extract_formats(self, url, video_id, media_id, sources): + token_url = 'https://%s/%s/desktop/%s' % ( + self._TKN_HOST, media_id, '+'.join(sources)) + + parsed_url = compat_urlparse.urlparse(url) + tokens = self._download_json(token_url, video_id, data=b'', headers={ + 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Referer': url, + }) + formats = [{ + 'url': tokens[format]['token'], + 'format_id': format + 'p', + 'resolution': format + 'p', + 'quality': int(format), + } for format in sources] + self._sort_formats(formats) + return formats + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') @@ -68,21 +97,7 @@ class FourTubeBaseIE(InfoExtractor): media_id = params[0] sources = ['%s' % p for p in params[2]] - token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format( - media_id, '+'.join(sources)) - - parsed_url = compat_urlparse.urlparse(url) - tokens = self._download_json(token_url, video_id, data=b'', headers={ - 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), - 'Referer': url, - }) - formats = [{ - 'url': tokens[format]['token'], - 'format_id': format + 'p', - 'resolution': format + 'p', - 'quality': int(format), - } for format in sources] - self._sort_formats(formats) + formats = self._extract_formats(url, video_id, media_id, sources) return { 'id': video_id, @@ -164,6 +179,7 @@ class FuxIE(FourTubeBaseIE): class PornTubeIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?porntube\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' + _TKN_HOST = 'tkn.porntube.com' _TESTS = [{ 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', 'info_dict': { @@ -171,13 +187,12 @@ class PornTubeIE(FourTubeBaseIE): 'ext': 'mp4', 'title': 'Teen couple doing anal', 'uploader': 'Alexy', - 'uploader_id': 'Alexy', + 'uploader_id': '91488', 'upload_date': '20150606', 'timestamp': 1433595647, 'duration': 5052, 'view_count': int, 'like_count': int, - 'categories': list, 'age_limit': 18, }, 'params': { @@ -191,6 +206,49 @@ class PornTubeIE(FourTubeBaseIE): 'only_matching': True, }] + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video = self._parse_json( + self._search_regex( + r'INITIALSTATE\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data', group='value'), video_id, + transform_source=lambda x: compat_urllib_parse_unquote( + compat_b64decode(x).decode('utf-8')))['page']['video'] + + title = video['title'] + media_id = video['mediaId'] + sources = [compat_str(e['height']) + for e in video['encodings'] if e.get('height')] + formats = self._extract_formats(url, video_id, media_id, sources) + + thumbnail = url_or_none(video.get('masterThumb')) + uploader = try_get(video, lambda x: x['user']['username'], compat_str) + uploader_id = compat_str(try_get(video, lambda x: x['user']['id'], int)) + like_count = int_or_none(video.get('likes')) + dislike_count = int_or_none(video.get('dislikes')) + view_count = int_or_none(video.get('playsQty')) + duration = int_or_none(video.get('durationInSeconds')) + timestamp = unified_timestamp(video.get('publishedAt')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'timestamp': timestamp, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'view_count': view_count, + 'duration': duration, + 'age_limit': 18, + } + class PornerBrosIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' From 44fd0a01c4e45ea12d12c826d57097d2e41c61e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:23:36 +0700 Subject: [PATCH 041/132] [extractor/common] Introduce channel meta fields --- youtube_dl/extractor/common.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b8bbaf81a..8eab5947f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -211,6 +211,11 @@ class InfoExtractor(object): If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader. + channel: Full name of the channel the video is uploaded on. + Note that channel fields may or may noy repeat uploader + fields. This depends on a particular extractor. + channel_id: Id of the channel. + channel_url: Full URL to a channel webpage. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format {tag: subformats}. "tag" is usually a language code, and From 0e11d01214f2431e7a72145425fa4c3ee632e154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:24:26 +0700 Subject: [PATCH 042/132] [youtube] Extract channel meta fields (closes #9676, closes #12939) --- youtube_dl/extractor/youtube.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 27047425d..2fe074cb4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -490,6 +490,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', + 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', + 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -1907,6 +1909,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self._downloader.report_warning('unable to extract uploader nickname') + channel_id = self._html_search_meta( + 'channelId', video_webpage, 'channel id') + channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None + # thumbnail image # We try first to get a high quality image: m_thumb = re.search(r'', @@ -2078,6 +2084,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': video_uploader, 'uploader_id': video_uploader_id, 'uploader_url': video_uploader_url, + 'channel_id': channel_id, + 'channel_url': channel_url, 'upload_date': upload_date, 'license': video_license, 'creator': video_creator or artist, From 0da3ecd61f983e718eb9dd29f8fe08ba8391b205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:24:48 +0700 Subject: [PATCH 043/132] [vimeo] Extract channel meta fields --- youtube_dl/extractor/vimeo.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index e49b233f2..95d368cc1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', + 'channel_id': 'keypeele', + 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele', 'timestamp': 1380339469, 'upload_date': '20130928', 'duration': 187, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://vimeo.com/76979871', @@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/channels/tributes/6213729', 'info_dict': { 'id': '6213729', - 'ext': 'mov', + 'ext': 'mp4', 'title': 'Vimeo Tribute: The Shining', 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', + 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes', + 'channel_id': 'tributes', 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', @@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor): if 'Referer' not in headers: headers['Referer'] = url + channel_id = self._search_regex( + r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) + # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -652,6 +660,8 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') + channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None + info_dict = { 'id': video_id, 'formats': formats, @@ -662,6 +672,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, + 'channel_id': channel_id, + 'channel_url': channel_url, } info_dict = merge_dicts(info_dict, info_dict_config, json_ld) From 876eaf5f7df95543b3d37e1bc4b720e0dfc81dee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:25:32 +0700 Subject: [PATCH 044/132] [porntube] Extract channel meta fields --- youtube_dl/extractor/fourtube.py | 33 +++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index 20391b2bb..a9a1f911e 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, parse_duration, parse_iso8601, + str_or_none, str_to_int, try_get, unified_timestamp, @@ -198,6 +199,26 @@ class PornTubeIE(FourTubeBaseIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406', + 'info_dict': { + 'id': '1331406', + 'ext': 'mp4', + 'title': 'Squirting Teen Ballerina on ECG', + 'uploader': 'Exploited College Girls', + 'uploader_id': '665', + 'channel': 'Exploited College Girls', + 'channel_id': '665', + 'upload_date': '20130920', + 'timestamp': 1379685485, + 'duration': 851, + 'view_count': int, + 'like_count': int, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.porntube.com/embed/7089759', 'only_matching': True, @@ -227,7 +248,11 @@ class PornTubeIE(FourTubeBaseIE): thumbnail = url_or_none(video.get('masterThumb')) uploader = try_get(video, lambda x: x['user']['username'], compat_str) - uploader_id = compat_str(try_get(video, lambda x: x['user']['id'], int)) + uploader_id = str_or_none(try_get( + video, lambda x: x['user']['id'], int)) + channel = try_get(video, lambda x: x['channel']['name'], compat_str) + channel_id = str_or_none(try_get( + video, lambda x: x['channel']['id'], int)) like_count = int_or_none(video.get('likes')) dislike_count = int_or_none(video.get('dislikes')) view_count = int_or_none(video.get('playsQty')) @@ -239,8 +264,10 @@ class PornTubeIE(FourTubeBaseIE): 'title': title, 'formats': formats, 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, + 'uploader': uploader or channel, + 'uploader_id': uploader_id or channel_id, + 'channel': channel, + 'channel_id': channel_id, 'timestamp': timestamp, 'like_count': like_count, 'dislike_count': dislike_count, From d8a8214fc200ee9697dfe57513cf759117ad88e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:53:01 +0700 Subject: [PATCH 045/132] [extractor/common] Fix typos --- youtube_dl/extractor/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8eab5947f..2dbf81e6e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -212,7 +212,7 @@ class InfoExtractor(object): uploader_id: Nickname or id of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader. channel: Full name of the channel the video is uploaded on. - Note that channel fields may or may noy repeat uploader + Note that channel fields may or may not repeat uploader fields. This depends on a particular extractor. channel_id: Id of the channel. channel_url: Full URL to a channel webpage. @@ -1706,9 +1706,9 @@ class InfoExtractor(object): # However, this is not always respected, for example, [2] # contains EXT-X-STREAM-INF tag which references AUDIO # rendition group but does not have CODECS and despite - # referencing audio group an audio group, it represents - # a complete (with audio and video) format. So, for such cases - # we will ignore references to rendition groups and treat them + # referencing an audio group it represents a complete + # (with audio and video) format. So, for such cases we will + # ignore references to rendition groups and treat them # as complete formats. if audio_group_id and codecs and f.get('vcodec') != 'none': audio_group = groups.get(audio_group_id) From c631db7e046e9b33e872088a232f793b624b5006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 02:04:43 +0700 Subject: [PATCH 046/132] [pornhub] Extract upload date (closes #17574) --- youtube_dl/extractor/pornhub.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 6782848d9..19eaf389f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor): 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', + 'upload_date': '20130628', 'duration': 361, 'view_count': int, 'like_count': int, @@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor): 'ext': 'mp4', 'title': '重庆婷婷女王足交', 'uploader': 'Unknown', + 'upload_date': '20150213', 'duration': 1753, 'view_count': int, 'like_count': int, @@ -237,8 +239,14 @@ class PornHubIE(InfoExtractor): video_urls.append((video_url, None)) video_urls_set.add(video_url) + upload_date = None formats = [] for video_url, height in video_urls: + if not upload_date: + upload_date = self._search_regex( + r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) + if upload_date: + upload_date = upload_date.replace('/', '') tbr = None mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url) if mobj: @@ -278,6 +286,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, 'uploader': video_uploader, + 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, 'duration': duration, From 2fadfc60a735e537100e59f4c3ac2ecc3ccdd522 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 15 Sep 2018 06:30:57 +0100 Subject: [PATCH 047/132] [vimeo] redirect to feature url only in the case of a trailer(closes #14591) --- youtube_dl/extractor/vimeo.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 95d368cc1..0a9239b62 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -571,19 +571,23 @@ class VimeoIE(VimeoBaseInfoExtractor): if config.get('view') == 4: config = self._verify_player_video_password(redirect_url, video_id) + vod = config.get('video', {}).get('vod', {}) + def is_rented(): if '>You rented this title.<' in webpage: return True if config.get('user', {}).get('purchased'): return True - label = try_get( - config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) - if label and label.startswith('You rented this'): - return True + for purchase_option in vod.get('purchase_options', []): + if purchase_option.get('purchased'): + return True + label = purchase_option.get('label_string') + if label and (label.startswith('You rented this') or label.endswith(' remaining')): + return True return False - if is_rented(): - feature_id = config.get('video', {}).get('vod', {}).get('feature_id') + if is_rented() and vod.get('is_trailer'): + feature_id = vod.get('feature_id') if feature_id and not data.get('force_feature_id', False): return self.url_result(smuggle_url( 'https://player.vimeo.com/player/%s' % feature_id, From f9982fec6e4e9c710bd43b3eee175d4cba492be6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 16 Sep 2018 23:52:27 +0700 Subject: [PATCH 048/132] [vrv] Make format ids deterministic --- youtube_dl/extractor/vrv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 921e9e172..ac0819c7c 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -90,7 +90,13 @@ class VRVIE(VRVBaseIE): def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): if not url or stream_format not in ('hls', 'dash'): return [] - stream_id = hardsub_lang or audio_lang + assert audio_lang or hardsub_lang + stream_id_list = [] + if audio_lang: + stream_id_list.append('audio-%s' % audio_lang) + if hardsub_lang: + stream_id_list.append('hardsub-%s' % hardsub_lang) + stream_id = '-'.join(stream_id_list) format_id = '%s-%s' % (stream_format, stream_id) if stream_format == 'hls': adaptive_formats = self._extract_m3u8_formats( From 62df9886d6772a7b5c413c596f8614c72f4aeded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 16 Sep 2018 23:54:25 +0700 Subject: [PATCH 049/132] [crunchyroll] Prefer hardsubless formats and formats in locale language --- youtube_dl/extractor/crunchyroll.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ba8b9fa7e..af786d096 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -445,6 +445,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage, 'vilos media', default='{}'), video_id) media_metadata = media.get('metadata') or {} + language = self._search_regex( + r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'language', default=None, group='lang') + video_title = self._html_search_regex( r'(?s)]*>((?:(?!]+itemprop=["\']title["\'][^>]*>(?:(?!', webpage, 'video_title') @@ -466,9 +470,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text formats = [] for stream in media.get('streams', []): - formats.extend(self._extract_vrv_formats( + audio_lang = stream.get('audio_lang') + hardsub_lang = stream.get('hardsub_lang') + vrv_formats = self._extract_vrv_formats( stream.get('url'), video_id, stream.get('format'), - stream.get('audio_lang'), stream.get('hardsub_lang'))) + audio_lang, hardsub_lang) + for f in vrv_formats: + if not hardsub_lang: + f['preference'] = 1 + language_preference = 0 + if audio_lang == language: + language_preference += 1 + if hardsub_lang == language: + language_preference += 1 + if language_preference: + f['language_preference'] = language_preference + formats.extend(vrv_formats) if not formats: available_fmts = [] for a, fmt in re.findall(r'(]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): @@ -557,7 +574,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'ext': 'flv', }) formats.append(format_info) - self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) + self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps')) metadata = self._call_rpc_api( 'VideoPlayer_GetMediaMetadata', video_id, From 9c60f146a02673e65700bc025bc4fa460c6f0d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:13:39 +0700 Subject: [PATCH 050/132] [youtube] Don't pollute default query dict (closes #17593) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2fe074cb4..e80e36f98 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -259,7 +259,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return True def _download_webpage_handle(self, *args, **kwargs): - kwargs.setdefault('query', {})['disable_polymer'] = 'true' + query = kwargs.get('query', {}).copy() + query['disable_polymer'] = 'true' + kwargs['query'] = query return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) From ab497bcf571d3848422bdf0f297e77a2823f3812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:14:28 +0700 Subject: [PATCH 051/132] [twitch] Don't pollute default headers dict --- youtube_dl/extractor/twitch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 26661fdf6..401615683 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -51,7 +51,9 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _call_api(self, path, item_id, *args, **kwargs): - kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID + headers = kwargs.get('headers', {}).copy() + headers['Client-ID'] = self._CLIENT_ID + kwargs['headers'] = headers response = self._download_json( '%s/%s' % (self._API_BASE, path), item_id, *args, **compat_kwargs(kwargs)) From 908c97796b7aad7b1165580fcb5a2fcdd76ae3a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:14:53 +0700 Subject: [PATCH 052/132] [udemy] Don't pollute default headers dict --- youtube_dl/extractor/udemy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 79c45f80e..105826e9b 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -122,7 +122,9 @@ class UdemyIE(InfoExtractor): raise ExtractorError(error_str, expected=True) def _download_webpage_handle(self, *args, **kwargs): - kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + headers = kwargs.get('headers', {}).copy() + headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + kwargs['headers'] = headers return super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) From ecba80c87cfe6605df8b792ba9cb9985ee08407e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:15:27 +0700 Subject: [PATCH 053/132] [adobepass] Don't pollute default headers dict --- youtube_dl/extractor/adobepass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index b83b51efb..1cf2dcbf3 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1325,8 +1325,8 @@ class AdobePassIE(InfoExtractor): _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' def _download_webpage_handle(self, *args, **kwargs): - headers = kwargs.get('headers', {}) - headers.update(self.geo_verification_headers()) + headers = self.geo_verification_headers() + headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers return super(AdobePassIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) From 575a2bdd70bf4f18f71188e4d0a30b1713d26a1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 18 Sep 2018 01:44:55 +0700 Subject: [PATCH 054/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index d184f69ee..b2b2d90a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version + +Core ++ [extractor/common] Introduce channel meta fields + +Extractors +* [adobepass] Don't pollute default headers dict +* [udemy] Don't pollute default headers dict +* [twitch] Don't pollute default headers dict +* [youtube] Don't pollute default query dict (#17593) +* [crunchyroll] Prefer hardsubless formats and formats in locale language +* [vrv] Make format ids deterministic +* [vimeo] Fix ondemand playlist extraction (#14591) ++ [pornhub] Extract upload date (#17574) ++ [porntube] Extract channel meta fields ++ [vimeo] Extract channel meta fields ++ [youtube] Extract channel meta fields (#9676, #12939) +* [porntube] Fix extraction (#17541) +* [asiancrush] Fix extraction (#15630) ++ [twitch:clips] Extend URL regular expression (closes #17559) ++ [vzaar] Add support for HLS +* [tube8] Fix metadata extraction (#17520) +* [eporner] Extract JSON-LD (#17519) + + version 2018.09.10 Core From 68fc843e4b064ce8e1eb40732a62d5240217b056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 18 Sep 2018 01:46:36 +0700 Subject: [PATCH 055/132] release 2018.09.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f41266f32..a4602287a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.10 +[debug] youtube-dl version 2018.09.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b2b2d90a1..800ece790 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.18 Core + [extractor/common] Introduce channel meta fields diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b078c4993..2b3b584a4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.10' +__version__ = '2018.09.18' From d5844654f0014a7f94232dd0dbcc41cfe1949450 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Wed, 19 Sep 2018 04:48:39 +0200 Subject: [PATCH 056/132] [raiplay:playlist] Remove a debug leftover print() --- youtube_dl/extractor/rai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index f916b2619..548a6553b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -274,7 +274,6 @@ class RaiPlayPlaylistIE(InfoExtractor): ('programma', 'nomeProgramma'), webpage, 'title') description = unescapeHTML(self._html_search_meta( ('description', 'og:description'), webpage, 'description')) - print(description) entries = [] for mobj in re.finditer( From 0cb7051de6b2168f3f01da56738591304cce8d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 19 Sep 2018 22:16:43 +0700 Subject: [PATCH 057/132] [popcorntv] Remove debug output --- youtube_dl/extractor/popcorntv.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/popcorntv.py b/youtube_dl/extractor/popcorntv.py index ac901f426..9f834fb6c 100644 --- a/youtube_dl/extractor/popcorntv.py +++ b/youtube_dl/extractor/popcorntv.py @@ -58,8 +58,6 @@ class PopcornTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) timestamp = unified_timestamp(self._html_search_meta( 'uploadDate', webpage, 'timestamp')) - print(self._html_search_meta( - 'duration', webpage)) duration = int_or_none(self._html_search_meta( 'duration', webpage), invscale=60) view_count = int_or_none(self._html_search_meta( From cc03ecd07be5b5a82f1566b05b52dac67faa2944 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Sun, 23 Sep 2018 16:34:47 +0200 Subject: [PATCH 058/132] [zattoo] Fix extraction (closes #17175) --- youtube_dl/extractor/zattoo.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index fb167c198..9c9024799 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -93,28 +93,30 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( - '%s/zapi/program/details' % self._HOST_URL, + '%s/zapi/v2/cached/program/power_details/%s' % ( + self._HOST_URL, self._power_guide_hash), video_id, 'Downloading video information', query={ - 'program_id': video_id, - 'complete': True + 'program_ids': video_id, + 'complete': True, }) - p = data['program'] + p = data['programs'][0] cid = p['cid'] info_dict = { 'id': video_id, - 'title': p.get('title') or p['episode_title'], - 'description': p.get('description'), - 'thumbnail': p.get('image_url'), + 'title': p.get('t') or p['et'], + 'description': p.get('d'), + 'thumbnail': p.get('i_url'), 'creator': p.get('channel_name'), - 'episode': p.get('episode_title'), - 'episode_number': int_or_none(p.get('episode_number')), - 'season_number': int_or_none(p.get('season_number')), + 'episode': p.get('et'), + 'episode_number': int_or_none(p.get('e_no')), + 'season_number': int_or_none(p.get('s_no')), 'release_year': int_or_none(p.get('year')), - 'categories': try_get(p, lambda x: x['categories'], list), + 'categories': try_get(p, lambda x: x['c'], list), + 'tags': try_get(p, lambda x: x['g'], list) } return cid, info_dict From e961b9456522483e0e3b88e92361dacd0e9c5393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Sep 2018 23:30:18 +0700 Subject: [PATCH 059/132] [zattoo] Add support for more zattoo platform sites --- youtube_dl/extractor/extractors.py | 12 ++ youtube_dl/extractor/zattoo.py | 178 ++++++++++++++++++++++++++--- 2 files changed, 172 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e95b8bce..f69e40e1d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1459,8 +1459,20 @@ from .youtube import ( from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zattoo import ( + BBVTVIE, + EinsUndEinsTVIE, + EWETVIE, + GlattvisionTVIE, + MNetTVIE, + MyVisionTVIE, + NetPlusIE, + OsnatelTVIE, + QuantumTVIE, QuicklineIE, QuicklineLiveIE, + SAKTVIE, + VTXTVIE, + WalyTVIE, ZattooIE, ZattooLiveIE, ) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 9c9024799..bbe0aecb6 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -18,12 +18,12 @@ from ..utils import ( ) -class ZattooBaseIE(InfoExtractor): - _NETRC_MACHINE = 'zattoo' - _HOST_URL = 'https://zattoo.com' - +class ZattooPlatformBaseIE(InfoExtractor): _power_guide_hash = None + def _host_url(self): + return 'https://%s' % self._HOST + def _login(self): username, password = self._get_login_info() if not username or not password: @@ -33,13 +33,13 @@ class ZattooBaseIE(InfoExtractor): try: data = self._download_json( - '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in', + '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in', data=urlencode_postdata({ 'login': username, 'password': password, 'remember': 'true', }), headers={ - 'Referer': '%s/login' % self._HOST_URL, + 'Referer': '%s/login' % self._host_url(), 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }) except ExtractorError as e: @@ -53,7 +53,7 @@ class ZattooBaseIE(InfoExtractor): def _real_initialize(self): webpage = self._download_webpage( - self._HOST_URL, None, 'Downloading app token') + self._host_url(), None, 'Downloading app token') app_token = self._html_search_regex( r'appToken\s*=\s*(["\'])(?P(?:(?!\1).)+?)\1', webpage, 'app token', group='token') @@ -62,7 +62,7 @@ class ZattooBaseIE(InfoExtractor): # Will setup appropriate cookies self._request_webpage( - '%s/zapi/v2/session/hello' % self._HOST_URL, None, + '%s/zapi/v2/session/hello' % self._host_url(), None, 'Opening session', data=urlencode_postdata({ 'client_app_token': app_token, 'uuid': compat_str(uuid4()), @@ -75,7 +75,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( - '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL, + '%s/zapi/v2/cached/channels/%s' % (self._host_url(), self._power_guide_hash), video_id, 'Downloading channel list', query={'details': False})['channel_groups'] @@ -94,7 +94,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( '%s/zapi/v2/cached/program/power_details/%s' % ( - self._HOST_URL, self._power_guide_hash), + self._host_url(), self._power_guide_hash), video_id, 'Downloading video information', query={ @@ -128,11 +128,11 @@ class ZattooBaseIE(InfoExtractor): if is_live: postdata_common.update({'timeshift': 10800}) - url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid) + url = '%s/zapi/watch/live/%s' % (self._host_url(), cid) elif record_id: - url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id) + url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id) else: - url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) + url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id) formats = [] for stream_type in ('dash', 'hls', 'hls5', 'hds'): @@ -203,13 +203,13 @@ class ZattooBaseIE(InfoExtractor): return info_dict -class QuicklineBaseIE(ZattooBaseIE): +class QuicklineBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'quickline' - _HOST_URL = 'https://mobiltv.quickline.com' + _HOST = 'mobiltv.quickline.com' class QuicklineIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P[^/]+)/(?P[0-9]+)' % re.escape(QuicklineBaseIE._HOST) _TEST = { 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', @@ -222,7 +222,7 @@ class QuicklineIE(QuicklineBaseIE): class QuicklineLiveIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P[^/]+)' % re.escape(QuicklineBaseIE._HOST) _TEST = { 'url': 'https://mobiltv.quickline.com/watch/srf1', @@ -238,8 +238,18 @@ class QuicklineLiveIE(QuicklineBaseIE): return self._extract_video(channel_name, video_id, is_live=True) +class ZattooBaseIE(ZattooPlatformBaseIE): + _NETRC_MACHINE = 'zattoo' + _HOST = 'zattoo.com' + + +def _make_valid_url(tmpl, host): + return tmpl % re.escape(host) + + class ZattooIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST) # Since regular videos are only available for 7 days and recorded videos # are only available for a specific user, we cannot have detailed tests. @@ -271,3 +281,135 @@ class ZattooLiveIE(ZattooBaseIE): def _real_extract(self, url): channel_name = video_id = self._match_id(url) return self._extract_video(channel_name, video_id, is_live=True) + + +class NetPlusIE(ZattooIE): + _NETRC_MACHINE = 'netplus' + _HOST = 'netplus.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.netplus.tv/watch/abc/123-abc', + 'only_matching': True, + }] + + +class MNetTVIE(ZattooIE): + _NETRC_MACHINE = 'mnettv' + _HOST = 'tvplus.m-net.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class WalyTVIE(ZattooIE): + _NETRC_MACHINE = 'walytv' + _HOST = 'player.waly.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.player.waly.tv/watch/abc/123-abc', + 'only_matching': True, + }] + + +class BBVTVIE(ZattooIE): + _NETRC_MACHINE = 'bbvtv' + _HOST = 'bbv-tv.net' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.bbv-tv.net/watch/abc/123-abc', + 'only_matching': True, + }] + + +class VTXTVIE(ZattooIE): + _NETRC_MACHINE = 'vtxtv' + _HOST = 'vtxtv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.vtxtv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class MyVisionTVIE(ZattooIE): + _NETRC_MACHINE = 'myvisiontv' + _HOST = 'myvisiontv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.myvisiontv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class GlattvisionTVIE(ZattooIE): + _NETRC_MACHINE = 'glattvisiontv' + _HOST = 'iptv.glattvision.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class SAKTVIE(ZattooIE): + _NETRC_MACHINE = 'saktv' + _HOST = 'saktv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.saktv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class EWETVIE(ZattooIE): + _NETRC_MACHINE = 'ewetv' + _HOST = 'tvonline.ewe.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class QuantumTVIE(ZattooIE): + _NETRC_MACHINE = 'quantumtv' + _HOST = 'quantum-tv.com' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.quantum-tv.com/watch/abc/123-abc', + 'only_matching': True, + }] + + +class OsnatelTVIE(ZattooIE): + _NETRC_MACHINE = 'osnateltv' + _HOST = 'onlinetv.osnatel.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class EinsUndEinsTVIE(ZattooIE): + _NETRC_MACHINE = '1und1tv' + _HOST = '1und1.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.1und1.tv/watch/abc/123-abc', + 'only_matching': True, + }] From ca05dfd666db5424b9e52e546d70a1536b2498b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Sep 2018 00:14:49 +0700 Subject: [PATCH 060/132] [youtube] Add support for invidio.us (closes #17613) --- youtube_dl/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e80e36f98..78203ef84 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -349,6 +349,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| + (?:www\.)?invidio\.us/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1068,6 +1069,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 'only_matching': True, }, + { + 'url': 'https://invidio.us/watch?v=BaW_jenozKc', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -2419,7 +2424,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com channels' - _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P[0-9A-Za-z_-]+)' + _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P[0-9A-Za-z_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' _VIDEO_RE = r'(?:title="(?P[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?' IE_NAME = 'youtube:channel' @@ -2440,6 +2445,9 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, + }, { + 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', + 'only_matching': True, }] @classmethod From 27c584fe72150209fe681c57525258083dda9549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 25 Sep 2018 23:43:41 +0700 Subject: [PATCH 061/132] [README.md] Document channel meta fields for output template --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index dd068a462..fdd115c9b 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date (YYYYMMDD) - `uploader_id` (string): Nickname or id of the video uploader + - `channel` (string): Full name of the channel the video is uploaded on + - `channel_id` (string): Id of the channel - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `view_count` (numeric): How many users have watched the video on the platform From f6e883b77b58b337f998ddeaa607f38b3348bef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 05:38:41 +0700 Subject: [PATCH 062/132] [mediaset] Improve embed support (closes #17668) --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/mediaset.py | 38 +++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 76ef01332..2a48667f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor): wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) # Look for Mediaset embeds - mediaset_urls = MediasetIE._extract_urls(webpage) + mediaset_urls = MediasetIE._extract_urls(self, webpage) if mediaset_urls: return self.playlist_from_matches( mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 57f97409d..df3748798 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -4,6 +4,11 @@ from __future__ import unicode_literals import re from .theplatform import ThePlatformBaseIE +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE): }] @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1', - webpage)] + def _extract_urls(ie, webpage): + def _qs(url): + return compat_parse_qs(compat_urllib_parse_urlparse(url).query) + + def _program_guid(qs): + return qs.get('programGuid', [None])[0] + + entries = [] + for mobj in re.finditer( + r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1', + webpage): + embed_url = mobj.group('url') + embed_qs = _qs(embed_url) + program_guid = _program_guid(embed_qs) + if program_guid: + entries.append(embed_url) + continue + video_id = embed_qs.get('id', [None])[0] + if not video_id: + continue + urlh = ie._request_webpage( + embed_url, video_id, note='Following embed URL redirect') + embed_url = compat_str(urlh.geturl()) + program_guid = _program_guid(_qs(embed_url)) + if program_guid: + entries.append(embed_url) + return entries def _real_extract(self, url): guid = self._match_id(url) From 9fe38aa0f939acc9ca35ac5c1e7ee73b269fc23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 09:27:40 +0700 Subject: [PATCH 063/132] [pluralsight] Fix subtitles extraction (closes #17671) --- youtube_dl/extractor/pluralsight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 1257841e4..ec67381bb 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -213,7 +213,7 @@ query viewClip { def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id): captions_post = { 'a': author, - 'cn': clip_idx, + 'cn': int(clip_idx), 'lc': lang, 'm': name, } From bd124be32e41c96f7e8bdd7938d9763f1fc7110c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 11:56:15 +0700 Subject: [PATCH 064/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 800ece790..15b875297 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version <unreleased> + +Extractors +* [pluralsight] Fix subtitles extraction (#17671) +* [mediaset] Improve embed support (#17668) ++ [youtube] Add support for invidio.us (#17613) ++ [zattoo] Add support for more zattoo platform sites +* [zattoo] Fix extraction (#17175, #17542) + + version 2018.09.18 Core From 3fab5ca85a9582b4af78b5d3d59ea4d2ad9a7c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 11:58:25 +0700 Subject: [PATCH 065/132] release 2018.09.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 12 ++++++++++++ youtube_dl/version.py | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a4602287a..ed3e0a157 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.18 +[debug] youtube-dl version 2018.09.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 15b875297..241712037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.09.26 Extractors * [pluralsight] Fix subtitles extraction (#17671) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9b8601751..736ab6da7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **BBVTV** - **Beatport** - **Beeg** - **BehindKink** @@ -251,6 +252,7 @@ - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson - **eHow** + - **EinsUndEinsTV** - **Einthusan** - **eitb.tv** - **EllenTube** @@ -268,6 +270,7 @@ - **EsriVideo** - **Europa** - **EveryonesMixtape** + - **EWETV** - **ExpoTV** - **Expressen** - **ExtremeTube** @@ -327,6 +330,7 @@ - **Gfycat** - **GiantBomb** - **Giga** + - **GlattvisionTV** - **Glide**: Glide mobile video messages (glide.me) - **Globo** - **GloboArticle** @@ -494,6 +498,7 @@ - **Mixer:vod** - **MLB** - **Mnet** + - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **Mojvideo** @@ -525,6 +530,7 @@ - **Myvi** - **MyVidster** - **MyviEmbed** + - **MyVisionTV** - **n-tv.de** - **natgeo** - **natgeo:episodeguide** @@ -550,6 +556,7 @@ - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 + - **NetPlus** - **Netzkino** - **Newgrounds** - **NewgroundsPlaylist** @@ -626,6 +633,7 @@ - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek + - **OsnatelTV** - **PacktPub** - **PacktPubCourse** - **PandaTV**: 熊猫TV @@ -686,6 +694,7 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **QuantumTV** - **Quickline** - **QuicklineLive** - **R7** @@ -753,6 +762,7 @@ - **safari**: safaribooksonline.com online video - **safari:api** - **safari:course**: safaribooksonline.com online courses + - **SAKTV** - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au @@ -1035,12 +1045,14 @@ - **vrv** - **vrv:series** - **VShare** + - **VTXTV** - **vube**: Vube.com - **VuClip** - **VVVVID** - **VyboryMos** - **Vzaar** - **Walla** + - **WalyTV** - **washingtonpost** - **washingtonpost:article** - **wat.tv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2b3b584a4..6f2cc31df 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.18' +__version__ = '2018.09.26' From c0ff60442088ec1ef4f7dddc3bc5aa72c96479f1 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 26 Sep 2018 08:13:16 +0100 Subject: [PATCH 066/132] [hotstar] fix extraction(closes #14694)(closes #14931)(closes #17637) --- youtube_dl/extractor/hotstar.py | 159 +++++++++++++++----------------- 1 file changed, 73 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index d28af36ec..354ac00dc 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -1,10 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import hashlib +import hmac +import time from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, @@ -13,37 +15,40 @@ from ..utils import ( class HotStarBaseIE(InfoExtractor): - _GEO_COUNTRIES = ['IN'] + _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' - def _download_json(self, *args, **kwargs): - response = super(HotStarBaseIE, self)._download_json(*args, **kwargs) - if response['resultCode'] != 'OK': - if kwargs.get('fatal'): - raise ExtractorError( - response['errorDescription'], expected=True) - return None - return response['resultObj'] - - def _download_content_info(self, content_id): - return self._download_json( - 'https://account.hotstar.com/AVS/besc', content_id, query={ - 'action': 'GetAggregatedContentDetails', - 'appVersion': '5.0.40', - 'channel': 'PCTV', - 'contentId': content_id, - })['contentInfo'][0] + def _call_api(self, path, video_id, query_name='contentId'): + st = int(time.time()) + exp = st + 6000 + auth = 'st=%d~exp=%d~acl=/*' % (st, exp) + auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() + response = self._download_json( + 'https://api.hotstar.com/' + path, + video_id, headers={ + 'hotstarauth': auth, + 'x-country-code': 'IN', + 'x-platform-code': 'JIO', + }, query={ + query_name: video_id, + 'tas': 10000, + }) + if response['statusCode'] != 'OK': + raise ExtractorError( + response['body']['message'], expected=True) + return response['body']['results'] class HotStarIE(HotStarBaseIE): + IE_NAME = 'hotstar' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _TESTS = [{ - 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', + 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB', + 'title': 'Can You Not Spread Rumours?', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', - 'timestamp': 1447227000, + 'timestamp': 1447248600, 'upload_date': '20151111', 'duration': 381, }, @@ -58,47 +63,43 @@ class HotStarIE(HotStarBaseIE): 'url': 'http://www.hotstar.com/1000000515', 'only_matching': True, }] + _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_content_info(video_id) + webpage = self._download_webpage(url, video_id) + app_state = self._parse_json(self._search_regex( + r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', + webpage, 'app state'), video_id) + video_data = list(app_state.values())[0]['initialState']['contentData']['content'] - title = video_data['episodeTitle'] + title = video_data['title'] - if video_data.get('encrypted') == 'Y': + if video_data.get('drmProtected'): raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - for f in ('JIO',): - format_data = self._download_json( - 'http://getcdn.hotstar.com/AVS/besc', - video_id, 'Downloading %s JSON metadata' % f, - fatal=False, query={ - 'action': 'GetCDN', - 'asJson': 'Y', - 'channel': f, - 'id': video_id, - 'type': 'VOD', - }) - if format_data: - format_url = format_data.get('src') - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - # produce broken files - continue - else: - formats.append({ - 'url': format_url, - 'width': int_or_none(format_data.get('width')), - 'height': int_or_none(format_data.get('height')), - }) + format_data = self._call_api('h/v1/play', video_id)['item'] + format_url = format_data['playbackUrl'] + ext = determine_ext(format_url) + if ext == 'm3u8': + try: + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted(countries=['IN']) + raise + elif ext == 'f4m': + # produce broken files + pass + else: + formats.append({ + 'url': format_url, + 'width': int_or_none(format_data.get('width')), + 'height': int_or_none(format_data.get('height')), + }) self._sort_formats(formats) return { @@ -106,57 +107,43 @@ class HotStarIE(HotStarBaseIE): 'title': title, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(video_data.get('broadcastDate')), + 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 'formats': formats, + 'channel': video_data.get('channelName'), + 'channel_id': video_data.get('channelId'), + 'series': video_data.get('showName'), + 'season': video_data.get('seasonName'), + 'season_number': int_or_none(video_data.get('seasonNo')), + 'season_id': video_data.get('seasonId'), 'episode': title, - 'episode_number': int_or_none(video_data.get('episodeNumber')), - 'series': video_data.get('contentTitle'), + 'episode_number': int_or_none(video_data.get('episodeNo')), } class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' _TESTS = [{ - 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993', + 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'info_dict': { - 'id': '14812', + 'id': '3_2_26', }, - 'playlist_mincount': 75, + 'playlist_mincount': 20, }, { - 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998', + 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'only_matching': True, }] - _ITEM_TYPES = { - 'episodes': 'EPISODE', - 'popular-clips': 'CLIPS', - } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - base_url = mobj.group('url') - content_id = mobj.group('content_id') - playlist_type = mobj.group('type') + playlist_id = self._match_id(url) - content_info = self._download_content_info(content_id) - playlist_id = compat_str(content_info['categoryId']) - - collection = self._download_json( - 'https://search.hotstar.com/AVS/besc', playlist_id, query={ - 'action': 'SearchContents', - 'appVersion': '5.0.40', - 'channel': 'PCTV', - 'moreFilters': 'series:%s;' % playlist_id, - 'query': '*', - 'searchOrder': 'last_broadcast_date desc,year desc,title asc', - 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'), - }) + collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId') entries = [ self.url_result( - '%s/_/%s' % (base_url, video['contentId']), + 'https://www.hotstar.com/%s' % video['contentId'], ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in collection['response']['docs'] + for video in collection['assets']['items'] if video.get('contentId')] return self.playlist_result(entries, playlist_id) From f96c37c1866bb00bb3aecd4a2518b4b9ffff6f2f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 28 Sep 2018 15:13:25 +0100 Subject: [PATCH 067/132] [spike] fix Paramount Network extraction(closes #17677) --- youtube_dl/extractor/spike.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index e76522b45..6090e0066 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -44,3 +44,10 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] + + def _extract_mgid(self, webpage): + cs = self._parse_json(self._search_regex( + r'window\.__DATA__\s*=\s*({.+})', + webpage, 'data'), None)['children'] + c = next(c for c in cs if c.get('type') == 'VideoPlayer') + return c['props']['media']['video']['config']['uri'] From e855c8c28dcc9fbdd4639ae9dfe1eb2d4e66b418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 21:13:43 +0700 Subject: [PATCH 068/132] [vimeo] Add another config regex (closes #17690) --- youtube_dl/extractor/vimeo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 0a9239b62..88f4d9979 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -551,6 +551,7 @@ class VimeoIE(VimeoBaseInfoExtractor): else: config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;') + config_re.append(r'\bconfig\s*=\s*({.+?})\s*;') config = self._search_regex(config_re, webpage, 'info section', flags=re.DOTALL) config = json.loads(config) From 57fc258ddbbb247c4f4d0757a92b8cd3bc9bbe5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 21:45:24 +0700 Subject: [PATCH 069/132] [pluralsight] Fix subtitles extraction (closes #17726, closes #17728) --- youtube_dl/extractor/pluralsight.py | 34 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index ec67381bb..daf172570 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -210,18 +210,26 @@ query viewClip { raise ExtractorError('Unable to log in') - def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id): - captions_post = { - 'a': author, - 'cn': int(clip_idx), - 'lc': lang, - 'm': name, - } - captions = self._download_json( - '%s/player/retrieve-captions' % self._API_BASE, video_id, - 'Downloading captions JSON', 'Unable to download captions JSON', - fatal=False, data=json.dumps(captions_post).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=utf-8'}) + def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): + captions = None + if clip_id: + captions = self._download_json( + '%s/transcript/api/v1/caption/json/%s/%s' + % (self._API_BASE, clip_id, lang), video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False) + if not captions: + captions_post = { + 'a': author, + 'cn': int(clip_idx), + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/player/retrieve-captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) if captions: return { lang: [{ @@ -413,7 +421,7 @@ query viewClip { # TODO: other languages? subtitles = self.extract_subtitles( - author, clip_idx, 'en', name, duration, display_id) + author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) return { 'id': clip_id, From 1400387b91714490edfced30c7504dced31c1093 Mon Sep 17 00:00:00 2001 From: Enes <enessolak99@gmail.com> Date: Mon, 1 Oct 2018 17:48:59 +0300 Subject: [PATCH 070/132] [openload] Add support for oload.cloud (closes #17710) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d264fe206..dc01b6346 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -307,6 +307,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.download/f/kUEfGclsU9o', 'only_matching': True, + }, { + 'url': 'https://oload.cloud/f/4ZDnBXRWiB8', + 'only_matching': True, }, { # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', From 3d45b00ef4481d0a36a14dceedeac3ab92424efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 22:05:18 +0700 Subject: [PATCH 071/132] [jamendo] Add support for licensing.jamendo.com (closes #17724) --- youtube_dl/extractor/jamendo.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 595d7a5b7..c21827618 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -26,8 +26,15 @@ class JamendoBaseIE(InfoExtractor): class JamendoIE(JamendoBaseIE): - _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + licensing\.jamendo\.com/[^/]+| + (?:www\.)?jamendo\.com + ) + /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) + ''' + _TESTS = [{ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', 'md5': '6e9e82ed6db98678f171c25a8ed09ffd', 'info_dict': { @@ -40,14 +47,19 @@ class JamendoIE(JamendoBaseIE): 'duration': 210, 'thumbnail': r're:^https?://.*\.jpg' } - } + }, { + 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', + 'only_matching': True, + }] def _real_extract(self, url): mobj = self._VALID_URL_RE.match(url) track_id = mobj.group('id') display_id = mobj.group('display_id') - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage( + 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), + display_id) title, artist, track = self._extract_meta(webpage) From f18e58c5319fb71159b5e717e7a52724eadff673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 23:29:24 +0700 Subject: [PATCH 072/132] [philharmoniedeparis] Fix extraction and add support for pad.philharmoniedeparis.fr (closes #17705) --- youtube_dl/extractor/philharmoniedeparis.py | 118 ++++++++++++-------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f1008ae51..f723a2b3b 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -2,31 +2,38 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - float_or_none, - int_or_none, - parse_iso8601, - xpath_text, + try_get, + urljoin, ) class PhilharmonieDeParisIE(InfoExtractor): IE_DESC = 'Philharmonie de Paris' - _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + pad\.philharmoniedeparis\.fr/doc/CIMU/ + ) + (?P<id>\d+) + ''' _TESTS = [{ + 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower', + 'md5': 'a0a4b195f544645073631cbec166a2c2', + 'info_dict': { + 'id': '1086697', + 'ext': 'mp4', + 'title': 'Jazz à la Villette : Knower', + }, + }, { 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'info_dict': { 'id': '1032066', - 'ext': 'flv', - 'title': 'md5:d1f5585d87d041d07ce9434804bc8425', - 'timestamp': 1428179400, - 'upload_date': '20150404', - 'duration': 6592.278, + 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0', }, - 'params': { - # rtmp download - 'skip_download': True, - } + 'playlist_mincount': 2, }, { 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', 'only_matching': True, @@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor): 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, }] + _LIVE_URL = 'https://live.philharmoniedeparis.fr' def _real_extract(self, url): video_id = self._match_id(url) - concert = self._download_xml( - 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, - video_id).find('./concert') + config = self._download_json( + '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={ + 'id': video_id, + 'lang': 'fr-FR', + }) - formats = [] - info_dict = { - 'id': video_id, - 'title': xpath_text(concert, './titre', 'title', fatal=True), - 'formats': formats, - } - - fichiers = concert.find('./fichiers') - stream = fichiers.attrib['serveurstream'] - for fichier in fichiers.findall('./fichier'): - info_dict['duration'] = float_or_none(fichier.get('timecodefin')) - for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): - format_url = fichier.get('url%s' % suffix) - if not format_url: + def extract_entry(source): + if not isinstance(source, dict): + return + title = source.get('title') + if not title: + return + files = source.get('files') + if not isinstance(files, dict): + return + format_urls = set() + formats = [] + for format_id in ('mobile', 'desktop'): + format_url = try_get( + files, lambda x: x[format_id]['file'], compat_str) + if not format_url or format_url in format_urls: continue - formats.append({ - 'url': stream, - 'play_path': format_url, - 'ext': 'flv', - 'format_id': format_id, - 'width': int_or_none(concert.get('largeur%s' % suffix)), - 'height': int_or_none(concert.get('hauteur%s' % suffix)), - 'quality': quality, - }) - self._sort_formats(formats) + format_urls.add(format_url) + m3u8_url = urljoin(self._LIVE_URL, format_url) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + if not formats: + return + self._sort_formats(formats) + return { + 'title': title, + 'formats': formats, + } - date, hour = concert.get('date'), concert.get('heure') - if date and hour: - info_dict['timestamp'] = parse_iso8601( - '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) - elif date: - info_dict['upload_date'] = date + thumbnail = urljoin(self._LIVE_URL, config.get('image')) - return info_dict + info = extract_entry(config) + if info: + info.update({ + 'id': video_id, + 'thumbnail': thumbnail, + }) + return info + + entries = [] + for num, chapter in enumerate(config['chapters'], start=1): + entry = extract_entry(chapter) + entry['id'] = '%s-%d' % (video_id, num) + entries.append(entry) + + return self.playlist_result(entries, video_id, config.get('title')) From 20e93ea80e3f43b33fa0114307b05d8d630cb884 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 2 Oct 2018 06:07:06 +0100 Subject: [PATCH 073/132] [hotstar] fix extraction in python 2(closes #17696) --- youtube_dl/extractor/hotstar.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 354ac00dc..bf5717f1b 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -11,6 +11,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + try_get, ) @@ -72,7 +73,11 @@ class HotStarIE(HotStarBaseIE): app_state = self._parse_json(self._search_regex( r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', webpage, 'app state'), video_id) - video_data = list(app_state.values())[0]['initialState']['contentData']['content'] + video_data = {} + for v in app_state.values(): + content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict) + if content and content.get('contentId') == video_id: + video_data = content title = video_data['title'] From 57188f0a0989814b80567e8fdf7df0aef33b9500 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 2 Oct 2018 19:43:06 +0100 Subject: [PATCH 074/132] [crunchyroll] switch to HTTPS for RpcApi(closes #17749) --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index af786d096..045be0ab5 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -45,7 +45,7 @@ class CrunchyrollBaseIE(InfoExtractor): data['req'] = 'RpcApi' + method data = compat_urllib_parse_urlencode(data).encode('utf-8') return self._download_xml( - 'http://www.crunchyroll.com/xml/', + 'https://www.crunchyroll.com/xml/', video_id, note, fatal=False, data=data, headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) From d3b866bcb0eefe6faaffddca35492ef0b668f28d Mon Sep 17 00:00:00 2001 From: Enes <enessolak99@gmail.com> Date: Sat, 29 Sep 2018 13:28:56 +0300 Subject: [PATCH 075/132] [dailymotion] Fix extraction (closes #17699) --- youtube_dl/extractor/dailymotion.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 040f0bd02..842d9a259 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -24,6 +24,7 @@ from ..utils import ( str_to_int, unescapeHTML, urlencode_postdata, + try_get, ) @@ -172,7 +173,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor): webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) - metadata = player['metadata'] + metadata = try_get( + player, lambda x: x['metadata'], dict) or self._download_json( + 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) if metadata.get('error', {}).get('type') == 'password_protected': password = self._downloader.params.get('videopassword') From 410fcbd9802b853b63ebb54442d073d779415161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:02:58 +0700 Subject: [PATCH 076/132] [dailymotion] Improve metadata extraction (closes #17706) --- youtube_dl/extractor/dailymotion.py | 32 ++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 842d9a259..1816c559e 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -22,9 +22,11 @@ from ..utils import ( parse_iso8601, sanitized_Request, str_to_int, - unescapeHTML, - urlencode_postdata, try_get, + unescapeHTML, + update_url_query, + url_or_none, + urlencode_postdata, ) @@ -172,15 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: - player = self._parse_json(player_v5, video_id) - metadata = try_get( - player, lambda x: x['metadata'], dict) or self._download_json( - 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ - 'integration': 'inline', - 'GK_PV5_NEON': '1', - }) + player = self._parse_json(player_v5, video_id, fatal=False) or {} + metadata = try_get(player, lambda x: x['metadata'], dict) + if not metadata: + metadata_url = url_or_none(try_get( + player, lambda x: x['context']['metadata_template_url1'])) + if metadata_url: + metadata_url = metadata_url.replace(':videoId', video_id) + else: + metadata_url = update_url_query( + 'https://www.dailymotion.com/player/metadata/video/%s' + % video_id, { + 'embedder': url, + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) + metadata = self._download_json( + metadata_url, video_id, 'Downloading metadata JSON') - if metadata.get('error', {}).get('type') == 'password_protected': + if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': password = self._downloader.params.get('videopassword') if password: r = int(metadata['id'][1:], 36) From 1a6d14cd7a0bf967ce0262d99be494bb66f46407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:27:14 +0700 Subject: [PATCH 077/132] [pluralsight] Improve authentication (closes #17762) --- youtube_dl/extractor/pluralsight.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index daf172570..eafe56897 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -4,6 +4,7 @@ import collections import json import os import random +import re from .common import InfoExtractor from ..compat import ( @@ -196,7 +197,10 @@ query viewClip { if error: raise ExtractorError('Unable to login: %s' % error, expected=True) - if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): BLOCKED = 'Your account has been blocked due to suspicious activity' if BLOCKED in response: raise ExtractorError( From ab811983e8f85655a3ed2d07e8c4e2ff4bceb24f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:29:52 +0700 Subject: [PATCH 078/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index 241712037..e2757f891 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version <unreleased> + +Extractors +* [pluralsight] Improve authentication (#17762) +* [dailymotion] Fix extraction (#17699) +* [crunchyroll] Switch to HTTPS for RpcApi (#17749) ++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705) +* [philharmoniedeparis] Fix extraction (#17705) ++ [jamendo] Add support for licensing.jamendo.com (#17724) ++ [openload] Add support for oload.cloud (#17710) +* [pluralsight] Fix subtitles extraction (#17726, #17728) ++ [vimeo] Add another config regular expression (#17690) +* [spike] Fix Paramount Network extraction (#17677) +* [hotstar] Fix extraction (#14694, #14931, #17637) + + version 2018.09.26 Extractors From 0b4b6171543d6714dfdc793a8d17299645413f54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:31:30 +0700 Subject: [PATCH 079/132] release 2018.10.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ed3e0a157..058eb4321 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.26 +[debug] youtube-dl version 2018.10.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e2757f891..86cf489b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.10.05 Extractors * [pluralsight] Improve authentication (#17762) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 736ab6da7..f167a6ddc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -360,7 +360,7 @@ - **HitRecord** - **HornBunny** - **HotNewHipHop** - - **HotStar** + - **hotstar** - **hotstar:playlist** - **Howcast** - **HowStuffWorks** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6f2cc31df..7d3f25019 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.26' +__version__ = '2018.10.05' From f30697681d096357b1fcaca36407825ed37d2cdd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 5 Oct 2018 20:11:01 +0100 Subject: [PATCH 080/132] [patreon] fix extraction(closes #14502)(closes #10471) --- youtube_dl/extractor/patreon.py | 160 ++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 72 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 9eb027679..6f73ed68d 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -2,52 +2,63 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + parse_iso8601, +) class PatreonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' - _TESTS = [ - { - 'url': 'http://www.patreon.com/creation?hid=743933', - 'md5': 'e25505eec1053a6e6813b8ed369875cc', - 'info_dict': { - 'id': '743933', - 'ext': 'mp3', - 'title': 'Episode 166: David Smalley of Dogma Debate', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.patreon.com/creation?hid=743933', + 'md5': 'e25505eec1053a6e6813b8ed369875cc', + 'info_dict': { + 'id': '743933', + 'ext': 'mp3', + 'title': 'Episode 166: David Smalley of Dogma Debate', + 'description': 'md5:713b08b772cd6271b9f3906683cfacdf', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', + 'timestamp': 1406473987, + 'upload_date': '20140727', }, - { - 'url': 'http://www.patreon.com/creation?hid=754133', - 'md5': '3eb09345bf44bf60451b8b0b81759d0a', - 'info_dict': { - 'id': '754133', - 'ext': 'mp3', - 'title': 'CD 167 Extra', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + }, { + 'url': 'http://www.patreon.com/creation?hid=754133', + 'md5': '3eb09345bf44bf60451b8b0b81759d0a', + 'info_dict': { + 'id': '754133', + 'ext': 'mp3', + 'title': 'CD 167 Extra', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', }, - { - 'url': 'https://www.patreon.com/creation?hid=1682498', - 'info_dict': { - 'id': 'SU4fj_aEMVw', - 'ext': 'mp4', - 'title': 'I\'m on Patreon!', - 'uploader': 'TraciJHines', - 'thumbnail': 're:^https?://.*$', - 'upload_date': '20150211', - 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', - 'uploader_id': 'TraciJHines', - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - } + 'skip': 'Patron-only content', + }, { + 'url': 'https://www.patreon.com/creation?hid=1682498', + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'uploader': 'TraciJHines', + 'thumbnail': 're:^https?://.*$', + 'upload_date': '20150211', + 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', + 'uploader_id': 'TraciJHines', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, } - ] + }, { + 'url': 'https://www.patreon.com/posts/episode-166-of-743933', + 'only_matching': True, + }, { + 'url': 'https://www.patreon.com/posts/743933', + 'only_matching': True, + }] # Currently Patreon exposes download URL via hidden CSS, so login is not # needed. Keeping this commented for when this inevitably changes. @@ -78,38 +89,43 @@ class PatreonIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage).strip() - - attach_fn = self._html_search_regex( - r'<div class="attach"><a target="_blank" href="([^"]+)">', - webpage, 'attachment URL', default=None) - embed = self._html_search_regex( - r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"', - webpage, 'embedded URL', default=None) - - if attach_fn is not None: - video_url = 'http://www.patreon.com' + attach_fn - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._html_search_regex( - r'<strong>(.*?)</strong> is creating', webpage, 'uploader') - elif embed is not None: - return self.url_result(embed) - else: - playlist = self._parse_json(self._search_regex( - r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', - webpage, 'playlist JSON'), - video_id, transform_source=js_to_json) - data = playlist[0] - video_url = self._proto_relative_url(data['mp3']) - thumbnail = self._proto_relative_url(data.get('cover')) - uploader = data.get('artist') - - return { + post = self._download_json( + 'https://www.patreon.com/api/posts/' + video_id, video_id) + attributes = post['data']['attributes'] + title = attributes['title'].strip() + image = attributes.get('image') or {} + info = { 'id': video_id, - 'url': video_url, - 'ext': 'mp3', 'title': title, - 'uploader': uploader, - 'thumbnail': thumbnail, + 'description': clean_html(attributes.get('content')), + 'thumbnail': image.get('large_url') or image.get('url'), + 'timestamp': parse_iso8601(attributes.get('published_at')), + 'like_count': int_or_none(attributes.get('like_count')), + 'comment_count': int_or_none(attributes.get('comment_count')), } + + for i in post.get('included', []): + i_type = i.get('type') + if i_type == 'attachment': + attachment_attributes = i.get('attributes') or {} + attachment_url = attachment_attributes.get('url') + if attachment_url: + info.update({ + 'url': attachment_url, + 'ext': determine_ext(attachment_attributes.get('name'), 'mp3'), + }) + elif i_type == 'user': + user_attributes = i.get('attributes') + if user_attributes: + info.update({ + 'uploader': user_attributes.get('full_name'), + 'uploader_url': user_attributes.get('url'), + }) + + if not info.get('url'): + info.update({ + '_type': 'url', + 'url': attributes['embed']['url'], + }) + + return info From d8cb62abd171e99da81a58582679fb10d68dca3f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 5 Oct 2018 22:45:04 +0100 Subject: [PATCH 081/132] [patreon] extract post_file url(#17792) --- youtube_dl/extractor/patreon.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 6f73ed68d..426dd8121 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -104,16 +104,18 @@ class PatreonIE(InfoExtractor): 'comment_count': int_or_none(attributes.get('comment_count')), } + def add_file(file_data): + file_url = file_data.get('url') + if file_url: + info.update({ + 'url': file_url, + 'ext': determine_ext(file_data.get('name'), 'mp3'), + }) + for i in post.get('included', []): i_type = i.get('type') if i_type == 'attachment': - attachment_attributes = i.get('attributes') or {} - attachment_url = attachment_attributes.get('url') - if attachment_url: - info.update({ - 'url': attachment_url, - 'ext': determine_ext(attachment_attributes.get('name'), 'mp3'), - }) + add_file(i.get('attributes') or {}) elif i_type == 'user': user_attributes = i.get('attributes') if user_attributes: @@ -122,6 +124,9 @@ class PatreonIE(InfoExtractor): 'uploader_url': user_attributes.get('url'), }) + if not info.get('url'): + add_file(attributes.get('post_file') or {}) + if not info.get('url'): info.update({ '_type': 'url', From 908c87855d4e0331d02189e8b9ff6364456598f3 Mon Sep 17 00:00:00 2001 From: yonaikerlol <39972049+yonaikerlol@users.noreply.github.com> Date: Sun, 7 Oct 2018 09:05:45 -0400 Subject: [PATCH 082/132] [openload] Add support for oload.cc --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index dc01b6346..c652603a5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -314,6 +314,9 @@ class OpenloadIE(InfoExtractor): # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', 'only_matching': True, + }, { + 'url': 'https://oload.cc/embed/5NEAbI2BDSk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 869084a8508659c95ce2e3a8801612c0990133df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 11:51:40 +0100 Subject: [PATCH 083/132] [ted] fix extraction for http and rtmp formats(closes #5941)(closes #17572)(closes #17894) --- youtube_dl/extractor/ted.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 212ac80ab..f9b6aa48f 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -212,8 +212,6 @@ class TEDIE(InfoExtractor): http_url = None for format_id, resources in resources_.items(): - if not isinstance(resources, dict): - continue if format_id == 'h264': for resource in resources: h264_url = resource.get('file') @@ -242,6 +240,8 @@ class TEDIE(InfoExtractor): 'tbr': int_or_none(resource.get('bitrate')), }) elif format_id == 'hls': + if not isinstance(resources, dict): + continue stream_url = url_or_none(resources.get('stream')) if not stream_url: continue From 02df1ed10473b8633bf7df3c6656f6d00aec34bf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 16:26:29 +0100 Subject: [PATCH 084/132] [tv3] remove extractor(closes #10461)(closes #15339) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/tv3.py | 34 ------------------------------ 2 files changed, 35 deletions(-) delete mode 100644 youtube_dl/extractor/tv3.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f69e40e1d..11376eb9d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1157,7 +1157,6 @@ from .tv2 import ( TV2ArticleIE, ) from .tv2hu import TV2HuIE -from .tv3 import TV3IE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tva import TVAIE diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py deleted file mode 100644 index 3867ec90d..000000000 --- a/youtube_dl/extractor/tv3.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TV3IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx' - _TEST = { - 'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx', - 'info_dict': { - 'id': '4659127992001', - 'ext': 'mp4', - 'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3', - 'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.', - 'uploader_id': '3812193411001', - 'upload_date': '20151213', - 'timestamp': 1449975272, - }, - 'expected_warnings': [ - 'Failed to download MPD manifest' - ], - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id') - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From 78f9ee9317bb51c4bd1e7ecbcc0cc8c284ba1ef7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 17:54:38 +0100 Subject: [PATCH 085/132] [brightcove:legacy] fall back to brightcove:new(#13912) --- youtube_dl/extractor/brightcove.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 14f9a14ed..5dbd71e12 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -356,7 +356,9 @@ class BrightcoveLegacyIE(InfoExtractor): def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) + publisher_id = video_info.get('publisherId') + info = { 'id': video_id, 'title': video_info['displayName'].strip(), @@ -444,8 +446,16 @@ class BrightcoveLegacyIE(InfoExtractor): else: return ad_info - if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % video_id) + if not info.get('url') and not info.get('formats'): + uploader_id = info.get('uploader_id') + if uploader_id: + info.update({ + '_type': 'url', + 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (uploader_id, video_id), + 'ie_key': BrightcoveNewIE.ie_key(), + }) + else: + raise ExtractorError('Unable to extract video url for %s' % video_id) return info From 20b896842a28291b9e858af379210c65e46d7643 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 18:41:57 +0100 Subject: [PATCH 086/132] [brightcove:legacy] add another fall back to brightcove:new --- youtube_dl/extractor/brightcove.py | 39 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5dbd71e12..40c3959fd 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,8 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import base64 import json +import re +import struct from .common import InfoExtractor from .adobepass import AdobePassIE @@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor): 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', expected=True) + def _brightcove_new_url_result(self, publisher_id, video_id): + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + def _get_video_info(self, video_id, query, referer=None): headers = {} linkBase = query.get('linkBaseURL') @@ -323,6 +329,29 @@ class BrightcoveLegacyIE(InfoExtractor): r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, 'error message', default=None) if error_msg is not None: + publisher_id = query.get('publisherId') + if publisher_id and publisher_id[0].isdigit(): + publisher_id = publisher_id[0] + if not publisher_id: + valid_key = lambda key: key and ',' in key + player_key = query.get('playerKey') + if player_key and ',' in player_key[0]: + player_key = player_key[0] + else: + player_id = query.get('playerID') + if player_id and player_id[0].isdigit(): + player_page = self._download_webpage( + 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + video_id, headers=headers, fatal=False) + if player_page: + player_key = self._search_regex( + r'<param\s+name="playerKey"\s+value="([\w~,-]+)"', + player_page, 'player key', fatal=False) + if player_key: + enc_pub_id = player_key.split(',')[1].replace('~', '=') + publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] + if publisher_id: + return self._brightcove_new_url_result(publisher_id, video_id) raise ExtractorError( 'brightcove said: %s' % error_msg, expected=True) @@ -356,9 +385,7 @@ class BrightcoveLegacyIE(InfoExtractor): def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) - publisher_id = video_info.get('publisherId') - info = { 'id': video_id, 'title': video_info['displayName'].strip(), @@ -449,11 +476,7 @@ class BrightcoveLegacyIE(InfoExtractor): if not info.get('url') and not info.get('formats'): uploader_id = info.get('uploader_id') if uploader_id: - info.update({ - '_type': 'url', - 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (uploader_id, video_id), - 'ie_key': BrightcoveNewIE.ie_key(), - }) + info.update(self._brightcove_new_url_result(uploader_id, video_id)) else: raise ExtractorError('Unable to extract video url for %s' % video_id) return info From 3f84de8a976bff02db87b2d8635d8c607fec88eb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 20:47:12 +0100 Subject: [PATCH 087/132] [brightcove] remove unused variable --- youtube_dl/extractor/brightcove.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 40c3959fd..465ae396e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -333,7 +333,6 @@ class BrightcoveLegacyIE(InfoExtractor): if publisher_id and publisher_id[0].isdigit(): publisher_id = publisher_id[0] if not publisher_id: - valid_key = lambda key: key and ',' in key player_key = query.get('playerKey') if player_key and ',' in player_key[0]: player_key = player_key[0] From 41a3140e6ca76d86068ce3fb7f8413a6f6b0d7e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 16 Oct 2018 23:19:44 +0700 Subject: [PATCH 088/132] [rutube] Use geo verification headers (closes #17897) --- youtube_dl/extractor/rutube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 261bcbb83..10ac8ed1f 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE): options = self._download_json( 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON') + video_id, 'Downloading options JSON', + headers=self.geo_verification_headers()) formats = [] for format_id, format_url in options['video_balancer'].items(): From 7b365b569b0f8bb9c80a2ffb2a9a0ce9ef5aa13b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 17 Oct 2018 06:22:07 +0100 Subject: [PATCH 089/132] [cwtv] handle api errors(closes #17905) --- youtube_dl/extractor/cwtv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 224a1fb5d..f9bd535f6 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_iso8601, @@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( + data = self._download_json( 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id, - video_id)['video'] + video_id) + if data.get('result') != 'ok': + raise ExtractorError(data['msg'], expected=True) + video_data = data['video'] title = video_data['title'] mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id From 76546c493aa9dc2e2c8f8b1c663a7f5284e4637f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 26 Oct 2018 05:40:49 +0100 Subject: [PATCH 090/132] [viewster] reduce format requests --- youtube_dl/extractor/viewster.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index d5d5b4c69..6e318479c 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor): def concat(suffix, sep='-'): return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix - for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): - media = self._download_json( - 'https://public-api.viewster.com/movies/%s/video' % entry_id, - video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={ - 'mediaType': media_type, - 'language': audio, - 'subtitle': subtitle, - }) - if not media: - continue + medias = self._download_json( + 'https://public-api.viewster.com/movies/%s/videos' % entry_id, + video_id, fatal=False, query={ + 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'], + 'language': audio, + 'subtitle': subtitle, + }) + if not medias: + continue + for media in medias: video_url = media.get('Uri') if not video_url: continue From 68a30502b49574b05422e9d2611bc0e92b876ef7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 26 Oct 2018 05:41:57 +0100 Subject: [PATCH 091/132] [dailymail] fix format extraction(closes #17976) --- youtube_dl/extractor/dailymail.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py index af3978035..4f75a2a30 100644 --- a/youtube_dl/extractor/dailymail.py +++ b/youtube_dl/extractor/dailymail.py @@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor): 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) video_sources = self._download_json(sources_url, video_id) + body = video_sources.get('body') + if body: + video_sources = body formats = [] for rendition in video_sources['renditions']: From 73fcd53c96ee42c41aa8f1d13fcc263a16e91265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Oct 2018 22:12:54 +0700 Subject: [PATCH 092/132] [crunchyroll] Improve extraction failsafeness (closes #17991) --- youtube_dl/extractor/crunchyroll.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 045be0ab5..4a68d092b 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re import json +import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -398,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if sub_doc is None: + if not isinstance(sub_doc, etree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -515,7 +516,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if streamdata is not None: + if isinstance(streamdata, etree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -526,7 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if stream_info is not None: + if isinstance(stream_info, etree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -598,10 +599,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text series = self._html_search_regex( r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', webpage, 'series', fatal=False) - season = xpath_text(metadata, 'series_title') - episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title') - episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number')) + season = episode = episode_number = duration = thumbnail = None + + if isinstance(metadata, etree.Element): + season = xpath_text(metadata, 'series_title') + episode = xpath_text(metadata, 'episode_title') + episode_number = int_or_none(xpath_text(metadata, 'episode_number')) + duration = float_or_none(media_metadata.get('duration'), 1000) + thumbnail = xpath_text(metadata, 'episode_image_url') + + if not episode: + episode = media_metadata.get('title') + if not episode_number: + episode_number = int_or_none(media_metadata.get('episode_number')) + if not thumbnail: + thumbnail = media_metadata.get('thumbnail', {}).get('url') season_number = int_or_none(self._search_regex( r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', @@ -611,8 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'id': video_id, 'title': video_title, 'description': video_description, - 'duration': float_or_none(media_metadata.get('duration'), 1000), - 'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), + 'duration': duration, + 'thumbnail': thumbnail, 'uploader': video_uploader, 'upload_date': video_upload_date, 'series': series, From 3811bd50c1b3ba44ed02a78e93ee649e3fd7113b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Oct 2018 22:49:10 +0700 Subject: [PATCH 093/132] [ivi] Add support for ivi.tv --- youtube_dl/extractor/ivi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index cb51cef2d..86c014b07 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -15,7 +15,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' _GEO_BYPASS = False _GEO_COUNTRIES = ['RU'] @@ -65,7 +65,11 @@ class IviIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', - } + }, + { + 'url': 'https://www.ivi.tv/watch/33560/', + 'only_matching': True, + }, ] # Sorted by quality From df76d3eb1327f8719029d7f0cf1438b81ead99e2 Mon Sep 17 00:00:00 2001 From: yonaikerlol <lawlietrs7@gmail.com> Date: Sun, 28 Oct 2018 11:51:29 -0400 Subject: [PATCH 094/132] [openload] Add support for oload.icu --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c652603a5..a91f29f5c 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -317,6 +317,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.cc/embed/5NEAbI2BDSk', 'only_matching': True, + }, { + 'url': 'https://oload.icu/f/-_i4y_F_Hs8', + 'only_matching': True }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From cdf763e9aed77745604c51b6ccaf0ffd334eb9bd Mon Sep 17 00:00:00 2001 From: sichuan-pepper <huajiao.sichuan.pepper@gmail.com> Date: Sun, 28 Oct 2018 01:46:32 +0900 Subject: [PATCH 095/132] [screencast] Fix extraction (closes #14590) --- youtube_dl/extractor/screencast.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 62a6a8337..c6554c905 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -90,6 +90,14 @@ class ScreencastIE(InfoExtractor): r'src=(.*?)(?:$|&)', video_meta, 'meta tag video URL', default=None) + if video_url is None: + video_url = self._html_search_regex( + r'"MediaContentUrl":"([^"]+)"', webpage, 'media content url', default=None) + + if video_url is None: + video_url = self._html_search_meta( + 'og:video', webpage, default=None) + if video_url is None: raise ExtractorError('Cannot find video') From 59862fcd65c059cfe00d1aeafa78bb8ac4a39a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Oct 2018 23:23:32 +0700 Subject: [PATCH 096/132] [screencast] Improve extraction (closes #14617, closes #17990) --- youtube_dl/extractor/screencast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c6554c905..69a0d01f3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -92,7 +92,8 @@ class ScreencastIE(InfoExtractor): if video_url is None: video_url = self._html_search_regex( - r'"MediaContentUrl":"([^"]+)"', webpage, 'media content url', default=None) + r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', default=None, group='url') if video_url is None: video_url = self._html_search_meta( From 6252c6de19364f724e97edf84222a2cb2b3b8b66 Mon Sep 17 00:00:00 2001 From: Alexey Trofimov <dmzkrsk@gmail.com> Date: Fri, 26 Oct 2018 15:00:55 +0700 Subject: [PATCH 097/132] [sportbox] Fix extraction --- youtube_dl/extractor/sportbox.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 54497c880..9413cf27a 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -18,7 +18,7 @@ class SportBoxEmbedIE(InfoExtractor): 'info_dict': { 'id': '211355', 'ext': 'mp4', - 'title': '211355', + 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 292, 'view_count': int, @@ -48,9 +48,18 @@ class SportBoxEmbedIE(InfoExtractor): wjplayer_data = self._parse_json( self._search_regex( - r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), + r'(?s)var\s+playerOptions\s*=\s*({.+?});', webpage, 'wjplayer settings'), video_id, transform_source=js_to_json) + wjplayer_data['sources'] = self._parse_json( + self._search_regex( + r'(?s)playerOptions\.sources\s*=\s*(\[.+?\]);', webpage, 'wjplayer sources'), + video_id, transform_source=js_to_json) + + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage) or self._html_search_regex( + r'<title>(.+?)', webpage, 'title', fatal=False) or video_id + formats = [] for source in wjplayer_data['sources']: src = source.get('src') @@ -71,7 +80,7 @@ class SportBoxEmbedIE(InfoExtractor): return { 'id': video_id, - 'title': video_id, + 'title': title, 'thumbnail': wjplayer_data.get('poster'), 'duration': int_or_none(wjplayer_data.get('duration')), 'view_count': view_count, From 87ab0e23377a4f9d87919ebb76ebf574f92158c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:19:08 +0700 Subject: [PATCH 098/132] [extractor/common] Add validation for JSON-LD URLs --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2dbf81e6e..8452125c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,6 +69,7 @@ from ..utils import ( update_url_query, urljoin, url_basename, + url_or_none, xpath_element, xpath_text, xpath_with_ns, @@ -1213,10 +1214,10 @@ class InfoExtractor(object): def extract_video_object(e): assert e['@type'] == 'VideoObject' info.update({ - 'url': e.get('contentUrl'), + 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), + 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), 'filesize': float_or_none(e.get('contentSize')), From 9d5aceb0554f515c5b417d8c9541f6d9ccfb016f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:20:29 +0700 Subject: [PATCH 099/132] [sportbox] Improve extraction, add support for matchtv.ru and fix video id (closes #17978) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/generic.py | 6 ++-- youtube_dl/extractor/sportbox.py | 55 ++++++++++++++++++------------ 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 11376eb9d..1c371c30b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1047,7 +1047,7 @@ from .spike import ( ) from .stitcher import StitcherIE from .sport5 import Sport5IE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a48667f0..545e03371 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .tvc import TVCIE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE @@ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor): return self.url_result(tvc_url, 'TVC') # Look for embedded SportBox player - sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) + sportbox_urls = SportBoxIE._extract_urls(webpage) if sportbox_urls: - return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') + return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 9413cf27a..b9017fd2a 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -8,20 +8,24 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, + merge_dicts, ) -class SportBoxEmbedIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' +class SportBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' _TESTS = [{ 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 'info_dict': { - 'id': '211355', + 'id': '109158', 'ext': 'mp4', 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 292, 'view_count': int, + 'timestamp': 1426237001, + 'upload_date': '20150313', }, 'params': { # m3u8 download @@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor): }, { 'url': 'https://news.sportbox.ru/vdl/player/media/193095', 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/109158', + 'only_matching': True, + }, { + 'url': 'https://matchtv.ru/vdl/player/media/109158', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return re.findall( - r']+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', + r']+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', webpage) def _real_extract(self, url): @@ -46,22 +56,14 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - wjplayer_data = self._parse_json( + sources = self._parse_json( self._search_regex( - r'(?s)var\s+playerOptions\s*=\s*({.+?});', webpage, 'wjplayer settings'), + r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', + webpage, 'sources'), video_id, transform_source=js_to_json) - wjplayer_data['sources'] = self._parse_json( - self._search_regex( - r'(?s)playerOptions\.sources\s*=\s*(\[.+?\]);', webpage, 'wjplayer sources'), - video_id, transform_source=js_to_json) - - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage) or self._html_search_regex( - r'(.+?)', webpage, 'title', fatal=False) or video_id - formats = [] - for source in wjplayer_data['sources']: + for source in sources: src = source.get('src') if not src: continue @@ -75,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor): }) self._sort_formats(formats) + player = self._parse_json( + self._search_regex( + r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, + 'player options', default='{}'), + video_id, transform_source=js_to_json) + media_id = player['mediaId'] + + info = self._search_json_ld(webpage, media_id, default={}) + view_count = int_or_none(self._search_regex( r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) - return { - 'id': video_id, - 'title': title, - 'thumbnail': wjplayer_data.get('poster'), - 'duration': int_or_none(wjplayer_data.get('duration')), + return merge_dicts(info, { + 'id': media_id, + 'title': self._og_search_title(webpage, default=None) or media_id, + 'thumbnail': player.get('poster'), + 'duration': int_or_none(player.get('duration')), 'view_count': view_count, 'formats': formats, - } + }) From 7cdcc5c24ed4b52498f2f8c77254b5e488cbaa2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:38:06 +0700 Subject: [PATCH 100/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 86cf489b1..a21177dac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version + +Core ++ [extractor/common] Add validation for JSON-LD URLs + +Extractors ++ [sportbox] Add support for matchtv.ru +* [sportbox] Fix extraction (#17978) +* [screencast] Fix extraction (#14590, #14617, #17990) ++ [openload] Add support for oload.icu ++ [ivi] Add support for ivi.tv +* [crunchyroll] Improve extraction failsafeness (#17991) +* [dailymail] Fix formats extraction (#17976) +* [viewster] Reduce format requests +* [cwtv] Handle API errors (#17905) ++ [rutube] Use geo verification headers (#17897) ++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912) +- [tv3] Remove extractor (#10461, #15339) +* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894) ++ [openload] Add support for oload.cc (#17823) ++ [patreon] Extract post_file URL (#17792) +* [patreon] Fix extraction (#14502, #10471) + + version 2018.10.05 Extractors From 7186d81525fffdfd20836063ffcbdc99261aab50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:39:29 +0700 Subject: [PATCH 101/132] release 2018.10.29 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 058eb4321..aefed163a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.10.05 +[debug] youtube-dl version 2018.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a21177dac..57dbde12d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.10.29 Core + [extractor/common] Add validation for JSON-LD URLs diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f167a6ddc..e5a6879bc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -818,7 +818,7 @@ - **Spiegeltv** - **sport.francetvinfo.fr** - **Sport5** - - **SportBoxEmbed** + - **SportBox** - **SportDeutschland** - **SpringboardPlatform** - **Sprout** @@ -909,7 +909,6 @@ - **TV2** - **tv2.hu** - **TV2Article** - - **TV3** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **TVA** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7d3f25019..ae9a77966 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.10.05' +__version__ = '2018.10.29' From 47d18d7751635fde2b4ce08a5b25706489294ab0 Mon Sep 17 00:00:00 2001 From: Ali Irani Date: Thu, 6 Sep 2018 02:08:38 +0430 Subject: [PATCH 102/132] [aparat] Fix extraction --- youtube_dl/extractor/aparat.py | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 6eb8bbb6e..780439e17 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -34,32 +34,32 @@ class AparatIE(InfoExtractor): 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, video_id) - title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') - file_list = self._parse_json( self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, + r'var options\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) + title = file_list['plugins']['sabaPlayerPlugin']['title'] + formats = [] - for item in file_list[0]: - file_url = url_or_none(item.get('file')) - if not file_url: - continue - ext = mimetype2ext(item.get('type')) - label = item.get('label') - formats.append({ - 'url': file_url, - 'ext': ext, - 'format_id': label or ext, - 'height': int_or_none(self._search_regex( - r'(\d+)[pP]', label or '', 'height', default=None)), - }) + for list in file_list['plugins']['sabaPlayerPlugin']['multiSRC']: + for item in list: + file_url = url_or_none(item.get('src')) + if not file_url: + continue + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': label or ext, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', default=None)), + }) self._sort_formats(formats) - thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + thumbnail = file_list['poster'] return { 'id': video_id, From 1b7763aa9d2786361e26ce3db5c77de1e7a4438e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 23:29:05 +0700 Subject: [PATCH 103/132] [aparat] Improve extraction and extract more metadata (closes #17445, closes #18008) --- youtube_dl/extractor/aparat.py | 89 ++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 780439e17..883dcee7a 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + merge_dicts, mimetype2ext, url_or_none, ) @@ -12,59 +13,83 @@ from ..utils import ( class AparatIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.aparat.com/v/wP8On', 'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 'info_dict': { 'id': 'wP8On', 'ext': 'mp4', 'title': 'تیم گلکسی 11 - زومیت', - 'age_limit': 0, + 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028', + 'duration': 231, + 'timestamp': 1387394859, + 'upload_date': '20131218', + 'view_count': int, }, - # 'skip': 'Extremely unreliable', - } + }, { + # multiple formats + 'url': 'https://www.aparat.com/v/8dflw/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - # Note: There is an easier-to-parse configuration at - # http://www.aparat.com/video/video/config/videohash/%video_id - # but the URL in there does not work - webpage = self._download_webpage( - 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, - video_id) + # Provides more metadata + webpage = self._download_webpage(url, video_id, fatal=False) - file_list = self._parse_json( + if not webpage: + # Note: There is an easier-to-parse configuration at + # http://www.aparat.com/video/video/config/videohash/%video_id + # but the URL in there does not work + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) + + options = self._parse_json( self._search_regex( - r'var options\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, - 'file list'), + r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P(?:(?!\1).)+)\1\s*\)', + webpage, 'options', group='value'), video_id) - title = file_list['plugins']['sabaPlayerPlugin']['title'] + player = options['plugins']['sabaPlayerPlugin'] formats = [] - for list in file_list['plugins']['sabaPlayerPlugin']['multiSRC']: - for item in list: + for sources in player['multiSRC']: + for item in sources: + if not isinstance(item, dict): + continue file_url = url_or_none(item.get('src')) if not file_url: continue - ext = mimetype2ext(item.get('type')) - label = item.get('label') - formats.append({ - 'url': file_url, - 'ext': ext, - 'format_id': label or ext, - 'height': int_or_none(self._search_regex( - r'(\d+)[pP]', label or '', 'height', default=None)), - }) - self._sort_formats(formats) + item_type = item.get('type') + if item_type == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': 'http-%s' % (label or ext), + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', + default=None)), + }) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) - thumbnail = file_list['poster'] + info = self._search_json_ld(webpage, video_id, default={}) - return { + if not info.get('title'): + info['title'] = player['title'] + + return merge_dicts(info, { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'age_limit': self._family_friendly_search(webpage), + 'thumbnail': url_or_none(options.get('poster')), + 'duration': int_or_none(player.get('duration')), 'formats': formats, - } + }) From fe118e27f340c12c11811ee2c97c9b135f3cc98e Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 30 Jul 2018 18:15:20 +0200 Subject: [PATCH 104/132] [cbnc] Add support for new URL schema (closes #14193) --- youtube_dl/extractor/cnbc.py | 41 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index d354d9f95..35c0b6124 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,8 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals + from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + js_to_json, + smuggle_url, +) class CNBCIE(InfoExtractor): @@ -34,3 +38,38 @@ class CNBCIE(InfoExtractor): {'force_smil_url': True}), 'id': video_id, } + + +class CNBCNewIE(InfoExtractor): + IE_NAME = 'CNBC:new' + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._parse_json( + self._search_regex( + r'(?s).*]*>.*?({.+?content_id.+?}).*?', + webpage, display_id), + display_id, transform_source=js_to_json + )['content_id'] + + return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1c371c30b..2668b9992 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -209,7 +209,10 @@ from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import CNBCIE +from .cnbc import ( + CNBCIE, + CNBCNewIE, +) from .cnn import ( CNNIE, CNNBlogsIE, From aa26d02ee2c6654a4a670cb5eea2d7ac365160eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 23:53:39 +0700 Subject: [PATCH 105/132] [cnbc] Simplify extraction (closes #14280, closes #17110) --- youtube_dl/extractor/cnbc.py | 29 ++++++++++------------------- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 35c0b6124..81b0c9fc4 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - js_to_json, - smuggle_url, -) +from ..utils import smuggle_url class CNBCIE(InfoExtractor): @@ -40,36 +37,30 @@ class CNBCIE(InfoExtractor): } -class CNBCNewIE(InfoExtractor): - IE_NAME = 'CNBC:new' - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { 'id': '7000031301', 'ext': 'mp4', - 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'title': "Trump: I don't necessarily agree with raising rates", 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'timestamp': 1531958400, 'upload_date': '20180719', 'uploader': 'NBCU-CNBC', }, 'params': { - # m3u8 download 'skip_download': True, }, } - CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._parse_json( - self._search_regex( - r'(?s).*]*>.*?({.+?content_id.+?}).*?', - webpage, display_id), - display_id, transform_source=js_to_json - )['content_id'] - - return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2668b9992..eb55c9370 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -211,7 +211,7 @@ from .clyp import ClypIE from .cmt import CMTIE from .cnbc import ( CNBCIE, - CNBCNewIE, + CNBCVideoIE, ) from .cnn import ( CNNIE, From 808261b89283dd7584d0662f1a197dc0261dc5c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Oct 2018 00:22:18 +0700 Subject: [PATCH 106/132] [theplatform] Improve error detection (#13222) --- youtube_dl/extractor/theplatform.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index ffef5bf06..181620615 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE): smil_url, video_id, note=note, query={'format': 'SMIL'}, headers=self.geo_verification_headers()) error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') - if error_element is not None and error_element.attrib['src'].startswith( - 'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD): - raise ExtractorError(error_element.attrib['abstract'], expected=True) + if error_element is not None: + exception = find_xpath_attr( + error_element, _x('.//smil:param'), 'name', 'exception') + if exception is not None: + if exception.get('value') == 'GeoLocationBlocked': + self.raise_geo_restricted(error_element.attrib['abstract']) + elif error_element.attrib['src'].startswith( + 'http://link.theplatform.%s/s/errorFiles/Unavailable.' + % self._TP_TLD): + raise ExtractorError( + error_element.attrib['abstract'], expected=True) smil_formats = self._parse_smil_formats( meta, smil_url, video_id, namespace=default_ns, From 0c12c80bc9e2a9629d721cf97f78468861d31007 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 29 Oct 2018 19:28:09 +0100 Subject: [PATCH 107/132] [linkedin:learning] Add new extractor(closes #13545) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/linkedin.py | 175 +++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 youtube_dl/extractor/linkedin.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb55c9370..4891d4ae8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -572,6 +572,10 @@ from .limelight import ( LimelightChannelListIE, ) from .line import LineTVIE +from .linkedin import ( + LinkedInLearningIE, + LinkedInLearningCourseIE, +) from .litv import LiTVIE from .liveleak import ( LiveLeakIE, diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py new file mode 100644 index 000000000..6333a8fd3 --- /dev/null +++ b/youtube_dl/extractor/linkedin.py @@ -0,0 +1,175 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + urlencode_postdata, +) + + +class LinkedInLearningBaseIE(InfoExtractor): + _NETRC_MACHINE = 'linkedin' + + def _call_api(self, course_slug, fields, video_slug=None, resolution=None): + query = { + 'courseSlug': course_slug, + 'fields': fields, + 'q': 'slugs', + } + sub = '' + if video_slug: + query.update({ + 'videoSlug': video_slug, + 'resolution': '_%s' % resolution, + }) + sub = ' %dp' % resolution + api_url = 'https://www.linkedin.com/learning-api/detailedCourses' + return self._download_json( + api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ + 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, + }, query=query)['elements'][0] + + def _get_video_id(self, urn, course_slug, video_slug): + if urn: + mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) + if mobj: + return mobj.group(1) + return '%s/%s' % (course_slug, video_slug) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + login_page = self._download_webpage( + 'https://www.linkedin.com/uas/login?trk=learning', + None, 'Downloading login page') + action_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, 'post url', + default='https://www.linkedin.com/uas/login-submit', group='url') + data = self._hidden_inputs(login_page) + data.update({ + 'session_key': email, + 'session_password': password, + }) + login_submit_page = self._download_webpage( + action_url, None, 'Logging in', + data=urlencode_postdata(data)) + error = self._search_regex( + r']+class="error"[^>]*>\s*(.+?)\s*', + login_submit_page, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + +class LinkedInLearningIE(LinkedInLearningBaseIE): + IE_NAME = 'linkedin:learning' + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P[^/]+)/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true', + 'md5': 'a1d74422ff0d5e66a792deb996693167', + 'info_dict': { + 'id': '90426', + 'ext': 'mp4', + 'title': 'Welcome', + 'timestamp': 1430396150.82, + 'upload_date': '20150430', + }, + } + + def _real_extract(self, url): + course_slug, video_slug = re.match(self._VALID_URL, url).groups() + + video_data = None + formats = [] + for width, height in ((640, 360), (960, 540), (1280, 720)): + video_data = self._call_api( + course_slug, 'selectedVideo', video_slug, height)['selectedVideo'] + + video_url_data = video_data.get('url') or {} + progressive_url = video_url_data.get('progressiveUrl') + if progressive_url: + formats.append({ + 'format_id': 'progressive-%dp' % height, + 'url': progressive_url, + 'height': height, + 'width': width, + 'source_preference': 1, + }) + + title = video_data['title'] + + audio_url = video_data.get('audio', {}).get('progressiveUrl') + if audio_url: + formats.append({ + 'abr': 64, + 'ext': 'm4a', + 'format_id': 'audio', + 'url': audio_url, + 'vcodec': 'none', + }) + + streaming_url = video_url_data.get('streamingUrl') + if streaming_url: + formats.extend(self._extract_m3u8_formats( + streaming_url, video_slug, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + + self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) + + return { + 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), + 'title': title, + 'formats': formats, + 'thumbnail': video_data.get('defaultThumbnail'), + 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), + 'duration': int_or_none(video_data.get('durationInSeconds')), + } + + +class LinkedInLearningCourseIE(LinkedInLearningBaseIE): + IE_NAME = 'linkedin:learning:course' + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals', + 'info_dict': { + 'id': 'programming-foundations-fundamentals', + 'title': 'Programming Foundations: Fundamentals', + 'description': 'md5:76e580b017694eb89dc8e8923fff5c86', + }, + 'playlist_mincount': 61, + } + + @classmethod + def suitable(cls, url): + return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_slug = self._match_id(url) + course_data = self._call_api(course_slug, 'chapters,description,title') + + entries = [] + for chapter in course_data.get('chapters', []): + chapter_title = chapter.get('title') + for video in chapter.get('videos', []): + video_slug = video.get('slug') + if not video_slug: + continue + entries.append({ + '_type': 'url', + 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), + 'title': video.get('title'), + 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), + 'chapter': chapter_title, + 'ie_key': LinkedInLearningIE.ie_key(), + }) + + return self.playlist_result( + entries, course_slug, + course_data.get('title'), + course_data.get('description')) From 00365179aecec0ff6dca479bfa0d90fac1b55d38 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 29 Oct 2018 21:49:12 +0100 Subject: [PATCH 108/132] [linkedin:learning:course] use url_transparent type for playlist entries --- youtube_dl/extractor/linkedin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 6333a8fd3..259fc4c5e 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -161,7 +161,7 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE): if not video_slug: continue entries.append({ - '_type': 'url', + '_type': 'url_transparent', 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), 'title': video.get('title'), 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), From 6de56f023a91a7d88fa0c7e7d3415a99369aacc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Oct 2018 04:57:28 +0700 Subject: [PATCH 109/132] [cnbc:video] Fix _VALID_URL (#17110) --- youtube_dl/extractor/cnbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 81b0c9fc4..6889b0f40 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -38,7 +38,7 @@ class CNBCIE(InfoExtractor): class CNBCVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' + _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { From 09879c07f5b3597f1465b21d937aa204798561a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Nov 2018 01:35:32 +0700 Subject: [PATCH 110/132] [njpwworld] Fix authentication (closes #17427) --- youtube_dl/extractor/njpwworld.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py index febef097a..025c5d249 100644 --- a/youtube_dl/extractor/njpwworld.py +++ b/youtube_dl/extractor/njpwworld.py @@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor): 'skip': 'Requires login', } + _LOGIN_URL = 'https://front.njpwworld.com/auth/login' + def _real_initialize(self): self._login() @@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor): if not username: return True + # Setup session (will set necessary cookies) + self._request_webpage( + 'https://njpwworld.com/', None, note='Setting up session') + webpage, urlh = self._download_webpage_handle( - 'https://njpwworld.com/auth/login', None, + self._LOGIN_URL, None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({'login_id': username, 'pw': password}), - headers={'Referer': 'https://njpwworld.com/auth'}) + headers={'Referer': 'https://front.njpwworld.com/auth'}) # /auth/login will return 302 for successful logins - if urlh.geturl() == 'https://njpwworld.com/auth/login': + if urlh.geturl() == self._LOGIN_URL: self.report_warning('unable to login') return False From c4364ce7d28b13255fb09091961097454100f886 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Fri, 2 Nov 2018 12:08:41 -0400 Subject: [PATCH 111/132] [openload] Add support for oload.fun --- youtube_dl/extractor/openload.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a91f29f5c..2473536fd 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -319,7 +319,10 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }, { 'url': 'https://oload.icu/f/-_i4y_F_Hs8', - 'only_matching': True + 'only_matching': True, + }, { + 'url': 'https://oload.fun/f/gb6G1H4sHXY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From fb68d031e40e586160e3e7b5a95814bd24c655c1 Mon Sep 17 00:00:00 2001 From: Sebastian Haas Date: Tue, 30 Oct 2018 23:44:50 +0100 Subject: [PATCH 112/132] [orf:tvthek] Fix extraction (closes #17737) use _extract_m3u8_formats and _extract_f4m_formats helper functions closes #17737 --- youtube_dl/extractor/orf.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index c1fb580ca..da8031ad2 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -80,14 +80,16 @@ class ORFTVthekIE(InfoExtractor): if not video_id or not title: continue video_id = compat_str(video_id) - formats = [{ - 'preference': -10 if fd['delivery'] == 'hls' else None, - 'format_id': '%s-%s-%s' % ( - fd['delivery'], fd['quality'], fd['quality_string']), - 'url': fd['src'], - 'protocol': fd['protocol'], - 'quality': quality_to_int(fd['quality']), - } for fd in sd['sources']] + formats = [] + for fd in sd['sources']: + format_id = '%s-%s-%s' % ( + fd['delivery'], fd['quality'], fd['quality_string']) + if determine_ext(fd['src']) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + fd['src'], video_id, 'mp4', m3u8_id=format_id)) + elif determine_ext(fd['src']) == 'f4m': + formats.extend(self._extract_f4m_formats( + fd['src'], video_id, f4m_id=format_id)) # Check for geoblocking. # There is a property is_geoprotection, but that's always false From 0f2c96bf336669bb662045a0792ee7f6e42047b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Nov 2018 23:46:56 +0700 Subject: [PATCH 113/132] [orf:tvthek] Improve extraction and remove unused code (closes #17956, closes #18024) --- youtube_dl/extractor/orf.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index da8031ad2..d432e3449 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -15,6 +15,7 @@ from ..utils import ( strip_jsonp, unescapeHTML, unified_strdate, + url_or_none, ) @@ -68,12 +69,6 @@ class ORFTVthekIE(InfoExtractor): webpage, 'playlist', group='json'), playlist_id, transform_source=unescapeHTML)['playlist']['videos'] - def quality_to_int(s): - m = re.search('([0-9]+)', s) - if m is None: - return -1 - return int(m.group(1)) - entries = [] for sd in data_jsb: video_id, title = sd.get('id'), sd.get('title') @@ -82,14 +77,27 @@ class ORFTVthekIE(InfoExtractor): video_id = compat_str(video_id) formats = [] for fd in sd['sources']: - format_id = '%s-%s-%s' % ( - fd['delivery'], fd['quality'], fd['quality_string']) + src = url_or_none(fd.get('src')) + if not src: + continue + format_id_list = [] + for key in ('delivery', 'quality', 'quality_string'): + value = fd.get(key) + if value: + format_id_list.append(value) + format_id = '-'.join(format_id_list) if determine_ext(fd['src']) == 'm3u8': formats.extend(self._extract_m3u8_formats( fd['src'], video_id, 'mp4', m3u8_id=format_id)) elif determine_ext(fd['src']) == 'f4m': formats.extend(self._extract_f4m_formats( fd['src'], video_id, f4m_id=format_id)) + else: + formats.append({ + 'format_id': format_id, + 'url': src, + 'protocol': fd.get('protocol'), + }) # Check for geoblocking. # There is a property is_geoprotection, but that's always false From de27a78e97c1a1fd70b05e40db54adf571ca08ed Mon Sep 17 00:00:00 2001 From: sichuan-pepper Date: Sat, 27 Oct 2018 03:40:44 +0900 Subject: [PATCH 114/132] [twitcasting] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitcasting.py | 44 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/twitcasting.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4891d4ae8..3ab6fd620 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1200,6 +1200,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import TwitcastingIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py new file mode 100644 index 000000000..856df5c0b --- /dev/null +++ b/youtube_dl/extractor/twitcasting.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class TwitcastingIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|ssl|en|pt|es|ja|ko)\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' + _TEST = { + 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', + 'md5': '745243cad58c4681dc752490f7540d7f', + 'info_dict': { + 'id': '2357609', + 'ext': 'mp4', + 'title': 'Recorded Live #2357609', + 'uploader_id': 'ivetesangalo', + 'description': "Moi! I'm live on TwitCasting from my iPhone.", + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + uploader_id = mobj.group('uploader_id') + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._html_search_regex(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage, name='playlist url', group='url') + formats = self._extract_m3u8_formats(playlist_url, video_id, ext='mp4') + thumbnail = self._og_search_thumbnail(webpage) + title = self._html_search_meta('twitter:title', webpage) + description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage) + return{ + 'id': video_id, + 'url': url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader_id': uploader_id, + 'formats': formats, + } From 64bd49390c6e747b5081e5a87d719d74d0f354a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Nov 2018 00:27:36 +0700 Subject: [PATCH 115/132] [twitcasting] Improve extraction and fix issues (closes #17981) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/twitcasting.py | 36 +++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3ab6fd620..c173f41e0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1200,7 +1200,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE -from .twitcasting import TwitcastingIE +from .twitcasting import TwitCastingIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py index 856df5c0b..05f8aa9ce 100644 --- a/youtube_dl/extractor/twitcasting.py +++ b/youtube_dl/extractor/twitcasting.py @@ -6,8 +6,8 @@ from .common import InfoExtractor import re -class TwitcastingIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|ssl|en|pt|es|ja|ko)\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' +class TwitCastingIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P[^/]+)/movie/(?P\d+)' _TEST = { 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', 'md5': '745243cad58c4681dc752490f7540d7f', @@ -18,24 +18,40 @@ class TwitcastingIE(InfoExtractor): 'uploader_id': 'ivetesangalo', 'description': "Moi! I'm live on TwitCasting from my iPhone.", 'thumbnail': r're:^https?://.*\.jpg$', - } + }, + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = mobj.group('id') uploader_id = mobj.group('uploader_id') webpage = self._download_webpage(url, video_id) - playlist_url = self._html_search_regex(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage, name='playlist url', group='url') - formats = self._extract_m3u8_formats(playlist_url, video_id, ext='mp4') + title = self._html_search_regex( + r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)(?:(?!\1).)+)\1', + r'(["\'])(?Phttp.+?\.m3u8.*?)\1'), + webpage, 'm3u8 url', group='url') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + thumbnail = self._og_search_thumbnail(webpage) - title = self._html_search_meta('twitter:title', webpage) - description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage) - return{ + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage) + + return { 'id': video_id, - 'url': url, 'title': title, 'description': description, 'thumbnail': thumbnail, From 87f6340cc6c59fd783874f6b328d2165579b8a88 Mon Sep 17 00:00:00 2001 From: Xiao Di Guan Date: Sat, 3 Nov 2018 05:18:20 +1100 Subject: [PATCH 116/132] [extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447) --- test/helper.py | 10 ++++++++ test/test_InfoExtractor.py | 42 ++++++++++++++++++++++++++++++++-- test/test_downloader_http.py | 12 +--------- test/test_http.py | 10 +------- youtube_dl/extractor/common.py | 5 ++++ 5 files changed, 57 insertions(+), 22 deletions(-) diff --git a/test/helper.py b/test/helper.py index dfee217a9..aa9a1c9b2 100644 --- a/test/helper.py +++ b/test/helper.py @@ -7,6 +7,7 @@ import json import os.path import re import types +import ssl import sys import youtube_dl.extractor @@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re): real_warning(w) ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 4833396a5..06be72616 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -9,11 +9,30 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value -from youtube_dl.compat import compat_etree_fromstring +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from youtube_dl.compat import compat_etree_fromstring, compat_http_server from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False class TestIE(InfoExtractor): @@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + if __name__ == '__main__': unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 5cf2bf1a5..750472281 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -9,26 +9,16 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import try_rm +from test.helper import http_server_port, try_rm from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server from youtube_dl.downloader.http import HttpFD from youtube_dl.utils import encodeFilename -import ssl import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - TEST_SIZE = 10 * 1024 diff --git a/test/test_http.py b/test/test_http.py index 409fec9c8..3ee0a5dda 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,6 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import http_server_port from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server, compat_urllib_request import ssl @@ -16,15 +17,6 @@ import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8452125c8..e5f8136fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -606,6 +606,11 @@ class InfoExtractor(object): except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): + # Retain reference to error to prevent file object from + # being closed before it can be read. Works around the + # effects of + # introduced in Python 3.4.1. + err.fp._error = err return err.fp if errnote is False: From f647b106ffa01d934579c6b3162c0c271d5af1f2 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Tue, 2 Oct 2018 14:49:01 +0200 Subject: [PATCH 117/132] [azmedien] Adopt to major site redesign (closes #17745) --- youtube_dl/extractor/azmedien.py | 222 +++++++---------------------- youtube_dl/extractor/extractors.py | 6 +- 2 files changed, 53 insertions(+), 175 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 68f26e2ca..9d606ee67 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -1,19 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - get_element_by_class, - get_element_by_id, - strip_or_none, - urljoin, -) class AZMedienBaseIE(InfoExtractor): + _PARTNER_ID = '1719221' + def _kaltura_video(self, partner_id, entry_id): return self.url_result( 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), @@ -25,189 +22,74 @@ class AZMedienIE(AZMedienBaseIE): _VALID_URL = r'''(?x) https?:// (?:www\.)? - (?: + (?P telezueri\.ch| telebaern\.tv| telem1\.ch )/ - [0-9]+-show-[^/\#]+ - (?: - /[0-9]+-episode-[^/\#]+ - (?: - /[0-9]+-segment-(?:[^/\#]+\#)?| - \# - )| - \# + [^/]+/ + (?P + [^/]+-(?P\d+) ) - (?P[^\#]+) + (?: + \#video= + (?P + [_0-9a-z]+ + ) + )? ''' _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', 'info_dict': { - 'id': '1_2444peh4', + 'id': '1_anruz3wy', 'ext': 'mp4', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, + 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen', + 'description': 'md5:dd9f96751ec9c35e409a698a328402f3', + 'uploader_id': 'TVOnline', + 'upload_date': '20180930', + 'timestamp': 1538328802, }, 'params': { 'skip_download': True, }, }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', 'only_matching': True }] def _real_extract(self, url): video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + entry_id = mobj.group('kaltura_id') - webpage = self._download_webpage(url, video_id) + if not entry_id: + webpage = self._download_webpage(url, video_id) + api_path = self._search_regex( + r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']', + webpage, 'api path') + api_url = 'https://www.%s%s' % (mobj.group('host'), api_path) + payload = { + 'query': '''query VideoContext($articleId: ID!) { + article: node(id: $articleId) { + ... on Article { + mainAssetRelation { + asset { + ... on VideoAsset { + kalturaId + } + } + } + } + } + }''', + 'variables': {'articleId': 'Article:%s' % mobj.group('article_id')}, + } + json_data = self._download_json( + api_url, video_id, headers={ + 'Content-Type': 'application/json', + }, + data=json.dumps(payload).encode()) + entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId'] - partner_id = self._search_regex( - r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', - webpage, 'kaltura partner id') - entry_id = self._html_search_regex( - r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' - % re.escape(video_id), webpage, 'kaltura entry id', group='id') - - return self._kaltura_video(partner_id, entry_id) - - -class AZMedienPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien playlists' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - telezueri\.ch| - telebaern\.tv| - telem1\.ch - )/ - (?P[0-9]+- - (?: - show| - topic| - themen - )-[^/\#]+ - (?: - /[0-9]+-episode-[^/\#]+ - )? - )$ - ''' - - _TESTS = [{ - # URL with 'episode' - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News - Donnerstag, 15. Dezember 2016', - }, - 'playlist_count': 9, - }, { - # URL with 'themen' - 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics', - 'info_dict': { - 'id': '258-themen-tele-m1-classics', - 'title': 'Tele M1 Classics', - }, - 'playlist_mincount': 15, - }, { - # URL with 'topic', contains nested playlists - 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen', - 'only_matching': True, - }, { - # URL with 'show' only - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) - - entries = [] - - partner_id = self._search_regex( - r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', - webpage, 'kaltura partner id', default=None) - - if partner_id: - entries = [ - self._kaltura_video(partner_id, m.group('id')) - for m in re.finditer( - r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] - - if not entries: - entries = [ - self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) - for m in re.finditer( - r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] - - if not entries: - entries = [ - # May contain nested playlists (e.g. [1]) thus no explicit - # ie_key - # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen) - self.url_result(urljoin(url, m.group('url'))) - for m in re.finditer( - r']+name=[^>]+href=(["\'])(?P/.+?)\1', webpage)] - - title = self._search_regex( - r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'title', - default=strip_or_none(get_element_by_id( - 'video-title', webpage)), group='title') - - return self.playlist_result(entries, show_id, title) - - -class AZMedienShowPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien show playlists' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - telezueri\.ch| - telebaern\.tv| - telem1\.ch - )/ - (?: - all-episodes| - alle-episoden - )/ - (?P<id>[^/?#&]+) - ''' - - _TEST = { - 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', - 'info_dict': { - 'id': 'astrotalk', - 'title': 'TeleZüri: AstroTalk - alle episoden', - 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', - }, - 'playlist_mincount': 13, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - episodes = get_element_by_class('search-mobile-box', webpage) - entries = [self.url_result( - urljoin(url, m.group('url'))) for m in re.finditer( - r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)] - title = self._og_search_title(webpage, fatal=False) - description = self._og_search_description(webpage) - return self.playlist_result(entries, playlist_id, title, description) + return self._kaltura_video(self._PARTNER_ID, entry_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c173f41e0..842464188 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,11 +88,7 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedien import ( - AZMedienIE, - AZMedienPlaylistIE, - AZMedienShowPlaylistIE, -) +from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE From 5d74d0efb4f1dde90f724251bb6fa9451f6371e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 01:32:29 +0700 Subject: [PATCH 118/132] [azmedien] Simplify (closes #17746) --- youtube_dl/extractor/azmedien.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 9d606ee67..a57a5f114 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -8,16 +8,7 @@ from .common import InfoExtractor from .kaltura import KalturaIE -class AZMedienBaseIE(InfoExtractor): - _PARTNER_ID = '1719221' - - def _kaltura_video(self, partner_id, entry_id): - return self.url_result( - 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), - video_id=entry_id) - - -class AZMedienIE(AZMedienBaseIE): +class AZMedienIE(InfoExtractor): IE_DESC = 'AZ Medien videos' _VALID_URL = r'''(?x) https?:// @@ -58,9 +49,11 @@ class AZMedienIE(AZMedienBaseIE): 'only_matching': True }] + _PARTNER_ID = '1719221' + def _real_extract(self, url): - video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') entry_id = mobj.group('kaltura_id') if not entry_id: @@ -92,4 +85,6 @@ class AZMedienIE(AZMedienBaseIE): data=json.dumps(payload).encode()) entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId'] - return self._kaltura_video(self._PARTNER_ID, entry_id) + return self.url_result( + 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + ie=KalturaIE.ie_key(), video_id=entry_id) From 0a93ed177a105f68e27fc8e4ee06c7ad58543136 Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Tue, 1 May 2018 05:36:03 +0200 Subject: [PATCH 119/132] [ehftv] Add extractor (closes #15408) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/laola1tv.py | 115 ++++++++++++++++++----------- 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 842464188..b54578877 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -539,6 +539,7 @@ from .la7 import LA7IE from .laola1tv import ( Laola1TvEmbedIE, Laola1TvIE, + EHFTVIE, ITTFIE, ) from .lci import LCIIE diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index c7f813370..d985bd3ca 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..utils import ( @@ -119,9 +120,59 @@ class Laola1TvEmbedIE(InfoExtractor): } -class Laola1TvIE(Laola1TvEmbedIE): +class Laola1TvBaseIE(Laola1TvEmbedIE): + def _extract_video(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + if 'Dieser Livestream ist bereits beendet.' in webpage: + raise ExtractorError('This live stream has already finished.', expected=True) + + conf = self._parse_json(self._search_regex( + r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), + display_id, + transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s))) + video_id = conf['videoid'] + + config = self._download_json(conf['configUrl'], video_id, query={ + 'videoid': video_id, + 'partnerid': conf['partnerid'], + 'language': conf.get('language', ''), + 'portal': conf.get('portalid', ''), + }) + error = config.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + video_data = config['video'] + title = video_data['title'] + is_live = video_data.get('isLivestream') and video_data.get('isLive') + meta = video_data.get('metaInformation') + sports = meta.get('sports') + categories = sports.split(',') if sports else [] + + token_url = self._extract_token_url( + video_data['streamAccess'], video_id, + video_data['abo']['required']) + + formats = self._extract_formats(token_url, video_id) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('image'), + 'categories': categories, + 'formats': formats, + 'is_live': is_live, + } + + +class Laola1TvIE(Laola1TvBaseIE): IE_NAME = 'laola1tv' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { @@ -169,52 +220,30 @@ class Laola1TvIE(Laola1TvEmbedIE): }] def _real_extract(self, url): - display_id = self._match_id(url) + return self._extract_video(url) - webpage = self._download_webpage(url, display_id) - if 'Dieser Livestream ist bereits beendet.' in webpage: - raise ExtractorError('This live stream has already finished.', expected=True) +class EHFTVIE(Laola1TvBaseIE): + IE_NAME = 'ehftv' + _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' - conf = self._parse_json(self._search_regex( - r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), - display_id, js_to_json) + _TESTS = [{ + 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761', + 'info_dict': { + 'id': '1166761', + 'display_id': 'paris-saint-germain-handball-pge-vive-kielce', + 'ext': 'mp4', + 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce', + 'is_live': False, + 'categories': ['Handball'], + }, + 'params': { + 'skip_download': True, + }, + }] - video_id = conf['videoid'] - - config = self._download_json(conf['configUrl'], video_id, query={ - 'videoid': video_id, - 'partnerid': conf['partnerid'], - 'language': conf.get('language', ''), - 'portal': conf.get('portalid', ''), - }) - error = config.get('error') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - - video_data = config['video'] - title = video_data['title'] - is_live = video_data.get('isLivestream') and video_data.get('isLive') - meta = video_data.get('metaInformation') - sports = meta.get('sports') - categories = sports.split(',') if sports else [] - - token_url = self._extract_token_url( - video_data['streamAccess'], video_id, - video_data['abo']['required']) - - formats = self._extract_formats(token_url, video_id) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': self._live_title(title) if is_live else title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('image'), - 'categories': categories, - 'formats': formats, - 'is_live': is_live, - } + def _real_extract(self, url): + return self._extract_video(url) class ITTFIE(InfoExtractor): From b79985e7d919352d3139c689f105c17562017f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 02:44:35 +0700 Subject: [PATCH 120/132] [laola1tv:embed] Set correct stream access URL scheme (closes #16341) --- youtube_dl/extractor/laola1tv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d985bd3ca..fa217365a 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -33,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor): def _extract_token_url(self, stream_access_url, video_id, data): return self._download_json( - stream_access_url, video_id, headers={ + self._proto_relative_url(stream_access_url, 'https:'), video_id, + headers={ 'Content-Type': 'application/json', }, data=json.dumps(data).encode())['data']['stream-access'][0] @@ -225,7 +226,7 @@ class Laola1TvIE(Laola1TvBaseIE): class EHFTVIE(Laola1TvBaseIE): IE_NAME = 'ehftv' - _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761', From 9a23aa46d7a53a5fd81734c242ee6447a19465e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 02:56:14 +0700 Subject: [PATCH 121/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 57dbde12d..05857596a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version <unreleased> + +Core +* [extractor/common] Ensure response handle is not prematurely closed before + it can be read if it matches expected_status (#17195, #17846, #17447) + +Extractors +* [laola1tv:embed] Set correct stream access URL scheme (#16341) ++ [ehftv] Add support for ehftv.com (#15408) +* [azmedien] Adopt to major site redesign (#17745, #17746) ++ [twitcasting] Add support for twitcasting.tv (#17981) +* [orf:tvthek] Fix extraction (#17737, #17956, #18024) ++ [openload] Add support for oload.fun (#18045) +* [njpwworld] Fix authentication (#17427) ++ [linkedin:learning] Add support for linkedin.com/learning (#13545) +* [theplatform] Improve error detection (#13222) +* [cnbc] Simplify extraction (#14280, #17110) ++ [cbnc] Add support for new URL schema (#14193) +* [aparat] Improve extraction and extract more metadata (#17445, #18008) +* [aparat] Fix extraction + + version 2018.10.29 Core From c4dfbacca01270c88f42f46ee3a869e445145590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 02:57:48 +0700 Subject: [PATCH 122/132] release 2018.11.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index aefed163a..eb8cef8ef 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.10.29 +[debug] youtube-dl version 2018.11.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 05857596a..11e1ba333 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.11.03 Core * [extractor/common] Ensure response handle is not prematurely closed before diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e5a6879bc..24c3254c3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -84,8 +84,6 @@ - **awaan:season** - **awaan:video** - **AZMedien**: AZ Medien videos - - **AZMedienPlaylist**: AZ Medien playlists - - **AZMedienShowPlaylist**: AZ Medien show playlists - **BaiduVideo**: 百度视频 - **bambuser** - **bambuser:channel** @@ -178,6 +176,7 @@ - **Clyp** - **cmt.com** - **CNBC** + - **CNBCVideo** - **CNN** - **CNNArticle** - **CNNBlogs** @@ -251,6 +250,7 @@ - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson + - **ehftv** - **eHow** - **EinsUndEinsTV** - **Einthusan** @@ -445,6 +445,8 @@ - **limelight:channel** - **limelight:channel_list** - **LineTV** + - **linkedin:learning** + - **linkedin:learning:course** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -930,6 +932,7 @@ - **TVPlayer** - **TVPlayHome** - **Tweakers** + - **TwitCasting** - **twitch:chapter** - **twitch:clips** - **twitch:profile** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ae9a77966..90de01214 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.10.29' +__version__ = '2018.11.03' From ae41150a618e9e620fa15b4466250d8a1adc6019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 06:26:16 +0700 Subject: [PATCH 123/132] [youtube] Add fallback metadata extraction from videoDetails (closes #18052) --- youtube_dl/extractor/youtube.py | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 78203ef84..abadfa545 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -41,6 +41,7 @@ from ..utils import ( remove_quotes, remove_start, smuggle_url, + str_or_none, str_to_int, try_get, unescapeHTML, @@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, + 'view_count': int, 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, + 'view_count': int, 'like_count': int, 'dislike_count': int, }, @@ -1538,6 +1541,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + player_response = {} + # Get video info embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: @@ -1580,6 +1585,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True sts = ytplayer_config.get('sts') + if not player_response: + pl_response = str_or_none(args.get('player_response')) + if pl_response: + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + player_response = pl_response if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): # We also try looking in get_video_info since it may contain different dashmpd # URL that points to a DASH manifest with possibly different itag set (some itags @@ -1608,6 +1619,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info_webpage: continue get_video_info = compat_parse_qs(video_info_webpage) + if not player_response: + pl_response = get_video_info.get('player_response', [None])[0] + if isinstance(pl_response, dict): + player_response = pl_response add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) @@ -1653,9 +1668,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '"token" parameter not in video info for unknown reason', video_id=video_id) + video_details = try_get( + player_response, lambda x: x['videoDetails'], dict) or {} + # title if 'title' in video_info: video_title = video_info['title'][0] + elif 'title' in player_response: + video_title = video_details['title'] else: self._downloader.report_warning('Unable to extract video title') video_title = '_' @@ -1718,6 +1738,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if view_count is None: view_count = extract_view_count(video_info) + if view_count is None and video_details: + view_count = int_or_none(video_details.get('viewCount')) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: @@ -1898,7 +1920,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') # uploader - video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str) + video_uploader = try_get( + video_info, lambda x: x['author'][0], + compat_str) or str_or_none(video_details.get('author')) if video_uploader: video_uploader = compat_urllib_parse_unquote_plus(video_uploader) else: @@ -2011,12 +2035,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): like_count = _extract_count('like') dislike_count = _extract_count('dislike') + if view_count is None: + view_count = str_to_int(self._search_regex( + r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage, + 'view count', default=None)) + # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) video_duration = try_get( video_info, lambda x: int_or_none(x['length_seconds'][0])) + if not video_duration: + video_duration = int_or_none(video_details.get('lengthSeconds')) if not video_duration: video_duration = parse_duration(self._html_search_meta( 'duration', video_webpage, 'video duration')) @@ -2244,6 +2275,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', 'categories': ['People & Blogs'], 'tags': list, + 'view_count': int, 'like_count': int, 'dislike_count': int, }, From e0d740595efbb54baafd06269f3590f4918533a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 00:11:36 +0700 Subject: [PATCH 124/132] [README.md] Improve documentation on safe metadata extraction and add more examples --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fdd115c9b..35c3de512 100644 --- a/README.md +++ b/README.md @@ -1168,7 +1168,28 @@ title = self._search_regex( ### Use safe conversion functions -Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` # EMBEDDING YOUTUBE-DL From 68ce19905ef2ec7dfae9814e1e5b261fe7ed4b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 15:52:46 +0700 Subject: [PATCH 125/132] [zattoo] Arrange API hosts for derived extractors (closes #18035) --- youtube_dl/extractor/zattoo.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index bbe0aecb6..cb1bac3a3 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor): _power_guide_hash = None def _host_url(self): - return 'https://%s' % self._HOST + return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST) def _login(self): username, password = self._get_login_info() @@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE): class NetPlusIE(ZattooIE): _NETRC_MACHINE = 'netplus' _HOST = 'netplus.tv' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', + 'url': 'https://tvplus.m-net.de/watch/abc/123-abc', 'only_matching': True, }] @@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.player.waly.tv/watch/abc/123-abc', + 'url': 'https://player.waly.tv/watch/abc/123-abc', 'only_matching': True, }] @@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE): class BBVTVIE(ZattooIE): _NETRC_MACHINE = 'bbvtv' _HOST = 'bbv-tv.net' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE): class VTXTVIE(ZattooIE): _NETRC_MACHINE = 'vtxtv' _HOST = 'vtxtv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE): class MyVisionTVIE(ZattooIE): _NETRC_MACHINE = 'myvisiontv' _HOST = 'myvisiontv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', + 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc', 'only_matching': True, }] @@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE): class SAKTVIE(ZattooIE): _NETRC_MACHINE = 'saktv' _HOST = 'saktv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -377,7 +382,7 @@ class EWETVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', + 'url': 'https://tvonline.ewe.de/watch/abc/123-abc', 'only_matching': True, }] @@ -385,6 +390,7 @@ class EWETVIE(ZattooIE): class QuantumTVIE(ZattooIE): _NETRC_MACHINE = 'quantumtv' _HOST = 'quantum-tv.com' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -399,7 +405,7 @@ class OsnatelTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', + 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc', 'only_matching': True, }] @@ -407,6 +413,7 @@ class OsnatelTVIE(ZattooIE): class EinsUndEinsTVIE(ZattooIE): _NETRC_MACHINE = '1und1tv' _HOST = '1und1.tv' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ From 10c48dbbfab809eaa6c69092f957b716f152db2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 17:09:57 +0700 Subject: [PATCH 126/132] [osnateltv] Update host --- youtube_dl/extractor/zattoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index cb1bac3a3..896276301 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -401,7 +401,7 @@ class QuantumTVIE(ZattooIE): class OsnatelTVIE(ZattooIE): _NETRC_MACHINE = 'osnateltv' - _HOST = 'onlinetv.osnatel.de' + _HOST = 'tvonline.osnatel.de' _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ From f48b0b46294f5116fb4eee96e06ee618b0861132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 19:08:39 +0700 Subject: [PATCH 127/132] [youtube:playlist] Add support for invidio.us (closes #18077) --- youtube_dl/extractor/youtube.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index abadfa545..6ab2db274 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2162,7 +2162,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (?:https?://)? (?:\w+\.)? (?: - youtube\.com/ + (?: + youtube\.com| + invidio\.us + ) + / (?: (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) \? (?:.*?[&;])*? (?:p|a|list)= @@ -2314,6 +2318,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # music album playlist 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM', 'only_matching': True, + }, { + 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', + 'only_matching': True, }] def _real_initialize(self): From c0740fdd626d2da50622a9a3cd0e5c14ece60a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Nov 2018 23:29:42 +0700 Subject: [PATCH 128/132] [cliphinter] Fix extraction (closes #18083) --- youtube_dl/extractor/cliphunter.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index ab651d1c8..f2ca7a337 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -1,19 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none - - -_translation_table = { - 'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n', - 'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r', - 'y': 'l', 'z': 'i', - '$': ':', '&': '.', '(': '=', '^': '&', '=': '/', -} - - -def _decode(s): - return ''.join(_translation_table.get(c, c) for c in s) +from ..utils import ( + int_or_none, + url_or_none, +) class CliphunterIE(InfoExtractor): @@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor): formats = [] for format_id, f in gexo_files.items(): - video_url = f.get('url') + video_url = url_or_none(f.get('url')) if not video_url: continue fmt = f.get('fmt') height = f.get('h') format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id formats.append({ - 'url': _decode(video_url), + 'url': video_url, 'format_id': format_id, 'width': int_or_none(f.get('w')), 'height': int_or_none(height), From 87a79d956f5185bdb43cb0d10f782d16741d74cc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 6 Nov 2018 21:22:00 +0100 Subject: [PATCH 129/132] [facebook] fix tahoe request(closes #17171) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 97cfe0fc3..74954049d 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor): _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' - _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true' + _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' _TESTS = [{ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', From bc34725f667a3d025bc591dd69e6f83d4eaa796f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Nov 2018 09:55:59 +0700 Subject: [PATCH 130/132] [youtube] Add another JS signature function name regex (closes #18091, closes #18093, closes #18094) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6ab2db274..3f49f3889 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1192,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('), + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From 17d9f9d8721fb69c8e677807e8cebfed8ef50b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Nov 2018 09:58:08 +0700 Subject: [PATCH 131/132] [ChangeLog] Actualize [ci skip] --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 11e1ba333..920a4855a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Extractors ++ [youtube] Add another JS signature function name regex (#18091, #18093, + #18094) +* [facebook] Fix tahoe request (#17171) +* [cliphunter] Fix extraction (#18083) ++ [youtube:playlist] Add support for invidio.us (#18077) +* [zattoo] Arrange API hosts for derived extractors (#18035) ++ [youtube] Add fallback metadata extraction from videoDetails (#18052) + + version 2018.11.03 Core From d2d423b3935067d0c2d7d091d2f46aca905298b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Nov 2018 01:38:25 +0700 Subject: [PATCH 132/132] release 2018.11.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 23 ++++++++++++++++++++++- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index eb8cef8ef..7607e0e03 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.11.03 +[debug] youtube-dl version 2018.11.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 333acee80..bbcb78808 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -296,5 +296,26 @@ title = self._search_regex( ### Use safe conversion functions -Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` diff --git a/ChangeLog b/ChangeLog index 920a4855a..fa5de8b04 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.11.07 Extractors + [youtube] Add another JS signature function name regex (#18091, #18093, diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 90de01214..7f32ad36c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.11.03' +__version__ = '2018.11.07'