From 178ee88319a384b66d9b2da27a819f32ba870425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Mar 2018 23:57:07 +0700 Subject: [PATCH 01/77] [generic] Add support for xfileshare embeds (closes #15879) --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/xfileshare.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a98f3636a..dbd565066 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -104,6 +104,7 @@ from .mediasite import MediasiteIE from .springboardplatform import SpringboardPlatformIE from .yapfiles import YapFilesIE from .vice import ViceIE +from .xfileshare import XFileShareIE class GenericIE(InfoExtractor): @@ -2971,6 +2972,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( vice_urls, video_id, video_title, ie=ViceIE.ie_key()) + xfileshare_urls = XFileShareIE._extract_urls(webpage) + if xfileshare_urls: + return self.playlist_from_matches( + xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ad747978d..bc3239f68 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor): 'only_matching': True }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' + % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]), + webpage)] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') From 96b8b9abaecb7518d901dc9d6a617f19c3161236 Mon Sep 17 00:00:00 2001 From: Ricardo Constantino Date: Wed, 7 Mar 2018 21:31:53 +0000 Subject: [PATCH 02/77] [extractor/generic] Support relative URIs in _parse_xspf can have relative URIs, not just absolute. --- test/test_InfoExtractor.py | 42 ++++++++++++++++++++++++++++++++ test/testdata/xspf/foo_xspf.xspf | 34 ++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 6 ++--- youtube_dl/extractor/generic.py | 4 ++- 4 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 test/testdata/xspf/foo_xspf.xspf diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7b31d5198..a695ce64b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -694,6 +694,48 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/', + [{ + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], + 'id': 'foo_xspf', + 'title': 'Pandemonium' + }, + { + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)' + }, + { + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [ + {'url': 'https://example.org/src/track3.mp3'}, + {'url': 'https://example.com/track3.mp3'} + ], + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale' + }] + ), + ] + + for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: + with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode('utf-8')), + xspf_file, xspf_base_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf new file mode 100644 index 000000000..b7f0086b3 --- /dev/null +++ b/test/testdata/xspf/foo_xspf.xspf @@ -0,0 +1,34 @@ + + + 2018-03-09T18:01:43Z + + + cd1/track%201.mp3 + Pandemonium + Foilverb + Visit http://bigbrother404.bandcamp.com + Pandemonium EP + 1 + 202416 + + + ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 + Final Cartridge (Nichico Twelve Remix) + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 2 + 255857 + + + track3.mp3 + https://example.com/track3.mp3 + Rebuilding Nightingale + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 3 + 287915 + + + diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fcdd0fd14..c1e1012e7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1700,9 +1700,9 @@ class InfoExtractor(object): 'Unable to download xspf manifest', fatal=fatal) if xspf is False: return [] - return self._parse_xspf(xspf, playlist_id) + return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) - def _parse_xspf(self, playlist, playlist_id): + def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', @@ -1720,7 +1720,7 @@ class InfoExtractor(object): xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) formats = [{ - 'url': location.text, + 'url': urljoin(playlist_base_url, location.text), 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dbd565066..023ccbc9b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2232,7 +2232,9 @@ class GenericIE(InfoExtractor): self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': - return self.playlist_result(self._parse_xspf(doc, video_id), video_id) + return self.playlist_result( + self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), + video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, From e0d198c18d4a5f191adbfb43259c104d16e30596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Mar 2018 02:17:34 +0700 Subject: [PATCH 03/77] [extractor/common] Add _download_xml_handle --- youtube_dl/extractor/common.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c1e1012e7..a50778509 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -644,19 +644,31 @@ class InfoExtractor(object): content, _ = res return content + def _download_xml_handle( + self, url_or_request, video_id, note='Downloading XML', + errnote='Unable to download XML', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)""" + res = self._download_webpage_handle( + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding, data=data, headers=headers, query=query) + if res is False: + return res + xml_string, urlh = res + return self._parse_xml( + xml_string, video_id, transform_source=transform_source, + fatal=fatal), urlh + def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" - xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding, data=data, headers=headers, query=query) - if xml_string is False: - return xml_string - return self._parse_xml( - xml_string, video_id, transform_source=transform_source, - fatal=fatal) + res = self._download_xml_handle( + url_or_request, video_id, note=note, errnote=errnote, + transform_source=transform_source, fatal=fatal, encoding=encoding, + data=data, headers=headers, query=query) + return res if res is False else res[0] def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): if transform_source: From 47a5cb77344536ca79d81a04904ac9ef9b02050f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Mar 2018 02:46:50 +0700 Subject: [PATCH 04/77] Generalize XML manifest processing code and improve XSPF parsing (closes #15794) --- test/test_InfoExtractor.py | 41 ++++++++++++++++++------------- youtube_dl/extractor/common.py | 43 +++++++++++++++++++-------------- youtube_dl/extractor/generic.py | 4 ++- 3 files changed, 52 insertions(+), 36 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a695ce64b..4833396a5 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -698,40 +698,47 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ _TEST_CASES = [ ( 'foo_xspf', - 'https://example.org/src/', + 'https://example.org/src/foo_xspf.xspf', [{ + 'id': 'foo_xspf', + 'title': 'Pandemonium', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 202.416, - 'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/cd1/track%201.mp3', + }], + }, { 'id': 'foo_xspf', - 'title': 'Pandemonium' - }, - { + 'title': 'Final Cartridge (Nichico Twelve Remix)', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 255.857, - 'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', + }], + }, { 'id': 'foo_xspf', - 'title': 'Final Cartridge (Nichico Twelve Remix)' - }, - { + 'title': 'Rebuilding Nightingale', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 287.915, - 'formats': [ - {'url': 'https://example.org/src/track3.mp3'}, - {'url': 'https://example.com/track3.mp3'} - ], - 'id': 'foo_xspf', - 'title': 'Rebuilding Nightingale' + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/track3.mp3', + }, { + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.com/track3.mp3', + }] }] ), ] - for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: + for xspf_file, xspf_url, expected_entries in _TEST_CASES: with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, mode='r', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode('utf-8')), - xspf_file, xspf_base_url) + xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a50778509..2e2a02948 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1706,22 +1706,24 @@ class InfoExtractor(object): }) return subtitles - def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): + def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): xspf = self._download_xml( - playlist_url, playlist_id, 'Downloading xpsf playlist', + xspf_url, playlist_id, 'Downloading xpsf playlist', 'Unable to download xspf manifest', fatal=fatal) if xspf is False: return [] - return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) + return self._parse_xspf( + xspf, playlist_id, xspf_url=xspf_url, + xspf_base_url=base_url(xspf_url)) - def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): + def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', } entries = [] - for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): title = xpath_text( track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) description = xpath_text( @@ -1731,12 +1733,18 @@ class InfoExtractor(object): duration = float_or_none( xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) - formats = [{ - 'url': urljoin(playlist_base_url, location.text), - 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), - 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), - 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), - } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] + formats = [] + for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)): + format_url = urljoin(xspf_base_url, location.text) + if not format_url: + continue + formats.append({ + 'url': format_url, + 'manifest_url': xspf_url, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + }) self._sort_formats(formats) entries.append({ @@ -1750,18 +1758,18 @@ class InfoExtractor(object): return entries def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): - res = self._download_webpage_handle( + res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', fatal=fatal) if res is False: return [] - mpd, urlh = res + mpd_doc, urlh = res mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( - compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, + mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url, formats_dict=formats_dict, mpd_url=mpd_url) def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): @@ -2035,17 +2043,16 @@ class InfoExtractor(object): return formats def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): - res = self._download_webpage_handle( + res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', fatal=fatal) if res is False: return [] - ism, urlh = res + ism_doc, urlh = res - return self._parse_ism_formats( - compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) + return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): """ diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 023ccbc9b..1cc491b19 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2233,7 +2233,9 @@ class GenericIE(InfoExtractor): return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': return self.playlist_result( - self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), + self._parse_xspf( + doc, video_id, xspf_url=url, + xspf_base_url=compat_str(full_response.geturl())), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( From 6e3f23d912ae2b7018a13f87ff89572dfac10d02 Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Sun, 18 Mar 2018 21:14:33 +0000 Subject: [PATCH 05/77] [prosiebensat1] Add support for galileo.tv (closes #15894) --- youtube_dl/extractor/prosiebensat1.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7efff4566..d0955d079 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?: prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| - ran\.de|fem\.com|advopedia\.de + ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ) /(?P.+) ''' @@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, + { + # geo restricted to Germany + 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', + 'only_matching': True, + }, { 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, @@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', - r'clip[iI]d\s*=\s*["\'](\d+)', + r'clip[iI][dD]\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)', From 9a054fcbbadf06101b081f8be0594b38b654364f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Mar 2018 23:28:37 +0700 Subject: [PATCH 06/77] [ceskatelevize] Add support for iframe embeds (closes #15918) --- youtube_dl/extractor/ceskatelevize.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e250de18c..6bad90859 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -13,6 +13,7 @@ from ..utils import ( float_or_none, sanitized_Request, unescapeHTML, + update_url_query, urlencode_postdata, USER_AGENTS, ) @@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, }] def _real_extract(self, url): @@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - data_url = unescapeHTML(self._search_regex( - r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'iframe player url', group='url')) + data_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={ + 'autoStart': 'true', + }) return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) From 38f59e2793dfcb5f493977857304ab50b784e6ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Mar 2018 23:40:19 +0700 Subject: [PATCH 07/77] [canalc2] Add support for HTML5 videos (closes #15916, closes #15919) --- youtube_dl/extractor/canalc2.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index acd87e371..407cc8084 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor): webpage = self._download_webpage( 'http://www.canalc2.tv/video/%s' % video_id, video_id) + title = self._html_search_regex( + r'(?s)class="[^"]*col_description[^"]*">.*?

(.+?)

', + webpage, 'title') + formats = [] for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): if video_url.startswith('rtmp://'): @@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor): 'url': video_url, 'format_id': 'http', }) - self._sort_formats(formats) - title = self._html_search_regex( - r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') - duration = parse_duration(self._search_regex( - r'id=["\']video_duree["\'][^>]*>([^<]+)', - webpage, 'duration', fatal=False)) + if formats: + info = { + 'formats': formats, + } + else: + info = self._parse_html5_media_entries(url, webpage, url)[0] - return { + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'duration': duration, - 'formats': formats, - } + 'duration': parse_duration(self._search_regex( + r'id=["\']video_duree["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)), + }) + return info From 6780154e6bcdabdb35a24d2b1c5049c94fbe27a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Mar 2018 23:43:53 +0700 Subject: [PATCH 08/77] [extractor/common] Improve thumbnail extraction for HTML5 entries --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2e2a02948..890232586 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2150,8 +2150,8 @@ class InfoExtractor(object): return formats def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): - def absolute_url(video_url): - return compat_urlparse.urljoin(base_url, video_url) + def absolute_url(item_url): + return urljoin(base_url, item_url) def parse_content_type(content_type): if not content_type: @@ -2208,7 +2208,7 @@ class InfoExtractor(object): if src: _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) - media_info['thumbnail'] = media_attributes.get('poster') + media_info['thumbnail'] = absolute_url(media_attributes.get('poster')) if media_content: for source_tag in re.findall(r']+>', media_content): source_attributes = extract_attributes(source_tag) From 21dedcb5804b070bea143e4670df3b6f2951a078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 00:27:39 +0700 Subject: [PATCH 09/77] [cbs] Skip unavailable assets (closes #13490, closes #13506, closes #15776) --- youtube_dl/extractor/cbs.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 1268e38ef..f425562ab 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .theplatform import ThePlatformFeedIE from ..utils import ( + ExtractorError, int_or_none, find_xpath_attr, xpath_element, @@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE): asset_types = [] subtitles = {} formats = [] + last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') if not asset_type or asset_type in asset_types: @@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE): query['formats'] = 'MPEG4,M3U' elif asset_type in ('RTMP', 'WIFI', '3G'): query['formats'] = 'MPEG4,FLV' - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + except ExtractorError as e: + last_e = e + continue formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) + if last_e and not formats: + raise last_e self._sort_formats(formats) info = self._extract_theplatform_metadata(tp_path, content_id) From 832f9d5258ac53e916515ad0b6b1490c872d6174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:06:58 +0700 Subject: [PATCH 10/77] [9now] Bypass geo restriction (closes #15920) --- youtube_dl/extractor/ninenow.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 351bea7ba..f32f530f7 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -4,15 +4,17 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, float_or_none, - ExtractorError, + smuggle_url, ) class NineNowIE(InfoExtractor): IE_NAME = '9now.com.au' _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P[^/?#]+)' + _GEO_COUNTRIES = ['AU'] _TESTS = [{ # clip 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', @@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': self._GEO_COUNTRIES}), 'id': video_id, 'title': title, 'description': common_data.get('description'), From d9e2240f7c5d6b1a8ecd133625827f2c806dc9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:40:53 +0700 Subject: [PATCH 11/77] [7plus] Extract series metadata (closes #15862, closes #15906) --- youtube_dl/extractor/sevenplus.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py index 9792f820a..84568ac69 100644 --- a/youtube_dl/extractor/sevenplus.py +++ b/youtube_dl/extractor/sevenplus.py @@ -4,22 +4,30 @@ from __future__ import unicode_literals import re from .brightcove import BrightcoveNewIE -from ..utils import update_url_query +from ..compat import compat_str +from ..utils import ( + try_get, + update_url_query, +) class SevenPlusIE(BrightcoveNewIE): IE_NAME = '7plus' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P[^?]+\?.*?\bepisode-id=(?P[^&#]+))' _TESTS = [{ - 'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', + 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003', 'info_dict': { - 'id': 'BEAT-001', + 'id': 'MTYS7-003', 'ext': 'mp4', - 'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', - 'description': 'md5:37718bea20a8eedaca7f7361af566131', + 'title': 'S7 E3 - Wind Surf', + 'description': 'md5:29c6a69f21accda7601278f81b46483d', 'uploader_id': '5303576322001', - 'upload_date': '20171031', - 'timestamp': 1509440068, + 'upload_date': '20171201', + 'timestamp': 1512106377, + 'series': 'Mighty Ships', + 'season_number': 7, + 'episode_number': 3, + 'episode': 'Wind Surf', }, 'params': { 'format': 'bestvideo', @@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE): value = item.get(src_key) if value: info[dst_key] = value + info['series'] = try_get( + item, lambda x: x['seriesLogo']['name'], compat_str) + mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title']) + if mobj: + info.update({ + 'season_number': int(mobj.group(1)), + 'episode_number': int(mobj.group(2)), + 'episode': mobj.group(3), + }) return info From c651de39d51cddf5ddefb446a89a62a6a424c39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:49:22 +0700 Subject: [PATCH 12/77] [ChangeLog] Actualize [ci skip] --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 47736e076..f3a1ca60d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version + +Core +* [extractor/common] Improve thumbnail extraction for HTML5 entries +* Generalize XML manifest processing code and improve XSPF parsing ++ [extractor/common] Add _download_xml_handle ++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794) + +Extractors ++ [7plus] Extract series metadata (#15862, #15906) +* [9now] Bypass geo restriction (#15920) +* [cbs] Skip unavailable assets (#13490, #13506, #15776) ++ [canalc2] Add support for HTML5 videos (#15916, #15919) ++ [ceskatelevize] Add support for iframe embeds (#15918) ++ [prosiebensat1] Add support for galileo.tv (#15894) ++ [generic] Add support for xfileshare embeds (#15879) +* [bilibili] Switch to v2 playurl API +* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863) +* [heise] Improve extraction (#15496, #15784, #15026) +* [instagram] Fix user videos extraction (#15858) + + version 2018.03.14 Extractors From a66d1d079a3c2f2791b0a67c97cc9cec8c2faffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:55:48 +0700 Subject: [PATCH 13/77] release 2018.03.20 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 481e2ed74..75c5b2226 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.03.14 +[debug] youtube-dl version 2018.03.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f3a1ca60d..0d748316e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.03.20 Core * [extractor/common] Improve thumbnail extraction for HTML5 entries diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6ce11c39b..c686714f0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.03.14' +__version__ = '2018.03.20' From 3395958d2befc710181bbde872074ce81eee9158 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 20 Mar 2018 23:07:11 +0100 Subject: [PATCH 14/77] libsyn: adapt to new page structure and replace testcase --- youtube_dl/extractor/libsyn.py | 52 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index 4750b03a3..f7311f483 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -1,24 +1,28 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + parse_duration, + unified_strdate, +) class LibsynIE(InfoExtractor): _VALID_URL = r'(?Phttps?://html5-player\.libsyn\.com/embed/episode/id/(?P[0-9]+))' _TESTS = [{ - 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', - 'md5': '443360ee1b58007bc3dcf09b41d093bb', + 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/', + 'md5': '2a55e75496c790cdeb058e7e6c087746', 'info_dict': { - 'id': '3377616', + 'id': '6385796', 'ext': 'mp3', - 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", - 'description': 'md5:601cb790edd05908957dae8aaa866465', - 'upload_date': '20150220', + 'title': "Champion Minded - Developing a Growth Mindset", + 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', + 'upload_date': '20180320', 'thumbnail': 're:^https?://.*', }, }, { @@ -39,31 +43,45 @@ class LibsynIE(InfoExtractor): url = m.group('mainurl') webpage = self._download_webpage(url, video_id) - formats = [{ - 'url': media_url, - } for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] - podcast_title = self._search_regex( - r'

([^<]+)

', webpage, 'podcast title', default=None) + r'

([^<]+)

', webpage, 'podcast title', default=None) + if podcast_title: + podcast_title = podcast_title.strip() episode_title = self._search_regex( - r'(?:
|

)([^<]+)|

)([^<]+)(.+?)

', webpage, + r'(.+?)

', webpage, 'description', default=None) - thumbnail = self._search_regex( - r']+class="info-show-icon"[^>]+src="([^"]+)"', - webpage, 'thumbnail', fatal=False) + if description: + # Strip non-breaking and normal spaces + description = description.replace('\u00A0', ' ').strip() release_date = unified_strdate(self._search_regex( r'
Released: ([^<]+)<', webpage, 'release date', fatal=False)) + data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') + data = json.loads(data_json) + + formats = [{ + 'url': data['media_url'], + 'format_id': 'main', + }, { + 'url': data['media_url_libsyn'], + 'format_id': 'libsyn', + }] + thumbnail = data.get('thumbnail_url') + duration = parse_duration(data.get('duration')) + return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': release_date, + 'duration': duration, 'formats': formats, } From 328ddf56a151830ae002842b7088464e4e391b5d Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Wed, 21 Mar 2018 12:13:31 +0530 Subject: [PATCH 15/77] [Youku] Update ccode --- youtube_dl/extractor/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 5b0b248cd..2f5a7b023 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0507', + 'ccode': '0590', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, From cba5d1b6b36d79fcafe0600d9805e6b82ed5388f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 21 Mar 2018 23:43:03 +0700 Subject: [PATCH 16/77] [instagram:user] Add pagination (closes #15934) --- youtube_dl/extractor/instagram.py | 96 +++++++++++++++++++------------ 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index ac9d92a8d..f9cd11b8e 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import itertools import json import re @@ -242,48 +243,69 @@ class InstagramUserIE(InfoExtractor): return int_or_none(try_get( node, lambda x: x['edge_media_' + suffix]['count'])) - edges = self._download_json( - 'https://www.instagram.com/graphql/query/', uploader_id, query={ - 'query_hash': '472f257a40c653c64c666ce877d59d2b', - 'variables': json.dumps({ - 'id': uploader_id, - 'first': 999999999, + cursor = '' + for page_num in itertools.count(1): + media = self._download_json( + 'https://www.instagram.com/graphql/query/', uploader_id, + 'Downloading JSON page %d' % page_num, query={ + 'query_hash': '472f257a40c653c64c666ce877d59d2b', + 'variables': json.dumps({ + 'id': uploader_id, + 'first': 100, + 'after': cursor, + }) + })['data']['user']['edge_owner_to_timeline_media'] + + edges = media.get('edges') + if not edges or not isinstance(edges, list): + break + + for edge in edges: + node = edge.get('node') + if not node or not isinstance(node, dict): + continue + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + video_id = node.get('shortcode') + if not video_id: + continue + + info = self.url_result( + 'https://instagram.com/p/%s/' % video_id, + ie=InstagramIE.ie_key(), video_id=video_id) + + description = try_get( + node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], + compat_str) + thumbnail = node.get('thumbnail_src') or node.get('display_src') + timestamp = int_or_none(node.get('taken_at_timestamp')) + + comment_count = get_count('to_comment') + like_count = get_count('preview_like') + view_count = int_or_none(node.get('video_view_count')) + + info.update({ + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'comment_count': comment_count, + 'like_count': like_count, + 'view_count': view_count, }) - })['data']['user']['edge_owner_to_timeline_media']['edges'] - for edge in edges: - node = edge['node'] + yield info - if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: - continue - video_id = node.get('shortcode') - if not video_id: - continue + page_info = media.get('page_info') + if not page_info or not isinstance(page_info, dict): + break - info = self.url_result( - 'https://instagram.com/p/%s/' % video_id, - ie=InstagramIE.ie_key(), video_id=video_id) + has_next_page = page_info.get('has_next_page') + if not has_next_page: + break - description = try_get( - node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) - thumbnail = node.get('thumbnail_src') or node.get('display_src') - timestamp = int_or_none(node.get('taken_at_timestamp')) - - comment_count = get_count('to_comment') - like_count = get_count('preview_like') - view_count = int_or_none(node.get('video_view_count')) - - info.update({ - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'comment_count': comment_count, - 'like_count': like_count, - 'view_count': view_count, - }) - - yield info + cursor = page_info.get('end_cursor') + if not cursor or not isinstance(cursor, compat_str): + break def _real_extract(self, url): username = self._match_id(url) From 8b7340a45eb0e3aeaa996896ff8690b6c3a32af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Mar 2018 22:55:28 +0700 Subject: [PATCH 17/77] [lenta] Add extractor (closes #15953) --- youtube_dl/extractor/extractors.py | 5 +-- youtube_dl/extractor/generic.py | 18 ---------- youtube_dl/extractor/lenta.py | 53 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 20 deletions(-) create mode 100644 youtube_dl/extractor/lenta.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3bde40eb3..de48a37ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -532,13 +532,14 @@ from .lcp import ( ) from .learnr import LearnrIE from .lecture2go import Lecture2GoIE -from .lego import LEGOIE -from .lemonde import LemondeIE from .leeco import ( LeIE, LePlaylistIE, LetvCloudIE, ) +from .lego import LEGOIE +from .lemonde import LemondeIE +from .lenta import LentaIE from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1cc491b19..cf64398e3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1270,24 +1270,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # EaglePlatform embed (generic URL) - { - 'url': 'http://lenta.ru/news/2015/03/06/navalny/', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - 'params': { - 'skip_download': True, - }, - }, # referrer protected EaglePlatform embed { 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', diff --git a/youtube_dl/extractor/lenta.py b/youtube_dl/extractor/lenta.py new file mode 100644 index 000000000..2ebd4e577 --- /dev/null +++ b/youtube_dl/extractor/lenta.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LentaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/', + 'info_dict': { + 'id': '964400', + 'ext': 'mp4', + 'title': 'Надежду Савченко задержали', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 61, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + # EaglePlatform iframe embed + 'url': 'http://lenta.ru/news/2015/03/06/navalny/', + 'info_dict': { + 'id': '227304', + 'ext': 'mp4', + 'title': 'Навальный вышел на свободу', + 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 87, + 'view_count': int, + 'age_limit': 0, + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id', + default=None) + if video_id: + return self.url_result( + 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id, + ie='EaglePlatform', video_id=video_id) + + return self.url_result(url, ie='Generic') From b9f5a41207bc704cca9e9e357f79d525828a39b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Mar 2018 23:53:18 +0700 Subject: [PATCH 18/77] [crackle] Fix extraction (closes #15969) --- youtube_dl/extractor/crackle.py | 207 ++++++++++++++++---------------- 1 file changed, 102 insertions(+), 105 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 13f425b2b..57d84241a 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,31 +1,41 @@ # coding: utf-8 from __future__ import unicode_literals, division +import re + from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + parse_age_limit, + parse_duration, +) class CrackleIE(InfoExtractor): _GEO_COUNTRIES = ['US'] _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P\d+)' _TEST = { - 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', + 'url': 'https://www.crackle.com/andromeda/2502343', 'info_dict': { - 'id': '2498934', + 'id': '2502343', 'ext': 'mp4', - 'title': 'Everybody Respects A Bloody Nose', - 'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 906, - 'series': 'Comedians In Cars Getting Coffee', - 'season_number': 8, - 'episode_number': 4, - 'subtitles': { - 'en-US': [ - {'ext': 'vtt'}, - {'ext': 'tt'}, - ] - }, + 'title': 'Under The Night', + 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a', + 'duration': 2583, + 'view_count': int, + 'average_rating': 0, + 'age_limit': 14, + 'genre': 'Action, Sci-Fi', + 'creator': 'Allan Kroeker', + 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe', + 'release_year': 2000, + 'series': 'Andromeda', + 'episode': 'Under The Night', + 'season_number': 1, + 'episode_number': 1, }, 'params': { # m3u8 download @@ -33,108 +43,95 @@ class CrackleIE(InfoExtractor): } } - _THUMBNAIL_RES = [ - (120, 90), - (208, 156), - (220, 124), - (220, 220), - (240, 180), - (250, 141), - (315, 236), - (320, 180), - (360, 203), - (400, 300), - (421, 316), - (460, 330), - (460, 460), - (462, 260), - (480, 270), - (587, 330), - (640, 480), - (700, 330), - (700, 394), - (854, 480), - (1024, 1024), - (1920, 1080), - ] - - # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx - _MEDIA_FILE_SLOTS = { - 'c544.flv': { - 'width': 544, - 'height': 306, - }, - '360p.mp4': { - 'width': 640, - 'height': 360, - }, - '480p.mp4': { - 'width': 852, - 'height': 478, - }, - '480p_1mbps.mp4': { - 'width': 852, - 'height': 478, - }, - } - def _real_extract(self, url): video_id = self._match_id(url) - config_doc = self._download_xml( - 'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', - video_id, 'Downloading config') + media = self._download_json( + 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' + % (video_id, self._GEO_COUNTRIES[0]), video_id, query={ + 'disableProtocols': 'true', + 'format': 'json' + }) - item = self._download_xml( - 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, - video_id, headers=self.geo_verification_headers()).find('i') - title = item.attrib['t'] + title = media['Title'] + + formats = [] + for e in media['MediaURLs']: + if e.get('UseDRM') is True: + continue + format_url = e.get('Path') + if not format_url or not isinstance(format_url, compat_str): + continue + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + description = media.get('Description') + duration = int_or_none(media.get( + 'DurationInSeconds')) or parse_duration(media.get('Duration')) + view_count = int_or_none(media.get('CountViews')) + average_rating = float_or_none(media.get('UserRating')) + age_limit = parse_age_limit(media.get('Rating')) + genre = media.get('Genre') + release_year = int_or_none(media.get('ReleaseYear')) + creator = media.get('Directors') + artist = media.get('Cast') + + if media.get('MediaTypeDisplayValue') == 'Full Episode': + series = media.get('ShowName') + episode = title + season_number = int_or_none(media.get('Season')) + episode_number = int_or_none(media.get('Episode')) + else: + series = episode = season_number = episode_number = None subtitles = {} - formats = self._extract_m3u8_formats( - 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), - video_id, 'mp4', m3u8_id='hls', fatal=None) + cc_files = media.get('ClosedCaptionFiles') + if isinstance(cc_files, list): + for cc_file in cc_files: + if not isinstance(cc_file, dict): + continue + cc_url = cc_file.get('Path') + if not cc_url or not isinstance(cc_url, compat_str): + continue + lang = cc_file.get('Locale') or 'en' + subtitles.setdefault(lang, []).append({'url': cc_url}) + thumbnails = [] - path = item.attrib.get('p') - if path: - for width, height in self._THUMBNAIL_RES: - res = '%dx%d' % (width, height) + images = media.get('Images') + if isinstance(images, list): + for image_key, image_url in images.items(): + mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) + if not mobj: + continue thumbnails.append({ - 'id': res, - 'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), - 'width': width, - 'height': height, - 'resolution': res, + 'url': image_url, + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), }) - http_base_url = 'http://ahttp.crackle.com/' + path - for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): - formats.append({ - 'url': http_base_url + mfs_path, - 'format_id': 'http-' + mfs_path.split('.')[0], - 'width': mfs_info['width'], - 'height': mfs_info['height'], - }) - for cc in item.findall('cc'): - locale = cc.attrib.get('l') - v = cc.attrib.get('v') - if locale and v: - if locale not in subtitles: - subtitles[locale] = [] - for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')): - subtitles.setdefault(locale, []).append({ - 'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext), - 'ext': ext, - }) - self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) return { 'id': video_id, 'title': title, - 'description': item.attrib.get('d'), - 'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, - 'series': item.attrib.get('sn'), - 'season_number': int_or_none(item.attrib.get('se')), - 'episode_number': int_or_none(item.attrib.get('ep')), + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'age_limit': age_limit, + 'genre': genre, + 'creator': creator, + 'artist': artist, + 'release_year': release_year, + 'series': series, + 'episode': episode, + 'season_number': season_number, + 'episode_number': episode_number, 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, From 7d34016fb0c7ef489f382bd106dcfedf401b617f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Mar 2018 01:49:50 +0700 Subject: [PATCH 19/77] [crackle] Bypass geo restriction --- youtube_dl/extractor/crackle.py | 192 ++++++++++++++++++-------------- 1 file changed, 109 insertions(+), 83 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 57d84241a..fc014f8b5 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -4,20 +4,24 @@ from __future__ import unicode_literals, division import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_HTTPError, +) from ..utils import ( determine_ext, float_or_none, int_or_none, parse_age_limit, parse_duration, + ExtractorError ) class CrackleIE(InfoExtractor): - _GEO_COUNTRIES = ['US'] _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P\d+)' _TEST = { + # geo restricted to CA 'url': 'https://www.crackle.com/andromeda/2502343', 'info_dict': { 'id': '2502343', @@ -46,93 +50,115 @@ class CrackleIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - media = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' - % (video_id, self._GEO_COUNTRIES[0]), video_id, query={ - 'disableProtocols': 'true', - 'format': 'json' - }) + country_code = self._downloader.params.get('geo_bypass_country', None) + countries = [country_code] if country_code else ( + 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI') - title = media['Title'] + last_e = None - formats = [] - for e in media['MediaURLs']: - if e.get('UseDRM') is True: + for country in countries: + try: + media = self._download_json( + 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' + % (video_id, country), video_id, + 'Downloading media JSON as %s' % country, + 'Unable to download media JSON', query={ + 'disableProtocols': 'true', + 'format': 'json' + }) + except ExtractorError as e: + # 401 means geo restriction, trying next country + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + last_e = e + continue + raise + + media_urls = media.get('MediaURLs') + if not media_urls or not isinstance(media_urls, list): continue - format_url = e.get('Path') - if not format_url or not isinstance(format_url, compat_str): - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) - description = media.get('Description') - duration = int_or_none(media.get( - 'DurationInSeconds')) or parse_duration(media.get('Duration')) - view_count = int_or_none(media.get('CountViews')) - average_rating = float_or_none(media.get('UserRating')) - age_limit = parse_age_limit(media.get('Rating')) - genre = media.get('Genre') - release_year = int_or_none(media.get('ReleaseYear')) - creator = media.get('Directors') - artist = media.get('Cast') + title = media['Title'] - if media.get('MediaTypeDisplayValue') == 'Full Episode': - series = media.get('ShowName') - episode = title - season_number = int_or_none(media.get('Season')) - episode_number = int_or_none(media.get('Episode')) - else: - series = episode = season_number = episode_number = None - - subtitles = {} - cc_files = media.get('ClosedCaptionFiles') - if isinstance(cc_files, list): - for cc_file in cc_files: - if not isinstance(cc_file, dict): + formats = [] + for e in media['MediaURLs']: + if e.get('UseDRM') is True: continue - cc_url = cc_file.get('Path') - if not cc_url or not isinstance(cc_url, compat_str): + format_url = e.get('Path') + if not format_url or not isinstance(format_url, compat_str): continue - lang = cc_file.get('Locale') or 'en' - subtitles.setdefault(lang, []).append({'url': cc_url}) + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) - thumbnails = [] - images = media.get('Images') - if isinstance(images, list): - for image_key, image_url in images.items(): - mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) - if not mobj: - continue - thumbnails.append({ - 'url': image_url, - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - }) + description = media.get('Description') + duration = int_or_none(media.get( + 'DurationInSeconds')) or parse_duration(media.get('Duration')) + view_count = int_or_none(media.get('CountViews')) + average_rating = float_or_none(media.get('UserRating')) + age_limit = parse_age_limit(media.get('Rating')) + genre = media.get('Genre') + release_year = int_or_none(media.get('ReleaseYear')) + creator = media.get('Directors') + artist = media.get('Cast') - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'average_rating': average_rating, - 'age_limit': age_limit, - 'genre': genre, - 'creator': creator, - 'artist': artist, - 'release_year': release_year, - 'series': series, - 'episode': episode, - 'season_number': season_number, - 'episode_number': episode_number, - 'thumbnails': thumbnails, - 'subtitles': subtitles, - 'formats': formats, - } + if media.get('MediaTypeDisplayValue') == 'Full Episode': + series = media.get('ShowName') + episode = title + season_number = int_or_none(media.get('Season')) + episode_number = int_or_none(media.get('Episode')) + else: + series = episode = season_number = episode_number = None + + subtitles = {} + cc_files = media.get('ClosedCaptionFiles') + if isinstance(cc_files, list): + for cc_file in cc_files: + if not isinstance(cc_file, dict): + continue + cc_url = cc_file.get('Path') + if not cc_url or not isinstance(cc_url, compat_str): + continue + lang = cc_file.get('Locale') or 'en' + subtitles.setdefault(lang, []).append({'url': cc_url}) + + thumbnails = [] + images = media.get('Images') + if isinstance(images, list): + for image_key, image_url in images.items(): + mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) + if not mobj: + continue + thumbnails.append({ + 'url': image_url, + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'age_limit': age_limit, + 'genre': genre, + 'creator': creator, + 'artist': artist, + 'release_year': release_year, + 'series': series, + 'episode': episode, + 'season_number': season_number, + 'episode_number': episode_number, + 'thumbnails': thumbnails, + 'subtitles': subtitles, + 'formats': formats, + } + + raise last_e From b015cb1af3453c6f27c1b8ebd1916c1fc4f94923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Mar 2018 14:11:27 +0700 Subject: [PATCH 20/77] [24video] Add support for 24video.sexy (closes #15973) --- youtube_dl/extractor/twentyfourvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 96e0b96e3..4b3b3e705 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -14,7 +14,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' + _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', From 86e1958944952afbe208101802c90f9a096adea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Mar 2018 14:21:08 +0700 Subject: [PATCH 21/77] [afreecatv] Update referrer (closes #15947) --- youtube_dl/extractor/afreecatv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index df2a3fc4a..0f4535804 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -187,11 +187,11 @@ class AfreecaTVIE(InfoExtractor): r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs') video_id = self._search_regex( r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) - + print(video_id, station_id, bbs_id) video_xml = self._download_xml( 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', video_id, headers={ - 'Referer': 'http://vod.afreecatv.com/embed.php', + 'Referer': url, }, query={ 'nTitleNo': video_id, 'nStationNo': station_id, From 16132cff7231d591bc4e6e3a12c02f9110d54e11 Mon Sep 17 00:00:00 2001 From: Joseph Spiros Date: Sat, 24 Mar 2018 03:57:34 -0400 Subject: [PATCH 22/77] [vrv] Fix extraction on python2 (closes #15928) --- youtube_dl/extractor/vrv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 9959627c0..64b13f0ed 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -12,7 +12,7 @@ import time from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlencode, - compat_urlparse, + compat_urllib_parse, ) from ..utils import ( float_or_none, @@ -39,11 +39,11 @@ class VRVBaseIE(InfoExtractor): data = json.dumps(data).encode() headers['Content-Type'] = 'application/json' method = 'POST' if data else 'GET' - base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')]) + base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')]) oauth_signature = base64.b64encode(hmac.new( (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), base_string.encode(), hashlib.sha1).digest()).decode() - encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '') + encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') return self._download_json( '?'.join([base_url, encoded_query]), video_id, note='Downloading %s JSON metadata' % note, headers=headers, data=data) From 0ff2c1ecb67b61e1410e1d0fe1966a7680e18947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Mar 2018 15:59:48 +0700 Subject: [PATCH 23/77] [downloader/fragment] Fix download finalization when writing file to stdout (closes #15799) --- youtube_dl/downloader/fragment.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index ea5e3a4b5..927c7e491 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -241,12 +241,16 @@ class FragmentFD(FileDownloader): if os.path.isfile(ytdl_filename): os.remove(ytdl_filename) elapsed = time.time() - ctx['started'] - self.try_rename(ctx['tmpfilename'], ctx['filename']) - fsize = os.path.getsize(encodeFilename(ctx['filename'])) + + if ctx['tmpfilename'] == '-': + downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + else: + self.try_rename(ctx['tmpfilename'], ctx['filename']) + downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) self._hook_progress({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, + 'downloaded_bytes': downloaded_bytes, + 'total_bytes': downloaded_bytes, 'filename': ctx['filename'], 'status': 'finished', 'elapsed': elapsed, From 80aa24609415af36ac30caa392e85f8c20349535 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sat, 8 Oct 2016 09:27:24 -0400 Subject: [PATCH 24/77] [downloader/external] Fix download finalization when writing file to stdout (closes #10809) An OSError or IOError generally indicates something a little more wrong than a "simple" UnavailableVideoError, so print the actual traceback that leads to the exception. Otherwise meaningful postmortem debugging a bug report is essentially infeasible. --- youtube_dl/downloader/common.py | 10 ++++++---- youtube_dl/downloader/external.py | 24 +++++++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index cc16bbb83..7062eee8b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -249,12 +249,14 @@ class FileDownloader(object): if self.params.get('noprogress', False): self.to_screen('[download] Download completed') else: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '100%% of %(_total_bytes_str)s' + else: + msg_template = 'Completed' if s.get('elapsed') is not None: s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' - else: - msg_template = '100%% of %(_total_bytes_str)s' + msg_template += ' in %(_elapsed_str)s' self._report_progress_status( msg_template % s, is_last_line=True) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index db018fa89..48c255ddc 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -41,15 +41,21 @@ class ExternalFD(FileDownloader): self.to_screen('[%s] Interrupted by user' % self.get_basename()) if retval == 0: - fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) - self.try_rename(tmpfilename, filename) - self._hook_progress({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, - 'filename': filename, - 'status': 'finished', - }) + if filename == '-': + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + }) + else: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) return True else: self.to_stderr('\n') From 2ea212628e0ffc0d66858817841643c4579c5d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Mar 2018 16:27:36 +0700 Subject: [PATCH 25/77] [downloader/common] Improve progress reporting when no total bytes available --- youtube_dl/downloader/common.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 7062eee8b..edd125ee2 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -249,11 +249,10 @@ class FileDownloader(object): if self.params.get('noprogress', False): self.to_screen('[download] Download completed') else: + msg_template = '100%%' if s.get('total_bytes') is not None: s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template = '100%% of %(_total_bytes_str)s' - else: - msg_template = 'Completed' + msg_template += ' of %(_total_bytes_str)s' if s.get('elapsed') is not None: s['_elapsed_str'] = self.format_seconds(s['elapsed']) msg_template += ' in %(_elapsed_str)s' From f0298f653e2199d6e4488882e40eea8e31140d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Mar 2018 16:29:03 +0700 Subject: [PATCH 26/77] [downloader/external] Simplify finished progress hook reporting and add elapsed time (closes #10876) --- youtube_dl/downloader/external.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 48c255ddc..958d00aac 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals import os.path +import re import subprocess import sys -import re +import time from .common import FileDownloader from ..compat import ( @@ -30,6 +31,7 @@ class ExternalFD(FileDownloader): tmpfilename = self.temp_name(filename) try: + started = time.time() retval = self._call_downloader(tmpfilename, info_dict) except KeyboardInterrupt: if not info_dict.get('is_live'): @@ -41,21 +43,20 @@ class ExternalFD(FileDownloader): self.to_screen('[%s] Interrupted by user' % self.get_basename()) if retval == 0: - if filename == '-': - self._hook_progress({ - 'filename': filename, - 'status': 'finished', - }) - else: + status = { + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + } + if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) self.try_rename(tmpfilename, filename) - self._hook_progress({ + status.update({ 'downloaded_bytes': fsize, 'total_bytes': fsize, - 'filename': filename, - 'status': 'finished', }) + self._hook_progress(status) return True else: self.to_stderr('\n') From 29d9594561fd92b07d1c2cff04ae5a4c144946b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Mar 2018 22:11:01 +0700 Subject: [PATCH 27/77] [ChangeLog] Actualize [ci skip] --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index 0d748316e..d4f442421 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version + +Core ++ [downloader/external] Add elapsed time to progress hook (#10876) +* [downloader/external,fragment] Fix download finalization when writing file + to stdout (#10809, #10876, #15799) + +Extractors +* [vrv] Fix extraction on python2 (#15928) +* [afreecatv] Update referrer (#15947) ++ [24video] Add support for 24video.sexy (#15973) +* [crackle] Bypass geo restriction +* [crackle] Fix extraction (#15969) ++ [lenta] Add support for lenta.ru (#15953) ++ [instagram:user] Add pagination (#15934) +* [youku] Update ccode (#15939) +* [libsyn] Adapt to new page structure + + version 2018.03.20 Core From 671e241bfbf5d1954ff07c98e0ba2c3d7c2405c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Mar 2018 05:03:47 +0700 Subject: [PATCH 28/77] release 2018.03.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 75c5b2226..86912f5e7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.03.20 +[debug] youtube-dl version 2018.03.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d4f442421..0d43b580f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.03.26 Core + [downloader/external] Add elapsed time to progress hook (#10876) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 80358bb14..0d7d7fbb3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -419,6 +419,7 @@ - **Lecture2Go** - **LEGO** - **Lemonde** + - **Lenta** - **LePlaylist** - **LetvCloud**: 乐视云 - **Libsyn** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c686714f0..d6d87ad74 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.03.20' +__version__ = '2018.03.26' From c3cfc71a0c822c86a01ad9c150415724d0b2b045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Mar 2018 22:30:11 +0700 Subject: [PATCH 29/77] [ChangeLog] Actualize [ci skip] --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0d43b580f..d4f442421 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version 2018.03.26 +version Core + [downloader/external] Add elapsed time to progress hook (#10876) From bbd9d8c17075055ddfd9873092a29a3e21566805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Mar 2018 22:32:03 +0700 Subject: [PATCH 30/77] release 2018.03.26.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 86912f5e7..0cd090e40 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.26.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.26.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.03.26 +[debug] youtube-dl version 2018.03.26.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d4f442421..f9d04ffd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.03.26.1 Core + [downloader/external] Add elapsed time to progress hook (#10876) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d6d87ad74..d38fde039 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.03.26' +__version__ = '2018.03.26.1' From 99c3091850118d08c14c78f5cc6ab5ce73f4196a Mon Sep 17 00:00:00 2001 From: Attila-Mihaly Balazs Date: Tue, 27 Mar 2018 18:02:04 +0300 Subject: [PATCH 31/77] [videa] Extend _VALID_URL --- youtube_dl/extractor/videa.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 311df58f4..d0e34c819 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -16,7 +16,7 @@ from ..utils import ( class VideaIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - videa\.hu/ + videa(?:kid)?\.hu/ (?: videok/(?:[^/]+/)*[^?#&]+-| player\?.*?\bv=| @@ -31,7 +31,7 @@ class VideaIE(InfoExtractor): 'id': '8YfIAjxwWGwT8HVQ', 'ext': 'mp4', 'title': 'Az őrült kígyász 285 kígyót enged szabadon', - 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', + 'thumbnail': r're:^https?://.*', 'duration': 21, }, }, { @@ -43,6 +43,15 @@ class VideaIE(InfoExtractor): }, { 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'only_matching': True, + }, { + 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', + 'only_matching': True, + }, { + 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ', + 'only_matching': True, + }, { + 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', + 'only_matching': True, }] @staticmethod From 9e6a4180158026e78f65563d0586923fef8ccece Mon Sep 17 00:00:00 2001 From: xofe <22776566+xofe@users.noreply.github.com> Date: Tue, 27 Mar 2018 15:08:40 +0000 Subject: [PATCH 32/77] [abc:iview] Unescape title and series meta fields --- youtube_dl/extractor/abc.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 87017ed39..512f04684 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, parse_iso8601, try_get, + unescapeHTML, update_url_query, ) @@ -109,16 +110,17 @@ class ABCIViewIE(InfoExtractor): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00', + 'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00', 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'info_dict': { - 'id': 'ZW0898A003S00', + 'id': 'ZY9247A021S00', 'ext': 'mp4', - 'title': 'Series 5 Ep 3', - 'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79', - 'upload_date': '20171228', - 'uploader_id': 'abc1', - 'timestamp': 1514499187, + 'title': "Gaston's Visit", + 'series': "Ben And Holly's Little Kingdom", + 'description': 'md5:18db170ad71cf161e006a4c688e33155', + 'upload_date': '20180318', + 'uploader_id': 'abc4kids', + 'timestamp': 1521400959, }, 'params': { 'skip_download': True, @@ -169,12 +171,12 @@ class ABCIViewIE(InfoExtractor): return { 'id': video_id, - 'title': title, + 'title': unescapeHTML(title), 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), 'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage), 'duration': int_or_none(video_params.get('eventDuration')), 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), - 'series': video_params.get('seriesTitle'), + 'series': unescapeHTML(video_params.get('seriesTitle')), 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)), 'episode': self._html_search_meta('episode_title', webpage, default=None), From 5d60b9971784289acd4325a8ed7b5afd7bea05ca Mon Sep 17 00:00:00 2001 From: "Arend v. Reinersdorff" Date: Tue, 27 Mar 2018 17:25:29 +0200 Subject: [PATCH 33/77] [options] Mention comments support in --batch-file --- youtube_dl/options.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 7d1bbc021..3e4ac03a2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -676,7 +676,8 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-a', '--batch-file', dest='batchfile', metavar='FILE', - help='File containing URLs to download (\'-\' for stdin)') + help="File containing URLs to download ('-' for stdin), one URL per line. " + "Lines starting with '#', ';' or ']' are considered as comments and ignored.") filesystem.add_option( '--id', default=False, action='store_true', dest='useid', help='Use only video ID in file name') From 02f6ccbce3a50d8db3eac06a5820347cf674ca86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Mar 2018 23:06:13 +0700 Subject: [PATCH 34/77] [dramafever] Partially switch to API v5 (closes #16026) --- youtube_dl/extractor/dramafever.py | 156 +++++++++++++++++++---------- 1 file changed, 102 insertions(+), 54 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 6b60e542b..c7a048f9d 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -3,25 +3,26 @@ from __future__ import unicode_literals import itertools -from .amp import AMPIE +from .common import InfoExtractor from ..compat import ( - compat_HTTPError, + compat_str, compat_urlparse, ) from ..utils import ( - ExtractorError, clean_html, + ExtractorError, int_or_none, - remove_end, + parse_age_limit, + parse_duration, sanitized_Request, + unified_timestamp, urlencode_postdata ) -class DramaFeverBaseIE(AMPIE): +class DramaFeverBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' - _GEO_COUNTRIES = ['US', 'CA'] _CONSUMER_SECRET = 'DA59dtVXYLxajktV' @@ -70,18 +71,20 @@ class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' _TESTS = [{ - 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', + 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/', 'info_dict': { - 'id': '4512.1', - 'ext': 'flv', - 'title': 'Cooking with Shin', - 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', + 'id': '4274.1', + 'ext': 'wvm', + 'title': 'Heirs - Episode 1', + 'description': 'md5:362a24ba18209f6276e032a651c50bc2', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 3783, + 'timestamp': 1381354993, + 'upload_date': '20131009', + 'series': 'Heirs', + 'season_number': 1, 'episode': 'Episode 1', 'episode_number': 1, - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1404336058, - 'upload_date': '20140702', - 'duration': 344, }, 'params': { # m3u8 download @@ -110,50 +113,95 @@ class DramaFeverIE(DramaFeverBaseIE): 'only_matching': True, }] + def _call_api(self, path, video_id, note, fatal=False): + return self._download_json( + 'https://www.dramafever.com/api/5/' + path, + video_id, note=note, headers={ + 'x-consumer-key': self._consumer_secret, + }, fatal=fatal) + + def _get_subtitles(self, video_id): + subtitles = {} + subs = self._call_api( + 'video/%s/subtitles/webvtt/' % video_id, video_id, + 'Downloading subtitles JSON', fatal=False) + if not subs or not isinstance(subs, list): + return subtitles + for sub in subs: + if not isinstance(sub, dict): + continue + sub_url = sub.get('url') + if not sub_url or not isinstance(sub_url, compat_str): + continue + subtitles.setdefault( + sub.get('code') or sub.get('language') or 'en', []).append({ + 'url': sub_url + }) + return subtitles + def _real_extract(self, url): video_id = self._match_id(url).replace('/', '.') - try: - info = self._extract_feed_info( - 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - self.raise_geo_restricted( - msg='Currently unavailable in your country', - countries=self._GEO_COUNTRIES) - raise - - # title is postfixed with video id for some reason, removing - if info.get('title'): - info['title'] = remove_end(info['title'], video_id).strip() - series_id, episode_number = video_id.split('.') - episode_info = self._download_json( - # We only need a single episode info, so restricting page size to one episode - # and dealing with page number as with episode number - r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1' - % (self._consumer_secret, series_id, episode_number), - video_id, 'Downloading episode info JSON', fatal=False) - if episode_info: - value = episode_info.get('value') - if isinstance(value, list): - for v in value: - if v.get('type') == 'Episode': - subfile = v.get('subfile') or v.get('new_subfile') - if subfile and subfile != 'http://www.dramafever.com/st/': - info.setdefault('subtitles', {}).setdefault('English', []).append({ - 'ext': 'srt', - 'url': subfile, - }) - episode_number = int_or_none(v.get('number')) - episode_fallback = 'Episode' - if episode_number: - episode_fallback += ' %d' % episode_number - info['episode'] = v.get('title') or episode_fallback - info['episode_number'] = episode_number - break - return info + video = self._call_api( + 'series/%s/episodes/%s/' % (series_id, episode_number), video_id, + 'Downloading video JSON') + + formats = [] + download_assets = video.get('download_assets') + if download_assets and isinstance(download_assets, dict): + for format_id, format_dict in download_assets.items(): + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url or not isinstance(format_url, compat_str): + continue + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'filesize': int_or_none(video.get('filesize')), + }) + + stream = self._call_api( + 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON', + fatal=False) + if stream: + stream_url = stream.get('stream_url') + if stream_url: + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + title = video.get('title') or 'Episode %s' % episode_number + description = video.get('description') + thumbnail = video.get('thumbnail') + timestamp = unified_timestamp(video.get('release_date')) + duration = parse_duration(video.get('duration')) + age_limit = parse_age_limit(video.get('tv_rating')) + series = video.get('series_title') + season_number = int_or_none(video.get('season')) + + if series: + title = '%s - %s' % (series, title) + + subtitles = self.extract_subtitles(video_id) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'age_limit': age_limit, + 'series': series, + 'season_number': season_number, + 'episode_number': int_or_none(episode_number), + 'formats': formats, + 'subtitles': subtitles, + } class DramaFeverSeriesIE(DramaFeverBaseIE): From 190f6c936be0ec03ed999cbf34e73f38c9beb022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Mar 2018 23:49:09 +0700 Subject: [PATCH 35/77] [naver] Fix extraction (closes #16029) --- youtube_dl/extractor/naver.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 2047d4402..bb3d94413 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -43,9 +41,14 @@ class NaverIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', - webpage) - if m_id is None: + vid = self._search_regex( + r'videoId["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'video id', fatal=None, group='value') + in_key = self._search_regex( + r'inKey["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'key', default=None, group='value') + + if not vid or not in_key: error = self._html_search_regex( r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', webpage, 'error', default=None) @@ -53,9 +56,9 @@ class NaverIE(InfoExtractor): raise ExtractorError(error, expected=True) raise ExtractorError('couldn\'t extract vid and key') video_data = self._download_json( - 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), + 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid, video_id, query={ - 'key': m_id.group(2), + 'key': in_key, }) meta = video_data['meta'] title = meta['subject'] From 3e78d23b5783d01f60bcb515febd5a590a734ee4 Mon Sep 17 00:00:00 2001 From: Luca Steeb Date: Fri, 30 Mar 2018 18:25:43 +0200 Subject: [PATCH 36/77] [openload] Add support for oload.site --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index eaaaf8a08..af7db6e12 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', From 0b4bbcdcb6f62e080e70c026eb28a5e92f46dfc8 Mon Sep 17 00:00:00 2001 From: kenavera Date: Sat, 31 Mar 2018 17:14:49 +0200 Subject: [PATCH 37/77] [medialaan] Fix vod id --- youtube_dl/extractor/medialaan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py index f8c30052f..50d5db802 100644 --- a/youtube_dl/extractor/medialaan.py +++ b/youtube_dl/extractor/medialaan.py @@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE): vod_id = config.get('vodId') or self._search_regex( (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"', + r'"vodId"\s*:\s*"(.+?)"', r'<[^>]+id=["\']vod-(\d+)'), webpage, 'video_id', default=None) From 0669f8fd8f19fbe0783974654fc2a6925d6162b0 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Sat, 31 Mar 2018 11:46:08 -0500 Subject: [PATCH 38/77] [xvideos] Fix thumbnail extraction (closes #15978) --- youtube_dl/extractor/xvideos.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 085c8d4f3..efee95651 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor): group='title') or self._og_search_title(webpage) thumbnail = self._search_regex( - r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) + (r'setThumbUrl\(\s*(["\'])(?P(?:(?!\1).)+)\1', + r'url_bigthumb=(?P.+?)&'), + webpage, 'thumbnail', fatal=False, group='thumbnail') duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( From 95a1322bc10687efac0b00fb3fd55708e556baf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Apr 2018 02:06:14 +0700 Subject: [PATCH 39/77] [bilibili] Remove debug from player params regexes --- youtube_dl/extractor/bilibili.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 90697c4a7..3e3348ef5 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -117,9 +117,9 @@ class BiliBiliIE(InfoExtractor): r'cid(?:["\']:|=)(\d+)', webpage, 'cid', default=None ) or compat_parse_qs(self._search_regex( - [r'1EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'1EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', - r'1]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', + r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], webpage, 'player parameters'))['cid'][0] else: if 'no_bangumi_tip' not in smuggled_data: From 03fcde10ced29291268f39cb8ccf7ee5dd40f676 Mon Sep 17 00:00:00 2001 From: kenavera Date: Sun, 1 Apr 2018 16:22:51 +0200 Subject: [PATCH 40/77] [nationalgeographic] Add support for new URL schema (closes #16001) --- youtube_dl/extractor/nationalgeographic.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 246f6795a..4d2ee6408 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor): class NationalGeographicIE(ThePlatformIE, AdobePassIE): IE_NAME = 'natgeo' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P[^/?]+)' + _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P[^/?]+)' _TESTS = [ { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', + 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/', 'md5': '518c9aa655686cf81493af5cc21e2a04', 'info_dict': { 'id': 'vKInpacll2pC', @@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): 'add_ie': ['ThePlatform'], }, { - 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', + 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/', 'md5': 'c4912f656b4cbe58f3e000c489360989', 'info_dict': { 'id': 'Pok5lWCkiEFA', @@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): { 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/', 'only_matching': True, + }, + { + 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', + 'only_matching': True, + }, + { + 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', + 'only_matching': True, } ] From e51762be19289da50977fd6f2d0ee2a1722765a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Apr 2018 22:47:39 +0700 Subject: [PATCH 41/77] [afreecatv] Add support for authentication (#14450) --- youtube_dl/extractor/afreecatv.py | 47 +++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 0f4535804..bb3728bb0 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + urlencode_postdata, xpath_text, ) @@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor): ) (?P\d+) ''' + _NETRC_MACHINE = 'afreecatv' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', @@ -172,6 +174,51 @@ class AfreecaTVIE(InfoExtractor): video_key['part'] = int(m.group('part')) return video_key + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_form = { + 'szWork': 'login', + 'szType': 'json', + 'szUid': username, + 'szPassword': password, + 'isSaveId': 'false', + 'szScriptVar': 'oLoginRet', + 'szAction': '', + } + + response = self._download_json( + 'https://login.afreecatv.com/app/LoginAction.php', None, + 'Logging in', data=urlencode_postdata(login_form)) + + _ERRORS = { + -4: 'Your account has been suspended due to a violation of our terms and policies.', + -5: 'https://member.afreecatv.com/app/user_delete_progress.php', + -6: 'https://login.afreecatv.com/membership/changeMember.php', + -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", + -9: 'https://member.afreecatv.com/app/pop_login_block.php', + -11: 'https://login.afreecatv.com/afreeca/second_login.php', + -12: 'https://member.afreecatv.com/app/user_security.php', + 0: 'The username does not exist or you have entered the wrong password.', + -1: 'The username does not exist or you have entered the wrong password.', + -3: 'You have entered your username/password incorrectly.', + -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', + -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', + -32008: 'You have failed to log in. Please contact our Help Center.', + } + + result = int_or_none(response.get('RESULT')) + if result != 1: + error = _ERRORS.get(result, 'You have failed to log in.') + raise ExtractorError( + 'Unable to login: %s said: %s' % (self.IE_NAME, error), + expected=True) + def _real_extract(self, url): video_id = self._match_id(url) From d563fb32ba5ef4b1a8061fca27edf3b1ad7eb8fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 Apr 2018 23:07:54 +0700 Subject: [PATCH 42/77] [afreecatv] Remove debug output --- youtube_dl/extractor/afreecatv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index bb3728bb0..095e6204f 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -234,7 +234,7 @@ class AfreecaTVIE(InfoExtractor): r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs') video_id = self._search_regex( r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) - print(video_id, station_id, bbs_id) + video_xml = self._download_xml( 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', video_id, headers={ From 86693c4930b98e8df33736d87361400422b1adab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Apr 2018 00:00:45 +0700 Subject: [PATCH 43/77] [afreecatv] Use partial view only when necessary (closes #14450) --- youtube_dl/extractor/afreecatv.py | 56 +++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 095e6204f..4b3d97136 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -141,22 +141,22 @@ class AfreecaTVIE(InfoExtractor): 'skip_download': True, }, }, { - # adult video - 'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731', + # PARTIAL_ADULT + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439', 'info_dict': { - 'id': '20171001_F1AE1711_196617479_1', + 'id': '20180327_27901457_202289533_1', 'ext': 'mp4', - 'title': '[생]서아 초심 찾기 방송 (part 1)', + 'title': '[생]빨개요♥ (part 1)', 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'BJ서아', + 'uploader': '[SA]서아', 'uploader_id': 'bjdyrksu', - 'upload_date': '20171001', - 'duration': 3600, - 'age_limit': 18, + 'upload_date': '20180327', + 'duration': 3601, }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['adult content'], }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, @@ -235,21 +235,41 @@ class AfreecaTVIE(InfoExtractor): video_id = self._search_regex( r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) - video_xml = self._download_xml( - 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', - video_id, headers={ - 'Referer': url, - }, query={ + partial_view = False + for _ in range(2): + query = { 'nTitleNo': video_id, 'nStationNo': station_id, 'nBbsNo': bbs_id, - 'partialView': 'SKIP_ADULT', - }) + } + if partial_view: + query['partialView'] = 'SKIP_ADULT' + video_xml = self._download_xml( + 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', + video_id, 'Downloading video info XML%s' + % (' (skipping adult)' if partial_view else ''), + video_id, headers={ + 'Referer': url, + }, query=query) - flag = xpath_text(video_xml, './track/flag', 'flag', default=None) - if flag and flag != 'SUCCEED': + flag = xpath_text(video_xml, './track/flag', 'flag', default=None) + if flag and flag == 'SUCCEED': + break + if flag == 'PARTIAL_ADULT': + self._downloader.report_warning( + 'In accordance with local laws and regulations, underage users are restricted from watching adult content. ' + 'Only content suitable for all ages will be downloaded. ' + 'Provide account credentials if you wish to download restricted content.') + partial_view = True + continue + elif flag == 'ADULT': + error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.' + else: + error = flag raise ExtractorError( - '%s said: %s' % (self.IE_NAME, flag), expected=True) + '%s said: %s' % (self.IE_NAME, error), expected=True) + else: + raise ExtractorError('Unable to download video info') video_element = video_xml.findall(compat_xpath('./track/video'))[-1] if video_element is None or video_element.text is None: From 8bd1df3c316970f15662831c28311560884356a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Apr 2018 22:19:42 +0700 Subject: [PATCH 44/77] [dramafever] Fix authentication (closes #16067) --- youtube_dl/extractor/dramafever.py | 41 ++++++++++++++++++------------ 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index c7a048f9d..ffbd2623d 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -2,9 +2,11 @@ from __future__ import unicode_literals import itertools +import json from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_str, compat_urlparse, ) @@ -14,14 +16,11 @@ from ..utils import ( int_or_none, parse_age_limit, parse_duration, - sanitized_Request, unified_timestamp, - urlencode_postdata ) class DramaFeverBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' _CONSUMER_SECRET = 'DA59dtVXYLxajktV' @@ -39,8 +38,8 @@ class DramaFeverBaseIE(InfoExtractor): 'consumer secret', default=self._CONSUMER_SECRET) def _real_initialize(self): - self._login() self._consumer_secret = self._get_consumer_secret() + self._login() def _login(self): (username, password) = self._get_login_info() @@ -52,19 +51,29 @@ class DramaFeverBaseIE(InfoExtractor): 'password': password, } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - response = self._download_webpage( - request, None, 'Logging in') + try: + response = self._download_json( + 'https://www.dramafever.com/api/users/login', None, 'Logging in', + data=json.dumps(login_form).encode('utf-8'), headers={ + 'x-consumer-key': self._consumer_secret, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404): + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + else: + raise - if all(logout_pattern not in response - for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): - error = self._html_search_regex( - r'(?s)]+\bclass="hidden-xs prompt"[^>]*>(.+?) Date: Mon, 12 Mar 2018 08:57:41 +0100 Subject: [PATCH 45/77] [tvnow] Add support for shows --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvnow.py | 73 +++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de48a37ad..e3a67cc5b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1136,6 +1136,7 @@ from .tvnoe import TVNoeIE from .tvnow import ( TVNowIE, TVNowListIE, + TVNowListChannelIE, ) from .tvp import ( TVPEmbedIE, diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py index 1bf472444..8e0ac6be5 100644 --- a/youtube_dl/extractor/tvnow.py +++ b/youtube_dl/extractor/tvnow.py @@ -19,7 +19,7 @@ class TVNowBaseIE(InfoExtractor): 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode', 'manifest.dashclear', 'format.title', 'format.defaultImage169Format', - 'format.defaultImage169Logo') + 'format.defaultImage169Logo', 'replaceMovieInformation') def _call_api(self, path, video_id, query): return self._download_json( @@ -58,7 +58,7 @@ class TVNowBaseIE(InfoExtractor): duration = parse_duration(info.get('duration')) f = info.get('format', {}) - thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') + thumbnail = ('https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % info.get('replaceMovieInformation')) or f.get('defaultImage169Format') or f.get('defaultImage169Logo') return { 'id': video_id, @@ -133,7 +133,27 @@ class TVNowIE(TVNowBaseIE): return self._extract_video(info, display_id) -class TVNowListIE(TVNowBaseIE): +class TVNowListBaseIE(TVNowBaseIE): + def _extend_query(self, show, season, video=None): + fields = [] + fields.extend(show) + fields.extend('formatTabs.%s' % field for field in season) + if video: + fields.extend( + 'formatTabs.formatTabPages.container.movies.%s' % field + for field in video) + + return fields + + def _tvnow_list_info(self, list_id, show_id, fields): + return self._call_api( + 'formats/seo', list_id, query={ + 'fields': ','.join(fields), + 'name': show_id + '.php' + }) + + +class TVNowListIE(TVNowListBaseIE): _VALID_URL = r'(?Phttps?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+)/)list/(?P[^?/#&]+)$' _SHOW_FIELDS = ('title', ) @@ -152,18 +172,7 @@ class TVNowListIE(TVNowBaseIE): def _real_extract(self, url): base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() - fields = [] - fields.extend(self._SHOW_FIELDS) - fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) - fields.extend( - 'formatTabs.formatTabPages.container.movies.%s' % field - for field in self._VIDEO_FIELDS) - - list_info = self._call_api( - 'formats/seo', season_id, query={ - 'fields': ','.join(fields), - 'name': show_id + '.php' - }) + list_info = self._tvnow_list_info(season_id, show_id, self._extend_query(self._SHOW_FIELDS, self._SEASON_FIELDS, self._VIDEO_FIELDS)) season = next( season for season in list_info['formatTabs']['items'] @@ -177,8 +186,40 @@ class TVNowListIE(TVNowBaseIE): seo_url = info.get('seoUrl') if not seo_url: continue + entries.append(self.url_result( - base_url + seo_url + '/player', 'TVNow', info.get('id'))) + base_url + seo_url + '/player', 'TVNow', str(info.get('id', seo_url)))) return self.playlist_result( entries, compat_str(season.get('id') or season_id), title) + + +class TVNowListChannelIE(TVNowListBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+))' + + _SHOW_FIELDS = ('id', 'title', ) + _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) + + _TESTS = [{ + 'url': 'https://www.tvnow.at/vox/ab-ins-beet', + 'only_matching': 'True', + }] + + @classmethod + def suitable(cls, url): + return False if TVNowIE.suitable(url) or TVNowListIE.suitable(url) else super(TVNowListChannelIE, cls).suitable(url) + + def _real_extract(self, url): + base_url, show_id = re.match(self._VALID_URL, url).groups() + + list_info = self._tvnow_list_info(show_id, show_id, self._extend_query(self._SHOW_FIELDS, self._SEASON_FIELDS)) + + entries = [] + for season_info in list_info['formatTabs']['items']: + season_url = season_info.get('seoheadline') + if not season_url: + continue + entries.append(self.url_result( + base_url + "/list/" + season_url, 'TVNowList', compat_str(season_info.get('id')), season_info.get('headline'))) + + return self.playlist_result(entries) From ea6679fbeb1fb91131022886a0a8697e4c75f07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Apr 2018 00:08:22 +0700 Subject: [PATCH 46/77] [tvnow] Fix issues, simplify and improve (closes #15837) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/tvnow.py | 124 +++++++++++++++++++++-------- 2 files changed, 90 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e3a67cc5b..bded6e144 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1136,7 +1136,7 @@ from .tvnoe import TVNoeIE from .tvnow import ( TVNowIE, TVNowListIE, - TVNowListChannelIE, + TVNowShowIE, ) from .tvp import ( TVPEmbedIE, diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py index 8e0ac6be5..808571ece 100644 --- a/youtube_dl/extractor/tvnow.py +++ b/youtube_dl/extractor/tvnow.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, parse_iso8601, parse_duration, + try_get, update_url_query, ) @@ -19,7 +20,7 @@ class TVNowBaseIE(InfoExtractor): 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode', 'manifest.dashclear', 'format.title', 'format.defaultImage169Format', - 'format.defaultImage169Logo', 'replaceMovieInformation') + 'format.defaultImage169Logo') def _call_api(self, path, video_id, query): return self._download_json( @@ -58,14 +59,22 @@ class TVNowBaseIE(InfoExtractor): duration = parse_duration(info.get('duration')) f = info.get('format', {}) - thumbnail = ('https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % info.get('replaceMovieInformation')) or f.get('defaultImage169Format') or f.get('defaultImage169Logo') + + thumbnails = [{ + 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id, + }] + thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') + if thumbnail: + thumbnails.append({ + 'url': thumbnail, + }) return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'timestamp': timestamp, 'duration': duration, 'series': f.get('title'), @@ -77,7 +86,12 @@ class TVNowBaseIE(InfoExtractor): class TVNowIE(TVNowBaseIE): - _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P[^/]+)/(?:player|preview)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/ + (?P[^/]+)/ + (?!(?:list|jahr)(?:/|$))(?P[^/?\#&]+) + ''' _TESTS = [{ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', @@ -99,27 +113,30 @@ class TVNowIE(TVNowBaseIE): }, { # rtl2 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', - 'only_matching': 'True', + 'only_matching': True, }, { # rtlnitro 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', - 'only_matching': 'True', + 'only_matching': True, }, { # superrtl 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', - 'only_matching': 'True', + 'only_matching': True, }, { # ntv 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', - 'only_matching': 'True', + 'only_matching': True, }, { # vox 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', - 'only_matching': 'True', + 'only_matching': True, }, { # rtlplus 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', - 'only_matching': 'True', + 'only_matching': True, + }, { + 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3', + 'only_matching': True, }] def _real_extract(self, url): @@ -134,27 +151,29 @@ class TVNowIE(TVNowBaseIE): class TVNowListBaseIE(TVNowBaseIE): - def _extend_query(self, show, season, video=None): - fields = [] - fields.extend(show) - fields.extend('formatTabs.%s' % field for field in season) - if video: - fields.extend( - 'formatTabs.formatTabPages.container.movies.%s' % field - for field in video) + _SHOW_VALID_URL = r'''(?x) + (?P + https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/ + (?P[^/]+) + ) + ''' - return fields - - def _tvnow_list_info(self, list_id, show_id, fields): + def _extract_list_info(self, display_id, show_id): + fields = list(self._SHOW_FIELDS) + fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) + fields.extend( + 'formatTabs.formatTabPages.container.movies.%s' % field + for field in self._VIDEO_FIELDS) return self._call_api( - 'formats/seo', list_id, query={ + 'formats/seo', display_id, query={ 'fields': ','.join(fields), 'name': show_id + '.php' }) class TVNowListIE(TVNowListBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+)/)list/(?P[^?/#&]+)$' + _VALID_URL = r'%s/(?:list|jahr)/(?P[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL _SHOW_FIELDS = ('title', ) _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) @@ -167,59 +186,94 @@ class TVNowListIE(TVNowListBaseIE): 'title': '30 Minuten Deutschland - Aktuell', }, 'playlist_mincount': 1, + }, { + 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14', + 'only_matching': True, + }, { + 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False if TVNowIE.suitable(url) + else super(TVNowListIE, cls).suitable(url)) + def _real_extract(self, url): base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() - list_info = self._tvnow_list_info(season_id, show_id, self._extend_query(self._SHOW_FIELDS, self._SEASON_FIELDS, self._VIDEO_FIELDS)) + list_info = self._extract_list_info(season_id, show_id) season = next( season for season in list_info['formatTabs']['items'] if season.get('seoheadline') == season_id) - title = '%s - %s' % (list_info['title'], season['headline']) + title = list_info.get('title') + headline = season.get('headline') + if title and headline: + title = '%s - %s' % (title, headline) + else: + title = headline or title entries = [] for container in season['formatTabPages']['items']: - for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []: + items = try_get( + container, lambda x: x['container']['movies']['items'], + list) or [] + for info in items: seo_url = info.get('seoUrl') if not seo_url: continue - + video_id = info.get('id') entries.append(self.url_result( - base_url + seo_url + '/player', 'TVNow', str(info.get('id', seo_url)))) + '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(), + compat_str(video_id) if video_id else None)) return self.playlist_result( entries, compat_str(season.get('id') or season_id), title) -class TVNowListChannelIE(TVNowListBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+))' +class TVNowShowIE(TVNowListBaseIE): + _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL _SHOW_FIELDS = ('id', 'title', ) _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) + _VIDEO_FIELDS = () _TESTS = [{ 'url': 'https://www.tvnow.at/vox/ab-ins-beet', - 'only_matching': 'True', + 'info_dict': { + 'id': 'ab-ins-beet', + 'title': 'Ab ins Beet!', + }, + 'playlist_mincount': 7, + }, { + 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list', + 'only_matching': True, + }, { + 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/', + 'only_matching': True, }] @classmethod def suitable(cls, url): - return False if TVNowIE.suitable(url) or TVNowListIE.suitable(url) else super(TVNowListChannelIE, cls).suitable(url) + return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url) + else super(TVNowShowIE, cls).suitable(url)) def _real_extract(self, url): base_url, show_id = re.match(self._VALID_URL, url).groups() - list_info = self._tvnow_list_info(show_id, show_id, self._extend_query(self._SHOW_FIELDS, self._SEASON_FIELDS)) + list_info = self._extract_list_info(show_id, show_id) entries = [] for season_info in list_info['formatTabs']['items']: season_url = season_info.get('seoheadline') if not season_url: continue + season_id = season_info.get('id') entries.append(self.url_result( - base_url + "/list/" + season_url, 'TVNowList', compat_str(season_info.get('id')), season_info.get('headline'))) + '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(), + compat_str(season_id) if season_id else None, + season_info.get('headline'))) - return self.playlist_result(entries) + return self.playlist_result(entries, show_id, list_info.get('title')) From 10f9caec048ca0c7c85a568d1dab12d7d7f7b45d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Apr 2018 00:23:03 +0700 Subject: [PATCH 47/77] [ChangeLog] Actualize [ci skip] --- ChangeLog | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index f9d04ffd9..89c58aba2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +version + +Extractors ++ [tvnow] Add support for shows (#15837) +* [dramafever] Fix authentication (#16067) +* [afreecatv] Use partial view only when necessary (#14450) ++ [afreecatv] Add support for authentication (#14450) ++ [nationalgeographic] Add support for new URL schema (#16001, #16054) +* [xvideos] Fix thumbnail extraction (#15978, #15979) +* [medialaan] Fix vod id (#16038) ++ [openload] Add support for oload.site (#16039) +* [naver] Fix extraction (#16029) +* [dramafever] Partially switch to API v5 (#16026) +* [abc:iview] Unescape title and series meta fields (#15994) +* [videa] Extend URL regular expression (#16003) + + version 2018.03.26.1 Core From e8dfecb3842ba54a4260af81e859e487e36eba41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Apr 2018 00:26:11 +0700 Subject: [PATCH 48/77] release 2018.04.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 4 +++- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0cd090e40..99e8acd33 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.26.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.26.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.03.26.1 +[debug] youtube-dl version 2018.04.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 89c58aba2..89dfbd8b8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.04.03 Extractors + [tvnow] Add support for shows (#15837) diff --git a/README.md b/README.md index 7dba5775d..5af0f387b 100644 --- a/README.md +++ b/README.md @@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo ## Filesystem Options: -a, --batch-file FILE File containing URLs to download ('-' for - stdin) + stdin), one URL per line. Lines starting + with '#', ';' or ']' are considered as + comments and ignored. --id Use only video ID in file name -o, --output TEMPLATE Output filename template, see the "OUTPUT TEMPLATE" for all the info diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0d7d7fbb3..17baac5ab 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -887,6 +887,7 @@ - **TVNoe** - **TVNow** - **TVNowList** + - **TVNowShow** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d38fde039..a3163509c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.03.26.1' +__version__ = '2018.04.03' From fd97fa7bfc59983d315892c26f861842820a9579 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Fri, 30 Mar 2018 20:02:09 +0200 Subject: [PATCH 49/77] [svtplay:series] Add extractor Related to #11130 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/svt.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bded6e144..b46a304ac 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1031,6 +1031,7 @@ from .sunporno import SunPornoIE from .svt import ( SVTIE, SVTPlayIE, + SVTPlaylistIE, ) from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 48bc4529e..d02fd9450 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -9,6 +9,8 @@ from ..utils import ( dict_get, int_or_none, try_get, + urljoin, + compat_str, ) @@ -189,3 +191,58 @@ class SVTPlayIE(SVTBaseIE): r'\s*\|\s*.+?$', '', info_dict.get('episode') or self._og_search_title(webpage)) return info_dict + + +class SVTPlaylistIE(InfoExtractor): + IE_DESC = 'SVT Play serie' + _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' + IE_NAME = 'svtplay:serie' + _TESTS = [{ + 'url': 'https://www.svtplay.se/rederiet', + 'info_dict': { + 'id': 'rederiet', + 'title': 'Rederiet', + 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', + }, + 'playlist_mincount': 318, + }] + + @classmethod + def suitable(cls, url): + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + + page = self._download_webpage( + url, video_id, + note='Downloading serie page', + errnote='unable to fetch serie page') + + root_json = self._search_regex( + r'root\[\'__svtplay\'\]\s*=(.+);\n', + page, 'root') + root = self._parse_json(root_json, video_id) + + metadata = root.get('metaData', {}) + related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion'] + + entries = [] + for season in related_videos_accordion: + videos = season.get('videos') + if not isinstance(videos, list): + continue + + for video in videos: + content_url = video.get('contentUrl') + if not isinstance(content_url, compat_str): + continue + entries.append( + self.url_result( + urljoin(url, content_url), + ie=SVTPlayIE.ie_key(), + video_title=video.get('title') + )) + + return self.playlist_result( + entries, video_id, metadata.get('title'), metadata.get('description')) From b71bb3ba8be711abab4c05527d28c4b5e4552401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Apr 2018 23:52:00 +0700 Subject: [PATCH 50/77] [svtplay:series] Improve extraction (closes #16059) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/svt.py | 36 ++++++++++++++---------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b46a304ac..c9f60114d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1031,7 +1031,7 @@ from .sunporno import SunPornoIE from .svt import ( SVTIE, SVTPlayIE, - SVTPlaylistIE, + SVTSeriesIE, ) from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index d02fd9450..45b4b8bf7 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -193,10 +193,8 @@ class SVTPlayIE(SVTBaseIE): return info_dict -class SVTPlaylistIE(InfoExtractor): - IE_DESC = 'SVT Play serie' +class SVTSeriesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' - IE_NAME = 'svtplay:serie' _TESTS = [{ 'url': 'https://www.svtplay.se/rederiet', 'info_dict': { @@ -209,33 +207,28 @@ class SVTPlaylistIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url) + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage( - url, video_id, - note='Downloading serie page', - errnote='unable to fetch serie page') + webpage = self._download_webpage( + url, video_id, 'Downloading serie page') - root_json = self._search_regex( - r'root\[\'__svtplay\'\]\s*=(.+);\n', - page, 'root') - root = self._parse_json(root_json, video_id) - - metadata = root.get('metaData', {}) - related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion'] + root = self._parse_json( + self._search_regex( + r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n', + webpage, 'content', group='json'), + video_id) entries = [] - for season in related_videos_accordion: + for season in root['relatedVideoContent']['relatedVideosAccordion']: videos = season.get('videos') if not isinstance(videos, list): continue - for video in videos: content_url = video.get('contentUrl') - if not isinstance(content_url, compat_str): + if not content_url or not isinstance(content_url, compat_str): continue entries.append( self.url_result( @@ -244,5 +237,10 @@ class SVTPlaylistIE(InfoExtractor): video_title=video.get('title') )) + metadata = root.get('metaData') + if not isinstance(metadata, dict): + metadata = {} + return self.playlist_result( - entries, video_id, metadata.get('title'), metadata.get('description')) + entries, video_id, metadata.get('title'), + metadata.get('description')) From df146eb2827a97da507833c08169d84d708dfb02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Apr 2018 00:05:09 +0700 Subject: [PATCH 51/77] [svtplay:series] Add support for season URLs --- youtube_dl/extractor/svt.py | 43 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 45b4b8bf7..d1d601b1f 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( determine_ext, dict_get, @@ -203,6 +207,14 @@ class SVTSeriesIE(InfoExtractor): 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', }, 'playlist_mincount': 318, + }, { + 'url': 'https://www.svtplay.se/rederiet?tab=sasong2', + 'info_dict': { + 'id': 'rederiet-sasong2', + 'title': 'Rederiet - Säsong 2', + 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', + }, + 'playlist_count': 12, }] @classmethod @@ -210,19 +222,33 @@ class SVTSeriesIE(InfoExtractor): return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) def _real_extract(self, url): - video_id = self._match_id(url) + series_id = self._match_id(url) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + season_slug = qs.get('tab', [None])[0] + + if season_slug: + series_id += '-%s' % season_slug webpage = self._download_webpage( - url, video_id, 'Downloading serie page') + url, series_id, 'Downloading series page') root = self._parse_json( self._search_regex( r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n', webpage, 'content', group='json'), - video_id) + series_id) + + season_name = None entries = [] for season in root['relatedVideoContent']['relatedVideosAccordion']: + if not isinstance(season, dict): + continue + if season_slug: + if season.get('slug') != season_slug: + continue + season_name = season.get('name') videos = season.get('videos') if not isinstance(videos, list): continue @@ -241,6 +267,13 @@ class SVTSeriesIE(InfoExtractor): if not isinstance(metadata, dict): metadata = {} + title = metadata.get('title') + season_name = season_name or season_slug + + if title and season_name: + title = '%s - %s' % (title, season_name) + elif season_slug: + title = season_slug + return self.playlist_result( - entries, video_id, metadata.get('title'), - metadata.get('description')) + entries, series_id, title, metadata.get('description')) From 1236ac6b0bc5ef49e4065ddfc310d15651633093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Apr 2018 00:28:36 +0700 Subject: [PATCH 52/77] [svtplay] Share svtplay regex --- youtube_dl/extractor/svt.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index d1d601b1f..b544da414 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -128,7 +128,11 @@ class SVTIE(SVTBaseIE): return info_dict -class SVTPlayIE(SVTBaseIE): +class SVTPlayBaseIE(SVTBaseIE): + _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n' + + +class SVTPlayIE(SVTPlayBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P[0-9]+)' _TESTS = [{ @@ -163,8 +167,8 @@ class SVTPlayIE(SVTBaseIE): data = self._parse_json( self._search_regex( - r'root\["__svtplay"\]\s*=\s*([^;]+);', - webpage, 'embedded data', default='{}'), + self._SVTPLAY_RE, webpage, 'embedded data', default='{}', + group='json'), video_id, fatal=False) thumbnail = self._og_search_thumbnail(webpage) @@ -197,7 +201,7 @@ class SVTPlayIE(SVTBaseIE): return info_dict -class SVTSeriesIE(InfoExtractor): +class SVTSeriesIE(SVTPlayBaseIE): _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.svtplay.se/rederiet', @@ -235,8 +239,7 @@ class SVTSeriesIE(InfoExtractor): root = self._parse_json( self._search_regex( - r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n', - webpage, 'content', group='json'), + self._SVTPLAY_RE, webpage, 'content', group='json'), series_id) season_name = None From 235d828b7b113f22309a0f30048678baea210620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Apr 2018 23:49:15 +0700 Subject: [PATCH 53/77] [openload] Fix extraction (closes #16099) --- youtube_dl/extractor/openload.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index af7db6e12..3e0a7a9a2 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -334,10 +334,11 @@ class OpenloadIE(InfoExtractor): decoded_id = (get_element_by_id('streamurl', webpage) or get_element_by_id('streamuri', webpage) or - get_element_by_id('streamurj', webpage)) - - if not decoded_id: - raise ExtractorError('Can\'t find stream URL', video_id=video_id) + get_element_by_id('streamurj', webpage) or + self._search_regex( + (r'>\s*([\da-zA-Z]+~\d{10,}~\d+\.\d+\.0\.0~[\da-zA-Z]+)\s*<', + r'>\s*([\w~]+~\d+\.\d+\.\d+\.\d+~[\w~]+)'), webpage, + 'stream URL')) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id From fdfb32a0dd80de4be67b0fcf93764bfa2a4ce7a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Apr 2018 00:15:22 +0700 Subject: [PATCH 54/77] [openload] Relax stream URL regex --- youtube_dl/extractor/openload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 3e0a7a9a2..9f5bebe40 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -336,8 +336,8 @@ class OpenloadIE(InfoExtractor): get_element_by_id('streamuri', webpage) or get_element_by_id('streamurj', webpage) or self._search_regex( - (r'>\s*([\da-zA-Z]+~\d{10,}~\d+\.\d+\.0\.0~[\da-zA-Z]+)\s*<', - r'>\s*([\w~]+~\d+\.\d+\.\d+\.\d+~[\w~]+)'), webpage, + (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', + r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)'), webpage, 'stream URL')) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id From e944737c597a2f5e8e6ade93a25fc812119d4eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Apr 2018 23:40:15 +0700 Subject: [PATCH 55/77] [openload] Add support for oload.xyz --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 9f5bebe40..650f95656 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -298,6 +298,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.stream/f/KnG-kKZdcfY', 'only_matching': True, + }, { + 'url': 'https://oload.xyz/f/WwRBpzW8Wtk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From e2750e1437497925c5a058947b850ddadd7ee7d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 7 Apr 2018 20:55:01 +0700 Subject: [PATCH 56/77] [liveleak] Extend _VALID_URL (closes #16117) --- youtube_dl/extractor/liveleak.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 246aac576..26671753c 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -7,7 +7,7 @@ from ..utils import int_or_none class LiveLeakIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P[\w_]+)(?:.*)' + _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P[\w_]+)' _TESTS = [{ 'url': 'http://www.liveleak.com/view?i=757_1364311680', 'md5': '0813c2430bea7a46bf13acf3406992f4', @@ -79,6 +79,9 @@ class LiveLeakIE(InfoExtractor): 'title': 'Fuel Depot in China Explosion caught on video', }, 'playlist_count': 3, + }, { + 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227', + 'only_matching': True, }] @staticmethod From 9d15be3a5b1f0764d8493ccfc312fc0d0a2df164 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Sat, 7 Apr 2018 09:39:21 -0500 Subject: [PATCH 57/77] [drtuber] Fix title extraction (closes #16107) --- youtube_dl/extractor/drtuber.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index c88b3126b..5c41c8022 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -66,7 +66,9 @@ class DrTuberIE(InfoExtractor): self._sort_formats(formats) title = self._html_search_regex( - (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', + (r']+class=["\']title[^>]+>([^<]+)', + r'([^<]+)\s*@\s+DrTuber', + r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'), webpage, 'title') From ff826177cc154ba8c67b8162a25e067783dc4caa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Apr 2018 23:57:32 +0700 Subject: [PATCH 58/77] [instagram:user] Fix extraction (closes #16119) --- youtube_dl/extractor/instagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index f9cd11b8e..9f570249f 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -243,6 +243,8 @@ class InstagramUserIE(InfoExtractor): return int_or_none(try_get( node, lambda x: x['edge_media_' + suffix]['count'])) + self._set_cookie('instagram.com', 'ig_pr', '1') + cursor = '' for page_num in itertools.count(1): media = self._download_json( From 1c9b1a449430bd8b267c9c43ce7ed7cb73ac4433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Apr 2018 00:08:45 +0700 Subject: [PATCH 59/77] [acast] Fix extraction (closes #16118) --- youtube_dl/extractor/acast.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 5871e72dc..4ad549c92 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -7,7 +7,7 @@ import functools from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - int_or_none, + float_or_none, unified_timestamp, OnDemandPagedList, ) @@ -46,18 +46,22 @@ class ACastIE(InfoExtractor): def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() + s = self._download_json( + 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id), + display_id)['result'] + media_url = s['url'] cast_data = self._download_json( 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id) e = cast_data['result']['episode'] return { 'id': compat_str(e['id']), 'display_id': display_id, - 'url': e['mediaUrl'], + 'url': media_url, 'title': e['name'], 'description': e.get('description'), 'thumbnail': e.get('image'), 'timestamp': unified_timestamp(e.get('publishingDate')), - 'duration': int_or_none(e.get('duration')), + 'duration': float_or_none(s.get('duration') or e.get('duration')), } From cae5d9705c28ffc0bf5e149a5f92d31a48208e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Apr 2018 00:21:55 +0700 Subject: [PATCH 60/77] [acast] Extract more metadata --- youtube_dl/extractor/acast.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 4ad549c92..6d846ea7a 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -8,6 +8,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( float_or_none, + int_or_none, + try_get, unified_timestamp, OnDemandPagedList, ) @@ -24,23 +26,29 @@ class ACastIE(InfoExtractor): 'id': '57de3baa-4bb0-487e-9418-2692c1277a34', 'ext': 'mp3', 'title': '"Where Are You?": Taipei 101, Taiwan', + 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'timestamp': 1196172000, 'upload_date': '20071127', - 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'duration': 211, + 'creator': 'Concierge', + 'series': 'Condé Nast Traveler Podcast', + 'episode': '"Where Are You?": Taipei 101, Taiwan', } }, { # test with multiple blings 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', - 'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0', + 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', 'info_dict': { 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'ext': 'mp3', 'title': '2. Raggarmordet - Röster ur det förflutna', + 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', 'timestamp': 1477346700, 'upload_date': '20161024', - 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', - 'duration': 2766, + 'duration': 2766.602563, + 'creator': 'Anton Berg & Martin Johnson', + 'series': 'Spår', + 'episode': '2. Raggarmordet - Röster ur det förflutna', } }] @@ -51,17 +59,25 @@ class ACastIE(InfoExtractor): display_id)['result'] media_url = s['url'] cast_data = self._download_json( - 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id) - e = cast_data['result']['episode'] + 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), + display_id)['result'] + e = cast_data['episode'] + title = e['name'] return { 'id': compat_str(e['id']), 'display_id': display_id, 'url': media_url, - 'title': e['name'], - 'description': e.get('description'), + 'title': title, + 'description': e.get('description') or e.get('summary'), 'thumbnail': e.get('image'), 'timestamp': unified_timestamp(e.get('publishingDate')), 'duration': float_or_none(s.get('duration') or e.get('duration')), + 'filesize': int_or_none(e.get('contentLength')), + 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), + 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), + 'season_number': int_or_none(e.get('seasonNumber')), + 'episode': title, + 'episode_number': int_or_none(e.get('episodeNumber')), } From 717ea4e14e59bded0c2fb20e84b6513d82644b43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Apr 2018 00:29:43 +0700 Subject: [PATCH 61/77] [steam] Bypass mature content check (closes #16113) --- youtube_dl/extractor/steam.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index e5ac586a7..a6a191ceb 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -75,6 +75,9 @@ class SteamIE(InfoExtractor): gameID = m.group('gameID') playlist_id = gameID videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id + + self._set_cookie('steampowered.com', 'mature_content', '1') + webpage = self._download_webpage(videourl, playlist_id) if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: From 66b686727b198a6b14ddcbcfdcbaadd5b203362f Mon Sep 17 00:00:00 2001 From: aeph6Ee0 <aeph6Ee0@users.noreply.github.com> Date: Sat, 7 Apr 2018 22:09:42 +0200 Subject: [PATCH 62/77] [extractor/common] Relax JSON-LD context check (closes #16006) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 890232586..59b9d3739 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,7 +1025,7 @@ class InfoExtractor(object): }) for e in json_ld: - if e.get('@context') == 'http://schema.org': + if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: return info From 608c738c7d8e6be21f0cc0bb7a844bad9d841964 Mon Sep 17 00:00:00 2001 From: GDR! <gdr@gdr.name> Date: Sun, 8 Apr 2018 17:13:00 +0200 Subject: [PATCH 63/77] [odnoklassniki] Extend _VALID_URL (closes #16081) --- youtube_dl/extractor/odnoklassniki.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 5c8b37e18..d87d0960f 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -19,7 +19,7 @@ from ..utils import ( class OdnoklassnikiIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)' + _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?/|web-api/video/moviePlayer/|live/|dk\?.*?st\.mvId=)(?P<id>[\d-]+)' _TESTS = [{ # metadata in JSON 'url': 'http://ok.ru/video/20079905452', @@ -101,6 +101,9 @@ class OdnoklassnikiIE(InfoExtractor): }, { 'url': 'https://www.ok.ru/live/484531969818', 'only_matching': True, + }, { + 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', + 'only_matching': True, }] def _real_extract(self, url): From d04ca9761615e2ed3fdf89d8d87a4b9adfffacc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Apr 2018 22:21:21 +0700 Subject: [PATCH 64/77] [odnoklassniki] Improve _VALID_URL readability --- youtube_dl/extractor/odnoklassniki.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index d87d0960f..190d8af4d 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -19,7 +19,18 @@ from ..utils import ( class OdnoklassnikiIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?/|web-api/video/moviePlayer/|live/|dk\?.*?st\.mvId=)(?P<id>[\d-]+)' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|m|mobile)\.)? + (?:odnoklassniki|ok)\.ru/ + (?: + video(?:embed)?/| + web-api/video/moviePlayer/| + live/| + dk\?.*?st\.mvId= + ) + (?P<id>[\d-]+) + ''' _TESTS = [{ # metadata in JSON 'url': 'http://ok.ru/video/20079905452', From 1fc37ca3f181159c98bccf081766abb73b9d344f Mon Sep 17 00:00:00 2001 From: Surya Oktafendri <f2face@f2face.com> Date: Mon, 9 Apr 2018 00:19:23 +0700 Subject: [PATCH 65/77] [generic] Add support for share-videos.se embeds (closes #16089) --- youtube_dl/extractor/generic.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cf64398e3..4b210da72 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1967,6 +1967,16 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, + { + 'url': 'http://share-videos.se/auto/video/83645793?uid=13', + 'md5': 'b68d276de422ab07ee1d49388103f457', + 'info_dict': { + 'id': '83645793', + 'title': 'Lock up and get excited', + 'thumbnail': r're:^https?://.*\.jpg(\?.*)?$', + 'ext': 'mp4' + } } # { # # TODO: find another test @@ -2978,6 +2988,14 @@ class GenericIE(InfoExtractor): merged[k] = v return merged + # Look for Share-Videos.se embeds + sharevideosse_urls = [m.group('url') for m in re.finditer( + r'<iframe[^>]+?src\s*=\s*(["\'])(?P<url>https?://embed\.share-videos\.se/auto/embed/\d+.+?)\1', + webpage)] + if sharevideosse_urls: + return self.playlist_from_matches( + sharevideosse_urls, video_id, video_title) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: From d3431dcb90ea72fed502ecfd8f34e7499009a53a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Apr 2018 00:25:44 +0700 Subject: [PATCH 66/77] [generic] Restrict share-videos.se embeds regex to filter bogus URLs (#16115) --- youtube_dl/extractor/generic.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4b210da72..8922d1914 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1974,10 +1974,10 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '83645793', 'title': 'Lock up and get excited', - 'thumbnail': r're:^https?://.*\.jpg(\?.*)?$', 'ext': 'mp4' - } - } + }, + 'skip': 'TODO: fix nested playlists processing in tests', + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2973,6 +2973,13 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key()) + sharevideos_urls = [mobj.group('url') for mobj in re.finditer( + r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', + webpage)] + if sharevideos_urls: + return self.playlist_from_matches( + sharevideos_urls, video_id, video_title) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): @@ -2988,14 +2995,6 @@ class GenericIE(InfoExtractor): merged[k] = v return merged - # Look for Share-Videos.se embeds - sharevideosse_urls = [m.group('url') for m in re.finditer( - r'<iframe[^>]+?src\s*=\s*(["\'])(?P<url>https?://embed\.share-videos\.se/auto/embed/\d+.+?)\1', - webpage)] - if sharevideosse_urls: - return self.playlist_from_matches( - sharevideosse_urls, video_id, video_title) - # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: From 069937151e429a2127569910d204c03eec167f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Apr 2018 00:37:15 +0700 Subject: [PATCH 67/77] [generic] Add support for tube8 embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/tube8.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8922d1914..e3cb5c5ce 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -58,6 +58,7 @@ from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE +from .tube8 import Tube8IE from .vimeo import VimeoIE from .dailymotion import DailymotionIE from .dailymail import DailyMailIE @@ -2556,6 +2557,11 @@ class GenericIE(InfoExtractor): if redtube_urls: return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()) + # Look for embedded Tube8 player + tube8_urls = Tube8IE._extract_urls(webpage) + if tube8_urls: + return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 1853a1104..368c45729 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -31,6 +31,12 @@ class Tube8IE(KeezMoviesIE): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)', + webpage) + def _real_extract(self, url): webpage, info = self._extract_info(url) From 94c3442e6ae176b01b3b5eae0a3adc355319b569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Apr 2018 01:03:55 +0700 Subject: [PATCH 68/77] [YoutubeDL] Do not save/restore console title while simulate (closes #16103) --- youtube_dl/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 523dd1f7d..fca4999eb 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -532,6 +532,8 @@ class YoutubeDL(object): def save_console_title(self): if not self.params.get('consoletitle', False): return + if self.params.get('simulate', False): + return if compat_os_name != 'nt' and 'TERM' in os.environ: # Save the title on stack self._write_string('\033[22;0t', self._screen_file) @@ -539,6 +541,8 @@ class YoutubeDL(object): def restore_console_title(self): if not self.params.get('consoletitle', False): return + if self.params.get('simulate', False): + return if compat_os_name != 'nt' and 'TERM' in os.environ: # Restore the title from stack self._write_string('\033[23;0t', self._screen_file) From 880ed89d491af9d85680777422c49e07f747e095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Apr 2018 01:14:47 +0700 Subject: [PATCH 69/77] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 89dfbd8b8..9b01a8062 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version <unreleased> + +Core +* [YoutubeDL] Do not save/restore console title while simulate (#16103) +* [extractor/common] Relax JSON-LD context check (#16006) + +Extractors ++ [generic] Add support for tube8 embeds ++ [generic] Add support for share-videos.se embeds (#16089, #16115) +* [odnoklassniki] Extend URL regular expression (#16081) +* [steam] Bypass mature content check (#16113) ++ [acast] Extract more metadata +* [acast] Fix extraction (#16118) +* [instagram:user] Fix extraction (#16119) +* [drtuber] Fix title extraction (#16107, #16108) +* [liveleak] Extend URL regular expression (#16117) ++ [openload] Add support for oload.xyz +* [openload] Relax stream URL regular expression +* [openload] Fix extraction (#16099) ++ [svtplay:series] Add support for season URLs ++ [svtplay:series] Add support for series (#11130, #16059) + + version 2018.04.03 Extractors From f7f9757efcd4f5eaaa31e16ff14fc6627f515393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Apr 2018 01:19:27 +0700 Subject: [PATCH 70/77] release 2018.04.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 99e8acd33..ed622afd1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.04.03 +[debug] youtube-dl version 2018.04.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 9b01a8062..4385c4091 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.04.09 Core * [YoutubeDL] Do not save/restore console title while simulate (#16103) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 17baac5ab..1c13199d4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -804,6 +804,7 @@ - **SunPorno** - **SVT** - **SVTPlay**: SVT Play and Öppet arkiv + - **SVTSeries** - **SWRMediathek** - **Syfy** - **SztvHu** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a3163509c..307d6041a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.04.03' +__version__ = '2018.04.09' From fce7962691a0f5874753cad431a8bb6ed31efc69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 10 Apr 2018 23:07:37 +0700 Subject: [PATCH 71/77] [twitch] Add support for mobile URLs (closes #16146) --- youtube_dl/extractor/twitch.py | 47 ++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 1981b4d4a..f736283e9 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -28,7 +28,7 @@ from ..utils import ( class TwitchBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv' + _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv' _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' @@ -226,7 +226,7 @@ class TwitchVodIE(TwitchItemBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/| + (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/| player\.twitch\.tv/\?.*?\bvideo=v ) (?P<id>\d+) @@ -279,6 +279,9 @@ class TwitchVodIE(TwitchItemBaseIE): }, { 'url': 'https://www.twitch.tv/videos/6528877', 'only_matching': True, + }, { + 'url': 'https://m.twitch.tv/beagsandjam/v/247478721', + 'only_matching': True, }] def _real_extract(self, url): @@ -390,14 +393,17 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE _PLAYLIST_TYPE = 'profile' - _TEST = { + _TESTS = [{ 'url': 'http://www.twitch.tv/vanillatv/profile', 'info_dict': { 'id': 'vanillatv', 'title': 'VanillaTV', }, 'playlist_mincount': 412, - } + }, { + 'url': 'http://m.twitch.tv/vanillatv/profile', + 'only_matching': True, + }] class TwitchVideosBaseIE(TwitchPlaylistBaseIE): @@ -411,14 +417,17 @@ class TwitchAllVideosIE(TwitchVideosBaseIE): _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' _PLAYLIST_TYPE = 'all videos' - _TEST = { + _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/all', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 869, - } + }, { + 'url': 'https://m.twitch.tv/spamfish/videos/all', + 'only_matching': True, + }] class TwitchUploadsIE(TwitchVideosBaseIE): @@ -427,14 +436,17 @@ class TwitchUploadsIE(TwitchVideosBaseIE): _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' _PLAYLIST_TYPE = 'uploads' - _TEST = { + _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/uploads', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 0, - } + }, { + 'url': 'https://m.twitch.tv/spamfish/videos/uploads', + 'only_matching': True, + }] class TwitchPastBroadcastsIE(TwitchVideosBaseIE): @@ -443,14 +455,17 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE): _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' _PLAYLIST_TYPE = 'past broadcasts' - _TEST = { + _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 0, - } + }, { + 'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts', + 'only_matching': True, + }] class TwitchHighlightsIE(TwitchVideosBaseIE): @@ -459,14 +474,17 @@ class TwitchHighlightsIE(TwitchVideosBaseIE): _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' _PLAYLIST_TYPE = 'highlights' - _TEST = { + _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/highlights', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 805, - } + }, { + 'url': 'https://m.twitch.tv/spamfish/videos/highlights', + 'only_matching': True, + }] class TwitchStreamIE(TwitchBaseIE): @@ -474,7 +492,7 @@ class TwitchStreamIE(TwitchBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:(?:www|go)\.)?twitch\.tv/| + (?:(?:www|go|m)\.)?twitch\.tv/| player\.twitch\.tv/\?.*?\bchannel= ) (?P<id>[^/#?]+) @@ -508,6 +526,9 @@ class TwitchStreamIE(TwitchBaseIE): }, { 'url': 'https://go.twitch.tv/food', 'only_matching': True, + }, { + 'url': 'https://m.twitch.tv/food', + 'only_matching': True, }] @classmethod From dd9aea8cbdabc7622446d387ed6ed59e47b79de7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 Apr 2018 01:25:41 +0700 Subject: [PATCH 72/77] [instagram:user] Add request signing (closes #16119) --- youtube_dl/extractor/instagram.py | 161 +++++++++++++++++++++++++++--- 1 file changed, 149 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 9f570249f..1c917bc95 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -2,14 +2,20 @@ from __future__ import unicode_literals import itertools import json +import os import re +import subprocess +import tempfile from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + check_executable, + ExtractorError, get_element_by_attribute, int_or_none, lowercase_escape, + std_headers, try_get, ) @@ -238,24 +244,140 @@ class InstagramUserIE(InfoExtractor): } } - def _entries(self, uploader_id): + _SIGN_CODE = ''' +"use strict"; +function i(e, t) { + var r = (65535 & e) + (65535 & t); + return (e >> 16) + (t >> 16) + (r >> 16) << 16 | 65535 & r +} +function a(e, t, r, n, o, a) { + return i((s = i(i(t, e), i(n, a))) << (c = o) | s >>> 32 - c, r); + var s, c +} +function s(e, t, r, n, o, i, s) { + return a(t & r | ~t & n, e, t, o, i, s) +} +function c(e, t, r, n, o, i, s) { + return a(t & n | r & ~n, e, t, o, i, s) +} +function u(e, t, r, n, o, i, s) { + return a(t ^ r ^ n, e, t, o, i, s) +} +function l(e, t, r, n, o, i, s) { + return a(r ^ (t | ~n), e, t, o, i, s) +} +function p(e, t) { + var r, n, o, a, p; + e[t >> 5] |= 128 << t % 32, + e[14 + (t + 64 >>> 9 << 4)] = t; + var d = 1732584193 + , f = -271733879 + , h = -1732584194 + , g = 271733878; + for (r = 0; r < e.length; r += 16) + n = d, + o = f, + a = h, + p = g, + f = l(f = l(f = l(f = l(f = u(f = u(f = u(f = u(f = c(f = c(f = c(f = c(f = s(f = s(f = s(f = s(f, h = s(h, g = s(g, d = s(d, f, h, g, e[r], 7, -680876936), f, h, e[r + 1], 12, -389564586), d, f, e[r + 2], 17, 606105819), g, d, e[r + 3], 22, -1044525330), h = s(h, g = s(g, d = s(d, f, h, g, e[r + 4], 7, -176418897), f, h, e[r + 5], 12, 1200080426), d, f, e[r + 6], 17, -1473231341), g, d, e[r + 7], 22, -45705983), h = s(h, g = s(g, d = s(d, f, h, g, e[r + 8], 7, 1770035416), f, h, e[r + 9], 12, -1958414417), d, f, e[r + 10], 17, -42063), g, d, e[r + 11], 22, -1990404162), h = s(h, g = s(g, d = s(d, f, h, g, e[r + 12], 7, 1804603682), f, h, e[r + 13], 12, -40341101), d, f, e[r + 14], 17, -1502002290), g, d, e[r + 15], 22, 1236535329), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 1], 5, -165796510), f, h, e[r + 6], 9, -1069501632), d, f, e[r + 11], 14, 643717713), g, d, e[r], 20, -373897302), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 5], 5, -701558691), f, h, e[r + 10], 9, 38016083), d, f, e[r + 15], 14, -660478335), g, d, e[r + 4], 20, -405537848), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 9], 5, 568446438), f, h, e[r + 14], 9, -1019803690), d, f, e[r + 3], 14, -187363961), g, d, e[r + 8], 20, 1163531501), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 13], 5, -1444681467), f, h, e[r + 2], 9, -51403784), d, f, e[r + 7], 14, 1735328473), g, d, e[r + 12], 20, -1926607734), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 5], 4, -378558), f, h, e[r + 8], 11, -2022574463), d, f, e[r + 11], 16, 1839030562), g, d, e[r + 14], 23, -35309556), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 1], 4, -1530992060), f, h, e[r + 4], 11, 1272893353), d, f, e[r + 7], 16, -155497632), g, d, e[r + 10], 23, -1094730640), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 13], 4, 681279174), f, h, e[r], 11, -358537222), d, f, e[r + 3], 16, -722521979), g, d, e[r + 6], 23, 76029189), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 9], 4, -640364487), f, h, e[r + 12], 11, -421815835), d, f, e[r + 15], 16, 530742520), g, d, e[r + 2], 23, -995338651), h = l(h, g = l(g, d = l(d, f, h, g, e[r], 6, -198630844), f, h, e[r + 7], 10, 1126891415), d, f, e[r + 14], 15, -1416354905), g, d, e[r + 5], 21, -57434055), h = l(h, g = l(g, d = l(d, f, h, g, e[r + 12], 6, 1700485571), f, h, e[r + 3], 10, -1894986606), d, f, e[r + 10], 15, -1051523), g, d, e[r + 1], 21, -2054922799), h = l(h, g = l(g, d = l(d, f, h, g, e[r + 8], 6, 1873313359), f, h, e[r + 15], 10, -30611744), d, f, e[r + 6], 15, -1560198380), g, d, e[r + 13], 21, 1309151649), h = l(h, g = l(g, d = l(d, f, h, g, e[r + 4], 6, -145523070), f, h, e[r + 11], 10, -1120210379), d, f, e[r + 2], 15, 718787259), g, d, e[r + 9], 21, -343485551), + d = i(d, n), + f = i(f, o), + h = i(h, a), + g = i(g, p); + return [d, f, h, g] +} +function d(e) { + var t, r = "", n = 32 * e.length; + for (t = 0; t < n; t += 8) + r += String.fromCharCode(e[t >> 5] >>> t % 32 & 255); + return r +} +function f(e) { + var t, r = []; + for (r[(e.length >> 2) - 1] = void 0, + t = 0; t < r.length; t += 1) + r[t] = 0; + var n = 8 * e.length; + for (t = 0; t < n; t += 8) + r[t >> 5] |= (255 & e.charCodeAt(t / 8)) << t % 32; + return r +} +function h(e) { + var t, r, n = ""; + for (r = 0; r < e.length; r += 1) + t = e.charCodeAt(r), + n += "0123456789abcdef".charAt(t >>> 4 & 15) + "0123456789abcdef".charAt(15 & t); + return n +} +function g(e) { + return unescape(encodeURIComponent(e)) +} +function b(e) { + return function(e) { + return d(p(f(e), 8 * e.length)) + }(g(e)) +} +function m(e, t) { + return function(e, t) { + var r, n, o = f(e), i = [], a = []; + for (i[15] = a[15] = void 0, + o.length > 16 && (o = p(o, 8 * e.length)), + r = 0; r < 16; r += 1) + i[r] = 909522486 ^ o[r], + a[r] = 1549556828 ^ o[r]; + return n = p(i.concat(f(t)), 512 + 8 * t.length), + d(p(a.concat(n), 640)) + }(g(e), g(t)) +} +function v(e, t, r) { + return t ? r ? m(t, e) : h(m(t, e)) : r ? b(e) : h(b(e)) +} +function sign(s) { + return v(s); +} +''' + + def _entries(self, data): def get_count(suffix): return int_or_none(try_get( node, lambda x: x['edge_media_' + suffix]['count'])) + uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + csrf_token = data['config']['csrf_token'] + rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' + self._set_cookie('instagram.com', 'ig_pr', '1') + def sign(s): + js_code = self._SIGN_CODE + "console.log(sign('%s')); phantom.exit();" % s + with open(self._phantomjs_script.name, 'w') as f: + f.write(js_code) + p = subprocess.Popen( + ['phantomjs', '--ssl-protocol=any', f.name], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + gis, err = p.communicate() + if p.returncode != 0: + raise ExtractorError('Failed to sign request\n:' + err.decode('utf-8')) + return gis.decode('utf-8').strip() + cursor = '' for page_num in itertools.count(1): + variables = json.dumps({ + 'id': uploader_id, + 'first': 100, + 'after': cursor, + }) + gis = sign( + '%s:%s:%s:%s' + % (rhx_gis, csrf_token, std_headers['User-Agent'], variables)) media = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, - 'Downloading JSON page %d' % page_num, query={ + 'Downloading JSON page %d' % page_num, headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'X-Instagram-GIS': gis, + }, query={ 'query_hash': '472f257a40c653c64c666ce877d59d2b', - 'variables': json.dumps({ - 'id': uploader_id, - 'first': 100, - 'after': cursor, - }) + 'variables': variables, })['data']['user']['edge_owner_to_timeline_media'] edges = media.get('edges') @@ -309,11 +431,26 @@ class InstagramUserIE(InfoExtractor): if not cursor or not isinstance(cursor, compat_str): break + def _real_initialize(self): + if not check_executable('phantomjs', ['-v']): + raise ExtractorError( + 'PhantomJS executable not found in PATH, download it from http://phantomjs.org', + expected=True) + self._phantomjs_script = tempfile.NamedTemporaryFile(delete=False) + self._phantomjs_script.close() + + def __del__(self): + os.unlink(self._phantomjs_script.name) + def _real_extract(self, url): username = self._match_id(url) - uploader_id = self._download_json( - 'https://instagram.com/%s/' % username, username, query={ - '__a': 1, - })['graphql']['user']['id'] + + webpage = self._download_webpage(url, username) + + data = self._parse_json( + self._search_regex( + r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), + username) + return self.playlist_result( - self._entries(uploader_id), username, username) + self._entries(data), username, username) From 315ab3d500964f1d8442135889e1886ca6d90100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 Apr 2018 01:51:57 +0700 Subject: [PATCH 73/77] [instagram:user] Simplify signing (#16119) --- youtube_dl/extractor/instagram.py | 128 +----------------------------- 1 file changed, 3 insertions(+), 125 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 1c917bc95..76452a6a1 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -1,17 +1,13 @@ from __future__ import unicode_literals import itertools +import hashlib import json -import os import re -import subprocess -import tempfile from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - check_executable, - ExtractorError, get_element_by_attribute, int_or_none, lowercase_escape, @@ -244,99 +240,6 @@ class InstagramUserIE(InfoExtractor): } } - _SIGN_CODE = ''' -"use strict"; -function i(e, t) { - var r = (65535 & e) + (65535 & t); - return (e >> 16) + (t >> 16) + (r >> 16) << 16 | 65535 & r -} -function a(e, t, r, n, o, a) { - return i((s = i(i(t, e), i(n, a))) << (c = o) | s >>> 32 - c, r); - var s, c -} -function s(e, t, r, n, o, i, s) { - return a(t & r | ~t & n, e, t, o, i, s) -} -function c(e, t, r, n, o, i, s) { - return a(t & n | r & ~n, e, t, o, i, s) -} -function u(e, t, r, n, o, i, s) { - return a(t ^ r ^ n, e, t, o, i, s) -} -function l(e, t, r, n, o, i, s) { - return a(r ^ (t | ~n), e, t, o, i, s) -} -function p(e, t) { - var r, n, o, a, p; - e[t >> 5] |= 128 << t % 32, - e[14 + (t + 64 >>> 9 << 4)] = t; - var d = 1732584193 - , f = -271733879 - , h = -1732584194 - , g = 271733878; - for (r = 0; r < e.length; r += 16) - n = d, - o = f, - a = h, - p = g, - f = l(f = l(f = l(f = l(f = u(f = u(f = u(f = u(f = c(f = c(f = c(f = c(f = s(f = s(f = s(f = s(f, h = s(h, g = s(g, d = s(d, f, h, g, e[r], 7, -680876936), f, h, e[r + 1], 12, -389564586), d, f, e[r + 2], 17, 606105819), g, d, e[r + 3], 22, -1044525330), h = s(h, g = s(g, d = s(d, f, h, g, e[r + 4], 7, -176418897), f, h, e[r + 5], 12, 1200080426), d, f, e[r + 6], 17, -1473231341), g, d, e[r + 7], 22, -45705983), h = s(h, g = s(g, d = s(d, f, h, g, e[r + 8], 7, 1770035416), f, h, e[r + 9], 12, -1958414417), d, f, e[r + 10], 17, -42063), g, d, e[r + 11], 22, -1990404162), h = s(h, g = s(g, d = s(d, f, h, g, e[r + 12], 7, 1804603682), f, h, e[r + 13], 12, -40341101), d, f, e[r + 14], 17, -1502002290), g, d, e[r + 15], 22, 1236535329), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 1], 5, -165796510), f, h, e[r + 6], 9, -1069501632), d, f, e[r + 11], 14, 643717713), g, d, e[r], 20, -373897302), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 5], 5, -701558691), f, h, e[r + 10], 9, 38016083), d, f, e[r + 15], 14, -660478335), g, d, e[r + 4], 20, -405537848), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 9], 5, 568446438), f, h, e[r + 14], 9, -1019803690), d, f, e[r + 3], 14, -187363961), g, d, e[r + 8], 20, 1163531501), h = c(h, g = c(g, d = c(d, f, h, g, e[r + 13], 5, -1444681467), f, h, e[r + 2], 9, -51403784), d, f, e[r + 7], 14, 1735328473), g, d, e[r + 12], 20, -1926607734), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 5], 4, -378558), f, h, e[r + 8], 11, -2022574463), d, f, e[r + 11], 16, 1839030562), g, d, e[r + 14], 23, -35309556), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 1], 4, -1530992060), f, h, e[r + 4], 11, 1272893353), d, f, e[r + 7], 16, -155497632), g, d, e[r + 10], 23, -1094730640), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 13], 4, 681279174), f, h, e[r], 11, -358537222), d, f, e[r + 3], 16, -722521979), g, d, e[r + 6], 23, 76029189), h = u(h, g = u(g, d = u(d, f, h, g, e[r + 9], 4, -640364487), f, h, e[r + 12], 11, -421815835), d, f, e[r + 15], 16, 530742520), g, d, e[r + 2], 23, -995338651), h = l(h, g = l(g, d = l(d, f, h, g, e[r], 6, -198630844), f, h, e[r + 7], 10, 1126891415), d, f, e[r + 14], 15, -1416354905), g, d, e[r + 5], 21, -57434055), h = l(h, g = l(g, d = l(d, f, h, g, e[r + 12], 6, 1700485571), f, h, e[r + 3], 10, -1894986606), d, f, e[r + 10], 15, -1051523), g, d, e[r + 1], 21, -2054922799), h = l(h, g = l(g, d = l(d, f, h, g, e[r + 8], 6, 1873313359), f, h, e[r + 15], 10, -30611744), d, f, e[r + 6], 15, -1560198380), g, d, e[r + 13], 21, 1309151649), h = l(h, g = l(g, d = l(d, f, h, g, e[r + 4], 6, -145523070), f, h, e[r + 11], 10, -1120210379), d, f, e[r + 2], 15, 718787259), g, d, e[r + 9], 21, -343485551), - d = i(d, n), - f = i(f, o), - h = i(h, a), - g = i(g, p); - return [d, f, h, g] -} -function d(e) { - var t, r = "", n = 32 * e.length; - for (t = 0; t < n; t += 8) - r += String.fromCharCode(e[t >> 5] >>> t % 32 & 255); - return r -} -function f(e) { - var t, r = []; - for (r[(e.length >> 2) - 1] = void 0, - t = 0; t < r.length; t += 1) - r[t] = 0; - var n = 8 * e.length; - for (t = 0; t < n; t += 8) - r[t >> 5] |= (255 & e.charCodeAt(t / 8)) << t % 32; - return r -} -function h(e) { - var t, r, n = ""; - for (r = 0; r < e.length; r += 1) - t = e.charCodeAt(r), - n += "0123456789abcdef".charAt(t >>> 4 & 15) + "0123456789abcdef".charAt(15 & t); - return n -} -function g(e) { - return unescape(encodeURIComponent(e)) -} -function b(e) { - return function(e) { - return d(p(f(e), 8 * e.length)) - }(g(e)) -} -function m(e, t) { - return function(e, t) { - var r, n, o = f(e), i = [], a = []; - for (i[15] = a[15] = void 0, - o.length > 16 && (o = p(o, 8 * e.length)), - r = 0; r < 16; r += 1) - i[r] = 909522486 ^ o[r], - a[r] = 1549556828 ^ o[r]; - return n = p(i.concat(f(t)), 512 + 8 * t.length), - d(p(a.concat(n), 640)) - }(g(e), g(t)) -} -function v(e, t, r) { - return t ? r ? m(t, e) : h(m(t, e)) : r ? b(e) : h(b(e)) -} -function sign(s) { - return v(s); -} -''' - def _entries(self, data): def get_count(suffix): return int_or_none(try_get( @@ -348,18 +251,6 @@ function sign(s) { self._set_cookie('instagram.com', 'ig_pr', '1') - def sign(s): - js_code = self._SIGN_CODE + "console.log(sign('%s')); phantom.exit();" % s - with open(self._phantomjs_script.name, 'w') as f: - f.write(js_code) - p = subprocess.Popen( - ['phantomjs', '--ssl-protocol=any', f.name], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - gis, err = p.communicate() - if p.returncode != 0: - raise ExtractorError('Failed to sign request\n:' + err.decode('utf-8')) - return gis.decode('utf-8').strip() - cursor = '' for page_num in itertools.count(1): variables = json.dumps({ @@ -367,14 +258,12 @@ function sign(s) { 'first': 100, 'after': cursor, }) - gis = sign( - '%s:%s:%s:%s' - % (rhx_gis, csrf_token, std_headers['User-Agent'], variables)) + s = '%s:%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent'], variables) media = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, 'Downloading JSON page %d' % page_num, headers={ 'X-Requested-With': 'XMLHttpRequest', - 'X-Instagram-GIS': gis, + 'X-Instagram-GIS': hashlib.md5(s.encode('utf-8')).hexdigest(), }, query={ 'query_hash': '472f257a40c653c64c666ce877d59d2b', 'variables': variables, @@ -431,17 +320,6 @@ function sign(s) { if not cursor or not isinstance(cursor, compat_str): break - def _real_initialize(self): - if not check_executable('phantomjs', ['-v']): - raise ExtractorError( - 'PhantomJS executable not found in PATH, download it from http://phantomjs.org', - expected=True) - self._phantomjs_script = tempfile.NamedTemporaryFile(delete=False) - self._phantomjs_script.close() - - def __del__(self): - os.unlink(self._phantomjs_script.name) - def _real_extract(self, url): username = self._match_id(url) From d783aee56a720ce15cf2775afc330b2ed5d53baf Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Wed, 11 Apr 2018 09:11:24 -0400 Subject: [PATCH 74/77] [fxnetworks] Add support for https theplatform URLs (closes #16125) --- youtube_dl/extractor/fxnetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 37549fb01..00e67426b 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -41,7 +41,7 @@ class FXNetworksIE(AdobePassIE): if 'The content you are trying to access is not available in your region.' in webpage: self.raise_geo_restricted() video_data = extract_attributes(self._search_regex( - r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data')) + r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data')) player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) release_url = video_data['rel'] title = video_data['data-title'] From 64f03e5b4c86f7c7e6d660267d77e02da621a94d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 Apr 2018 23:28:55 +0700 Subject: [PATCH 75/77] [cbc:watch] Re-acquire device token when expired (closes #16160) --- youtube_dl/extractor/cbc.py | 59 +++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 3be0c646b..54b4b9be9 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -5,7 +5,10 @@ import json import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_HTTPError, +) from ..utils import ( js_to_json, smuggle_url, @@ -206,30 +209,48 @@ class CBCWatchBaseIE(InfoExtractor): def _call_api(self, path, video_id): url = path if path.startswith('http') else self._API_BASE_URL + path - result = self._download_xml(url, video_id, headers={ - 'X-Clearleap-DeviceId': self._device_id, - 'X-Clearleap-DeviceToken': self._device_token, - }) + for _ in range(2): + try: + result = self._download_xml(url, video_id, headers={ + 'X-Clearleap-DeviceId': self._device_id, + 'X-Clearleap-DeviceToken': self._device_token, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + # Device token has expired, re-acquiring device token + self._register_device() + continue + raise error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') if error_message: raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) return result def _real_initialize(self): - if not self._device_id or not self._device_token: - device = self._downloader.cache.load('cbcwatch', 'device') or {} - self._device_id, self._device_token = device.get('id'), device.get('token') - if not self._device_id or not self._device_token: - result = self._download_xml( - self._API_BASE_URL + 'device/register', - None, data=b'<device><type>web</type></device>') - self._device_id = xpath_text(result, 'deviceId', fatal=True) - self._device_token = xpath_text(result, 'deviceToken', fatal=True) - self._downloader.cache.store( - 'cbcwatch', 'device', { - 'id': self._device_id, - 'token': self._device_token, - }) + if self._valid_device_token(): + return + device = self._downloader.cache.load('cbcwatch', 'device') or {} + self._device_id, self._device_token = device.get('id'), device.get('token') + if self._valid_device_token(): + return + self._register_device() + + def _valid_device_token(self): + return self._device_id and self._device_token + + def _register_device(self): + self._device_id = self._device_token = None + result = self._download_xml( + self._API_BASE_URL + 'device/register', + None, 'Acquiring device token', + data=b'<device><type>web</type></device>') + self._device_id = xpath_text(result, 'deviceId', fatal=True) + self._device_token = xpath_text(result, 'deviceToken', fatal=True) + self._downloader.cache.store( + 'cbcwatch', 'device', { + 'id': self._device_id, + 'token': self._device_token, + }) def _parse_rss_feed(self, rss): channel = xpath_element(rss, 'channel', fatal=True) From 92ded33a05e0760d0ae0e804a1dca0d8a84f3d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 12 Apr 2018 04:53:45 +0700 Subject: [PATCH 76/77] [pornhub] Relax _VALID_URLs (closes #16165) --- youtube_dl/extractor/pornhub.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 9ce513aeb..23e24d216 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) @@ -264,7 +264,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.pornhub.com/playlist/4667351', 'info_dict': { @@ -272,11 +272,14 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): 'title': 'Nataly Hot', }, 'playlist_mincount': 2, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, }] class PornHubUserVideosIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos' + _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos' _TESTS = [{ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { @@ -305,6 +308,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): # Most Viewed Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', + 'only_matching': True, }] def _real_extract(self, url): From 68ddba20ae4e0ab28146e80d3e112a5a2661c386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 Apr 2018 22:27:52 +0700 Subject: [PATCH 77/77] [instagram:user] Remove User-Agent from signature (closes #16119) --- youtube_dl/extractor/instagram.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 76452a6a1..8da1d5f2f 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -11,7 +11,6 @@ from ..utils import ( get_element_by_attribute, int_or_none, lowercase_escape, - std_headers, try_get, ) @@ -258,7 +257,7 @@ class InstagramUserIE(InfoExtractor): 'first': 100, 'after': cursor, }) - s = '%s:%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent'], variables) + s = '%s:%s:%s' % (rhx_gis, csrf_token, variables) media = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, 'Downloading JSON page %d' % page_num, headers={