From fe8fb82c1eee3f1ee01fe36cc115322db7f49ff6 Mon Sep 17 00:00:00 2001 From: Kade Date: Sun, 11 Feb 2018 09:35:28 -0500 Subject: [PATCH] Revert "updating again" (#11) * Revert "[francetv] Separate main extractor and rework others to delegate to it" This reverts commit 24b97ef14a944edeb00bf81ed0ddc990459e3bc3. * Revert "[francetv] Improve manifest URL signing (closes #15536)" This reverts commit 6f54dce07d5ab9780e653193a9db53013712f19d. * Revert "[francetv] Sign m3u8 manifest URLs (closes #15565)" This reverts commit 4eb4ace765dae302460291a790c5e81f373ce5cc. * Revert "[veoh] Add support for embed URLs" This reverts commit 6adfc88ce6034563f6e275c72131cdf87e2d2ab6. * Revert "[dvtv] Skip download on failing test" This reverts commit ad4167ed33a00c70d411e68e5745482880ad2855. * Revert "[afreecatv] Fix extraction (closes #15556)" This reverts commit 8cc0cd0a2ff7bfe27d3571aa75cab841f6f96f91. * Revert "[periscope] Use accessVideoPublic endpoint (closes #15554)" This reverts commit b6aef8f69f16724ca8705823c86a349b97eceeff. * Revert "[YoutubeDL] Add support for filesize_approx in format selector (closes #15550)" This reverts commit eb048b366869f19e3138a76af7c31325c7a66b70. * Revert "[discovery] Fix auth request (closes #15542)" This reverts commit 14adea5bc7d0c0324716403717b3f234aec4d7bb. * Revert "[6play] Extract subtitles (closes #15541)" This reverts commit aee441166254bf3d6810ddaaca2ca6de7f327899. * Revert "Credit @mweinelt for #15124" This reverts commit 82eaab7f58f0a1a0ff85d2829ce48dd61bbbcefe. * Revert "Credit @iamleot for internazionale (#14973)" This reverts commit fdd87ce20957a903653336621797cea866db204f. * Revert "Credit @che0 for seznamzpravy (#14616) and dvtv (#15442)" This reverts commit 9b904aaeba37fa218ddad01bb04a5ea7c8d585aa. * Revert "[newgrounds] Fix metadata extraction (closes #15531)" This reverts commit 8be35d3cd121029b0c134cd3b5672c46add7b88d. * Revert "[nbc] add support for NBC Olympics Streams(closes #10295)" This reverts commit ba6e116f59c296f78668b1b665657b33e7c40410. * Revert "[dvtv] Simplify (closes #15442)" This reverts commit 2da0581a746fe6a98a331a646e64840d7ef78bc0. * Revert "[dvtv] Fix live streams extraction" This reverts commit 773234624f2a8e63a382cba91228a0445261d783. * Revert "release 2018.02.08" This reverts commit 0204b2a7b183a412c5a13bc306f8d77a0de6f9a8. * Revert "[ChangeLog] Actualize" This reverts commit 72aa0a29e54dee40f1b52da5c2e2aa46886dee1b. * Revert "[extractors] Import for myvi:embed" This reverts commit 38e519ff0327b5b4067d5c3b0c40d651c8a508f9. * Revert "[pokemon] PEP 8" This reverts commit 7b11f92ae02d791a1115537c6bd3c129198a7810. * Revert "[gameinformer] PEP 8" This reverts commit aa71431e28de547fb55ab55e47817769c8e91029. * Revert "[myvi] Extend _VALID_URL" This reverts commit 441ec65fd33e224bee2e49092a45af9b1c92fd90. * Revert "[myvi:embed] Add extractor (closes #15521)" This reverts commit e35a984bc7623226bfba13e6c94a2f3cf8fc76fe. * Revert "[prosiebensat1] Extend _VALID_URL (closes #15520)" This reverts commit 0e1a732e390e0e90b48fc54e3b6b61c9accb043a. * Revert "[pokemon] Relax _VALID_URL and extend title extraction (closes #15518)" This reverts commit 8a768fe854833d29eab2cb695ed2e154d2b2cf28. * Revert "[gameinformer] Use geo verification headers" This reverts commit 6cdc00e9989adf7d2e4a09e838791f39dce7c118. * Revert "[la7] Fix extraction (closes #15501)" This reverts commit 8d2fbb06a9fe5821d143039716b058cbd2ae0267. * Revert "[gameinformer] Fix brightcove id extraction" This reverts commit dc1171da811f5f055aab82bb34b068bd6496ca47. * Revert "[afreecatv] Pass referrer to video info request (closes #15507)" This reverts commit c16af2cb4ea0011b3103751574e0b2e2404b3783. * Revert "[telebruxelles] Relax _VALID_URL and add support for live streams" This reverts commit 1f916ea01b9fc6df933d7220100ef14a859fd63c. * Revert "[telebruxelles] Fix extraction (closes #15504)" This reverts commit ea7a8f529844d5ccc201d2d689c07bddce91f3c5. * Revert "[extractor/common] Respect secure schemes in _extract_wowza_formats" This reverts commit b44f7f4fc8d69ea8da69287c4c94f2dfe4c8aec0. * Revert "release 2018.02.04" This reverts commit ed4de10767c633eaa13c4415b4b6819e2cc358c7. * Revert "[ChangeLog] Actualize" This reverts commit f277eb6c9d5d995697fc8f37cb8bcb0780be19b8. * Revert "[brightcove] Pass embed page URL as referrer (closes #15486)" This reverts commit a690cb67422b4d1b1a4976fa4e51444467ac6656. * Revert "[downloader/http] Randomize HTTP chunk size" This reverts commit c315342edb0caa310a184e7c6863cee38abca916. * Revert "[youtube] Enforce using chunked HTTP downloading for DASH formats" This reverts commit 7412fe8d5e8f6229135f3f9d2ca70c91cc5a9e11. * Revert "[downloader/http] Add ability to pass downloader options via info dict" This reverts commit 6cfd25a8e0bd236e1d276f44430e9a6d2545237c. * Revert "[downloader/http] Fix 302 infinite loops by not reusing requests" This reverts commit 57c3eea4e579a0a2e259c409d2139577e382e8f8. * Revert "Document http_chunk_size" This reverts commit ff3f520741a721b2022baa99fedbcb94b1c37632. --- .github/ISSUE_TEMPLATE.md | 6 +- AUTHORS | 3 - ChangeLog | 30 ---- docs/supportedsites.md | 1 - youtube_dl/YoutubeDL.py | 5 +- youtube_dl/downloader/common.py | 3 - youtube_dl/downloader/http.py | 25 +-- youtube_dl/extractor/afreecatv.py | 15 +- youtube_dl/extractor/brightcove.py | 13 +- youtube_dl/extractor/common.py | 9 +- youtube_dl/extractor/discovery.py | 22 +-- youtube_dl/extractor/dvtv.py | 27 +-- youtube_dl/extractor/extractors.py | 7 +- youtube_dl/extractor/francetv.py | 235 +++++++++----------------- youtube_dl/extractor/gameinformer.py | 11 +- youtube_dl/extractor/generic.py | 5 +- youtube_dl/extractor/la7.py | 4 +- youtube_dl/extractor/myvi.py | 71 ++------ youtube_dl/extractor/nbc.py | 53 ------ youtube_dl/extractor/newgrounds.py | 16 +- youtube_dl/extractor/periscope.py | 19 ++- youtube_dl/extractor/pokemon.py | 37 ++-- youtube_dl/extractor/prosiebensat1.py | 1 - youtube_dl/extractor/sixplay.py | 5 - youtube_dl/extractor/telebruxelles.py | 27 +-- youtube_dl/extractor/veoh.py | 5 +- youtube_dl/extractor/youtube.py | 5 - youtube_dl/version.py | 2 +- 28 files changed, 174 insertions(+), 488 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f7d951de2..12e7f02ce 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.02.08 +[debug] youtube-dl version 2018.02.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/AUTHORS b/AUTHORS index 6223212aa..40215a5cf 100644 --- a/AUTHORS +++ b/AUTHORS @@ -233,6 +233,3 @@ Daniel Weber Kay Bouché Yang Hongbo Lei Wang -Petr Novák -Leonardo Taccari -Martin Weinelt diff --git a/ChangeLog b/ChangeLog index 72f239b8d..db212c4a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,33 +1,3 @@ -version 2018.02.08 - -Extractors -+ [myvi] Extend URL regular expression -+ [myvi:embed] Add support for myvi.tv embeds (#15521) -+ [prosiebensat1] Extend URL regular expression (#15520) -* [pokemon] Relax URL regular expression and extend title extraction (#15518) -+ [gameinformer] Use geo verification headers -* [la7] Fix extraction (#15501, #15502) -* [gameinformer] Fix brightcove id extraction (#15416) -+ [afreecatv] Pass referrer to video info request (#15507) -+ [telebruxelles] Add support for live streams -* [telebruxelles] Relax URL regular expression -* [telebruxelles] Fix extraction (#15504) -* [extractor/common] Respect secure schemes in _extract_wowza_formats - - -version 2018.02.04 - -Core -* [downloader/http] Randomize HTTP chunk size -+ [downloader/http] Add ability to pass downloader options via info dict -* [downloader/http] Fix 302 infinite loops by not reusing requests -+ Document http_chunk_size - -Extractors -+ [brightcove] Pass embed page URL as referrer (#15486) -+ [youtube] Enforce using chunked HTTP downloading for DASH formats - - version 2018.02.03 Core diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3fa5d22b..c15b5eec5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -502,7 +502,6 @@ - **MySpass** - **Myvi** - **MyVidster** - - **MyviEmbed** - **n-tv.de** - **natgeo** - **natgeo:episodeguide** diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 709a57e3d..9e0539ed2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -298,8 +298,7 @@ class YoutubeDL(object): the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args, hls_use_mpegts, - http_chunk_size. + xattr_set_filesize, external_downloader_args, hls_use_mpegts. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -1033,7 +1032,7 @@ class YoutubeDL(object): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s* - (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps) + (?Pwidth|height|tbr|abr|vbr|asr|filesize|fps) \s*(?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) $ diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index a7f62ae53..75b8166c5 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -49,9 +49,6 @@ class FileDownloader(object): external_downloader_args: A list of additional command-line arguments for the external downloader. hls_use_mpegts: Use the mpegts container for HLS videos. - http_chunk_size: Size of a chunk for chunk-based HTTP downloading.May be - useful for bypassing bandwidth throttling imposed by - a webserver (experimental) Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index a22875f69..dc2b37beb 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,7 +4,6 @@ import errno import os import socket import time -import random import re from .common import FileDownloader @@ -43,10 +42,11 @@ class HttpFD(FileDownloader): add_headers = info_dict.get('http_headers') if add_headers: headers.update(add_headers) + basic_request = sanitized_Request(url, None, headers) + request = sanitized_Request(url, None, headers) is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - info_dict.get('downloader_options', {}).get('http_chunk_size') or self.params.get('http_chunk_size') or 0) ctx.open_mode = 'wb' @@ -54,7 +54,6 @@ class HttpFD(FileDownloader): ctx.data_len = None ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() - ctx.chunk_size = None if self.params.get('continuedl', True): # Establish possible resume length @@ -84,24 +83,21 @@ class HttpFD(FileDownloader): req.add_header('Range', range_header) def establish_connection(): - ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) - if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' - elif ctx.chunk_size > 0: + elif chunk_size > 0: range_start = 0 else: range_start = None ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None + range_end = range_start + chunk_size - 1 if chunk_size else None if range_end and ctx.data_len is not None and range_end >= ctx.data_len: range_end = ctx.data_len - 1 has_range = range_start is not None ctx.has_range = has_range - request = sanitized_Request(url, None, headers) if has_range: set_range(request, range_start, range_end) # Establish connection @@ -123,7 +119,7 @@ class HttpFD(FileDownloader): content_len = int_or_none(content_range_m.group(3)) accept_content_len = ( # Non-chunked download - not ctx.chunk_size or + not chunk_size or # Chunked download and requested piece or # its part is promised to be served content_range_end == range_end or @@ -144,8 +140,7 @@ class HttpFD(FileDownloader): # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header - ctx.data = self.ydl.urlopen( - sanitized_Request(url, None, headers)) + ctx.data = self.ydl.urlopen(basic_request) content_length = ctx.data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: @@ -176,6 +171,12 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.open_mode = 'wb' return + elif err.code == 302: + if not chunk_size: + raise + # HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop. + # may happen during chunk downloading. This is usually fixed + # with a retry. elif err.code < 500 or err.code >= 600: # Unexpected HTTP error raise @@ -301,7 +302,7 @@ class HttpFD(FileDownloader): if is_test and byte_counter == data_len: break - if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: + if not is_test and chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: ctx.resume_len = byte_counter # ctx.block_size = block_size raise NextFragment() diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 148a0999c..513dd81df 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -175,23 +175,10 @@ class AfreecaTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - station_id = self._search_regex( - r'nStationNo\s*=\s*(\d+)', webpage, 'station') - bbs_id = self._search_regex( - r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs') - video_id = self._search_regex( - r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) - video_xml = self._download_xml( 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', - video_id, headers={ - 'Referer': 'http://vod.afreecatv.com/embed.php', - }, query={ + video_id, query={ 'nTitleNo': video_id, - 'nStationNo': station_id, - 'nBbsNo': bbs_id, 'partialView': 'SKIP_ADULT', }) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f3c0dc3fa..f04505011 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -690,17 +690,10 @@ class BrightcoveNewIE(AdobePassIE): webpage, 'policy key', group='pk') api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) - headers = { - 'Accept': 'application/json;pk=%s' % policy_key, - } - referrer = smuggled_data.get('referrer') - if referrer: - headers.update({ - 'Referer': referrer, - 'Origin': re.search(r'https?://[^/]+', referrer).group(0), - }) try: - json_data = self._download_json(api_url, video_id, headers=headers) + json_data = self._download_json(api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: json_data = self._parse_json(e.cause.read().decode(), video_id)[0] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 568250aa1..1aad00aea 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -174,8 +174,6 @@ class InfoExtractor(object): width : height ratio as float. * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. - * downloader_options A dictionary of downloader options as - described in FileDownloader url: Final video URL. ext: Video filename extension. @@ -2266,10 +2264,9 @@ class InfoExtractor(object): def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): query = compat_urlparse.urlparse(url).query url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) - mobj = re.search( - r'(?:(?:http|rtmp|rtsp)(?Ps)?:)?(?P//[^?]+)', url) - url_base = mobj.group('url') - http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base) + url_base = self._search_regex( + r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url') + http_base_url = '%s:%s' % ('http', url_base) formats = [] def manifest_url(manifest): diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 91449dcd8..f9cec1d23 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -5,16 +5,15 @@ import re import string from .discoverygo import DiscoveryGoBaseIE -from ..compat import compat_str from ..utils import ( ExtractorError, - try_get, + update_url_query, ) from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?P + _VALID_URL = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| discoverylife| @@ -45,7 +44,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): _GEO_BYPASS = False def _real_extract(self, url): - site, path, display_id = re.match(self._VALID_URL, url).groups() + path, display_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) react_data = self._parse_json(self._search_regex( @@ -56,13 +55,14 @@ class DiscoveryIE(DiscoveryGoBaseIE): video_id = video['id'] access_token = self._download_json( - 'https://www.%s.com/anonymous' % site, display_id, query={ - 'authRel': 'authorization', - 'client_id': try_get( - react_data, lambda x: x['application']['apiClientId'], - compat_str) or '3020a40c2356a645b4b4', - 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), - 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, + 'https://www.discovery.com/anonymous', display_id, query={ + 'authLink': update_url_query( + 'https://login.discovery.com/v1/oauth2/authorize', { + 'client_id': react_data['application']['apiClientId'], + 'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html', + 'response_type': 'anonymous', + 'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + }) })['access_token'] try: diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index 3f760888e..e85c58bd5 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { - 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'id': '973eb3bc854e11e498be002590604f2e', }, 'playlist': [{ @@ -91,24 +91,10 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'only_matching': True, - }, { - 'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/', - 'md5': '87defe16681b1429c91f7a74809823c6', - 'info_dict': { - 'id': 'f5ae72f6fad611e794dbac1f6b220ee8', - 'ext': 'mp4', - 'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová', - }, - 'params': { - 'skip_download': True, - }, }] - def _parse_video_metadata(self, js, video_id, live_js=None): + def _parse_video_metadata(self, js, video_id): data = self._parse_json(js, video_id, transform_source=js_to_json) - if live_js: - data.update(self._parse_json( - live_js, video_id, transform_source=js_to_json)) title = unescapeHTML(data['title']) @@ -156,18 +142,13 @@ class DVTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - # live content - live_item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - # single video item = self._search_regex( r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', - webpage, 'video', default=None) + webpage, 'video', default=None, fatal=False) if item: - return self._parse_video_metadata(item, video_id, live_item) + return self._parse_video_metadata(item, video_id) # playlist items = re.findall( diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e56293f5..b442256fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -373,7 +373,6 @@ from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, - FranceTVSiteIE, FranceTVEmbedIE, FranceTVInfoIE, GenerationWhatIE, @@ -631,10 +630,7 @@ from .musicplayon import MusicPlayOnIE from .mwave import MwaveIE, MwaveMeetGreetIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE -from .myvi import ( - MyviIE, - MyviEmbedIE, -) +from .myvi import MyviIE from .myvidster import MyVidsterIE from .nationalgeographic import ( NationalGeographicVideoIE, @@ -648,7 +644,6 @@ from .nbc import ( NBCIE, NBCNewsIE, NBCOlympicsIE, - NBCOlympicsStreamIE, NBCSportsIE, NBCSportsVPlayerIE, ) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e0734d59a..095bb3954 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -5,10 +5,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( clean_html, ExtractorError, @@ -20,68 +17,7 @@ from .dailymotion import DailymotionIE class FranceTVBaseInfoExtractor(InfoExtractor): - def _make_url_result(self, video_id, catalog=None): - full_id = 'francetv:%s' % video_id - if catalog: - full_id += '@%s' % catalog - return self.url_result( - full_id, ie=FranceTVIE.ie_key(), video_id=video_id) - - -class FranceTVIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - https?:// - sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\? - .*?\bidDiffusion=[^&]+| - (?: - https?://videos\.francetv\.fr/video/| - francetv: - ) - (?P[^@]+)(?:@(?P.+))? - ) - ''' - - _TESTS = [{ - # without catalog - 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0', - 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f', - 'info_dict': { - 'id': '162311093', - 'ext': 'mp4', - 'title': '13h15, le dimanche... - Les mystères de Jésus', - 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', - 'timestamp': 1502623500, - 'upload_date': '20170813', - }, - }, { - # with catalog - 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4', - 'only_matching': True, - }, { - 'url': 'http://videos.francetv.fr/video/NI_657393@Regions', - 'only_matching': True, - }, { - 'url': 'francetv:162311093', - 'only_matching': True, - }, { - 'url': 'francetv:NI_1004933@Zouzous', - 'only_matching': True, - }, { - 'url': 'francetv:NI_983319@Info-web', - 'only_matching': True, - }, { - 'url': 'francetv:NI_983319', - 'only_matching': True, - }, { - 'url': 'francetv:NI_657393@Regions', - 'only_matching': True, - }] - def _extract_video(self, video_id, catalogue=None): - # Videos are identified by idDiffusion so catalogue part is optional. - # However when provided, some extra formats may be returned so we pass - # it if available. info = self._download_json( 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', video_id, 'Downloading video JSON', query={ @@ -91,8 +27,7 @@ class FranceTVIE(InfoExtractor): if info.get('status') == 'NOK': raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, info['message']), - expected=True) + '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True) allowed_countries = info['videos'][0].get('geoblocage') if allowed_countries: georestricted = True @@ -107,19 +42,6 @@ class FranceTVIE(InfoExtractor): else: georestricted = False - def sign(manifest_url, manifest_id): - for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = self._download_webpage( - 'https://%s/esi/TA' % host, video_id, - 'Downloading signed %s manifest URL' % manifest_id, - fatal=False, query={ - 'url': manifest_url, - }) - if (signed_url and isinstance(signed_url, compat_str) and - re.search(r'^(?:https?:)?//', signed_url)): - return signed_url - return manifest_url - formats = [] for video in info['videos']: if video['statut'] != 'ONLINE': @@ -134,14 +56,17 @@ class FranceTVIE(InfoExtractor): # See https://github.com/rg3/youtube-dl/issues/3963 # m3u8 urls work fine continue - formats.extend(self._extract_f4m_formats( - sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id=format_id, fatal=False)) + f4m_url = self._download_webpage( + 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url, + video_id, 'Downloading f4m manifest token', fatal=False) + if f4m_url: + formats.extend(self._extract_f4m_formats( + f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', + video_id, f4m_id=format_id, fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - sign(video_url, format_id), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id=format_id, - fatal=False)) + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, @@ -181,38 +106,24 @@ class FranceTVIE(InfoExtractor): 'subtitles': subtitles, } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - catalog = mobj.group('catalog') - if not video_id: - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - video_id = qs.get('idDiffusion', [None])[0] - catalog = qs.get('catalogue', [None])[0] - if not video_id: - raise ExtractorError('Invalid URL', expected=True) - - return self._extract_video(video_id, catalog) - - -class FranceTVSiteIE(FranceTVBaseInfoExtractor): +class FranceTVIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P[^/]+)\.html' _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': '162311093', + 'id': '157550144', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', - 'timestamp': 1502623500, - 'upload_date': '20170813', + 'timestamp': 1494156300, + 'upload_date': '20170507', }, 'params': { + # m3u8 downloads 'skip_download': True, }, - 'add_ie': [FranceTVIE.ie_key()], }, { # france3 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', @@ -261,14 +172,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): video_id, catalogue = self._html_search_regex( r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video ID').split('@') - - return self._make_url_result(video_id, catalogue) + return self._extract_video(video_id, catalogue) class FranceTVEmbedIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P[^&]+)' - _TESTS = [{ + _TEST = { 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', 'info_dict': { 'id': 'NI_983319', @@ -278,11 +188,7 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor): 'timestamp': 1493981780, 'duration': 16, }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] + } def _real_extract(self, url): video_id = self._match_id(url) @@ -291,12 +197,12 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor): 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, video_id) - return self._make_url_result(video['video_id'], video.get('catalog')) + return self._extract_video(video['video_id'], video.get('catalog')) class FranceTVInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' - _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&.]+)' + _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&.]+)' _TESTS = [{ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', @@ -311,18 +217,51 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): }, }, 'params': { + # m3u8 downloads 'skip_download': True, }, - 'add_ie': [FranceTVIE.ie_key()], }, { 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', - 'only_matching': True, + 'info_dict': { + 'id': 'EV_20019', + 'ext': 'mp4', + 'title': 'Débat des candidats à la Commission européenne', + 'description': 'Débat des candidats à la Commission européenne', + }, + 'params': { + 'skip_download': 'HLS (reqires ffmpeg)' + }, + 'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.', }, { 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', - 'only_matching': True, + 'md5': 'f485bda6e185e7d15dbc69b72bae993e', + 'info_dict': { + 'id': 'NI_173343', + 'ext': 'mp4', + 'title': 'Les entreprises familiales : le secret de la réussite', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'timestamp': 1433273139, + 'upload_date': '20150602', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, }, { 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', - 'only_matching': True, + 'md5': 'f485bda6e185e7d15dbc69b72bae993e', + 'info_dict': { + 'id': 'NI_657393', + 'ext': 'mp4', + 'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"', + 'description': 'md5:a3264114c9d29aeca11ced113c37b16c', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'timestamp': 1458300695, + 'upload_date': '20160318', + }, + 'params': { + 'skip_download': True, + }, }, { # Dailymotion embed 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html', @@ -344,9 +283,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): }] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) + mobj = re.match(self._VALID_URL, url) + page_title = mobj.group('title') + webpage = self._download_webpage(url, page_title) dailymotion_urls = DailymotionIE._extract_urls(webpage) if dailymotion_urls: @@ -358,13 +297,12 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): (r'id-video=([^@]+@[^"]+)', r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), webpage, 'video id').split('@') - - return self._make_url_result(video_id, catalogue) + return self._extract_video(video_id, catalogue) class GenerationWhatIE(InfoExtractor): IE_NAME = 'france2.fr:generation-what' - _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', @@ -376,10 +314,6 @@ class GenerationWhatIE(InfoExtractor): 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', 'upload_date': '20160411', }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], }, { 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', 'only_matching': True, @@ -387,47 +321,42 @@ class GenerationWhatIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - youtube_id = self._search_regex( r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", webpage, 'youtube id') - - return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id) + return self.url_result(youtube_id, 'Youtube', youtube_id) class CultureboxIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)' + IE_NAME = 'culturebox.francetvinfo.fr' + _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' - _TESTS = [{ - 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689', + _TEST = { + 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', + 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6', 'info_dict': { - 'id': 'EV_134885', - 'ext': 'mp4', - 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7', - 'description': 'md5:19c44af004b88219f4daa50fa9a351d4', - 'upload_date': '20180206', - 'timestamp': 1517945220, - 'duration': 5981, + 'id': 'EV_50111', + 'ext': 'flv', + 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", + 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', + 'upload_date': '20150320', + 'timestamp': 1426892400, + 'duration': 2760.9, }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] + } def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage(url, name) if ">Ce live n'est plus disponible en replay<" in webpage: - raise ExtractorError( - 'Video %s is not available' % display_id, expected=True) + raise ExtractorError('Video %s is not available' % name, expected=True) video_id, catalogue = self._search_regex( r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', webpage, 'video id').split('@') - return self._make_url_result(video_id, catalogue) + return self._extract_video(video_id, catalogue) diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py index a2920a793..a66e309de 100644 --- a/youtube_dl/extractor/gameinformer.py +++ b/youtube_dl/extractor/gameinformer.py @@ -23,11 +23,6 @@ class GameInformerIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage( - url, display_id, headers=self.geo_verification_headers()) - brightcove_id = self._search_regex( - [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"], - webpage, 'brightcove id') - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + webpage = self._download_webpage(url, display_id) + brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id') + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c3f201949..1d9da8115 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2280,10 +2280,7 @@ class GenericIE(InfoExtractor): # Look for Brightcove New Studio embeds bc_urls = BrightcoveNewIE._extract_urls(self, webpage) if bc_urls: - return self.playlist_from_matches( - bc_urls, video_id, video_title, - getter=lambda x: smuggle_url(x, {'referrer': url}), - ie='BrightcoveNew') + return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') # Look for Nexx embeds nexx_urls = NexxIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index 6373268c4..da5a5de4a 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -49,9 +49,7 @@ class LA7IE(InfoExtractor): webpage = self._download_webpage(url, video_id) player_data = self._parse_json( - self._search_regex( - [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'], - webpage, 'player data'), + self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'), video_id, transform_source=js_to_json) return { diff --git a/youtube_dl/extractor/myvi.py b/youtube_dl/extractor/myvi.py index 75d286365..621ae74a7 100644 --- a/youtube_dl/extractor/myvi.py +++ b/youtube_dl/extractor/myvi.py @@ -3,31 +3,22 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor from .vimple import SprutoBaseIE class MyviIE(SprutoBaseIE): _VALID_URL = r'''(?x) - (?: - https?:// - (?:www\.)? - myvi\. + https?:// + myvi\.(?:ru/player|tv)/ + (?: (?: - (?:ru/player|tv)/ - (?: - (?: - embed/html| - flash| - api/Video/Get - )/| - content/preloader\.swf\?.*\bid= - )| - ru/watch/ - )| - myvi: - ) - (?P<id>[\da-zA-Z_-]+) + embed/html| + flash| + api/Video/Get + )/| + content/preloader\.swf\?.*\bid= + ) + (?P<id>[\da-zA-Z_-]+) ''' _TESTS = [{ 'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0', @@ -51,12 +42,6 @@ class MyviIE(SprutoBaseIE): }, { 'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30', 'only_matching': True, - }, { - 'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2', - 'only_matching': True, - }, { - 'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2', - 'only_matching': True, }] @classmethod @@ -73,39 +58,3 @@ class MyviIE(SprutoBaseIE): 'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData'] return self._extract_spruto(spruto, video_id) - - -class MyviEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)' - _TESTS = [{ - 'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r', - 'info_dict': { - 'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1', - 'ext': 'mp4', - 'title': 'Твоя (original song).mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 277, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.myvi.tv/embed/%s' % video_id, video_id) - - myvi_id = self._search_regex( - r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)', - webpage, 'video id') - - return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key()) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 9dc8f9ebc..554dec36e 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import base64 from .common import InfoExtractor from .theplatform import ThePlatformIE @@ -359,7 +358,6 @@ class NBCNewsIE(ThePlatformIE): class NBCOlympicsIE(InfoExtractor): - IE_NAME = 'nbcolympics' _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)' _TEST = { @@ -397,54 +395,3 @@ class NBCOlympicsIE(InfoExtractor): 'ie_key': ThePlatformIE.ie_key(), 'display_id': display_id, } - - -class NBCOlympicsStreamIE(AdobePassIE): - IE_NAME = 'nbcolympics:stream' - _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)' - _TEST = { - 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8', - 'info_dict': { - 'id': '203493', - 'ext': 'mp4', - 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid') - resource = self._search_regex( - r"resource\s*=\s*'(.+)';", webpage, - 'resource').replace("' + pid + '", pid) - event_config = self._download_json( - self._DATA_URL_TEMPLATE % ('event_config', pid), - pid)['eventConfig'] - title = self._live_title(event_config['eventTitle']) - source_url = self._download_json( - self._DATA_URL_TEMPLATE % ('live_sources', pid), - pid)['videoSources'][0]['sourceUrl'] - media_token = self._extract_mvpd_auth( - url, pid, event_config.get('requestorId', 'NBCOlympics'), resource) - formats = self._extract_m3u8_formats(self._download_webpage( - 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={ - 'cdn': 'akamai', - 'mediaToken': base64.b64encode(media_token.encode()), - 'resource': base64.b64encode(resource.encode()), - 'url': source_url, - }), pid, 'mp4') - self._sort_formats(formats) - - return { - 'id': pid, - 'display_id': display_id, - 'title': title, - 'formats': formats, - 'is_live': True, - } diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 82e7cf522..0e26f8399 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -87,21 +87,19 @@ class NewgroundsIE(InfoExtractor): self._check_formats(formats, media_id) self._sort_formats(formats) - uploader = self._html_search_regex( - (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>', - r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', + uploader = self._search_regex( + r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False) - timestamp = unified_timestamp(self._html_search_regex( - (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', - r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', + timestamp = unified_timestamp(self._search_regex( + r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp', default=None)) duration = parse_duration(self._search_regex( - r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage, - 'duration', default=None)) + r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration', + default=None)) filesize_approx = parse_filesize(self._html_search_regex( - r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize', + r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize', default=None)) if len(formats) == 1: formats[0]['filesize_approx'] = filesize_approx diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 8afe541ec..e5e08538c 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -56,16 +56,18 @@ class PeriscopeIE(PeriscopeBaseIE): def _real_extract(self, url): token = self._match_id(url) - stream = self._call_api( - 'accessVideoPublic', {'broadcast_id': token}, token) + broadcast_data = self._call_api( + 'getBroadcastPublic', {'broadcast_id': token}, token) + broadcast = broadcast_data['broadcast'] + status = broadcast['status'] - broadcast = stream['broadcast'] - title = broadcast['status'] + user = broadcast_data.get('user', {}) - uploader = broadcast.get('user_display_name') or broadcast.get('username') - uploader_id = (broadcast.get('user_id') or broadcast.get('username')) + uploader = broadcast.get('user_display_name') or user.get('display_name') + uploader_id = (broadcast.get('username') or user.get('username') or + broadcast.get('user_id') or user.get('id')) - title = '%s - %s' % (uploader, title) if uploader else title + title = '%s - %s' % (uploader, status) if uploader else status state = broadcast.get('state').lower() if state == 'running': title = self._live_title(title) @@ -75,6 +77,9 @@ class PeriscopeIE(PeriscopeBaseIE): 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + stream = self._call_api( + 'getAccessPublic', {'broadcast_id': token}, token) + video_urls = set() formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py index dd5f17f11..2d87e7e70 100644 --- a/youtube_dl/extractor/pokemon.py +++ b/youtube_dl/extractor/pokemon.py @@ -11,34 +11,19 @@ from ..utils import ( class PokemonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))' _TESTS = [{ - 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', - 'md5': '2fe8eaec69768b25ef898cda9c43062e', + 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true', + 'md5': '9fb209ae3a569aac25de0f5afc4ee08f', 'info_dict': { - 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', + 'id': 'd0436c00c3ce4071ac6cee8130ac54a1', 'ext': 'mp4', - 'title': 'The Ol’ Raise and Switch!', - 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', - 'timestamp': 1511824728, - 'upload_date': '20171127', - }, - 'add_id': ['LimelightMedia'], - }, { - # no data-video-title - 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008', - 'info_dict': { - 'id': '99f3bae270bf4e5097274817239ce9c8', - 'ext': 'mp4', - 'title': 'Pokémon: The Rise of Darkrai', - 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d', - 'timestamp': 1417778347, - 'upload_date': '20141205', - }, - 'add_id': ['LimelightMedia'], - 'params': { - 'skip_download': True, + 'title': 'From A to Z!', + 'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!', + 'timestamp': 1460478136, + 'upload_date': '20160412', }, + 'add_id': ['LimelightMedia'] }, { 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', 'only_matching': True, @@ -57,9 +42,7 @@ class PokemonIE(InfoExtractor): r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), webpage, 'video data element')) video_id = video_data['data-video-id'] - title = video_data.get('data-video-title') or self._html_search_meta( - 'pkm-title', webpage, ' title', default=None) or self._search_regex( - r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') + title = video_data['data-video-title'] return { '_type': 'url_transparent', 'id': video_id, diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7efff4566..48757fd4f 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -129,7 +129,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): https?:// (?:www\.)? (?: - (?:beta\.)? (?: prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 69951e387..d435f7157 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -52,7 +52,6 @@ class SixPlayIE(InfoExtractor): urls = [] quality_key = qualities(['lq', 'sd', 'hq', 'hd']) formats = [] - subtitles = {} for asset in clip_data['assets']: asset_url = asset.get('full_physical_path') protocol = asset.get('protocol') @@ -61,9 +60,6 @@ class SixPlayIE(InfoExtractor): urls.append(asset_url) container = asset.get('video_container') ext = determine_ext(asset_url) - if protocol == 'http_subtitle' or ext == 'vtt': - subtitles.setdefault('fr', []).append({'url': asset_url}) - continue if container == 'm3u8' or ext == 'm3u8': if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]: asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url) @@ -106,5 +102,4 @@ class SixPlayIE(InfoExtractor): 'duration': int_or_none(clip_data.get('duration')), 'series': get(lambda x: x['program']['title']), 'formats': formats, - 'subtitles': subtitles, } diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py index a0353fe3a..5886e9c1b 100644 --- a/youtube_dl/extractor/telebruxelles.py +++ b/youtube_dl/extractor/telebruxelles.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class TeleBruxellesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(?:[^/]+/)*(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)' _TESTS = [{ 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/', 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9', @@ -31,16 +31,6 @@ class TeleBruxellesIE(InfoExtractor): }, { 'url': 'http://bx1.be/emission/bxenf1-gastronomie/', 'only_matching': True, - }, { - 'url': 'https://bx1.be/berchem-sainte-agathe/personnel-carrefour-de-berchem-sainte-agathe-inquiet/', - 'only_matching': True, - }, { - 'url': 'https://bx1.be/dernier-jt/', - 'only_matching': True, - }, { - # live stream - 'url': 'https://bx1.be/lives/direct-tv/', - 'only_matching': True, }] def _real_extract(self, url): @@ -48,29 +38,22 @@ class TeleBruxellesIE(InfoExtractor): webpage = self._download_webpage(url, display_id) article_id = self._html_search_regex( - r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None) + r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None) title = self._html_search_regex( - r'<h1[^>]*>(.+?)</h1>', webpage, 'title', - default=None) or self._og_search_title(webpage) + r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title') description = self._og_search_description(webpage, default=None) rtmp_url = self._html_search_regex( - r'file["\']?\s*:\s*"(r(?:tm|mt)ps?://[^/]+/(?:vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*"\.mp4|stream/live))"', + r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', webpage, 'RTMP url') - # Yes, they have a typo in scheme name for live stream URLs (e.g. - # https://bx1.be/lives/direct-tv/) - rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url) rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) formats = self._extract_wowza_formats(rtmp_url, article_id or display_id) self._sort_formats(formats) - is_live = 'stream/live' in rtmp_url - return { 'id': article_id or display_id, 'display_id': display_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': description, 'formats': formats, - 'is_live': is_live, } diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 071774a6f..b20dddc5c 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -12,7 +12,7 @@ from ..utils import ( class VeohIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)' _TESTS = [{ 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', @@ -24,9 +24,6 @@ class VeohIE(InfoExtractor): 'uploader': 'LUMOback', 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', }, - }, { - 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3', - 'only_matching': True, }, { 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 17543d6f4..43051512b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1944,11 +1944,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): break if codecs: dct.update(parse_codecs(codecs)) - if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none': - dct['downloader_options'] = { - # Youtube throttles chunks >~10M - 'http_chunk_size': 10485760, - } formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dcca60fcf..7ae919523 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.02.08' +__version__ = '2018.02.03'