1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 05:22:51 +08:00

Revert "updating again" (#11)

* Revert "[francetv] Separate main extractor and rework others to delegate to it"

This reverts commit 24b97ef14a.

* Revert "[francetv] Improve manifest URL signing (closes #15536)"

This reverts commit 6f54dce07d.

* Revert "[francetv] Sign m3u8 manifest URLs (closes #15565)"

This reverts commit 4eb4ace765.

* Revert "[veoh] Add support for embed URLs"

This reverts commit 6adfc88ce6.

* Revert "[dvtv] Skip download on failing test"

This reverts commit ad4167ed33.

* Revert "[afreecatv] Fix extraction (closes #15556)"

This reverts commit 8cc0cd0a2f.

* Revert "[periscope] Use accessVideoPublic endpoint (closes #15554)"

This reverts commit b6aef8f69f.

* Revert "[YoutubeDL] Add support for filesize_approx in format selector (closes #15550)"

This reverts commit eb048b3668.

* Revert "[discovery] Fix auth request (closes #15542)"

This reverts commit 14adea5bc7.

* Revert "[6play] Extract subtitles (closes #15541)"

This reverts commit aee4411662.

* Revert "Credit @mweinelt for #15124"

This reverts commit 82eaab7f58.

* Revert "Credit @iamleot for internazionale (#14973)"

This reverts commit fdd87ce209.

* Revert "Credit @che0 for seznamzpravy (#14616) and dvtv (#15442)"

This reverts commit 9b904aaeba.

* Revert "[newgrounds] Fix metadata extraction (closes #15531)"

This reverts commit 8be35d3cd1.

* Revert "[nbc] add support for NBC Olympics Streams(closes #10295)"

This reverts commit ba6e116f59.

* Revert "[dvtv] Simplify (closes #15442)"

This reverts commit 2da0581a74.

* Revert "[dvtv] Fix live streams extraction"

This reverts commit 773234624f.

* Revert "release 2018.02.08"

This reverts commit 0204b2a7b1.

* Revert "[ChangeLog] Actualize"

This reverts commit 72aa0a29e5.

* Revert "[extractors] Import for myvi:embed"

This reverts commit 38e519ff03.

* Revert "[pokemon] PEP 8"

This reverts commit 7b11f92ae0.

* Revert "[gameinformer] PEP 8"

This reverts commit aa71431e28.

* Revert "[myvi] Extend _VALID_URL"

This reverts commit 441ec65fd3.

* Revert "[myvi:embed] Add extractor (closes #15521)"

This reverts commit e35a984bc7.

* Revert "[prosiebensat1] Extend _VALID_URL (closes #15520)"

This reverts commit 0e1a732e39.

* Revert "[pokemon] Relax _VALID_URL and extend title extraction (closes #15518)"

This reverts commit 8a768fe854.

* Revert "[gameinformer] Use geo verification headers"

This reverts commit 6cdc00e998.

* Revert "[la7] Fix extraction (closes #15501)"

This reverts commit 8d2fbb06a9.

* Revert "[gameinformer] Fix brightcove id extraction"

This reverts commit dc1171da81.

* Revert "[afreecatv] Pass referrer to video info request (closes #15507)"

This reverts commit c16af2cb4e.

* Revert "[telebruxelles] Relax _VALID_URL and add support for live streams"

This reverts commit 1f916ea01b.

* Revert "[telebruxelles] Fix extraction (closes #15504)"

This reverts commit ea7a8f5298.

* Revert "[extractor/common] Respect secure schemes in _extract_wowza_formats"

This reverts commit b44f7f4fc8.

* Revert "release 2018.02.04"

This reverts commit ed4de10767.

* Revert "[ChangeLog] Actualize"

This reverts commit f277eb6c9d.

* Revert "[brightcove] Pass embed page URL as referrer (closes #15486)"

This reverts commit a690cb6742.

* Revert "[downloader/http] Randomize HTTP chunk size"

This reverts commit c315342edb.

* Revert "[youtube] Enforce using chunked HTTP downloading for DASH formats"

This reverts commit 7412fe8d5e.

* Revert "[downloader/http] Add ability to pass downloader options via info dict"

This reverts commit 6cfd25a8e0.

* Revert "[downloader/http] Fix 302 infinite loops by not reusing requests"

This reverts commit 57c3eea4e5.

* Revert "Document http_chunk_size"

This reverts commit ff3f520741.
This commit is contained in:
Kade 2018-02-11 09:35:28 -05:00 committed by GitHub
parent 24b97ef14a
commit fe8fb82c1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 174 additions and 488 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.08** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.03**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.02.08 [debug] youtube-dl version 2018.02.03
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -233,6 +233,3 @@ Daniel Weber
Kay Bouché Kay Bouché
Yang Hongbo Yang Hongbo
Lei Wang Lei Wang
Petr Novák
Leonardo Taccari
Martin Weinelt

View File

@ -1,33 +1,3 @@
version 2018.02.08
Extractors
+ [myvi] Extend URL regular expression
+ [myvi:embed] Add support for myvi.tv embeds (#15521)
+ [prosiebensat1] Extend URL regular expression (#15520)
* [pokemon] Relax URL regular expression and extend title extraction (#15518)
+ [gameinformer] Use geo verification headers
* [la7] Fix extraction (#15501, #15502)
* [gameinformer] Fix brightcove id extraction (#15416)
+ [afreecatv] Pass referrer to video info request (#15507)
+ [telebruxelles] Add support for live streams
* [telebruxelles] Relax URL regular expression
* [telebruxelles] Fix extraction (#15504)
* [extractor/common] Respect secure schemes in _extract_wowza_formats
version 2018.02.04
Core
* [downloader/http] Randomize HTTP chunk size
+ [downloader/http] Add ability to pass downloader options via info dict
* [downloader/http] Fix 302 infinite loops by not reusing requests
+ Document http_chunk_size
Extractors
+ [brightcove] Pass embed page URL as referrer (#15486)
+ [youtube] Enforce using chunked HTTP downloading for DASH formats
version 2018.02.03 version 2018.02.03
Core Core

View File

@ -502,7 +502,6 @@
- **MySpass** - **MySpass**
- **Myvi** - **Myvi**
- **MyVidster** - **MyVidster**
- **MyviEmbed**
- **n-tv.de** - **n-tv.de**
- **natgeo** - **natgeo**
- **natgeo:episodeguide** - **natgeo:episodeguide**

View File

@ -298,8 +298,7 @@ class YoutubeDL(object):
the downloader (see youtube_dl/downloader/common.py): the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle, noresizebuffer, retries, continuedl, noprogress, consoletitle,
xattr_set_filesize, external_downloader_args, hls_use_mpegts, xattr_set_filesize, external_downloader_args, hls_use_mpegts.
http_chunk_size.
The following options are used by the post processors: The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
@ -1033,7 +1032,7 @@ class YoutubeDL(object):
'!=': operator.ne, '!=': operator.ne,
} }
operator_rex = re.compile(r'''(?x)\s* operator_rex = re.compile(r'''(?x)\s*
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps) (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
$ $

View File

@ -49,9 +49,6 @@ class FileDownloader(object):
external_downloader_args: A list of additional command-line arguments for the external_downloader_args: A list of additional command-line arguments for the
external downloader. external downloader.
hls_use_mpegts: Use the mpegts container for HLS videos. hls_use_mpegts: Use the mpegts container for HLS videos.
http_chunk_size: Size of a chunk for chunk-based HTTP downloading.May be
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
Subclasses of this one must re-define the real_download method. Subclasses of this one must re-define the real_download method.
""" """

View File

@ -4,7 +4,6 @@ import errno
import os import os
import socket import socket
import time import time
import random
import re import re
from .common import FileDownloader from .common import FileDownloader
@ -43,10 +42,11 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers') add_headers = info_dict.get('http_headers')
if add_headers: if add_headers:
headers.update(add_headers) headers.update(add_headers)
basic_request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False) is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else ( chunk_size = self._TEST_FILE_SIZE if is_test else (
info_dict.get('downloader_options', {}).get('http_chunk_size') or
self.params.get('http_chunk_size') or 0) self.params.get('http_chunk_size') or 0)
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
@ -54,7 +54,6 @@ class HttpFD(FileDownloader):
ctx.data_len = None ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024) ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time() ctx.start_time = time.time()
ctx.chunk_size = None
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
@ -84,24 +83,21 @@ class HttpFD(FileDownloader):
req.add_header('Range', range_header) req.add_header('Range', range_header)
def establish_connection(): def establish_connection():
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
if not is_test and chunk_size else chunk_size)
if ctx.resume_len > 0: if ctx.resume_len > 0:
range_start = ctx.resume_len range_start = ctx.resume_len
if ctx.is_resume: if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len) self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab' ctx.open_mode = 'ab'
elif ctx.chunk_size > 0: elif chunk_size > 0:
range_start = 0 range_start = 0
else: else:
range_start = None range_start = None
ctx.is_resume = False ctx.is_resume = False
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None range_end = range_start + chunk_size - 1 if chunk_size else None
if range_end and ctx.data_len is not None and range_end >= ctx.data_len: if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
range_end = ctx.data_len - 1 range_end = ctx.data_len - 1
has_range = range_start is not None has_range = range_start is not None
ctx.has_range = has_range ctx.has_range = has_range
request = sanitized_Request(url, None, headers)
if has_range: if has_range:
set_range(request, range_start, range_end) set_range(request, range_start, range_end)
# Establish connection # Establish connection
@ -123,7 +119,7 @@ class HttpFD(FileDownloader):
content_len = int_or_none(content_range_m.group(3)) content_len = int_or_none(content_range_m.group(3))
accept_content_len = ( accept_content_len = (
# Non-chunked download # Non-chunked download
not ctx.chunk_size or not chunk_size or
# Chunked download and requested piece or # Chunked download and requested piece or
# its part is promised to be served # its part is promised to be served
content_range_end == range_end or content_range_end == range_end or
@ -144,8 +140,7 @@ class HttpFD(FileDownloader):
# Unable to resume (requested range not satisfiable) # Unable to resume (requested range not satisfiable)
try: try:
# Open the connection again without the range header # Open the connection again without the range header
ctx.data = self.ydl.urlopen( ctx.data = self.ydl.urlopen(basic_request)
sanitized_Request(url, None, headers))
content_length = ctx.data.info()['Content-Length'] content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err: except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600: if err.code < 500 or err.code >= 600:
@ -176,6 +171,12 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
return return
elif err.code == 302:
if not chunk_size:
raise
# HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop.
# may happen during chunk downloading. This is usually fixed
# with a retry.
elif err.code < 500 or err.code >= 600: elif err.code < 500 or err.code >= 600:
# Unexpected HTTP error # Unexpected HTTP error
raise raise
@ -301,7 +302,7 @@ class HttpFD(FileDownloader):
if is_test and byte_counter == data_len: if is_test and byte_counter == data_len:
break break
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: if not is_test and chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
ctx.resume_len = byte_counter ctx.resume_len = byte_counter
# ctx.block_size = block_size # ctx.block_size = block_size
raise NextFragment() raise NextFragment()

View File

@ -175,23 +175,10 @@ class AfreecaTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
station_id = self._search_regex(
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
bbs_id = self._search_regex(
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
video_xml = self._download_xml( video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, headers={ video_id, query={
'Referer': 'http://vod.afreecatv.com/embed.php',
}, query={
'nTitleNo': video_id, 'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
'partialView': 'SKIP_ADULT', 'partialView': 'SKIP_ADULT',
}) })

View File

@ -690,17 +690,10 @@ class BrightcoveNewIE(AdobePassIE):
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
headers = {
'Accept': 'application/json;pk=%s' % policy_key,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers.update({
'Referer': referrer,
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
})
try: try:
json_data = self._download_json(api_url, video_id, headers=headers) json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0] json_data = self._parse_json(e.cause.read().decode(), video_id)[0]

View File

@ -174,8 +174,6 @@ class InfoExtractor(object):
width : height ratio as float. width : height ratio as float.
* no_resume The server does not support resuming the * no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean. (HTTP or RTMP) download. Boolean.
* downloader_options A dictionary of downloader options as
described in FileDownloader
url: Final video URL. url: Final video URL.
ext: Video filename extension. ext: Video filename extension.
@ -2266,10 +2264,9 @@ class InfoExtractor(object):
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
mobj = re.search( url_base = self._search_regex(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url) r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
url_base = mobj.group('url') http_base_url = '%s:%s' % ('http', url_base)
http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
formats = [] formats = []
def manifest_url(manifest): def manifest_url(manifest):

View File

@ -5,16 +5,15 @@ import re
import string import string
from .discoverygo import DiscoveryGoBaseIE from .discoverygo import DiscoveryGoBaseIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
try_get, update_url_query,
) )
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE): class DiscoveryIE(DiscoveryGoBaseIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site> _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
discovery| discovery|
investigationdiscovery| investigationdiscovery|
discoverylife| discoverylife|
@ -45,7 +44,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
_GEO_BYPASS = False _GEO_BYPASS = False
def _real_extract(self, url): def _real_extract(self, url):
site, path, display_id = re.match(self._VALID_URL, url).groups() path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
react_data = self._parse_json(self._search_regex( react_data = self._parse_json(self._search_regex(
@ -56,13 +55,14 @@ class DiscoveryIE(DiscoveryGoBaseIE):
video_id = video['id'] video_id = video['id']
access_token = self._download_json( access_token = self._download_json(
'https://www.%s.com/anonymous' % site, display_id, query={ 'https://www.discovery.com/anonymous', display_id, query={
'authRel': 'authorization', 'authLink': update_url_query(
'client_id': try_get( 'https://login.discovery.com/v1/oauth2/authorize', {
react_data, lambda x: x['application']['apiClientId'], 'client_id': react_data['application']['apiClientId'],
compat_str) or '3020a40c2356a645b4b4', 'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'response_type': 'anonymous',
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, 'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
})
})['access_token'] })['access_token']
try: try:

View File

@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor):
}, { }, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': { 'info_dict': {
'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e', 'id': '973eb3bc854e11e498be002590604f2e',
}, },
'playlist': [{ 'playlist': [{
@ -91,24 +91,10 @@ class DVTVIE(InfoExtractor):
}, { }, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
'md5': '87defe16681b1429c91f7a74809823c6',
'info_dict': {
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
'ext': 'mp4',
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
},
'params': {
'skip_download': True,
},
}] }]
def _parse_video_metadata(self, js, video_id, live_js=None): def _parse_video_metadata(self, js, video_id):
data = self._parse_json(js, video_id, transform_source=js_to_json) data = self._parse_json(js, video_id, transform_source=js_to_json)
if live_js:
data.update(self._parse_json(
live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title']) title = unescapeHTML(data['title'])
@ -156,18 +142,13 @@ class DVTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# live content
live_item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
webpage, 'video', default=None)
# single video # single video
item = self._search_regex( item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None) webpage, 'video', default=None, fatal=False)
if item: if item:
return self._parse_video_metadata(item, video_id, live_item) return self._parse_video_metadata(item, video_id)
# playlist # playlist
items = re.findall( items = re.findall(

View File

@ -373,7 +373,6 @@ from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE from .franceinter import FranceInterIE
from .francetv import ( from .francetv import (
FranceTVIE, FranceTVIE,
FranceTVSiteIE,
FranceTVEmbedIE, FranceTVEmbedIE,
FranceTVInfoIE, FranceTVInfoIE,
GenerationWhatIE, GenerationWhatIE,
@ -631,10 +630,7 @@ from .musicplayon import MusicPlayOnIE
from .mwave import MwaveIE, MwaveMeetGreetIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvi import ( from .myvi import MyviIE
MyviIE,
MyviEmbedIE,
)
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .nationalgeographic import ( from .nationalgeographic import (
NationalGeographicVideoIE, NationalGeographicVideoIE,
@ -648,7 +644,6 @@ from .nbc import (
NBCIE, NBCIE,
NBCNewsIE, NBCNewsIE,
NBCOlympicsIE, NBCOlympicsIE,
NBCOlympicsStreamIE,
NBCSportsIE, NBCSportsIE,
NBCSportsVPlayerIE, NBCSportsVPlayerIE,
) )

View File

@ -5,10 +5,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
@ -20,68 +17,7 @@ from .dailymotion import DailymotionIE
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
def _make_url_result(self, video_id, catalog=None):
full_id = 'francetv:%s' % video_id
if catalog:
full_id += '@%s' % catalog
return self.url_result(
full_id, ie=FranceTVIE.ie_key(), video_id=video_id)
class FranceTVIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
.*?\bidDiffusion=[^&]+|
(?:
https?://videos\.francetv\.fr/video/|
francetv:
)
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
)
'''
_TESTS = [{
# without catalog
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
'info_dict': {
'id': '162311093',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1502623500,
'upload_date': '20170813',
},
}, {
# with catalog
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
'only_matching': True,
}, {
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
'only_matching': True,
}, {
'url': 'francetv:162311093',
'only_matching': True,
}, {
'url': 'francetv:NI_1004933@Zouzous',
'only_matching': True,
}, {
'url': 'francetv:NI_983319@Info-web',
'only_matching': True,
}, {
'url': 'francetv:NI_983319',
'only_matching': True,
}, {
'url': 'francetv:NI_657393@Regions',
'only_matching': True,
}]
def _extract_video(self, video_id, catalogue=None): def _extract_video(self, video_id, catalogue=None):
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
info = self._download_json( info = self._download_json(
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
video_id, 'Downloading video JSON', query={ video_id, 'Downloading video JSON', query={
@ -91,8 +27,7 @@ class FranceTVIE(InfoExtractor):
if info.get('status') == 'NOK': if info.get('status') == 'NOK':
raise ExtractorError( raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, info['message']), '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
expected=True)
allowed_countries = info['videos'][0].get('geoblocage') allowed_countries = info['videos'][0].get('geoblocage')
if allowed_countries: if allowed_countries:
georestricted = True georestricted = True
@ -107,19 +42,6 @@ class FranceTVIE(InfoExtractor):
else: else:
georestricted = False georestricted = False
def sign(manifest_url, manifest_id):
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
signed_url = self._download_webpage(
'https://%s/esi/TA' % host, video_id,
'Downloading signed %s manifest URL' % manifest_id,
fatal=False, query={
'url': manifest_url,
})
if (signed_url and isinstance(signed_url, compat_str) and
re.search(r'^(?:https?:)?//', signed_url)):
return signed_url
return manifest_url
formats = [] formats = []
for video in info['videos']: for video in info['videos']:
if video['statut'] != 'ONLINE': if video['statut'] != 'ONLINE':
@ -134,14 +56,17 @@ class FranceTVIE(InfoExtractor):
# See https://github.com/rg3/youtube-dl/issues/3963 # See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine # m3u8 urls work fine
continue continue
formats.extend(self._extract_f4m_formats( f4m_url = self._download_webpage(
sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
video_id, f4m_id=format_id, fatal=False)) video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
sign(video_url, format_id), video_id, 'mp4', video_url, video_id, 'mp4', entry_protocol='m3u8_native',
entry_protocol='m3u8_native', m3u8_id=format_id, m3u8_id=format_id, fatal=False))
fatal=False))
elif video_url.startswith('rtmp'): elif video_url.startswith('rtmp'):
formats.append({ formats.append({
'url': video_url, 'url': video_url,
@ -181,38 +106,24 @@ class FranceTVIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
catalog = mobj.group('catalog')
if not video_id: class FranceTVIE(FranceTVBaseInfoExtractor):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = qs.get('idDiffusion', [None])[0]
catalog = qs.get('catalogue', [None])[0]
if not video_id:
raise ExtractorError('Invalid URL', expected=True)
return self._extract_video(video_id, catalog)
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html' _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': { 'info_dict': {
'id': '162311093', 'id': '157550144',
'ext': 'mp4', 'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus', 'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1502623500, 'timestamp': 1494156300,
'upload_date': '20170813', 'upload_date': '20170507',
}, },
'params': { 'params': {
# m3u8 downloads
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [FranceTVIE.ie_key()],
}, { }, {
# france3 # france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@ -261,14 +172,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
video_id, catalogue = self._html_search_regex( video_id, catalogue = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@') webpage, 'video ID').split('@')
return self._extract_video(video_id, catalogue)
return self._make_url_result(video_id, catalogue)
class FranceTVEmbedIE(FranceTVBaseInfoExtractor): class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)' _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
_TESTS = [{ _TEST = {
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
'info_dict': { 'info_dict': {
'id': 'NI_983319', 'id': 'NI_983319',
@ -278,11 +188,7 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
'timestamp': 1493981780, 'timestamp': 1493981780,
'duration': 16, 'duration': 16,
}, },
'params': { }
'skip_download': True,
},
'add_ie': [FranceTVIE.ie_key()],
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -291,12 +197,12 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
video_id) video_id)
return self._make_url_result(video['video_id'], video.get('catalog')) return self._extract_video(video['video_id'], video.get('catalog'))
class FranceTVInfoIE(FranceTVBaseInfoExtractor): class FranceTVInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr' IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)' _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@ -311,18 +217,51 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
}, },
}, },
'params': { 'params': {
# m3u8 downloads
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [FranceTVIE.ie_key()],
}, { }, {
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
'only_matching': True, 'info_dict': {
'id': 'EV_20019',
'ext': 'mp4',
'title': 'Débat des candidats à la Commission européenne',
'description': 'Débat des candidats à la Commission européenne',
},
'params': {
'skip_download': 'HLS (reqires ffmpeg)'
},
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
}, { }, {
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
'only_matching': True, 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
'info_dict': {
'id': 'NI_173343',
'ext': 'mp4',
'title': 'Les entreprises familiales : le secret de la réussite',
'thumbnail': r're:^https?://.*\.jpe?g$',
'timestamp': 1433273139,
'upload_date': '20150602',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
}, { }, {
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html', 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
'only_matching': True, 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
'info_dict': {
'id': 'NI_657393',
'ext': 'mp4',
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de lArmor"',
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
'thumbnail': r're:^https?://.*\.jpe?g$',
'timestamp': 1458300695,
'upload_date': '20160318',
},
'params': {
'skip_download': True,
},
}, { }, {
# Dailymotion embed # Dailymotion embed
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html', 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
@ -344,9 +283,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, page_title)
dailymotion_urls = DailymotionIE._extract_urls(webpage) dailymotion_urls = DailymotionIE._extract_urls(webpage)
if dailymotion_urls: if dailymotion_urls:
@ -358,13 +297,12 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
(r'id-video=([^@]+@[^"]+)', (r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
webpage, 'video id').split('@') webpage, 'video id').split('@')
return self._extract_video(video_id, catalogue)
return self._make_url_result(video_id, catalogue)
class GenerationWhatIE(InfoExtractor): class GenerationWhatIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-what' IE_NAME = 'france2.fr:generation-what'
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
@ -376,10 +314,6 @@ class GenerationWhatIE(InfoExtractor):
'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
'upload_date': '20160411', 'upload_date': '20160411',
}, },
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
}, { }, {
'url': 'http://generation-what.francetv.fr/europe/video/present-arms', 'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
'only_matching': True, 'only_matching': True,
@ -387,47 +321,42 @@ class GenerationWhatIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
youtube_id = self._search_regex( youtube_id = self._search_regex(
r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
webpage, 'youtube id') webpage, 'youtube id')
return self.url_result(youtube_id, 'Youtube', youtube_id)
return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
class CultureboxIE(FranceTVBaseInfoExtractor): class CultureboxIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)' IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TESTS = [{ _TEST = {
'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689', 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
'info_dict': { 'info_dict': {
'id': 'EV_134885', 'id': 'EV_50111',
'ext': 'mp4', 'ext': 'flv',
'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7', 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
'description': 'md5:19c44af004b88219f4daa50fa9a351d4', 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
'upload_date': '20180206', 'upload_date': '20150320',
'timestamp': 1517945220, 'timestamp': 1426892400,
'duration': 5981, 'duration': 2760.9,
}, },
'params': { }
'skip_download': True,
},
'add_ie': [FranceTVIE.ie_key()],
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, name)
if ">Ce live n'est plus disponible en replay<" in webpage: if ">Ce live n'est plus disponible en replay<" in webpage:
raise ExtractorError( raise ExtractorError('Video %s is not available' % name, expected=True)
'Video %s is not available' % display_id, expected=True)
video_id, catalogue = self._search_regex( video_id, catalogue = self._search_regex(
r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
webpage, 'video id').split('@') webpage, 'video id').split('@')
return self._make_url_result(video_id, catalogue) return self._extract_video(video_id, catalogue)

View File

@ -23,11 +23,6 @@ class GameInformerIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(url, display_id)
url, display_id, headers=self.geo_verification_headers()) brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
brightcove_id = self._search_regex( return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
[r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
webpage, 'brightcove id')
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
brightcove_id)

View File

@ -2280,10 +2280,7 @@ class GenericIE(InfoExtractor):
# Look for Brightcove New Studio embeds # Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(self, webpage) bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
if bc_urls: if bc_urls:
return self.playlist_from_matches( return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
bc_urls, video_id, video_title,
getter=lambda x: smuggle_url(x, {'referrer': url}),
ie='BrightcoveNew')
# Look for Nexx embeds # Look for Nexx embeds
nexx_urls = NexxIE._extract_urls(webpage) nexx_urls = NexxIE._extract_urls(webpage)

View File

@ -49,9 +49,7 @@ class LA7IE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data = self._parse_json( player_data = self._parse_json(
self._search_regex( self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
[r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
webpage, 'player data'),
video_id, transform_source=js_to_json) video_id, transform_source=js_to_json)
return { return {

View File

@ -3,31 +3,22 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor
from .vimple import SprutoBaseIE from .vimple import SprutoBaseIE
class MyviIE(SprutoBaseIE): class MyviIE(SprutoBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: https?://
https?:// myvi\.(?:ru/player|tv)/
(?:www\.)? (?:
myvi\.
(?: (?:
(?:ru/player|tv)/ embed/html|
(?: flash|
(?: api/Video/Get
embed/html| )/|
flash| content/preloader\.swf\?.*\bid=
api/Video/Get )
)/| (?P<id>[\da-zA-Z_-]+)
content/preloader\.swf\?.*\bid=
)|
ru/watch/
)|
myvi:
)
(?P<id>[\da-zA-Z_-]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0', 'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
@ -51,12 +42,6 @@ class MyviIE(SprutoBaseIE):
}, { }, {
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30', 'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
'only_matching': True,
}, {
'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -73,39 +58,3 @@ class MyviIE(SprutoBaseIE):
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData'] 'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
return self._extract_spruto(spruto, video_id) return self._extract_spruto(spruto, video_id)
class MyviEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
_TESTS = [{
'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
'info_dict': {
'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
'ext': 'mp4',
'title': 'Твоя (original song).mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 277,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.myvi.tv/embed/%s' % video_id, video_id)
myvi_id = self._search_regex(
r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
webpage, 'video id')
return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import base64
from .common import InfoExtractor from .common import InfoExtractor
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
@ -359,7 +358,6 @@ class NBCNewsIE(ThePlatformIE):
class NBCOlympicsIE(InfoExtractor): class NBCOlympicsIE(InfoExtractor):
IE_NAME = 'nbcolympics'
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)' _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
_TEST = { _TEST = {
@ -397,54 +395,3 @@ class NBCOlympicsIE(InfoExtractor):
'ie_key': ThePlatformIE.ie_key(), 'ie_key': ThePlatformIE.ie_key(),
'display_id': display_id, 'display_id': display_id,
} }
class NBCOlympicsStreamIE(AdobePassIE):
IE_NAME = 'nbcolympics:stream'
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
_TEST = {
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
'info_dict': {
'id': '203493',
'ext': 'mp4',
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
resource = self._search_regex(
r"resource\s*=\s*'(.+)';", webpage,
'resource').replace("' + pid + '", pid)
event_config = self._download_json(
self._DATA_URL_TEMPLATE % ('event_config', pid),
pid)['eventConfig']
title = self._live_title(event_config['eventTitle'])
source_url = self._download_json(
self._DATA_URL_TEMPLATE % ('live_sources', pid),
pid)['videoSources'][0]['sourceUrl']
media_token = self._extract_mvpd_auth(
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
formats = self._extract_m3u8_formats(self._download_webpage(
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
'cdn': 'akamai',
'mediaToken': base64.b64encode(media_token.encode()),
'resource': base64.b64encode(resource.encode()),
'url': source_url,
}), pid, 'mp4')
self._sort_formats(formats)
return {
'id': pid,
'display_id': display_id,
'title': title,
'formats': formats,
'is_live': True,
}

View File

@ -87,21 +87,19 @@ class NewgroundsIE(InfoExtractor):
self._check_formats(formats, media_id) self._check_formats(formats, media_id)
self._sort_formats(formats) self._sort_formats(formats)
uploader = self._html_search_regex( uploader = self._search_regex(
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>', r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
fatal=False) fatal=False)
timestamp = unified_timestamp(self._html_search_regex( timestamp = unified_timestamp(self._search_regex(
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
default=None)) default=None))
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage, r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
'duration', default=None)) default=None))
filesize_approx = parse_filesize(self._html_search_regex( filesize_approx = parse_filesize(self._html_search_regex(
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize', r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
default=None)) default=None))
if len(formats) == 1: if len(formats) == 1:
formats[0]['filesize_approx'] = filesize_approx formats[0]['filesize_approx'] = filesize_approx

View File

@ -56,16 +56,18 @@ class PeriscopeIE(PeriscopeBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
token = self._match_id(url) token = self._match_id(url)
stream = self._call_api( broadcast_data = self._call_api(
'accessVideoPublic', {'broadcast_id': token}, token) 'getBroadcastPublic', {'broadcast_id': token}, token)
broadcast = broadcast_data['broadcast']
status = broadcast['status']
broadcast = stream['broadcast'] user = broadcast_data.get('user', {})
title = broadcast['status']
uploader = broadcast.get('user_display_name') or broadcast.get('username') uploader = broadcast.get('user_display_name') or user.get('display_name')
uploader_id = (broadcast.get('user_id') or broadcast.get('username')) uploader_id = (broadcast.get('username') or user.get('username') or
broadcast.get('user_id') or user.get('id'))
title = '%s - %s' % (uploader, title) if uploader else title title = '%s - %s' % (uploader, status) if uploader else status
state = broadcast.get('state').lower() state = broadcast.get('state').lower()
if state == 'running': if state == 'running':
title = self._live_title(title) title = self._live_title(title)
@ -75,6 +77,9 @@ class PeriscopeIE(PeriscopeBaseIE):
'url': broadcast[image], 'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)] } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
stream = self._call_api(
'getAccessPublic', {'broadcast_id': token}, token)
video_urls = set() video_urls = set()
formats = [] formats = []
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):

View File

@ -11,34 +11,19 @@ from ..utils import (
class PokemonIE(InfoExtractor): class PokemonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
'md5': '2fe8eaec69768b25ef898cda9c43062e', 'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
'info_dict': { 'info_dict': {
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', 'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Ol Raise and Switch!', 'title': 'From A to Z!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', 'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
'timestamp': 1511824728, 'timestamp': 1460478136,
'upload_date': '20171127', 'upload_date': '20160412',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
'info_dict': {
'id': '99f3bae270bf4e5097274817239ce9c8',
'ext': 'mp4',
'title': 'Pokémon: The Rise of Darkrai',
'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
'timestamp': 1417778347,
'upload_date': '20141205',
},
'add_id': ['LimelightMedia'],
'params': {
'skip_download': True,
}, },
'add_id': ['LimelightMedia']
}, { }, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True, 'only_matching': True,
@ -57,9 +42,7 @@ class PokemonIE(InfoExtractor):
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element')) webpage, 'video data element'))
video_id = video_data['data-video-id'] video_id = video_data['data-video-id']
title = video_data.get('data-video-title') or self._html_search_meta( title = video_data['data-video-title']
'pkm-title', webpage, ' title', default=None) or self._search_regex(
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,

View File

@ -129,7 +129,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
https?:// https?://
(?:www\.)? (?:www\.)?
(?: (?:
(?:beta\.)?
(?: (?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
)\.(?:de|at|ch)| )\.(?:de|at|ch)|

View File

@ -52,7 +52,6 @@ class SixPlayIE(InfoExtractor):
urls = [] urls = []
quality_key = qualities(['lq', 'sd', 'hq', 'hd']) quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = [] formats = []
subtitles = {}
for asset in clip_data['assets']: for asset in clip_data['assets']:
asset_url = asset.get('full_physical_path') asset_url = asset.get('full_physical_path')
protocol = asset.get('protocol') protocol = asset.get('protocol')
@ -61,9 +60,6 @@ class SixPlayIE(InfoExtractor):
urls.append(asset_url) urls.append(asset_url)
container = asset.get('video_container') container = asset.get('video_container')
ext = determine_ext(asset_url) ext = determine_ext(asset_url)
if protocol == 'http_subtitle' or ext == 'vtt':
subtitles.setdefault('fr', []).append({'url': asset_url})
continue
if container == 'm3u8' or ext == 'm3u8': if container == 'm3u8' or ext == 'm3u8':
if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]: if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url) asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
@ -106,5 +102,4 @@ class SixPlayIE(InfoExtractor):
'duration': int_or_none(clip_data.get('duration')), 'duration': int_or_none(clip_data.get('duration')),
'series': get(lambda x: x['program']['title']), 'series': get(lambda x: x['program']['title']),
'formats': formats, 'formats': formats,
'subtitles': subtitles,
} }

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class TeleBruxellesIE(InfoExtractor): class TeleBruxellesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(?:[^/]+/)*(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/', 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9', 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
@ -31,16 +31,6 @@ class TeleBruxellesIE(InfoExtractor):
}, { }, {
'url': 'http://bx1.be/emission/bxenf1-gastronomie/', 'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://bx1.be/berchem-sainte-agathe/personnel-carrefour-de-berchem-sainte-agathe-inquiet/',
'only_matching': True,
}, {
'url': 'https://bx1.be/dernier-jt/',
'only_matching': True,
}, {
# live stream
'url': 'https://bx1.be/lives/direct-tv/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -48,29 +38,22 @@ class TeleBruxellesIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
article_id = self._html_search_regex( article_id = self._html_search_regex(
r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None) r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
title = self._html_search_regex( title = self._html_search_regex(
r'<h1[^>]*>(.+?)</h1>', webpage, 'title', r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
default=None) or self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None) description = self._og_search_description(webpage, default=None)
rtmp_url = self._html_search_regex( rtmp_url = self._html_search_regex(
r'file["\']?\s*:\s*"(r(?:tm|mt)ps?://[^/]+/(?:vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*"\.mp4|stream/live))"', r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
webpage, 'RTMP url') webpage, 'RTMP url')
# Yes, they have a typo in scheme name for live stream URLs (e.g.
# https://bx1.be/lives/direct-tv/)
rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
formats = self._extract_wowza_formats(rtmp_url, article_id or display_id) formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
self._sort_formats(formats) self._sort_formats(formats)
is_live = 'stream/live' in rtmp_url
return { return {
'id': article_id or display_id, 'id': article_id or display_id,
'display_id': display_id, 'display_id': display_id,
'title': self._live_title(title) if is_live else title, 'title': title,
'description': description, 'description': description,
'formats': formats, 'formats': formats,
'is_live': is_live,
} }

View File

@ -12,7 +12,7 @@ from ..utils import (
class VeohIE(InfoExtractor): class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
@ -24,9 +24,6 @@ class VeohIE(InfoExtractor):
'uploader': 'LUMOback', 'uploader': 'LUMOback',
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
}, },
}, {
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
'only_matching': True,
}, { }, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',

View File

@ -1944,11 +1944,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break break
if codecs: if codecs:
dct.update(parse_codecs(codecs)) dct.update(parse_codecs(codecs))
if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
dct['downloader_options'] = {
# Youtube throttles chunks >~10M
'http_chunk_size': 10485760,
}
formats.append(dct) formats.append(dct)
elif video_info.get('hlsvp'): elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0] manifest_url = video_info['hlsvp'][0]

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.02.08' __version__ = '2018.02.03'