1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 13:12:52 +08:00

Merge pull request #9 from rg3/master

updating again
This commit is contained in:
Kade 2018-02-03 14:01:00 -05:00 committed by GitHub
commit 8fc4b4ce94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 361 additions and 191 deletions

View File

@ -6,12 +6,13 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.27**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.03**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
### What is the purpose of your *issue*?
- [ ] Bug report (encountered problems with youtube-dl)
@ -35,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.01.27
[debug] youtube-dl version 2018.02.03
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -12,6 +12,7 @@
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
### What is the purpose of your *issue*?
- [ ] Bug report (encountered problems with youtube-dl)

View File

@ -1,3 +1,22 @@
version 2018.02.03
Core
+ Introduce --http-chunk-size for chunk-based HTTP downloading
+ Add support for IronPython
* [downloader/ism] Fix Python 3.2 support
Extractors
* [redbulltv] Fix extraction (#15481)
* [redtube] Fix metadata extraction (#15472)
* [pladform] Respect platform id and extract HLS formats (#15468)
- [rtlnl] Remove progressive formats (#15459)
* [6play] Do no modify asset URLs with a token (#15248)
* [nationalgeographic] Relax URL regular expression
* [dplay] Relax URL regular expression (#15458)
* [cbsinteractive] Fix data extraction (#15451)
+ [amcnetworks] Add support for sundancetv.com (#9260)
version 2018.01.27
Core

View File

@ -198,6 +198,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
size. By default, the buffer size is
automatically resized from an initial value
of SIZE.
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
downloading (e.g. 10485760 or 10M) (default
is disabled). May be useful for bypassing
bandwidth throttling imposed by a webserver
(experimental)
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with

View File

@ -3,4 +3,4 @@ universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
ignore = E402,E501,E731
ignore = E402,E501,E731,E741

View File

@ -0,0 +1,125 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import re
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
TEST_SIZE = 10 * 1024
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
def send_content_range(self, total=None):
range_header = self.headers.get('Range')
start = end = None
if range_header:
mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
if mobj:
start = int(mobj.group(1))
end = int(mobj.group(2))
valid_range = start is not None and end is not None
if valid_range:
content_range = 'bytes %d-%d' % (start, end)
if total:
content_range += '/%d' % total
self.send_header('Content-Range', content_range)
return (end - start + 1) if valid_range else total
def serve(self, range=True, content_length=True):
self.send_response(200)
self.send_header('Content-Type', 'video/mp4')
size = TEST_SIZE
if range:
size = self.send_content_range(TEST_SIZE)
if content_length:
self.send_header('Content-Length', size)
self.end_headers()
self.wfile.write(b'#' * size)
def do_GET(self):
if self.path == '/regular':
self.serve()
elif self.path == '/no-content-length':
self.serve(content_length=False)
elif self.path == '/no-range':
self.serve(range=False)
elif self.path == '/no-range-no-content-length':
self.serve(range=False, content_length=False)
else:
assert False
class FakeLogger(object):
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
class TestHttpFD(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
def download(self, params, ep):
params['logger'] = FakeLogger()
ydl = YoutubeDL(params)
downloader = HttpFD(ydl, params)
filename = 'testfile.mp4'
try_rm(encodeFilename(filename))
self.assertTrue(downloader.real_download(filename, {
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
}))
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
try_rm(encodeFilename(filename))
def download_all(self, params):
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
self.download(params, ep)
def test_regular(self):
self.download_all({})
def test_chunked(self):
self.download_all({
'http_chunk_size': 1000,
})
if __name__ == '__main__':
unittest.main()

View File

@ -47,7 +47,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
self.end_headers()
return
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
self.send_response(302)
self.send_header(b'Location', new_url.encode('utf-8'))
self.end_headers()
@ -74,7 +74,7 @@ class FakeLogger(object):
class TestHTTP(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
('localhost', 0), HTTPTestRequestHandler)
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
@ -86,15 +86,15 @@ class TestHTTP(unittest.TestCase):
return
ydl = YoutubeDL({'logger': FakeLogger()})
r = ydl.extract_info('http://localhost:%d/302' % self.port)
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
class TestHTTPS(unittest.TestCase):
def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.httpd = compat_http_server.HTTPServer(
('localhost', 0), HTTPTestRequestHandler)
('127.0.0.1', 0), HTTPTestRequestHandler)
self.httpd.socket = ssl.wrap_socket(
self.httpd.socket, certfile=certfn, server_side=True)
self.port = http_server_port(self.httpd)
@ -107,11 +107,11 @@ class TestHTTPS(unittest.TestCase):
ydl = YoutubeDL({'logger': FakeLogger()})
self.assertRaises(
Exception,
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
def _build_proxy_handler(name):
@ -132,23 +132,23 @@ def _build_proxy_handler(name):
class TestProxy(unittest.TestCase):
def setUp(self):
self.proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('normal'))
('127.0.0.1', 0), _build_proxy_handler('normal'))
self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
self.geo_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('geo'))
('127.0.0.1', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
self.geo_proxy_thread.daemon = True
self.geo_proxy_thread.start()
def test_proxy(self):
geo_proxy = 'localhost:{0}'.format(self.geo_port)
geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
'proxy': '127.0.0.1:{0}'.format(self.port),
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
@ -162,7 +162,7 @@ class TestProxy(unittest.TestCase):
def test_proxy_with_idn(self):
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
'proxy': '127.0.0.1:{0}'.format(self.port),
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode('utf-8')

View File

@ -191,6 +191,11 @@ def _real_main(argv=None):
if numeric_buffersize is None:
parser.error('invalid buffer size specified')
opts.buffersize = numeric_buffersize
if opts.http_chunk_size is not None:
numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
if not numeric_chunksize:
parser.error('invalid http chunk size specified')
opts.http_chunk_size = numeric_chunksize
if opts.playliststart <= 0:
raise ValueError('Playlist start must be positive')
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
@ -346,6 +351,7 @@ def _real_main(argv=None):
'keep_fragments': opts.keep_fragments,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'http_chunk_size': opts.http_chunk_size,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'progress_with_newline': opts.progress_with_newline,

View File

@ -2909,8 +2909,8 @@ else:
if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
class compat_Struct(struct.Struct):
def unpack(self, string):
if not isinstance(string, buffer):
string = buffer(string)
if not isinstance(string, buffer): # noqa: F821
string = buffer(string) # noqa: F821
return super(compat_Struct, self).unpack(string)
else:
compat_Struct = struct.Struct

View File

@ -7,10 +7,14 @@ import time
import re
from .common import FileDownloader
from ..compat import compat_urllib_error
from ..compat import (
compat_str,
compat_urllib_error,
)
from ..utils import (
ContentTooShortError,
encodeFilename,
int_or_none,
sanitize_open,
sanitized_Request,
write_xattr,
@ -42,17 +46,22 @@ class HttpFD(FileDownloader):
request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)
if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
chunk_size = self._TEST_FILE_SIZE if is_test else (
self.params.get('http_chunk_size') or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
if self.params.get('continuedl', True):
# Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
ctx.resume_len = os.path.getsize(
encodeFilename(ctx.tmpfilename))
ctx.is_resume = ctx.resume_len > 0
count = 0
retries = self.params.get('retries', 0)
@ -64,11 +73,33 @@ class HttpFD(FileDownloader):
def __init__(self, source_error):
self.source_error = source_error
class NextFragment(Exception):
pass
def set_range(req, start, end):
range_header = 'bytes=%d-' % start
if end:
range_header += compat_str(end)
req.add_header('Range', range_header)
def establish_connection():
if ctx.resume_len != 0:
if ctx.resume_len > 0:
range_start = ctx.resume_len
if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len)
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
ctx.open_mode = 'ab'
elif chunk_size > 0:
range_start = 0
else:
range_start = None
ctx.is_resume = False
range_end = range_start + chunk_size - 1 if chunk_size else None
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
if has_range:
set_range(request, range_start, range_end)
# Establish connection
try:
ctx.data = self.ydl.urlopen(request)
@ -77,12 +108,24 @@ class HttpFD(FileDownloader):
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
if ctx.resume_len > 0:
if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
content_range_m = re.search(r'bytes (\d+)-', content_range)
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
# Content-Range is present and matches requested Range, resume is possible
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
if content_range_m:
if range_start == int(content_range_m.group(1)):
content_range_end = int_or_none(content_range_m.group(2))
content_len = int_or_none(content_range_m.group(3))
accept_content_len = (
# Non-chunked download
not chunk_size or
# Chunked download and requested piece or
# its part is promised to be served
content_range_end == range_end or
content_len < range_end)
if accept_content_len:
ctx.data_len = content_len
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
@ -90,12 +133,10 @@ class HttpFD(FileDownloader):
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
return
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
@ -130,6 +171,15 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
elif err.code == 302:
if not chunk_size:
raise
# HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop.
# may happen during chunk downloading. This is usually fixed
# with a retry.
elif err.code < 500 or err.code >= 600:
# Unexpected HTTP error
raise
raise RetryDownload(err)
except socket.error as err:
if err.errno != errno.ECONNRESET:
@ -160,7 +210,7 @@ class HttpFD(FileDownloader):
return False
byte_counter = 0 + ctx.resume_len
block_size = self.params.get('buffersize', 1024)
block_size = ctx.block_size
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
@ -233,25 +283,30 @@ class HttpFD(FileDownloader):
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
if data_len is None:
if ctx.data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'total_bytes': ctx.data_len,
'tmpfilename': ctx.tmpfilename,
'filename': ctx.filename,
'eta': eta,
'speed': speed,
'elapsed': now - start,
'elapsed': now - ctx.start_time,
})
if is_test and byte_counter == data_len:
break
if not is_test and chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
if ctx.stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
@ -276,7 +331,7 @@ class HttpFD(FileDownloader):
'total_bytes': byte_counter,
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - start,
'elapsed': time.time() - ctx.start_time,
})
return True
@ -290,6 +345,8 @@ class HttpFD(FileDownloader):
if count <= retries:
self.report_retry(e.source_error, count, retries)
continue
except NextFragment:
continue
except SucceedDownload:
return True

View File

@ -11,7 +11,7 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
@ -51,6 +51,9 @@ class AMCNetworksIE(ThePlatformIE):
}, {
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
'only_matching': True,
}, {
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -75,10 +75,10 @@ class CBSInteractiveIE(CBSIE):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
webpage, 'data json')
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0]
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
video_id = vdata['mpxRefId']

View File

@ -26,7 +26,7 @@ from ..utils import (
class DPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:videoer/)?(?P<id>[^/]+/[^/?#]+)'
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
_TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL
@ -89,9 +89,12 @@ class DPlayIE(InfoExtractor):
'skip_download': True,
},
}, {
# geo restricted, bypassable via X-Forwarded-For
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
'only_matching': True,
}, {
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -68,7 +68,7 @@ class NationalGeographicVideoIE(InfoExtractor):
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
_TESTS = [
{
@ -102,6 +102,10 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
'only_matching': True,
}
]

View File

@ -4,7 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
xpath_text,
@ -26,17 +28,15 @@ class PladformIE(InfoExtractor):
(?P<id>\d+)
'''
_TESTS = [{
# http://muz-tv.ru/kinozal/view/7400/
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
'md5': '53362fac3a27352da20fa2803cc5cd6f',
'info_dict': {
'id': '100183293',
'id': '3777899',
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко',
'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
'duration': 3190,
},
}, {
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
@ -56,22 +56,48 @@ class PladformIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
pl = qs.get('pl', ['1'])[0]
video = self._download_xml(
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
video_id)
'http://out.pladform.ru/getVideo', video_id, query={
'pl': pl,
'videoid': video_id,
})
def fail(text):
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, text),
expected=True)
if video.tag == 'error':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, video.text),
expected=True)
fail(video.text)
quality = qualities(('ld', 'sd', 'hd'))
formats = [{
formats = []
for src in video.findall('./src'):
if src is None:
continue
format_url = src.text
if not format_url:
continue
if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': src.text,
'format_id': src.get('quality'),
'quality': quality(src.get('quality')),
} for src in video.findall('./src')]
})
if not formats:
error = xpath_text(video, './cap', 'error', default=None)
if error:
fail(error)
self._sort_formats(formats)
webpage = self._download_webpage(

View File

@ -5,135 +5,93 @@ from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
int_or_none,
try_get,
# unified_timestamp,
ExtractorError,
)
class RedBullTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
_TESTS = [{
# film
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
'md5': 'fb0445b98aa4394e504b413d98031d1f',
'info_dict': {
'id': 'AP-1Q756YYX51W11',
'id': 'AP-1Q6XCDTAN1W11',
'ext': 'mp4',
'title': 'ABC of...WRC',
'title': 'ABC of... WRC - ABC of... S1E6',
'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
'duration': 1582.04,
# 'timestamp': 1488405786,
# 'upload_date': '20170301',
},
}, {
# episode
'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web',
'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11',
'info_dict': {
'id': 'AP-1PMT5JCWH1W11',
'id': 'AP-1PMHKJFCW1W11',
'ext': 'mp4',
'title': 'Grime - Hashtags S2E4',
'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
'duration': 904.6,
# 'timestamp': 1487290093,
# 'upload_date': '20170217',
'series': 'Hashtags',
'season_number': 2,
'episode_number': 4,
},
'params': {
'skip_download': True,
},
}, {
# segment
'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
'info_dict': {
'id': 'AP-1QSAQJ6V52111',
'ext': 'mp4',
'title': 'Semi Finals - Vans Park Series Pro Tour',
'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
'duration': 11791.991,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
session = self._download_json(
'https://api-v2.redbull.tv/session', video_id,
'https://api.redbull.tv/v3/session', video_id,
note='Downloading access token', query={
'build': '4.370.0',
'category': 'personal_computer',
'os_version': '1.0',
'os_family': 'http',
})
if session.get('code') == 'error':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, session['message']))
auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token'])
token = session['token']
try:
info = self._download_json(
'https://api-v2.redbull.tv/content/%s' % video_id,
video = self._download_json(
'https://api.redbull.tv/v3/products/' + video_id,
video_id, note='Downloading video information',
headers={'Authorization': auth}
headers={'Authorization': token}
)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
error_message = self._parse_json(
e.cause.read().decode(), video_id)['message']
e.cause.read().decode(), video_id)['error']
raise ExtractorError('%s said: %s' % (
self.IE_NAME, error_message), expected=True)
raise
video = info['video_product']
title = info['title'].strip()
title = video['title'].strip()
formats = self._extract_m3u8_formats(
video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
subtitles = {}
for _, captions in (try_get(
video, lambda x: x['attachments']['captions'],
dict) or {}).items():
if not captions or not isinstance(captions, list):
continue
for caption in captions:
caption_url = caption.get('url')
if not caption_url:
continue
ext = caption.get('format')
if ext == 'xml':
ext = 'ttml'
subtitles.setdefault(caption.get('lang') or 'en', []).append({
'url': caption_url,
'ext': ext,
for resource in video.get('resources', []):
if resource.startswith('closed_caption_'):
splitted_resource = resource.split('_')
if splitted_resource[2]:
subtitles.setdefault('en', []).append({
'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource),
'ext': splitted_resource[2],
})
subheading = info.get('subheading')
subheading = video.get('subheading')
if subheading:
title += ' - %s' % subheading
return {
'id': video_id,
'title': title,
'description': info.get('long_description') or info.get(
'description': video.get('long_description') or video.get(
'short_description'),
'duration': float_or_none(video.get('duration'), scale=1000),
# 'timestamp': unified_timestamp(info.get('published')),
'series': info.get('show_title'),
'season_number': int_or_none(info.get('season_number')),
'episode_number': int_or_none(info.get('episode_number')),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -46,9 +46,10 @@ class RedTubeIE(InfoExtractor):
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex(
(r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
webpage, 'title', group='title')
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
webpage, 'title', group='title',
default=None) or self._og_search_title(webpage)
formats = []
sources = self._parse_json(
@ -87,12 +88,13 @@ class RedTubeIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
r'<span[^>]+>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
(r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
webpage, 'view count', fatal=False))
# No self-labeling, but they describe themselves as

View File

@ -93,58 +93,11 @@ class RtlNlIE(InfoExtractor):
meta = info.get('meta', {})
# m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
# To workaround this previously adaptive -> flash trick was used to obtain
# unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
# and bypass georestrictions as well.
# Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
# unusable albeit can be fixed by simple string replacement (see
# https://github.com/rg3/youtube-dl/pull/6337)
# Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
# streams are used now.
videopath = material['videopath']
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
formats = self._extract_m3u8_formats(
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
video_urlpart = videopath.split('/adaptive/')[1][:-5]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
PG_FORMATS = (
('a2t', 512, 288),
('a3t', 704, 400),
('nettv', 1280, 720),
)
def pg_format(format_id, width, height):
return {
'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
'format_id': 'pg-%s' % format_id,
'protocol': 'http',
'width': width,
'height': height,
}
if not formats:
formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
else:
pg_formats = []
for format_id, width, height in PG_FORMATS:
try:
# Find hls format with the same width and height corresponding
# to progressive format and copy metadata from it.
f = next(f for f in formats if f.get('height') == height)
# hls formats may have invalid width
f['width'] = width
f_copy = f.copy()
f_copy.update(pg_format(format_id, width, height))
pg_formats.append(f_copy)
except StopIteration:
# Missing hls format does mean that no progressive format with
# such width and height exists either.
pass
formats.extend(pg_formats)
self._sort_formats(formats)
thumbnails = []

View File

@ -159,7 +159,6 @@ class SeznamZpravyArticleIE(InfoExtractor):
webpage = self._download_webpage(url, article_id)
info = self._search_json_ld(webpage, article_id, default={})
print(info)
title = info.get('title') or self._og_search_title(webpage, fatal=False)
description = info.get('description') or self._og_search_description(webpage)

View File

@ -4,7 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
int_or_none,
@ -57,7 +61,7 @@ class SixPlayIE(InfoExtractor):
container = asset.get('video_container')
ext = determine_ext(asset_url)
if container == 'm3u8' or ext == 'm3u8':
if protocol == 'usp':
if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',

View File

@ -158,7 +158,6 @@ class SoundcloudIE(InfoExtractor):
]
_CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
def _extract_urls(webpage):

View File

@ -478,6 +478,11 @@ def parseOpts(overrideArguments=None):
'--no-resize-buffer',
action='store_true', dest='noresizebuffer', default=False,
help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
downloader.add_option(
'--http-chunk-size',
dest='http_chunk_size', metavar='SIZE', default=None,
help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
downloader.add_option(
'--test',
action='store_true', dest='test', default=False,

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.01.27'
__version__ = '2018.02.03'