mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-24 05:22:51 +08:00
Merge branch 'master' into BlenderCloud-issue-13282
This commit is contained in:
commit
7feaf65da7
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.08.23
|
||||
[debug] youtube-dl version 2017.09.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
40
ChangeLog
40
ChangeLog
@ -1,3 +1,43 @@
|
||||
version 2017.09.02
|
||||
|
||||
Extractors
|
||||
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
|
||||
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
|
||||
* [youtube] Fix upload date extraction (#14065)
|
||||
+ [charlierose] Add support for episodes (#14062)
|
||||
+ [bbccouk] Add support for w-prefixed ids (#14056)
|
||||
* [googledrive] Extend URL regular expression (#9785)
|
||||
+ [googledrive] Add support for source format (#14046)
|
||||
* [pornhd] Fix extraction (#14005)
|
||||
|
||||
|
||||
version 2017.08.27.1
|
||||
|
||||
Extractors
|
||||
|
||||
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
|
||||
|
||||
|
||||
version 2017.08.27
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
|
||||
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
|
||||
#8625, #9483)
|
||||
* Simplify code and split into separate routines to facilitate maintaining
|
||||
* Make retry mechanism work on errors during actual download not only
|
||||
during connection establishment phase
|
||||
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
|
||||
* Retry on content too short
|
||||
* Show error description on retry
|
||||
|
||||
Extractors
|
||||
* [generic] Lower preference for extraction from LD-JSON
|
||||
* [rai] Fix audio formats extraction (#14024)
|
||||
* [youtube] Fix controversy videos extraction (#14027, #14029)
|
||||
* [mixcloud] Fix extraction (#14015, #14020)
|
||||
|
||||
|
||||
version 2017.08.23
|
||||
|
||||
Core
|
||||
|
4
Makefile
4
Makefile
@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
mkdir -p zip
|
||||
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -a $$d/*.py zip/$$d/ ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
|
||||
mv zip/youtube_dl/__main__.py zip/
|
||||
cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||
rm -rf zip
|
||||
echo '#!$(PYTHON)' > youtube-dl
|
||||
cat youtube-dl.zip >> youtube-dl
|
||||
|
@ -304,11 +304,11 @@ class FileDownloader(object):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
def report_retry(self, err, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
|
||||
% (count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
|
@ -22,8 +22,16 @@ from ..utils import (
|
||||
class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
class DownloadContext(dict):
|
||||
__getattr__ = dict.get
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
ctx = DownloadContext()
|
||||
ctx.filename = filename
|
||||
ctx.tmpfilename = self.temp_name(filename)
|
||||
ctx.stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
|
||||
if is_test:
|
||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
else:
|
||||
resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.resume_len = 0
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', True):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
else:
|
||||
resume_len = 0
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
while count <= retries:
|
||||
|
||||
class SucceedDownload(Exception):
|
||||
pass
|
||||
|
||||
class RetryDownload(Exception):
|
||||
def __init__(self, source_error):
|
||||
self.source_error = source_error
|
||||
|
||||
def establish_connection():
|
||||
if ctx.resume_len != 0:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
# Establish connection
|
||||
try:
|
||||
data = self.ydl.urlopen(request)
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if resume_len > 0:
|
||||
content_range = data.headers.get('Content-Range')
|
||||
if ctx.resume_len > 0:
|
||||
content_range = ctx.data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
||||
break
|
||||
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = self.ydl.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
ctx.data = self.ydl.urlopen(basic_request)
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None and
|
||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
||||
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
@ -102,152 +115,184 @@ class HttpFD(FileDownloader):
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(filename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self.report_file_already_downloaded(ctx.filename)
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': resume_len,
|
||||
'total_bytes': resume_len,
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
})
|
||||
return True
|
||||
raise SucceedDownload()
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except socket.error as e:
|
||||
if e.errno != errno.ECONNRESET:
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
raise RetryDownload(err)
|
||||
except socket.error as err:
|
||||
if err.errno != errno.ECONNRESET:
|
||||
# Connection reset is no problem, just retry
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(count, retries)
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length', None)
|
||||
|
||||
if count > retries:
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get('min_filesize')
|
||||
max_data_len = self.params.get('max_filesize')
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
while True:
|
||||
|
||||
# Download and write
|
||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# exit loop when download is finished
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
|
||||
# Open destination file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
assert stream is not None
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + ctx.resume_len
|
||||
min_data_len = self.params.get('min_filesize')
|
||||
max_data_len = self.params.get('max_filesize')
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
byte_counter = 0 + ctx.resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
|
||||
def retry(e):
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
# socket.timeout is a subclass of socket.error but may not have
|
||||
# errno set
|
||||
except socket.timeout as e:
|
||||
retry(e)
|
||||
except socket.error as e:
|
||||
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||
raise
|
||||
retry(e)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# exit loop when download is finished
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
|
||||
# Open destination file just in time
|
||||
if ctx.stream is None:
|
||||
try:
|
||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
ctx.stream, ctx.tmpfilename = sanitize_open(
|
||||
ctx.tmpfilename, ctx.open_mode)
|
||||
assert ctx.stream is not None
|
||||
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||
self.report_destination(ctx.filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
ctx.stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, now, byte_counter - ctx.resume_len)
|
||||
|
||||
# end measuring of one loop run
|
||||
now = time.time()
|
||||
after = now
|
||||
|
||||
# Adjust block size
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
before = after
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||
if data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': ctx.tmpfilename,
|
||||
'filename': ctx.filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, now, byte_counter - resume_len)
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
err = ContentTooShortError(byte_counter, int(data_len))
|
||||
if count <= retries:
|
||||
retry(err)
|
||||
raise err
|
||||
|
||||
# end measuring of one loop run
|
||||
now = time.time()
|
||||
after = now
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
|
||||
# Adjust block size
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
before = after
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
return True
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
while count <= retries:
|
||||
try:
|
||||
establish_connection()
|
||||
download()
|
||||
return True
|
||||
except RetryDownload as e:
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(e.source_error, count, retries)
|
||||
continue
|
||||
except SucceedDownload:
|
||||
return True
|
||||
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
})
|
||||
|
||||
return True
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
||||
|
@ -29,7 +29,7 @@ from ..compat import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pb][\da-z]{7}'
|
||||
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
@ -5,7 +5,7 @@ from ..utils import remove_end
|
||||
|
||||
|
||||
class CharlieRoseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||
@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||
|
@ -2184,6 +2184,12 @@ class InfoExtractor(object):
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
is_plain_url, formats = _media_formats(src, media_type, f)
|
||||
if is_plain_url:
|
||||
# res attribute is not standard but seen several times
|
||||
# in the wild
|
||||
f.update({
|
||||
'height': int_or_none(source_attributes.get('res')),
|
||||
'format_id': source_attributes.get('label'),
|
||||
})
|
||||
f.update(formats[0])
|
||||
media_info['formats'].append(f)
|
||||
else:
|
||||
|
@ -2871,12 +2871,6 @@ class GenericIE(InfoExtractor):
|
||||
merged[k] = v
|
||||
return merged
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
if json_ld.get('url'):
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@ -2895,6 +2889,12 @@ class GenericIE(InfoExtractor):
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
if json_ld.get('url'):
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
@ -4,6 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@ -12,27 +13,53 @@ from ..utils import (
|
||||
|
||||
|
||||
class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:docs|drive)\.google\.com/
|
||||
(?:
|
||||
(?:uc|open)\?.*?id=|
|
||||
file/d/
|
||||
)|
|
||||
video\.google\.com/get_player\?.*?docid=
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9_-]{28,})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||
'md5': '5c602afbbf2c1db91831f5d82f678554',
|
||||
'info_dict': {
|
||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 45,
|
||||
}
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
|
||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||
'info_dict': {
|
||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||
}
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||
'md5': 'c230c67252874fddd8170e3fd1a45886',
|
||||
'info_dict': {
|
||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||
'duration': 189,
|
||||
},
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
@ -147,47 +174,84 @@ class GoogleDriveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
raise ExtractorError(reason)
|
||||
|
||||
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
|
||||
title = self._search_regex(
|
||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||
default=None))
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
||||
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
|
||||
formats = []
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[1],
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map', default='').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt_list', default='').split(',')
|
||||
if fmt_stream_map and fmt_list:
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
mobj = re.search(
|
||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||
if mobj:
|
||||
resolutions[mobj.group('format_id')] = (
|
||||
int(mobj.group('width')), int(mobj.group('height')))
|
||||
|
||||
for fmt_stream in fmt_stream_map:
|
||||
fmt_stream_split = fmt_stream.split('|')
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
f.update({
|
||||
'width': resolution[0],
|
||||
'height': resolution[1],
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.google.com/uc', {
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
})
|
||||
urlh = self._request_webpage(
|
||||
source_url, video_id, note='Requesting source file',
|
||||
errnote='Unable to request source file', fatal=False)
|
||||
if urlh:
|
||||
def add_source_format(src_url):
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
'ext': determine_ext(title, 'mp4').lower(),
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
})
|
||||
formats.append(f)
|
||||
if urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(source_url)
|
||||
else:
|
||||
confirmation_webpage = self._webpage_read_content(
|
||||
urlh, url, video_id, note='Downloading confirmation page',
|
||||
errnote='Unable to confirm download', fatal=False)
|
||||
if confirmation_webpage:
|
||||
confirm = self._search_regex(
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', fatal=False)
|
||||
if confirm:
|
||||
add_source_format(update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
}))
|
||||
|
||||
if not formats:
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
hl = self._search_regex(
|
||||
|
@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor):
|
||||
js = self._download_webpage(js_url, track_id, fatal=False)
|
||||
if js:
|
||||
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
||||
for key_name in ('value', 'key_value', 'key_value_two'):
|
||||
for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'):
|
||||
key = self._search_regex(
|
||||
KEY_RE_TEMPLATE % key_name, js, 'key',
|
||||
default=None, group='key')
|
||||
|
@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
|
||||
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
||||
|
||||
sources = self._parse_json(js_to_json(self._search_regex(
|
||||
r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
|
||||
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
||||
webpage, 'sources', default='{}')), video_id)
|
||||
|
||||
if not sources:
|
||||
@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
||||
thumbnail = self._search_regex(
|
||||
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -59,6 +59,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
device_types.append('android')
|
||||
|
||||
formats = []
|
||||
error = None
|
||||
# TODO: extract f4m formats
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in device_types:
|
||||
@ -84,8 +85,8 @@ class RadioCanadaIE(InfoExtractor):
|
||||
if not v_url:
|
||||
continue
|
||||
if v_url == 'null':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
||||
error = xpath_text(v_data, 'message')
|
||||
continue
|
||||
ext = determine_ext(v_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@ -129,6 +130,9 @@ class RadioCanadaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
base_url + '/manifest.f4m', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
if not formats and error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
|
||||
media_type = media['type']
|
||||
if 'Audio' in media_type:
|
||||
relinker_info = {
|
||||
'formats': {
|
||||
'formats': [{
|
||||
'format_id': media.get('formatoAudio'),
|
||||
'url': media['audioUrl'],
|
||||
'ext': media.get('formatoAudio'),
|
||||
}
|
||||
}]
|
||||
}
|
||||
elif 'Video' in media_type:
|
||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||
|
@ -4,12 +4,14 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor):
|
||||
|
||||
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
|
||||
|
||||
lecture_data = self._download_json(
|
||||
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
||||
lecture_id)['lecture'][0]
|
||||
try:
|
||||
lecture_data = self._download_json(
|
||||
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
||||
lecture_id)['lecture'][0]
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
msg = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), lecture_id)
|
||||
raise ExtractorError(msg['detail'], expected=True)
|
||||
raise
|
||||
|
||||
lecture_info = {
|
||||
'id': lecture_id,
|
||||
|
@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
||||
from ..swfinterp import SWFInterpreter
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
return True
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
|
||||
return super(YoutubeBaseInfoExtractor, self)._download_webpage(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
@ -1003,6 +1009,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'Skipping DASH manifest',
|
||||
],
|
||||
},
|
||||
{
|
||||
# The following content has been identified by the YouTube community
|
||||
# as inappropriate or offensive to some audiences.
|
||||
'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
|
||||
'info_dict': {
|
||||
'id': '6SJNVb0GnPI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Race Differences in Intelligence',
|
||||
'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
|
||||
'duration': 965,
|
||||
'upload_date': '20140124',
|
||||
'uploader': 'New Century Foundation',
|
||||
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
|
||||
'license': 'Standard YouTube License',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# itag 212
|
||||
'url': '1t24XAntNCY',
|
||||
@ -1437,9 +1464,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
||||
dash_mpds.append(dash_mpd[0])
|
||||
|
||||
is_live = None
|
||||
view_count = None
|
||||
|
||||
def extract_view_count(v_info):
|
||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||
|
||||
# Get video info
|
||||
embed_webpage = None
|
||||
is_live = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
@ -1509,6 +1541,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
add_dash_mpd(get_video_info)
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(get_video_info)
|
||||
if not video_info:
|
||||
video_info = get_video_info
|
||||
if 'token' in get_video_info:
|
||||
@ -1592,10 +1626,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if 'view_count' in video_info:
|
||||
view_count = int(video_info['view_count'][0])
|
||||
else:
|
||||
view_count = None
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(video_info)
|
||||
|
||||
# Check for "rental" videos
|
||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||
@ -1639,10 +1671,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||
r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||
video_webpage, 'upload date', default=None)
|
||||
if upload_date:
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
video_license = self._html_search_regex(
|
||||
@ -2028,7 +2058,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
|
|
||||
(%(playlist_id)s)
|
||||
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||
IE_NAME = 'youtube:playlist'
|
||||
_TESTS = [{
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.08.23'
|
||||
__version__ = '2017.09.02'
|
||||
|
Loading…
Reference in New Issue
Block a user