mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-24 11:43:21 +08:00
Merge branch 'master' into BlenderCloud-issue-13282
This commit is contained in:
commit
7feaf65da7
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.08.23
|
[debug] youtube-dl version 2017.09.02
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
40
ChangeLog
40
ChangeLog
@ -1,3 +1,43 @@
|
|||||||
|
version 2017.09.02
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
|
||||||
|
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
|
||||||
|
* [youtube] Fix upload date extraction (#14065)
|
||||||
|
+ [charlierose] Add support for episodes (#14062)
|
||||||
|
+ [bbccouk] Add support for w-prefixed ids (#14056)
|
||||||
|
* [googledrive] Extend URL regular expression (#9785)
|
||||||
|
+ [googledrive] Add support for source format (#14046)
|
||||||
|
* [pornhd] Fix extraction (#14005)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.27.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
|
||||||
|
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.27
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
|
||||||
|
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
|
||||||
|
#8625, #9483)
|
||||||
|
* Simplify code and split into separate routines to facilitate maintaining
|
||||||
|
* Make retry mechanism work on errors during actual download not only
|
||||||
|
during connection establishment phase
|
||||||
|
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
|
||||||
|
* Retry on content too short
|
||||||
|
* Show error description on retry
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [generic] Lower preference for extraction from LD-JSON
|
||||||
|
* [rai] Fix audio formats extraction (#14024)
|
||||||
|
* [youtube] Fix controversy videos extraction (#14027, #14029)
|
||||||
|
* [mixcloud] Fix extraction (#14015, #14020)
|
||||||
|
|
||||||
|
|
||||||
version 2017.08.23
|
version 2017.08.23
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
4
Makefile
4
Makefile
@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
|||||||
mkdir -p zip
|
mkdir -p zip
|
||||||
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
|
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
|
||||||
mkdir -p zip/$$d ;\
|
mkdir -p zip/$$d ;\
|
||||||
cp -a $$d/*.py zip/$$d/ ;\
|
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||||
done
|
done
|
||||||
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
|
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
|
||||||
mv zip/youtube_dl/__main__.py zip/
|
mv zip/youtube_dl/__main__.py zip/
|
||||||
cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||||
rm -rf zip
|
rm -rf zip
|
||||||
echo '#!$(PYTHON)' > youtube-dl
|
echo '#!$(PYTHON)' > youtube-dl
|
||||||
cat youtube-dl.zip >> youtube-dl
|
cat youtube-dl.zip >> youtube-dl
|
||||||
|
@ -304,11 +304,11 @@ class FileDownloader(object):
|
|||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||||
|
|
||||||
def report_retry(self, count, retries):
|
def report_retry(self, err, count, retries):
|
||||||
"""Report retry in case of HTTP error 5xx"""
|
"""Report retry in case of HTTP error 5xx"""
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
|
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
|
||||||
% (count, self.format_retries(retries)))
|
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
|
@ -22,8 +22,16 @@ from ..utils import (
|
|||||||
class HttpFD(FileDownloader):
|
class HttpFD(FileDownloader):
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
tmpfilename = self.temp_name(filename)
|
|
||||||
stream = None
|
class DownloadContext(dict):
|
||||||
|
__getattr__ = dict.get
|
||||||
|
__setattr__ = dict.__setitem__
|
||||||
|
__delattr__ = dict.__delitem__
|
||||||
|
|
||||||
|
ctx = DownloadContext()
|
||||||
|
ctx.filename = filename
|
||||||
|
ctx.tmpfilename = self.temp_name(filename)
|
||||||
|
ctx.stream = None
|
||||||
|
|
||||||
# Do not include the Accept-Encoding header
|
# Do not include the Accept-Encoding header
|
||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Youtubedl-no-compression': 'True'}
|
||||||
@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
|
|||||||
if is_test:
|
if is_test:
|
||||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||||
|
|
||||||
# Establish possible resume length
|
ctx.open_mode = 'wb'
|
||||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
ctx.resume_len = 0
|
||||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
|
||||||
else:
|
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
open_mode = 'wb'
|
if self.params.get('continuedl', True):
|
||||||
if resume_len != 0:
|
# Establish possible resume length
|
||||||
if self.params.get('continuedl', True):
|
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||||
self.report_resuming_byte(resume_len)
|
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
|
||||||
open_mode = 'ab'
|
|
||||||
else:
|
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
retries = self.params.get('retries', 0)
|
retries = self.params.get('retries', 0)
|
||||||
while count <= retries:
|
|
||||||
|
class SucceedDownload(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class RetryDownload(Exception):
|
||||||
|
def __init__(self, source_error):
|
||||||
|
self.source_error = source_error
|
||||||
|
|
||||||
|
def establish_connection():
|
||||||
|
if ctx.resume_len != 0:
|
||||||
|
self.report_resuming_byte(ctx.resume_len)
|
||||||
|
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||||
|
ctx.open_mode = 'ab'
|
||||||
# Establish connection
|
# Establish connection
|
||||||
try:
|
try:
|
||||||
data = self.ydl.urlopen(request)
|
ctx.data = self.ydl.urlopen(request)
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
# set in response despite of requested Range (see
|
# set in response despite of requested Range (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||||
if resume_len > 0:
|
if ctx.resume_len > 0:
|
||||||
content_range = data.headers.get('Content-Range')
|
content_range = ctx.data.headers.get('Content-Range')
|
||||||
if content_range:
|
if content_range:
|
||||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||||
# Content-Range is present and matches requested Range, resume is possible
|
# Content-Range is present and matches requested Range, resume is possible
|
||||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||||
break
|
return
|
||||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||||
# and performing entire redownload
|
# and performing entire redownload
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
resume_len = 0
|
ctx.resume_len = 0
|
||||||
open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
break
|
return
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||||
# Unexpected HTTP error
|
# Unexpected HTTP error
|
||||||
@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
|
|||||||
# Unable to resume (requested range not satisfiable)
|
# Unable to resume (requested range not satisfiable)
|
||||||
try:
|
try:
|
||||||
# Open the connection again without the range header
|
# Open the connection again without the range header
|
||||||
data = self.ydl.urlopen(basic_request)
|
ctx.data = self.ydl.urlopen(basic_request)
|
||||||
content_length = data.info()['Content-Length']
|
content_length = ctx.data.info()['Content-Length']
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if err.code < 500 or err.code >= 600:
|
if err.code < 500 or err.code >= 600:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
# Examine the reported length
|
# Examine the reported length
|
||||||
if (content_length is not None and
|
if (content_length is not None and
|
||||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
|
||||||
# The file had already been fully downloaded.
|
# The file had already been fully downloaded.
|
||||||
# Explanation to the above condition: in issue #175 it was revealed that
|
# Explanation to the above condition: in issue #175 it was revealed that
|
||||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||||
@ -102,152 +115,184 @@ class HttpFD(FileDownloader):
|
|||||||
# I decided to implement a suggested change and consider the file
|
# I decided to implement a suggested change and consider the file
|
||||||
# completely downloaded if the file size differs less than 100 bytes from
|
# completely downloaded if the file size differs less than 100 bytes from
|
||||||
# the one in the hard drive.
|
# the one in the hard drive.
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(ctx.filename)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': ctx.filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
'downloaded_bytes': resume_len,
|
'downloaded_bytes': ctx.resume_len,
|
||||||
'total_bytes': resume_len,
|
'total_bytes': ctx.resume_len,
|
||||||
})
|
})
|
||||||
return True
|
raise SucceedDownload()
|
||||||
else:
|
else:
|
||||||
# The length does not match, we start the download over
|
# The length does not match, we start the download over
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
resume_len = 0
|
ctx.resume_len = 0
|
||||||
open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
break
|
return
|
||||||
except socket.error as e:
|
raise RetryDownload(err)
|
||||||
if e.errno != errno.ECONNRESET:
|
except socket.error as err:
|
||||||
|
if err.errno != errno.ECONNRESET:
|
||||||
# Connection reset is no problem, just retry
|
# Connection reset is no problem, just retry
|
||||||
raise
|
raise
|
||||||
|
raise RetryDownload(err)
|
||||||
|
|
||||||
# Retry
|
def download():
|
||||||
count += 1
|
data_len = ctx.data.info().get('Content-length', None)
|
||||||
if count <= retries:
|
|
||||||
self.report_retry(count, retries)
|
|
||||||
|
|
||||||
if count > retries:
|
# Range HTTP header may be ignored/unsupported by a webserver
|
||||||
self.report_error('giving up after %s retries' % retries)
|
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||||
return False
|
# However, for a test we still would like to download just a piece of a file.
|
||||||
|
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||||
|
# block size when downloading a file.
|
||||||
|
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||||
|
data_len = self._TEST_FILE_SIZE
|
||||||
|
|
||||||
data_len = data.info().get('Content-length', None)
|
if data_len is not None:
|
||||||
|
data_len = int(data_len) + ctx.resume_len
|
||||||
# Range HTTP header may be ignored/unsupported by a webserver
|
min_data_len = self.params.get('min_filesize')
|
||||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
max_data_len = self.params.get('max_filesize')
|
||||||
# However, for a test we still would like to download just a piece of a file.
|
if min_data_len is not None and data_len < min_data_len:
|
||||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||||
# block size when downloading a file.
|
return False
|
||||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
if max_data_len is not None and data_len > max_data_len:
|
||||||
data_len = self._TEST_FILE_SIZE
|
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
|
|
||||||
if data_len is not None:
|
|
||||||
data_len = int(data_len) + resume_len
|
|
||||||
min_data_len = self.params.get('min_filesize')
|
|
||||||
max_data_len = self.params.get('max_filesize')
|
|
||||||
if min_data_len is not None and data_len < min_data_len:
|
|
||||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
|
||||||
return False
|
|
||||||
if max_data_len is not None and data_len > max_data_len:
|
|
||||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
|
||||||
return False
|
|
||||||
|
|
||||||
byte_counter = 0 + resume_len
|
|
||||||
block_size = self.params.get('buffersize', 1024)
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
|
||||||
now = None # needed for slow_down() in the first loop run
|
|
||||||
before = start # start measuring
|
|
||||||
while True:
|
|
||||||
|
|
||||||
# Download and write
|
|
||||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
|
||||||
byte_counter += len(data_block)
|
|
||||||
|
|
||||||
# exit loop when download is finished
|
|
||||||
if len(data_block) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Open destination file just in time
|
|
||||||
if stream is None:
|
|
||||||
try:
|
|
||||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
|
||||||
assert stream is not None
|
|
||||||
filename = self.undo_temp_name(tmpfilename)
|
|
||||||
self.report_destination(filename)
|
|
||||||
except (OSError, IOError) as err:
|
|
||||||
self.report_error('unable to open for writing: %s' % str(err))
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
byte_counter = 0 + ctx.resume_len
|
||||||
|
block_size = self.params.get('buffersize', 1024)
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||||
|
now = None # needed for slow_down() in the first loop run
|
||||||
|
before = start # start measuring
|
||||||
|
|
||||||
|
def retry(e):
|
||||||
|
if ctx.tmpfilename != '-':
|
||||||
|
ctx.stream.close()
|
||||||
|
ctx.stream = None
|
||||||
|
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
|
raise RetryDownload(e)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Download and write
|
||||||
|
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||||
|
# socket.timeout is a subclass of socket.error but may not have
|
||||||
|
# errno set
|
||||||
|
except socket.timeout as e:
|
||||||
|
retry(e)
|
||||||
|
except socket.error as e:
|
||||||
|
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||||
|
raise
|
||||||
|
retry(e)
|
||||||
|
|
||||||
|
byte_counter += len(data_block)
|
||||||
|
|
||||||
|
# exit loop when download is finished
|
||||||
|
if len(data_block) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Open destination file just in time
|
||||||
|
if ctx.stream is None:
|
||||||
try:
|
try:
|
||||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
ctx.stream, ctx.tmpfilename = sanitize_open(
|
||||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
ctx.tmpfilename, ctx.open_mode)
|
||||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
assert ctx.stream is not None
|
||||||
|
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||||
|
self.report_destination(ctx.filename)
|
||||||
|
except (OSError, IOError) as err:
|
||||||
|
self.report_error('unable to open for writing: %s' % str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
try:
|
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||||
stream.write(data_block)
|
try:
|
||||||
except (IOError, OSError) as err:
|
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||||
|
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||||
|
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||||
|
|
||||||
|
try:
|
||||||
|
ctx.stream.write(data_block)
|
||||||
|
except (IOError, OSError) as err:
|
||||||
|
self.to_stderr('\n')
|
||||||
|
self.report_error('unable to write data: %s' % str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Apply rate limit
|
||||||
|
self.slow_down(start, now, byte_counter - ctx.resume_len)
|
||||||
|
|
||||||
|
# end measuring of one loop run
|
||||||
|
now = time.time()
|
||||||
|
after = now
|
||||||
|
|
||||||
|
# Adjust block size
|
||||||
|
if not self.params.get('noresizebuffer', False):
|
||||||
|
block_size = self.best_block_size(after - before, len(data_block))
|
||||||
|
|
||||||
|
before = after
|
||||||
|
|
||||||
|
# Progress message
|
||||||
|
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||||
|
if data_len is None:
|
||||||
|
eta = None
|
||||||
|
else:
|
||||||
|
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': ctx.tmpfilename,
|
||||||
|
'filename': ctx.filename,
|
||||||
|
'eta': eta,
|
||||||
|
'speed': speed,
|
||||||
|
'elapsed': now - start,
|
||||||
|
})
|
||||||
|
|
||||||
|
if is_test and byte_counter == data_len:
|
||||||
|
break
|
||||||
|
|
||||||
|
if ctx.stream is None:
|
||||||
self.to_stderr('\n')
|
self.to_stderr('\n')
|
||||||
self.report_error('unable to write data: %s' % str(err))
|
self.report_error('Did not get any data blocks')
|
||||||
return False
|
return False
|
||||||
|
if ctx.tmpfilename != '-':
|
||||||
|
ctx.stream.close()
|
||||||
|
|
||||||
# Apply rate limit
|
if data_len is not None and byte_counter != data_len:
|
||||||
self.slow_down(start, now, byte_counter - resume_len)
|
err = ContentTooShortError(byte_counter, int(data_len))
|
||||||
|
if count <= retries:
|
||||||
|
retry(err)
|
||||||
|
raise err
|
||||||
|
|
||||||
# end measuring of one loop run
|
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||||
now = time.time()
|
|
||||||
after = now
|
|
||||||
|
|
||||||
# Adjust block size
|
# Update file modification time
|
||||||
if not self.params.get('noresizebuffer', False):
|
if self.params.get('updatetime', True):
|
||||||
block_size = self.best_block_size(after - before, len(data_block))
|
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||||
|
|
||||||
before = after
|
|
||||||
|
|
||||||
# Progress message
|
|
||||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
|
||||||
if data_len is None:
|
|
||||||
eta = None
|
|
||||||
else:
|
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'status': 'downloading',
|
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': data_len,
|
'total_bytes': byte_counter,
|
||||||
'tmpfilename': tmpfilename,
|
'filename': ctx.filename,
|
||||||
'filename': filename,
|
'status': 'finished',
|
||||||
'eta': eta,
|
'elapsed': time.time() - start,
|
||||||
'speed': speed,
|
|
||||||
'elapsed': now - start,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
return True
|
||||||
break
|
|
||||||
|
|
||||||
if stream is None:
|
while count <= retries:
|
||||||
self.to_stderr('\n')
|
try:
|
||||||
self.report_error('Did not get any data blocks')
|
establish_connection()
|
||||||
return False
|
download()
|
||||||
if tmpfilename != '-':
|
return True
|
||||||
stream.close()
|
except RetryDownload as e:
|
||||||
|
count += 1
|
||||||
|
if count <= retries:
|
||||||
|
self.report_retry(e.source_error, count, retries)
|
||||||
|
continue
|
||||||
|
except SucceedDownload:
|
||||||
|
return True
|
||||||
|
|
||||||
if data_len is not None and byte_counter != data_len:
|
self.report_error('giving up after %s retries' % retries)
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
return False
|
||||||
self.try_rename(tmpfilename, filename)
|
|
||||||
|
|
||||||
# Update file modification time
|
|
||||||
if self.params.get('updatetime', True):
|
|
||||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
|
||||||
|
|
||||||
self._hook_progress({
|
|
||||||
'downloaded_bytes': byte_counter,
|
|
||||||
'total_bytes': byte_counter,
|
|
||||||
'filename': filename,
|
|
||||||
'status': 'finished',
|
|
||||||
'elapsed': time.time() - start,
|
|
||||||
})
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
@ -29,7 +29,7 @@ from ..compat import (
|
|||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_ID_REGEX = r'[pb][\da-z]{7}'
|
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?bbc\.co\.uk/
|
(?:www\.)?bbc\.co\.uk/
|
||||||
@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
@ -5,7 +5,7 @@ from ..utils import remove_end
|
|||||||
|
|
||||||
|
|
||||||
class CharlieRoseIE(InfoExtractor):
|
class CharlieRoseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://charlierose.com/videos/27996',
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||||
@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://charlierose.com/videos/27996',
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||||
|
@ -2184,6 +2184,12 @@ class InfoExtractor(object):
|
|||||||
f = parse_content_type(source_attributes.get('type'))
|
f = parse_content_type(source_attributes.get('type'))
|
||||||
is_plain_url, formats = _media_formats(src, media_type, f)
|
is_plain_url, formats = _media_formats(src, media_type, f)
|
||||||
if is_plain_url:
|
if is_plain_url:
|
||||||
|
# res attribute is not standard but seen several times
|
||||||
|
# in the wild
|
||||||
|
f.update({
|
||||||
|
'height': int_or_none(source_attributes.get('res')),
|
||||||
|
'format_id': source_attributes.get('label'),
|
||||||
|
})
|
||||||
f.update(formats[0])
|
f.update(formats[0])
|
||||||
media_info['formats'].append(f)
|
media_info['formats'].append(f)
|
||||||
else:
|
else:
|
||||||
|
@ -2871,12 +2871,6 @@ class GenericIE(InfoExtractor):
|
|||||||
merged[k] = v
|
merged[k] = v
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
|
||||||
json_ld = self._search_json_ld(
|
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
|
||||||
if json_ld.get('url'):
|
|
||||||
return merge_dicts(json_ld, info_dict)
|
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
@ -2895,6 +2889,12 @@ class GenericIE(InfoExtractor):
|
|||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
return merge_dicts(info, info_dict)
|
return merge_dicts(info, info_dict)
|
||||||
|
|
||||||
|
# Looking for http://schema.org/VideoObject
|
||||||
|
json_ld = self._search_json_ld(
|
||||||
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
|
if json_ld.get('url'):
|
||||||
|
return merge_dicts(json_ld, info_dict)
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
@ -12,27 +13,53 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GoogleDriveIE(InfoExtractor):
|
class GoogleDriveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:docs|drive)\.google\.com/
|
||||||
|
(?:
|
||||||
|
(?:uc|open)\?.*?id=|
|
||||||
|
file/d/
|
||||||
|
)|
|
||||||
|
video\.google\.com/get_player\?.*?docid=
|
||||||
|
)
|
||||||
|
(?P<id>[a-zA-Z0-9_-]{28,})
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
'md5': '5c602afbbf2c1db91831f5d82f678554',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny.mp4',
|
'title': 'Big Buck Bunny.mp4',
|
||||||
'duration': 45,
|
'duration': 45,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
|
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
|
||||||
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
|
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
'md5': 'c230c67252874fddd8170e3fd1a45886',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||||
'duration': 189,
|
'duration': 189,
|
||||||
},
|
},
|
||||||
'only_matching': True
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_FORMATS_EXT = {
|
_FORMATS_EXT = {
|
||||||
'5': 'flv',
|
'5': 'flv',
|
||||||
@ -147,47 +174,84 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||||
|
|
||||||
reason = self._search_regex(
|
title = self._search_regex(
|
||||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||||
if reason:
|
default=None) or self._og_search_title(webpage)
|
||||||
raise ExtractorError(reason)
|
|
||||||
|
|
||||||
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
|
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||||
default=None))
|
default=None))
|
||||||
fmt_stream_map = self._search_regex(
|
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
|
||||||
'fmt stream map').split(',')
|
|
||||||
fmt_list = self._search_regex(
|
|
||||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
|
||||||
|
|
||||||
resolutions = {}
|
|
||||||
for fmt in fmt_list:
|
|
||||||
mobj = re.search(
|
|
||||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
|
||||||
if mobj:
|
|
||||||
resolutions[mobj.group('format_id')] = (
|
|
||||||
int(mobj.group('width')), int(mobj.group('height')))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt_stream in fmt_stream_map:
|
fmt_stream_map = self._search_regex(
|
||||||
fmt_stream_split = fmt_stream.split('|')
|
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||||
if len(fmt_stream_split) < 2:
|
'fmt stream map', default='').split(',')
|
||||||
continue
|
fmt_list = self._search_regex(
|
||||||
format_id, format_url = fmt_stream_split[:2]
|
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||||
f = {
|
'fmt_list', default='').split(',')
|
||||||
'url': lowercase_escape(format_url),
|
if fmt_stream_map and fmt_list:
|
||||||
'format_id': format_id,
|
resolutions = {}
|
||||||
'ext': self._FORMATS_EXT[format_id],
|
for fmt in fmt_list:
|
||||||
}
|
mobj = re.search(
|
||||||
resolution = resolutions.get(format_id)
|
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||||
if resolution:
|
if mobj:
|
||||||
f.update({
|
resolutions[mobj.group('format_id')] = (
|
||||||
'width': resolution[0],
|
int(mobj.group('width')), int(mobj.group('height')))
|
||||||
'height': resolution[1],
|
|
||||||
|
for fmt_stream in fmt_stream_map:
|
||||||
|
fmt_stream_split = fmt_stream.split('|')
|
||||||
|
if len(fmt_stream_split) < 2:
|
||||||
|
continue
|
||||||
|
format_id, format_url = fmt_stream_split[:2]
|
||||||
|
f = {
|
||||||
|
'url': lowercase_escape(format_url),
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': self._FORMATS_EXT[format_id],
|
||||||
|
}
|
||||||
|
resolution = resolutions.get(format_id)
|
||||||
|
if resolution:
|
||||||
|
f.update({
|
||||||
|
'width': resolution[0],
|
||||||
|
'height': resolution[1],
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
source_url = update_url_query(
|
||||||
|
'https://drive.google.com/uc', {
|
||||||
|
'id': video_id,
|
||||||
|
'export': 'download',
|
||||||
|
})
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
source_url, video_id, note='Requesting source file',
|
||||||
|
errnote='Unable to request source file', fatal=False)
|
||||||
|
if urlh:
|
||||||
|
def add_source_format(src_url):
|
||||||
|
formats.append({
|
||||||
|
'url': src_url,
|
||||||
|
'ext': determine_ext(title, 'mp4').lower(),
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
if urlh.headers.get('Content-Disposition'):
|
||||||
|
add_source_format(source_url)
|
||||||
|
else:
|
||||||
|
confirmation_webpage = self._webpage_read_content(
|
||||||
|
urlh, url, video_id, note='Downloading confirmation page',
|
||||||
|
errnote='Unable to confirm download', fatal=False)
|
||||||
|
if confirmation_webpage:
|
||||||
|
confirm = self._search_regex(
|
||||||
|
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||||
|
'confirmation code', fatal=False)
|
||||||
|
if confirm:
|
||||||
|
add_source_format(update_url_query(source_url, {
|
||||||
|
'confirm': confirm,
|
||||||
|
}))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
reason = self._search_regex(
|
||||||
|
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||||
|
if reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
hl = self._search_regex(
|
hl = self._search_regex(
|
||||||
|
@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
js = self._download_webpage(js_url, track_id, fatal=False)
|
js = self._download_webpage(js_url, track_id, fatal=False)
|
||||||
if js:
|
if js:
|
||||||
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
||||||
for key_name in ('value', 'key_value', 'key_value_two'):
|
for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'):
|
||||||
key = self._search_regex(
|
key = self._search_regex(
|
||||||
KEY_RE_TEMPLATE % key_name, js, 'key',
|
KEY_RE_TEMPLATE % key_name, js, 'key',
|
||||||
default=None, group='key')
|
default=None, group='key')
|
||||||
|
@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
||||||
|
|
||||||
sources = self._parse_json(js_to_json(self._search_regex(
|
sources = self._parse_json(js_to_json(self._search_regex(
|
||||||
r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
|
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
||||||
webpage, 'sources', default='{}')), video_id)
|
webpage, 'sources', default='{}')), video_id)
|
||||||
|
|
||||||
if not sources:
|
if not sources:
|
||||||
@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
|
|||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||||
|
'thumbnail', fatal=False, group='url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -59,6 +59,7 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
device_types.append('android')
|
device_types.append('android')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
error = None
|
||||||
# TODO: extract f4m formats
|
# TODO: extract f4m formats
|
||||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||||
for device_type in device_types:
|
for device_type in device_types:
|
||||||
@ -84,8 +85,8 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
if not v_url:
|
if not v_url:
|
||||||
continue
|
continue
|
||||||
if v_url == 'null':
|
if v_url == 'null':
|
||||||
raise ExtractorError('%s said: %s' % (
|
error = xpath_text(v_data, 'message')
|
||||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
continue
|
||||||
ext = determine_ext(v_url)
|
ext = determine_ext(v_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@ -129,6 +130,9 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
base_url + '/manifest.f4m', video_id,
|
base_url + '/manifest.f4m', video_id,
|
||||||
f4m_id='hds', fatal=False))
|
f4m_id='hds', fatal=False))
|
||||||
|
if not formats and error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
|
|||||||
media_type = media['type']
|
media_type = media['type']
|
||||||
if 'Audio' in media_type:
|
if 'Audio' in media_type:
|
||||||
relinker_info = {
|
relinker_info = {
|
||||||
'formats': {
|
'formats': [{
|
||||||
'format_id': media.get('formatoAudio'),
|
'format_id': media.get('formatoAudio'),
|
||||||
'url': media['audioUrl'],
|
'url': media['audioUrl'],
|
||||||
'ext': media.get('formatoAudio'),
|
'ext': media.get('formatoAudio'),
|
||||||
}
|
}]
|
||||||
}
|
}
|
||||||
elif 'Video' in media_type:
|
elif 'Video' in media_type:
|
||||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||||
|
@ -4,12 +4,14 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
compat_HTTPError,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor):
|
|||||||
|
|
||||||
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
|
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
|
||||||
|
|
||||||
lecture_data = self._download_json(
|
try:
|
||||||
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
lecture_data = self._download_json(
|
||||||
lecture_id)['lecture'][0]
|
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
||||||
|
lecture_id)['lecture'][0]
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
msg = self._parse_json(
|
||||||
|
e.cause.read().decode('utf-8'), lecture_id)
|
||||||
|
raise ExtractorError(msg['detail'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
lecture_info = {
|
lecture_info = {
|
||||||
'id': lecture_id,
|
'id': lecture_id,
|
||||||
|
@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
|||||||
from ..swfinterp import SWFInterpreter
|
from ..swfinterp import SWFInterpreter
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
|
compat_kwargs,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _download_webpage(self, *args, **kwargs):
|
||||||
|
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
|
||||||
|
return super(YoutubeBaseInfoExtractor, self)._download_webpage(
|
||||||
|
*args, **compat_kwargs(kwargs))
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return
|
return
|
||||||
@ -1003,6 +1009,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'Skipping DASH manifest',
|
'Skipping DASH manifest',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# The following content has been identified by the YouTube community
|
||||||
|
# as inappropriate or offensive to some audiences.
|
||||||
|
'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6SJNVb0GnPI',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Race Differences in Intelligence',
|
||||||
|
'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
|
||||||
|
'duration': 965,
|
||||||
|
'upload_date': '20140124',
|
||||||
|
'uploader': 'New Century Foundation',
|
||||||
|
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
|
||||||
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
|
||||||
|
'license': 'Standard YouTube License',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# itag 212
|
# itag 212
|
||||||
'url': '1t24XAntNCY',
|
'url': '1t24XAntNCY',
|
||||||
@ -1437,9 +1464,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
||||||
dash_mpds.append(dash_mpd[0])
|
dash_mpds.append(dash_mpd[0])
|
||||||
|
|
||||||
|
is_live = None
|
||||||
|
view_count = None
|
||||||
|
|
||||||
|
def extract_view_count(v_info):
|
||||||
|
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
is_live = None
|
|
||||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
@ -1509,6 +1541,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
continue
|
continue
|
||||||
get_video_info = compat_parse_qs(video_info_webpage)
|
get_video_info = compat_parse_qs(video_info_webpage)
|
||||||
add_dash_mpd(get_video_info)
|
add_dash_mpd(get_video_info)
|
||||||
|
if view_count is None:
|
||||||
|
view_count = extract_view_count(get_video_info)
|
||||||
if not video_info:
|
if not video_info:
|
||||||
video_info = get_video_info
|
video_info = get_video_info
|
||||||
if 'token' in get_video_info:
|
if 'token' in get_video_info:
|
||||||
@ -1592,10 +1626,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
if 'view_count' in video_info:
|
if view_count is None:
|
||||||
view_count = int(video_info['view_count'][0])
|
view_count = extract_view_count(video_info)
|
||||||
else:
|
|
||||||
view_count = None
|
|
||||||
|
|
||||||
# Check for "rental" videos
|
# Check for "rental" videos
|
||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
@ -1639,10 +1671,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not upload_date:
|
if not upload_date:
|
||||||
upload_date = self._search_regex(
|
upload_date = self._search_regex(
|
||||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||||
r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
|
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||||
video_webpage, 'upload date', default=None)
|
video_webpage, 'upload date', default=None)
|
||||||
if upload_date:
|
|
||||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
video_license = self._html_search_regex(
|
video_license = self._html_search_regex(
|
||||||
@ -2028,7 +2058,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
|
|
|
|
||||||
(%(playlist_id)s)
|
(%(playlist_id)s)
|
||||||
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||||
IE_NAME = 'youtube:playlist'
|
IE_NAME = 'youtube:playlist'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.08.23'
|
__version__ = '2017.09.02'
|
||||||
|
Loading…
Reference in New Issue
Block a user