Merge branch 'master' into BlenderCloud-issue-13282

2025-01-24 05:22:51 +08:00 · 2017-09-02 20:56:36 -05:00 · 2017-09-02 20:56:36 -05:00 · 7feaf65da7
commit 7feaf65da7
parent 000892ccc8 64f0e30b93
17 changed files with 431 additions and 226 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.08.23
+[debug] youtube-dl version 2017.09.02
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/40
+++ b/40
@ -1,3 +1,43 @@
+version 2017.09.02
+
+Extractors
+* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
+  #14077, #14079, #14082, #14083, #14094, #14095, #14096)
+* [youtube] Fix upload date extraction (#14065)
+ [charlierose] Add support for episodes (#14062)
+ [bbccouk] Add support for w-prefixed ids (#14056)
+* [googledrive] Extend URL regular expression (#9785)
+ [googledrive] Add support for source format (#14046)
+* [pornhd] Fix extraction (#14005)
+
+
+version 2017.08.27.1
+
+Extractors
+
+* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
+
+
+version 2017.08.27
+
+Core
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
+* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
+  #8625, #9483)
+    * Simplify code and split into separate routines to facilitate maintaining
+    * Make retry mechanism work on errors during actual download not only
+      during connection establishment phase
+    * Retry on ECONNRESET and ETIMEDOUT during reading data from network
+    * Retry on content too short
+    * Show error description on retry
+
+Extractors
+* [generic] Lower preference for extraction from LD-JSON
+* [rai] Fix audio formats extraction (#14024)
+* [youtube] Fix controversy videos extraction (#14027, #14029)
+* [mixcloud] Fix extraction (#14015, #14020)
+
+
 version 2017.08.23

 Core
--- a/4
+++ b/4
@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
 	mkdir -p zip
 	for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
 	  mkdir -p zip/$$d ;\
-	  cp -a $$d/*.py zip/$$d/ ;\
+	  cp -pPR $$d/*.py zip/$$d/ ;\
 	done
 	touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
 	mv zip/youtube_dl/__main__.py zip/
-	cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
+	cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
 	rm -rf zip
 	echo '#!$(PYTHON)' > youtube-dl
 	cat youtube-dl.zip >> youtube-dl
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -304,11 +304,11 @@ class FileDownloader(object):
        """Report attempt to resume at given byte."""
        self.to_screen('[download] Resuming download at byte %s' % resume_len)

-    def report_retry(self, count, retries):
+    def report_retry(self, err, count, retries):
        """Report retry in case of HTTP error 5xx"""
        self.to_screen(
-            '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
-            % (count, self.format_retries(retries)))
+            '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+            % (error_to_compat_str(err), count, self.format_retries(retries)))

    def report_file_already_downloaded(self, file_name):
        """Report file has already been fully downloaded."""
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -22,8 +22,16 @@ from ..utils import (
 class HttpFD(FileDownloader):
    def real_download(self, filename, info_dict):
        url = info_dict['url']
-        tmpfilename = self.temp_name(filename)
-        stream = None
+
+        class DownloadContext(dict):
+            __getattr__ = dict.get
+            __setattr__ = dict.__setitem__
+            __delattr__ = dict.__delitem__
+
+        ctx = DownloadContext()
+        ctx.filename = filename
+        ctx.tmpfilename = self.temp_name(filename)
+        ctx.stream = None

        # Do not include the Accept-Encoding header
        headers = {'Youtubedl-no-compression': 'True'}
@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
        if is_test:
            request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))

-        # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))
-        else:
-            resume_len = 0
+        ctx.open_mode = 'wb'
+        ctx.resume_len = 0

-        open_mode = 'wb'
-        if resume_len != 0:
-            if self.params.get('continuedl', True):
-                self.report_resuming_byte(resume_len)
-                request.add_header('Range', 'bytes=%d-' % resume_len)
-                open_mode = 'ab'
-            else:
-                resume_len = 0
+        if self.params.get('continuedl', True):
+            # Establish possible resume length
+            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+                ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))

        count = 0
        retries = self.params.get('retries', 0)
-        while count <= retries:
+
+        class SucceedDownload(Exception):
+            pass
+
+        class RetryDownload(Exception):
+            def __init__(self, source_error):
+                self.source_error = source_error
+
+        def establish_connection():
+            if ctx.resume_len != 0:
+                self.report_resuming_byte(ctx.resume_len)
+                request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
+                ctx.open_mode = 'ab'
            # Establish connection
            try:
-                data = self.ydl.urlopen(request)
+                ctx.data = self.ydl.urlopen(request)
                # When trying to resume, Content-Range HTTP header of response has to be checked
                # to match the value of requested Range HTTP header. This is due to a webservers
                # that don't support resuming and serve a whole file with no Content-Range
                # set in response despite of requested Range (see
                # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
-                if resume_len > 0:
-                    content_range = data.headers.get('Content-Range')
+                if ctx.resume_len > 0:
+                    content_range = ctx.data.headers.get('Content-Range')
                    if content_range:
                        content_range_m = re.search(r'bytes (\d+)-', content_range)
                        # Content-Range is present and matches requested Range, resume is possible
-                        if content_range_m and resume_len == int(content_range_m.group(1)):
-                            break
+                        if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
+                            return
                    # Content-Range is either not present or invalid. Assuming remote webserver is
                    # trying to send the whole file, resume is not possible, so wiping the local file
                    # and performing entire redownload
                    self.report_unable_to_resume()
-                    resume_len = 0
-                    open_mode = 'wb'
-                break
+                    ctx.resume_len = 0
+                    ctx.open_mode = 'wb'
+                return
            except (compat_urllib_error.HTTPError, ) as err:
                if (err.code < 500 or err.code >= 600) and err.code != 416:
                    # Unexpected HTTP error
@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
                    # Unable to resume (requested range not satisfiable)
                    try:
                        # Open the connection again without the range header
-                        data = self.ydl.urlopen(basic_request)
-                        content_length = data.info()['Content-Length']
+                        ctx.data = self.ydl.urlopen(basic_request)
+                        content_length = ctx.data.info()['Content-Length']
                    except (compat_urllib_error.HTTPError, ) as err:
                        if err.code < 500 or err.code >= 600:
                            raise
                    else:
                        # Examine the reported length
                        if (content_length is not None and
-                                (resume_len - 100 < int(content_length) < resume_len + 100)):
+                                (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
                            # The file had already been fully downloaded.
                            # Explanation to the above condition: in issue #175 it was revealed that
                            # YouTube sometimes adds or removes a few bytes from the end of the file,
@ -102,152 +115,184 @@ class HttpFD(FileDownloader):
                            # I decided to implement a suggested change and consider the file
                            # completely downloaded if the file size differs less than 100 bytes from
                            # the one in the hard drive.
-                            self.report_file_already_downloaded(filename)
-                            self.try_rename(tmpfilename, filename)
+                            self.report_file_already_downloaded(ctx.filename)
+                            self.try_rename(ctx.tmpfilename, ctx.filename)
                            self._hook_progress({
-                                'filename': filename,
+                                'filename': ctx.filename,
                                'status': 'finished',
-                                'downloaded_bytes': resume_len,
-                                'total_bytes': resume_len,
+                                'downloaded_bytes': ctx.resume_len,
+                                'total_bytes': ctx.resume_len,
                            })
-                            return True
+                            raise SucceedDownload()
                        else:
                            # The length does not match, we start the download over
                            self.report_unable_to_resume()
-                            resume_len = 0
-                            open_mode = 'wb'
-                            break
-            except socket.error as e:
-                if e.errno != errno.ECONNRESET:
+                            ctx.resume_len = 0
+                            ctx.open_mode = 'wb'
+                            return
+                raise RetryDownload(err)
+            except socket.error as err:
+                if err.errno != errno.ECONNRESET:
                    # Connection reset is no problem, just retry
                    raise
+                raise RetryDownload(err)

-            # Retry
-            count += 1
-            if count <= retries:
-                self.report_retry(count, retries)
+        def download():
+            data_len = ctx.data.info().get('Content-length', None)

-        if count > retries:
-            self.report_error('giving up after %s retries' % retries)
-            return False
+            # Range HTTP header may be ignored/unsupported by a webserver
+            # (e.g. extractor/scivee.py, extractor/bambuser.py).
+            # However, for a test we still would like to download just a piece of a file.
+            # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+            # block size when downloading a file.
+            if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+                data_len = self._TEST_FILE_SIZE

-        data_len = data.info().get('Content-length', None)
-
-        # Range HTTP header may be ignored/unsupported by a webserver
-        # (e.g. extractor/scivee.py, extractor/bambuser.py).
-        # However, for a test we still would like to download just a piece of a file.
-        # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
-        # block size when downloading a file.
-        if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
-            data_len = self._TEST_FILE_SIZE
-
-        if data_len is not None:
-            data_len = int(data_len) + resume_len
-            min_data_len = self.params.get('min_filesize')
-            max_data_len = self.params.get('max_filesize')
-            if min_data_len is not None and data_len < min_data_len:
-                self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
-                return False
-            if max_data_len is not None and data_len > max_data_len:
-                self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
-                return False
-
-        byte_counter = 0 + resume_len
-        block_size = self.params.get('buffersize', 1024)
-        start = time.time()
-
-        # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
-        now = None  # needed for slow_down() in the first loop run
-        before = start  # start measuring
-        while True:
-
-            # Download and write
-            data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
-            byte_counter += len(data_block)
-
-            # exit loop when download is finished
-            if len(data_block) == 0:
-                break
-
-            # Open destination file just in time
-            if stream is None:
-                try:
-                    (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
-                    assert stream is not None
-                    filename = self.undo_temp_name(tmpfilename)
-                    self.report_destination(filename)
-                except (OSError, IOError) as err:
-                    self.report_error('unable to open for writing: %s' % str(err))
+            if data_len is not None:
+                data_len = int(data_len) + ctx.resume_len
+                min_data_len = self.params.get('min_filesize')
+                max_data_len = self.params.get('max_filesize')
+                if min_data_len is not None and data_len < min_data_len:
+                    self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+                    return False
+                if max_data_len is not None and data_len > max_data_len:
+                    self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                    return False

-                if self.params.get('xattr_set_filesize', False) and data_len is not None:
+            byte_counter = 0 + ctx.resume_len
+            block_size = self.params.get('buffersize', 1024)
+            start = time.time()
+
+            # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+            now = None  # needed for slow_down() in the first loop run
+            before = start  # start measuring
+
+            def retry(e):
+                if ctx.tmpfilename != '-':
+                    ctx.stream.close()
+                ctx.stream = None
+                ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+                raise RetryDownload(e)
+
+            while True:
+                try:
+                    # Download and write
+                    data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+                # socket.timeout is a subclass of socket.error but may not have
+                # errno set
+                except socket.timeout as e:
+                    retry(e)
+                except socket.error as e:
+                    if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
+                        raise
+                    retry(e)
+
+                byte_counter += len(data_block)
+
+                # exit loop when download is finished
+                if len(data_block) == 0:
+                    break
+
+                # Open destination file just in time
+                if ctx.stream is None:
                    try:
-                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
-                    except (XAttrUnavailableError, XAttrMetadataError) as err:
-                        self.report_error('unable to set filesize xattr: %s' % str(err))
+                        ctx.stream, ctx.tmpfilename = sanitize_open(
+                            ctx.tmpfilename, ctx.open_mode)
+                        assert ctx.stream is not None
+                        ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+                        self.report_destination(ctx.filename)
+                    except (OSError, IOError) as err:
+                        self.report_error('unable to open for writing: %s' % str(err))
+                        return False

-            try:
-                stream.write(data_block)
-            except (IOError, OSError) as err:
+                    if self.params.get('xattr_set_filesize', False) and data_len is not None:
+                        try:
+                            write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+                        except (XAttrUnavailableError, XAttrMetadataError) as err:
+                            self.report_error('unable to set filesize xattr: %s' % str(err))
+
+                try:
+                    ctx.stream.write(data_block)
+                except (IOError, OSError) as err:
+                    self.to_stderr('\n')
+                    self.report_error('unable to write data: %s' % str(err))
+                    return False
+
+                # Apply rate limit
+                self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+                # end measuring of one loop run
+                now = time.time()
+                after = now
+
+                # Adjust block size
+                if not self.params.get('noresizebuffer', False):
+                    block_size = self.best_block_size(after - before, len(data_block))
+
+                before = after
+
+                # Progress message
+                speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+                if data_len is None:
+                    eta = None
+                else:
+                    eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+                self._hook_progress({
+                    'status': 'downloading',
+                    'downloaded_bytes': byte_counter,
+                    'total_bytes': data_len,
+                    'tmpfilename': ctx.tmpfilename,
+                    'filename': ctx.filename,
+                    'eta': eta,
+                    'speed': speed,
+                    'elapsed': now - start,
+                })
+
+                if is_test and byte_counter == data_len:
+                    break
+
+            if ctx.stream is None:
                self.to_stderr('\n')
-                self.report_error('unable to write data: %s' % str(err))
+                self.report_error('Did not get any data blocks')
                return False
+            if ctx.tmpfilename != '-':
+                ctx.stream.close()

-            # Apply rate limit
-            self.slow_down(start, now, byte_counter - resume_len)
+            if data_len is not None and byte_counter != data_len:
+                err = ContentTooShortError(byte_counter, int(data_len))
+                if count <= retries:
+                    retry(err)
+                raise err

-            # end measuring of one loop run
-            now = time.time()
-            after = now
+            self.try_rename(ctx.tmpfilename, ctx.filename)

-            # Adjust block size
-            if not self.params.get('noresizebuffer', False):
-                block_size = self.best_block_size(after - before, len(data_block))
-
-            before = after
-
-            # Progress message
-            speed = self.calc_speed(start, now, byte_counter - resume_len)
-            if data_len is None:
-                eta = None
-            else:
-                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+            # Update file modification time
+            if self.params.get('updatetime', True):
+                info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))

            self._hook_progress({
-                'status': 'downloading',
                'downloaded_bytes': byte_counter,
-                'total_bytes': data_len,
-                'tmpfilename': tmpfilename,
-                'filename': filename,
-                'eta': eta,
-                'speed': speed,
-                'elapsed': now - start,
+                'total_bytes': byte_counter,
+                'filename': ctx.filename,
+                'status': 'finished',
+                'elapsed': time.time() - start,
            })

-            if is_test and byte_counter == data_len:
-                break
+            return True

-        if stream is None:
-            self.to_stderr('\n')
-            self.report_error('Did not get any data blocks')
-            return False
-        if tmpfilename != '-':
-            stream.close()
+        while count <= retries:
+            try:
+                establish_connection()
+                download()
+                return True
+            except RetryDownload as e:
+                count += 1
+                if count <= retries:
+                    self.report_retry(e.source_error, count, retries)
+                continue
+            except SucceedDownload:
+                return True

-        if data_len is not None and byte_counter != data_len:
-            raise ContentTooShortError(byte_counter, int(data_len))
-        self.try_rename(tmpfilename, filename)
-
-        # Update file modification time
-        if self.params.get('updatetime', True):
-            info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
-
-        self._hook_progress({
-            'downloaded_bytes': byte_counter,
-            'total_bytes': byte_counter,
-            'filename': filename,
-            'status': 'finished',
-            'elapsed': time.time() - start,
-        })
-
-        return True
+        self.report_error('giving up after %s retries' % retries)
+        return False
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -29,7 +29,7 @@ from ..compat import (
 class BBCCoUkIE(InfoExtractor):
    IE_NAME = 'bbc.co.uk'
    IE_DESC = 'BBC iPlayer'
-    _ID_REGEX = r'[pb][\da-z]{7}'
+    _ID_REGEX = r'[pbw][\da-z]{7}'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?bbc\.co\.uk/
@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
        }, {
            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
            'only_matching': True,
+        }, {
+            'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+            'only_matching': True,
        }]

    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
--- a/youtube_dl/extractor/charlierose.py
+++ b/youtube_dl/extractor/charlierose.py
@ -5,7 +5,7 @@ from ..utils import remove_end


 class CharlieRoseIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://charlierose.com/videos/27996',
        'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
    }, {
        'url': 'https://charlierose.com/videos/27996',
        'only_matching': True,
+    }, {
+        'url': 'https://charlierose.com/episodes/30887?autoplay=true',
+        'only_matching': True,
    }]

    _PLAYER_BASE = 'https://charlierose.com/video/player/%s'
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -2184,6 +2184,12 @@ class InfoExtractor(object):
                    f = parse_content_type(source_attributes.get('type'))
                    is_plain_url, formats = _media_formats(src, media_type, f)
                    if is_plain_url:
+                        # res attribute is not standard but seen several times
+                        # in the wild
+                        f.update({
+                            'height': int_or_none(source_attributes.get('res')),
+                            'format_id': source_attributes.get('label'),
+                        })
                        f.update(formats[0])
                        media_info['formats'].append(f)
                    else:
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -2871,12 +2871,6 @@ class GenericIE(InfoExtractor):
                    merged[k] = v
            return merged

-        # Looking for http://schema.org/VideoObject
-        json_ld = self._search_json_ld(
-            webpage, video_id, default={}, expected_type='VideoObject')
-        if json_ld.get('url'):
-            return merge_dicts(json_ld, info_dict)
-
        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
        if entries:
@ -2895,6 +2889,12 @@ class GenericIE(InfoExtractor):
                jwplayer_data, video_id, require_title=False, base_url=url)
            return merge_dicts(info, info_dict)

+        # Looking for http://schema.org/VideoObject
+        json_ld = self._search_json_ld(
+            webpage, video_id, default={}, expected_type='VideoObject')
+        if json_ld.get('url'):
+            return merge_dicts(json_ld, info_dict)
+
        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
                return True
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@ -4,6 +4,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
    ExtractorError,
    int_or_none,
    lowercase_escape,
@ -12,27 +13,53 @@ from ..utils import (


 class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
+    _VALID_URL = r'''(?x)
+                        https?://
+                            (?:
+                                (?:docs|drive)\.google\.com/
+                                (?:
+                                    (?:uc|open)\?.*?id=|
+                                    file/d/
+                                )|
+                                video\.google\.com/get_player\?.*?docid=
+                            )
+                            (?P<id>[a-zA-Z0-9_-]{28,})
+                    '''
    _TESTS = [{
        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
-        'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
+        'md5': '5c602afbbf2c1db91831f5d82f678554',
        'info_dict': {
            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
            'ext': 'mp4',
            'title': 'Big Buck Bunny.mp4',
            'duration': 45,
        }
+    }, {
+        # video can't be watched anonymously due to view count limit reached,
+        # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
+        'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
+        'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
+        'info_dict': {
+            'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
+            'ext': 'mp4',
+            'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
+        }
    }, {
        # video id is longer than 28 characters
        'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
-        'md5': 'c230c67252874fddd8170e3fd1a45886',
        'info_dict': {
            'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
            'ext': 'mp4',
            'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
            'duration': 189,
        },
-        'only_matching': True
+        'only_matching': True,
+    }, {
+        'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+        'only_matching': True,
+    }, {
+        'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+        'only_matching': True,
    }]
    _FORMATS_EXT = {
        '5': 'flv',
@ -147,47 +174,84 @@ class GoogleDriveIE(InfoExtractor):
        webpage = self._download_webpage(
            'http://docs.google.com/file/d/%s' % video_id, video_id)

-        reason = self._search_regex(
-            r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
-        if reason:
-            raise ExtractorError(reason)
-
-        title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+        title = self._search_regex(
+            r'"title"\s*,\s*"([^"]+)', webpage, 'title',
+            default=None) or self._og_search_title(webpage)
        duration = int_or_none(self._search_regex(
            r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
            default=None))
-        fmt_stream_map = self._search_regex(
-            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
-            'fmt stream map').split(',')
-        fmt_list = self._search_regex(
-            r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
-
-        resolutions = {}
-        for fmt in fmt_list:
-            mobj = re.search(
-                r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
-            if mobj:
-                resolutions[mobj.group('format_id')] = (
-                    int(mobj.group('width')), int(mobj.group('height')))

        formats = []
-        for fmt_stream in fmt_stream_map:
-            fmt_stream_split = fmt_stream.split('|')
-            if len(fmt_stream_split) < 2:
-                continue
-            format_id, format_url = fmt_stream_split[:2]
-            f = {
-                'url': lowercase_escape(format_url),
-                'format_id': format_id,
-                'ext': self._FORMATS_EXT[format_id],
-            }
-            resolution = resolutions.get(format_id)
-            if resolution:
-                f.update({
-                    'width': resolution[0],
-                    'height': resolution[1],
+        fmt_stream_map = self._search_regex(
+            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
+            'fmt stream map', default='').split(',')
+        fmt_list = self._search_regex(
+            r'"fmt_list"\s*,\s*"([^"]+)', webpage,
+            'fmt_list', default='').split(',')
+        if fmt_stream_map and fmt_list:
+            resolutions = {}
+            for fmt in fmt_list:
+                mobj = re.search(
+                    r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
+                if mobj:
+                    resolutions[mobj.group('format_id')] = (
+                        int(mobj.group('width')), int(mobj.group('height')))
+
+            for fmt_stream in fmt_stream_map:
+                fmt_stream_split = fmt_stream.split('|')
+                if len(fmt_stream_split) < 2:
+                    continue
+                format_id, format_url = fmt_stream_split[:2]
+                f = {
+                    'url': lowercase_escape(format_url),
+                    'format_id': format_id,
+                    'ext': self._FORMATS_EXT[format_id],
+                }
+                resolution = resolutions.get(format_id)
+                if resolution:
+                    f.update({
+                        'width': resolution[0],
+                        'height': resolution[1],
+                    })
+                formats.append(f)
+
+        source_url = update_url_query(
+            'https://drive.google.com/uc', {
+                'id': video_id,
+                'export': 'download',
+            })
+        urlh = self._request_webpage(
+            source_url, video_id, note='Requesting source file',
+            errnote='Unable to request source file', fatal=False)
+        if urlh:
+            def add_source_format(src_url):
+                formats.append({
+                    'url': src_url,
+                    'ext': determine_ext(title, 'mp4').lower(),
+                    'format_id': 'source',
+                    'quality': 1,
                })
-            formats.append(f)
+            if urlh.headers.get('Content-Disposition'):
+                add_source_format(source_url)
+            else:
+                confirmation_webpage = self._webpage_read_content(
+                    urlh, url, video_id, note='Downloading confirmation page',
+                    errnote='Unable to confirm download', fatal=False)
+                if confirmation_webpage:
+                    confirm = self._search_regex(
+                        r'confirm=([^&"\']+)', confirmation_webpage,
+                        'confirmation code', fatal=False)
+                    if confirm:
+                        add_source_format(update_url_query(source_url, {
+                            'confirm': confirm,
+                        }))
+
+        if not formats:
+            reason = self._search_regex(
+                r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+            if reason:
+                raise ExtractorError(reason, expected=True)
+
        self._sort_formats(formats)

        hl = self._search_regex(
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor):
                js = self._download_webpage(js_url, track_id, fatal=False)
                if js:
                    KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
-                    for key_name in ('value', 'key_value', 'key_value_two'):
+                    for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'):
                        key = self._search_regex(
                            KEY_RE_TEMPLATE % key_name, js, 'key',
                            default=None, group='key')
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
             r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')

        sources = self._parse_json(js_to_json(self._search_regex(
-            r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
+            r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
            webpage, 'sources', default='{}')), video_id)

        if not sources:
@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
        view_count = int_or_none(self._html_search_regex(
            r'(\d+) views\s*<', webpage, 'view count', fatal=False))
        thumbnail = self._search_regex(
-            r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+            r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
+            'thumbnail', fatal=False, group='url')

        return {
            'id': video_id,
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@ -59,6 +59,7 @@ class RadioCanadaIE(InfoExtractor):
            device_types.append('android')

        formats = []
+        error = None
        # TODO: extract f4m formats
        # f4m formats can be extracted using flashhd device_type but they produce unplayable file
        for device_type in device_types:
@ -84,8 +85,8 @@ class RadioCanadaIE(InfoExtractor):
            if not v_url:
                continue
            if v_url == 'null':
-                raise ExtractorError('%s said: %s' % (
-                    self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
+                error = xpath_text(v_data, 'message')
+                continue
            ext = determine_ext(v_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
@ -129,6 +130,9 @@ class RadioCanadaIE(InfoExtractor):
                            formats.extend(self._extract_f4m_formats(
                                base_url + '/manifest.f4m', video_id,
                                f4m_id='hds', fatal=False))
+        if not formats and error:
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, error), expected=True)
        self._sort_formats(formats)

        subtitles = {}
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
        media_type = media['type']
        if 'Audio' in media_type:
            relinker_info = {
-                'formats': {
+                'formats': [{
                    'format_id': media.get('formatoAudio'),
                    'url': media['audioUrl'],
                    'ext': media.get('formatoAudio'),
-                }
+                }]
            }
        elif 'Video' in media_type:
            relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
--- a/youtube_dl/extractor/viidea.py
+++ b/youtube_dl/extractor/viidea.py
@ -4,12 +4,14 @@ import re

 from .common import InfoExtractor
 from ..compat import (
-    compat_urlparse,
+    compat_HTTPError,
    compat_str,
+    compat_urlparse,
 )
 from ..utils import (
-    parse_duration,
+    ExtractorError,
    js_to_json,
+    parse_duration,
    parse_iso8601,
 )

@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor):

        base_url = self._proto_relative_url(cfg['livepipe'], 'http:')

-        lecture_data = self._download_json(
-            '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
-            lecture_id)['lecture'][0]
+        try:
+            lecture_data = self._download_json(
+                '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
+                lecture_id)['lecture'][0]
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                msg = self._parse_json(
+                    e.cause.read().decode('utf-8'), lecture_id)
+                raise ExtractorError(msg['detail'], expected=True)
+            raise

        lecture_info = {
            'id': lecture_id,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
 from ..compat import (
    compat_chr,
+    compat_kwargs,
    compat_parse_qs,
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus,
@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):

        return True

+    def _download_webpage(self, *args, **kwargs):
+        kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+        return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+            *args, **compat_kwargs(kwargs))
+
    def _real_initialize(self):
        if self._downloader is None:
            return
@ -1003,6 +1009,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'Skipping DASH manifest',
            ],
        },
+        {
+            # The following content has been identified by the YouTube community
+            # as inappropriate or offensive to some audiences.
+            'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+            'info_dict': {
+                'id': '6SJNVb0GnPI',
+                'ext': 'mp4',
+                'title': 'Race Differences in Intelligence',
+                'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+                'duration': 965,
+                'upload_date': '20140124',
+                'uploader': 'New Century Foundation',
+                'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+                'license': 'Standard YouTube License',
+                'view_count': int,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
        {
            # itag 212
            'url': '1t24XAntNCY',
@ -1437,9 +1464,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            if dash_mpd and dash_mpd[0] not in dash_mpds:
                dash_mpds.append(dash_mpd[0])

+        is_live = None
+        view_count = None
+
+        def extract_view_count(v_info):
+            return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+
        # Get video info
        embed_webpage = None
-        is_live = None
        if re.search(r'player-age-gate-content">', video_webpage) is not None:
            age_gate = True
            # We simulate the access to the video from www.youtube.com/v/{video_id}
@ -1509,6 +1541,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        continue
                    get_video_info = compat_parse_qs(video_info_webpage)
                    add_dash_mpd(get_video_info)
+                    if view_count is None:
+                        view_count = extract_view_count(get_video_info)
                    if not video_info:
                        video_info = get_video_info
                    if 'token' in get_video_info:
@ -1592,10 +1626,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                return self.playlist_result(entries, video_id, video_title, video_description)
            self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

-        if 'view_count' in video_info:
-            view_count = int(video_info['view_count'][0])
-        else:
-            view_count = None
+        if view_count is None:
+            view_count = extract_view_count(video_info)

        # Check for "rental" videos
        if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
@ -1639,10 +1671,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if not upload_date:
            upload_date = self._search_regex(
                [r'(?s)id="eow-date.*?>(.*?)</span>',
-                 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
+                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
                video_webpage, 'upload date', default=None)
-            if upload_date:
-                upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
        upload_date = unified_strdate(upload_date)

        video_license = self._html_search_regex(
@ -2028,7 +2058,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                     |
                        (%(playlist_id)s)
                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2017.08.23'
+__version__ = '2017.09.02'