mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-03 20:05:35 +08:00
[generic] utf8 decode before re.match(), for Python 3
Otherwise we raise TypeError: can't use a string pattern on a bytes-like object This perhaps argues for putting it in is_html(), which already does this decoding. But of course plain whitespace isn't just html. So perhaps renaming is_html()? I dunno what is simpler. Let's start with this.
This commit is contained in:
parent
00bc75ca01
commit
a5d5a2c068
@ -1759,7 +1759,7 @@ class GenericIE(InfoExtractor):
|
|||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
if re.match(r'^\s+$', first_bytes):
|
if re.match(r'^\s+$', first_bytes.decode('utf-8', 'replace')):
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'First block is just whitespace? Continuing...')
|
'First block is just whitespace? Continuing...')
|
||||||
elif not is_html(first_bytes):
|
elif not is_html(first_bytes):
|
||||||
|
Loading…
Reference in New Issue
Block a user