mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-08 18:22:52 +08:00
fixed bug #15024 in extractor/twitter.py see gist: https://gist.github.com/mwattsun/a5eeb63087c8281066242321f0511185
This commit is contained in:
parent
049c0486bb
commit
46b50f0784
@ -1,7 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
import string
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -400,6 +403,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
uploader = user.get('name')
|
uploader = user.get('name')
|
||||||
if uploader:
|
if uploader:
|
||||||
title = '%s - %s' % (uploader, title)
|
title = '%s - %s' % (uploader, title)
|
||||||
|
title = universal_filename(title)
|
||||||
uploader_id = user.get('screen_name')
|
uploader_id = user.get('screen_name')
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
@ -596,3 +600,75 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
|||||||
info['formats'] = self._extract_pscp_m3u8_formats(
|
info['formats'] = self._extract_pscp_m3u8_formats(
|
||||||
m3u8_url, broadcast_id, m3u8_id, state, width, height)
|
m3u8_url, broadcast_id, m3u8_id, state, width, height)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
max_len = 140
|
||||||
|
|
||||||
|
def universal_filename(sutf8):
|
||||||
|
"""People have been having problems since Twitter increased Tweet length
|
||||||
|
from 140 to 280 chars because video filenames are derived from the tweet
|
||||||
|
text that contains emojis and many hashtags. Some file names are then too
|
||||||
|
long. This function is designed to be added to twitter.py in the youtube-dl
|
||||||
|
extractor directory and run on titles so they are a valid filename or an
|
||||||
|
empty string.
|
||||||
|
- Convert string to Ascii/UTF-8 (all chars in range 32 - 127)
|
||||||
|
- Remove control chars, illegal chars and reserved names
|
||||||
|
- Make sure string doesn't exceed max_len
|
||||||
|
- return a valid filename in ascii range since youtube-dl was started on
|
||||||
|
python2 and has some utf-8 weirdness
|
||||||
|
"""
|
||||||
|
|
||||||
|
def make_filename():
|
||||||
|
letters = string.ascii_lowercase
|
||||||
|
return ''.join(random.choice(letters) for i in range(64))
|
||||||
|
|
||||||
|
def check_name(udata):
|
||||||
|
if len(udata) == 0:
|
||||||
|
make_filename()
|
||||||
|
return udata
|
||||||
|
|
||||||
|
# make sure input is a string and not 0 length
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
if type(sutf8) != str:
|
||||||
|
return make_filename()
|
||||||
|
else:
|
||||||
|
if type(sutf8) != unicode:
|
||||||
|
return make_filename()
|
||||||
|
if len(sutf8) == 0:
|
||||||
|
return make_filename()
|
||||||
|
|
||||||
|
# encode copies every char that is ascii in range 0-127
|
||||||
|
# and 'ignore' says throw away the rest, but ignore doesn't
|
||||||
|
# always work on python2.7, so use a try block
|
||||||
|
# encode returns bytes, so turn it back into a string safely
|
||||||
|
# because it is all ascii which maps directly to utf-8 codecs.decode('ascii', 'ignore')
|
||||||
|
try:
|
||||||
|
udata = sutf8.encode("ascii","ignore")
|
||||||
|
udata = udata.decode('utf-8')
|
||||||
|
except:
|
||||||
|
print("exception")
|
||||||
|
return check_name("")
|
||||||
|
if len(udata) == 0:
|
||||||
|
return make_filename()
|
||||||
|
|
||||||
|
# cntl chars, get ride of multiline
|
||||||
|
udata = re.sub(r'[\x00-\x1F]*', '', udata)
|
||||||
|
|
||||||
|
# illegal chars, leading and trailing spaces or dots
|
||||||
|
udata = re.sub(r'^[\s.]*|[\s.]*$', '', udata)
|
||||||
|
udata = re.sub(r'[/<>:"|\\?*]*', '', udata)
|
||||||
|
udata = re.sub(r'\s{2,}', ' ', udata)
|
||||||
|
|
||||||
|
pattern = re.compile(r'(?P<reserved>^COM[0-9]|LPT[0-9]|CLOCK\$|CON|PRN|AUX|NUL)(?P<more>.*)')
|
||||||
|
m = pattern.match(udata)
|
||||||
|
if m is not None:
|
||||||
|
if not m.group('more'):
|
||||||
|
return make_filename()
|
||||||
|
|
||||||
|
# make sure it is not too long or 0 (return made up valid filename if so)
|
||||||
|
length = len(udata)
|
||||||
|
if length > max_len:
|
||||||
|
udata = udata[0:max_len]
|
||||||
|
elif length == 0:
|
||||||
|
return make_filename()
|
||||||
|
|
||||||
|
return udata
|
Loading…
Reference in New Issue
Block a user