mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-03 20:43:24 +08:00
Changed GenericIE so all regex matches on a page are used to find video urls
This commit is contained in:
parent
99d46e8c27
commit
0269764b07
66
youtube-dl
66
youtube-dl
@ -2170,27 +2170,15 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
matches = [mobj for mobj in re.finditer(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)]
|
||||||
if mobj is None:
|
|
||||||
|
if len(matches) == 0:
|
||||||
# Broaden the search a little bit
|
# Broaden the search a little bit
|
||||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
matches = [mobj for mobj in re.finditer(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)]
|
||||||
if mobj is None:
|
if len(matches) == 0:
|
||||||
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
||||||
return
|
return
|
||||||
|
|
||||||
# It's possible that one of the regexes
|
|
||||||
# matched, but returned an empty group:
|
|
||||||
if mobj.group(1) is None:
|
|
||||||
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
|
||||||
return
|
|
||||||
|
|
||||||
video_url = urllib.unquote(mobj.group(1))
|
|
||||||
video_id = os.path.basename(video_url)
|
|
||||||
|
|
||||||
# here's a fun little line of code for you:
|
|
||||||
video_extension = os.path.splitext(video_id)[1][1:]
|
|
||||||
video_id = os.path.splitext(video_id)[0]
|
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
# it's tempting to parse this further, but you would
|
||||||
# have to take into account all the variations like
|
# have to take into account all the variations like
|
||||||
# Video Title - Site Name
|
# Video Title - Site Name
|
||||||
@ -2212,21 +2200,35 @@ class GenericIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
video_uploader = mobj.group(1).decode('utf-8')
|
video_uploader = mobj.group(1).decode('utf-8')
|
||||||
|
|
||||||
try:
|
for mobj in matches:
|
||||||
# Process video information
|
# It's possible that one of the regexes
|
||||||
self._downloader.process_info({
|
# matched, but returned an empty group:
|
||||||
'id': video_id.decode('utf-8'),
|
if mobj.group(1) is None:
|
||||||
'url': video_url.decode('utf-8'),
|
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
||||||
'uploader': video_uploader,
|
continue
|
||||||
'upload_date': u'NA',
|
|
||||||
'title': video_title,
|
video_url = urllib.unquote(mobj.group(1))
|
||||||
'stitle': simple_title,
|
video_id = os.path.basename(video_url)
|
||||||
'ext': video_extension.decode('utf-8'),
|
|
||||||
'format': u'NA',
|
# here's a fun little line of code for you:
|
||||||
'player_url': None,
|
video_extension = os.path.splitext(video_id)[1][1:]
|
||||||
})
|
video_id = os.path.splitext(video_id)[0]
|
||||||
except UnavailableVideoError, err:
|
|
||||||
self._downloader.trouble(u'\nERROR: unable to download video')
|
try:
|
||||||
|
# Process video information
|
||||||
|
self._downloader.process_info({
|
||||||
|
'id': video_id.decode('utf-8'),
|
||||||
|
'url': video_url.decode('utf-8'),
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'upload_date': u'NA',
|
||||||
|
'title': video_title,
|
||||||
|
'stitle': simple_title,
|
||||||
|
'ext': video_extension.decode('utf-8'),
|
||||||
|
'format': u'NA',
|
||||||
|
'player_url': None,
|
||||||
|
})
|
||||||
|
except UnavailableVideoError, err:
|
||||||
|
self._downloader.trouble(u'\nERROR: unable to download video')
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchIE(InfoExtractor):
|
class YoutubeSearchIE(InfoExtractor):
|
||||||
|
Loading…
Reference in New Issue
Block a user