Changed GenericIE so all regex matches on a page are used to find video urls

2025-02-03 20:43:24 +08:00 · 2012-02-11 16:17:23 +11:00 · 2012-02-11 16:17:23 +11:00 · 0269764b07
commit 0269764b07
parent 99d46e8c27
1 changed files with 34 additions and 32 deletions
--- a/66
+++ b/66
@ -2170,27 +2170,15 @@ class GenericIE(InfoExtractor):
 		self.report_extraction(video_id)
 		# Start with something easy: JW Player in SWFObject
-		mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
+		matches = [mobj for mobj in re.finditer(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)]
-		if mobj is None:
+
 		if len(matches) == 0:
 			# Broaden the search a little bit
-			mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+			matches = [mobj for mobj in re.finditer(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)]
-		if mobj is None:
+		if len(matches) == 0:
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return
 		# It's possible that one of the regexes
 		# matched, but returned an empty group:
 		if mobj.group(1) is None:
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return
 		video_url = urllib.unquote(mobj.group(1))
 		video_id = os.path.basename(video_url)
 		# here's a fun little line of code for you:
 		video_extension = os.path.splitext(video_id)[1][1:]
 		video_id = os.path.splitext(video_id)[0]
 		# it's tempting to parse this further, but you would
 		# have to take into account all the variations like
 		#   Video Title - Site Name
@ -2212,21 +2200,35 @@ class GenericIE(InfoExtractor):
 			return
 		video_uploader = mobj.group(1).decode('utf-8')
-		try:
+		for mobj in matches:
-			# Process video information
+			# It's possible that one of the regexes
-			self._downloader.process_info({
+			# matched, but returned an empty group:
-				'id':		video_id.decode('utf-8'),
+			if mobj.group(1) is None:
-				'url':		video_url.decode('utf-8'),
+				self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
-				'uploader':	video_uploader,
+				continue
-				'upload_date':	u'NA',
+
-				'title':	video_title,
+			video_url = urllib.unquote(mobj.group(1))
-				'stitle':	simple_title,
+			video_id = os.path.basename(video_url)
-				'ext':		video_extension.decode('utf-8'),
+
-				'format':	u'NA',
+			# here's a fun little line of code for you:
-				'player_url':	None,
+			video_extension = os.path.splitext(video_id)[1][1:]
-			})
+			video_id = os.path.splitext(video_id)[0]
-		except UnavailableVideoError, err:
+
-			self._downloader.trouble(u'\nERROR: unable to download video')
+			try:
 				# Process video information
 				self._downloader.process_info({
 					'id':		video_id.decode('utf-8'),
 					'url':		video_url.decode('utf-8'),
 					'uploader':	video_uploader,
 					'upload_date':	u'NA',
 					'title':	video_title,
 					'stitle':	simple_title,
 					'ext':		video_extension.decode('utf-8'),
 					'format':	u'NA',
 					'player_url':	None,
 				})
 			except UnavailableVideoError, err:
 				self._downloader.trouble(u'\nERROR: unable to download video')
 class YoutubeSearchIE(InfoExtractor):