1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 13:05:34 +08:00

Regrouped most '_type: url' and '_type: playlist' in a single multidimentional playlist (entries_for_batch) for webpages batch processing

This commit is contained in:
Amram Oren Titane 2017-06-18 18:48:19 -04:00
parent 560050669b
commit 33f25ad94d

View File

@ -1827,11 +1827,13 @@ class GenericIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
entries_for_batch = [];
if url.startswith('//'): if url.startswith('//'):
return { entries_for_batch.append({
'_type': 'url', '_type': 'url',
'url': self.http_scheme() + url, 'url': self.http_scheme() + url,
} })
parsed_url = compat_urlparse.urlparse(url) parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme: if not parsed_url.scheme:
@ -1842,7 +1844,7 @@ class GenericIE(InfoExtractor):
if default_search in ('auto', 'auto_warning', 'fixup_error'): if default_search in ('auto', 'auto_warning', 'fixup_error'):
if '/' in url: if '/' in url:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url) entries_for_batch.append(self.url_result('http://' + url))
elif default_search != 'fixup_error': elif default_search != 'fixup_error':
if default_search == 'auto_warning': if default_search == 'auto_warning':
if re.match(r'^(?:url|URL)$', url): if re.match(r'^(?:url|URL)$', url):
@ -1852,7 +1854,7 @@ class GenericIE(InfoExtractor):
else: else:
self._downloader.report_warning( self._downloader.report_warning(
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url) entries_for_batch.append(self.url_result('ytsearch:' + url))
if default_search in ('error', 'fixup_error'): if default_search in ('error', 'fixup_error'):
raise ExtractorError( raise ExtractorError(
@ -1862,7 +1864,7 @@ class GenericIE(InfoExtractor):
else: else:
if ':' not in default_search: if ':' not in default_search:
default_search += ':' default_search += ':'
return self.url_result(default_search + url) entries_for_batch.append(self.url_result(default_search + url))
url, smuggled_data = unsmuggle_url(url) url, smuggled_data = unsmuggle_url(url)
force_videoid = None force_videoid = None
@ -1889,7 +1891,7 @@ class GenericIE(InfoExtractor):
if force_videoid: if force_videoid:
new_url = smuggle_url( new_url = smuggle_url(
new_url, {'force_videoid': force_videoid}) new_url, {'force_videoid': force_videoid})
return self.url_result(new_url) entries_for_batch.append(self.url_result(new_url))
full_response = None full_response = None
if head_response is False: if head_response is False:
@ -1970,7 +1972,7 @@ class GenericIE(InfoExtractor):
try: try:
doc = compat_etree_fromstring(webpage.encode('utf-8')) doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss': if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc) entries_for_batch.append(self._extract_rss(url, video_id, doc))
elif doc.tag == 'SmoothStreamingMedia': elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'] = self._parse_ism_formats(doc, url) info_dict['formats'] = self._parse_ism_formats(doc, url)
self._sort_formats(info_dict['formats']) self._sort_formats(info_dict['formats'])
@ -1980,7 +1982,7 @@ class GenericIE(InfoExtractor):
self._sort_formats(smil['formats']) self._sort_formats(smil['formats'])
return smil return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) entries_for_batch.append(self.playlist_result(self._parse_xspf(doc, video_id), video_id))
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, video_id, doc, video_id,
@ -1998,7 +2000,7 @@ class GenericIE(InfoExtractor):
# Is it a Camtasia project? # Is it a Camtasia project?
camtasia_res = self._extract_camtasia(url, video_id, webpage) camtasia_res = self._extract_camtasia(url, video_id, webpage)
if camtasia_res is not None: if camtasia_res is not None:
return camtasia_res entries_for_batch.append(camtasia_res)
# Sometimes embedded video player is hidden behind percent encoding # Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/rg3/youtube-dl/issues/2448) # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
@ -2042,44 +2044,44 @@ class GenericIE(InfoExtractor):
'ie_key': 'BrightcoveLegacy' 'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls] } for bc_url in bc_urls]
return { entries_for_batch.append({
'_type': 'playlist', '_type': 'playlist',
'title': video_title, 'title': video_title,
'id': video_id, 'id': video_id,
'entries': entries, 'entries': entries,
} })
# Look for Brightcove New Studio embeds # Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(self, webpage) bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
if bc_urls: if bc_urls:
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') entries_for_batch.append(self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew'))
# Look for ThePlatform embeds # Look for ThePlatform embeds
tp_urls = ThePlatformIE._extract_urls(webpage) tp_urls = ThePlatformIE._extract_urls(webpage)
if tp_urls: if tp_urls:
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') entries_for_batch.append(self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform'))
# Look for Vessel embeds # Look for Vessel embeds
vessel_urls = VesselIE._extract_urls(webpage) vessel_urls = VesselIE._extract_urls(webpage)
if vessel_urls: if vessel_urls:
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()))
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
webpage) webpage)
if matches: if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl') entries_for_batch.append(self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl'))
vimeo_urls = VimeoIE._extract_urls(url, webpage) vimeo_urls = VimeoIE._extract_urls(url, webpage)
if vimeo_urls: if vimeo_urls:
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()))
vid_me_embed_url = self._search_regex( vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
webpage, 'vid.me embed', default=None) webpage, 'vid.me embed', default=None)
if vid_me_embed_url is not None: if vid_me_embed_url is not None:
return self.url_result(vid_me_embed_url, 'Vidme') entries_for_batch.append(self.url_result(vid_me_embed_url, 'Vidme'))
# Look for embedded YouTube player # Look for embedded YouTube player
matches = re.findall(r'''(?x) matches = re.findall(r'''(?x)
@ -2096,6 +2098,7 @@ class GenericIE(InfoExtractor):
(?:embed|v|p)/.+?) (?:embed|v|p)/.+?)
\1''', webpage) \1''', webpage)
if matches: if matches:
# can't remove return else errors
return self.playlist_from_matches( return self.playlist_from_matches(
matches, video_id, video_title, lambda m: unescapeHTML(m[1])) matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
@ -2103,18 +2106,18 @@ class GenericIE(InfoExtractor):
matches = re.findall( matches = re.findall(
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
if matches: if matches:
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) entries_for_batch.append(self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)))
# Look for Wordpress "YouTube Video Importer" plugin # Look for Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+ matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches: if matches:
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) entries_for_batch.append(self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]))
matches = DailymotionIE._extract_urls(webpage) matches = DailymotionIE._extract_urls(webpage)
if matches: if matches:
return self.playlist_from_matches(matches, video_id, video_title) entries_for_batch.append(self.playlist_from_matches(matches, video_id, video_title))
# Look for embedded Dailymotion playlist player (#3822) # Look for embedded Dailymotion playlist player (#3822)
m = re.search( m = re.search(
@ -2123,56 +2126,56 @@ class GenericIE(InfoExtractor):
playlists = re.findall( playlists = re.findall(
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url'))) r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
if playlists: if playlists:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p))
# Look for embedded Wistia player # Look for embedded Wistia player
wistia_url = WistiaIE._extract_url(webpage) wistia_url = WistiaIE._extract_url(webpage)
if wistia_url: if wistia_url:
return { entries_for_batch.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': self._proto_relative_url(wistia_url), 'url': self._proto_relative_url(wistia_url),
'ie_key': WistiaIE.ie_key(), 'ie_key': WistiaIE.ie_key(),
'uploader': video_uploader, 'uploader': video_uploader,
} })
# Look for SVT player # Look for SVT player
svt_url = SVTIE._extract_url(webpage) svt_url = SVTIE._extract_url(webpage)
if svt_url: if svt_url:
return self.url_result(svt_url, 'SVT') entries_for_batch.append(self.url_result(svt_url, 'SVT'))
# Look for Bandcamp pages with custom domain # Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None: if mobj is not None:
burl = unescapeHTML(mobj.group(1)) burl = unescapeHTML(mobj.group(1))
# Don't set the extractor because it can be a track url or an album # Don't set the extractor because it can be a track url or an album
return self.url_result(burl) entries_for_batch.append(self.url_result(burl))
# Look for embedded Vevo player # Look for embedded Vevo player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for embedded Viddler player # Look for embedded Viddler player
mobj = re.search( mobj = re.search(
r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1', r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for NYTimes player # Look for NYTimes player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>', r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for Libsyn player # Look for Libsyn player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage) r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for Ooyala videos # Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
@ -2194,19 +2197,19 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
embeds = self._parse_json(mobj.group(1), video_id, fatal=False) embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds: if embeds:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
embeds, video_id, video_title, embeds, video_id, video_title,
getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala') getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala'))
# Look for Aparat videos # Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group(1), 'Aparat') entries_for_batch.append(self.url_result(mobj.group(1), 'Aparat'))
# Look for MPORA videos # Look for MPORA videos
mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage) mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora') entries_for_batch.append(self.url_result(mobj.group(1), 'Mpora'))
# Look for embedded NovaMov-based player # Look for embedded NovaMov-based player
mobj = re.search( mobj = re.search(
@ -2219,155 +2222,155 @@ class GenericIE(InfoExtractor):
divxstage\.(?:eu|net|ch|co|at|ag)) divxstage\.(?:eu|net|ch|co|at|ag))
/embed\.php.+?)\1''', webpage) /embed\.php.+?)\1''', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for embedded Facebook player # Look for embedded Facebook player
facebook_url = FacebookIE._extract_url(webpage) facebook_url = FacebookIE._extract_url(webpage)
if facebook_url is not None: if facebook_url is not None:
return self.url_result(facebook_url, 'Facebook') entries_for_batch.append(self.url_result(facebook_url, 'Facebook'))
# Look for embedded VK player # Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'VK') entries_for_batch.append(self.url_result(mobj.group('url'), 'VK'))
# Look for embedded Odnoklassniki player # Look for embedded Odnoklassniki player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Odnoklassniki') entries_for_batch.append(self.url_result(mobj.group('url'), 'Odnoklassniki'))
# Look for embedded ivi player # Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Ivi') entries_for_batch.append(self.url_result(mobj.group('url'), 'Ivi'))
# Look for embedded Huffington Post player # Look for embedded Huffington Post player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'HuffPost') entries_for_batch.append(self.url_result(mobj.group('url'), 'HuffPost'))
# Look for embed.ly # Look for embed.ly
mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage) mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage) mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(compat_urllib_parse_unquote(mobj.group('url'))) entries_for_batch.append(self.url_result(compat_urllib_parse_unquote(mobj.group('url'))))
# Look for funnyordie embed # Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
if matches: if matches:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie') matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie'))
# Look for BBC iPlayer embed # Look for BBC iPlayer embed
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage) matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
if matches: if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk') entries_for_batch.append(self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk'))
# Look for embedded RUTV player # Look for embedded RUTV player
rutv_url = RUTVIE._extract_url(webpage) rutv_url = RUTVIE._extract_url(webpage)
if rutv_url: if rutv_url:
return self.url_result(rutv_url, 'RUTV') entries_for_batch.append(self.url_result(rutv_url, 'RUTV'))
# Look for embedded TVC player # Look for embedded TVC player
tvc_url = TVCIE._extract_url(webpage) tvc_url = TVCIE._extract_url(webpage)
if tvc_url: if tvc_url:
return self.url_result(tvc_url, 'TVC') entries_for_batch.append(self.url_result(tvc_url, 'TVC'))
# Look for embedded SportBox player # Look for embedded SportBox player
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
if sportbox_urls: if sportbox_urls:
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') entries_for_batch.append(self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed'))
# Look for embedded XHamster player # Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls: if xhamster_urls:
return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed') entries_for_batch.append(self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed'))
# Look for embedded TNAFlixNetwork player # Look for embedded TNAFlixNetwork player
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage) tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
if tnaflix_urls: if tnaflix_urls:
return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key()))
# Look for embedded PornHub player # Look for embedded PornHub player
pornhub_urls = PornHubIE._extract_urls(webpage) pornhub_urls = PornHubIE._extract_urls(webpage)
if pornhub_urls: if pornhub_urls:
return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key()))
# Look for embedded DrTuber player # Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage) drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls: if drtuber_urls:
return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key()))
# Look for embedded RedTube player # Look for embedded RedTube player
redtube_urls = RedTubeIE._extract_urls(webpage) redtube_urls = RedTubeIE._extract_urls(webpage)
if redtube_urls: if redtube_urls:
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()))
# Look for embedded Tvigle player # Look for embedded Tvigle player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Tvigle') entries_for_batch.append(self.url_result(mobj.group('url'), 'Tvigle'))
# Look for embedded TED player # Look for embedded TED player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'TED') entries_for_batch.append(self.url_result(mobj.group('url'), 'TED'))
# Look for embedded Ustream videos # Look for embedded Ustream videos
ustream_url = UstreamIE._extract_url(webpage) ustream_url = UstreamIE._extract_url(webpage)
if ustream_url: if ustream_url:
return self.url_result(ustream_url, UstreamIE.ie_key()) entries_for_batch.append(self.url_result(ustream_url, UstreamIE.ie_key()))
# Look for embedded arte.tv player # Look for embedded arte.tv player
mobj = re.search( mobj = re.search(
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"', r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed') entries_for_batch.append(self.url_result(mobj.group('url'), 'ArteTVEmbed'))
# Look for embedded francetv player # Look for embedded francetv player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for embedded smotri.com player # Look for embedded smotri.com player
smotri_url = SmotriIE._extract_url(webpage) smotri_url = SmotriIE._extract_url(webpage)
if smotri_url: if smotri_url:
return self.url_result(smotri_url, 'Smotri') entries_for_batch.append(self.url_result(smotri_url, 'Smotri'))
# Look for embedded Myvi.ru player # Look for embedded Myvi.ru player
myvi_url = MyviIE._extract_url(webpage) myvi_url = MyviIE._extract_url(webpage)
if myvi_url: if myvi_url:
return self.url_result(myvi_url) entries_for_batch.append(self.url_result(myvi_url))
# Look for embedded soundcloud player # Look for embedded soundcloud player
soundcloud_urls = SoundcloudIE._extract_urls(webpage) soundcloud_urls = SoundcloudIE._extract_urls(webpage)
if soundcloud_urls: if soundcloud_urls:
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()))
# Look for tunein player # Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage) tunein_urls = TuneInBaseIE._extract_urls(webpage)
if tunein_urls: if tunein_urls:
return self.playlist_from_matches(tunein_urls, video_id, video_title) entries_for_batch.append(self.playlist_from_matches(tunein_urls, video_id, video_title))
# Look for embedded mtvservices player # Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
if mtvservices_url: if mtvservices_url:
return self.url_result(mtvservices_url, ie='MTVServicesEmbedded') entries_for_batch.append(self.url_result(mtvservices_url, ie='MTVServicesEmbedded'))
# Look for embedded yahoo player # Look for embedded yahoo player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1', r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Yahoo') entries_for_batch.append(self.url_result(mobj.group('url'), 'Yahoo'))
# Look for embedded sbs.com.au player # Look for embedded sbs.com.au player
mobj = re.search( mobj = re.search(
@ -2379,14 +2382,14 @@ class GenericIE(InfoExtractor):
(["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''', (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS') entries_for_batch.append(self.url_result(mobj.group('url'), 'SBS'))
# Look for embedded Cinchcast player # Look for embedded Cinchcast player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Cinchcast') entries_for_batch.append(self.url_result(mobj.group('url'), 'Cinchcast'))
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
@ -2396,174 +2399,174 @@ class GenericIE(InfoExtractor):
r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)', r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'MLB') entries_for_batch.append(self.url_result(mobj.group('url'), 'MLB'))
mobj = re.search( mobj = re.search(
r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL, r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') entries_for_batch.append(self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast'))
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"', r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream') entries_for_batch.append(self.url_result(mobj.group('url'), 'Livestream'))
# Look for Zapiks embed # Look for Zapiks embed
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Zapiks') entries_for_batch.append(self.url_result(mobj.group('url'), 'Zapiks'))
# Look for Kaltura embeds # Look for Kaltura embeds
kaltura_url = KalturaIE._extract_url(webpage) kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url: if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) entries_for_batch.append(self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()))
# Look for Eagle.Platform embeds # Look for Eagle.Platform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage) eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url: if eagleplatform_url:
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) entries_for_batch.append(self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()))
# Look for ClipYou (uses Eagle.Platform) embeds # Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage) r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None: if mobj is not None:
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform') entries_for_batch.append(self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform'))
# Look for Pladform embeds # Look for Pladform embeds
pladform_url = PladformIE._extract_url(webpage) pladform_url = PladformIE._extract_url(webpage)
if pladform_url: if pladform_url:
return self.url_result(pladform_url) entries_for_batch.append(self.url_result(pladform_url))
# Look for Videomore embeds # Look for Videomore embeds
videomore_url = VideomoreIE._extract_url(webpage) videomore_url = VideomoreIE._extract_url(webpage)
if videomore_url: if videomore_url:
return self.url_result(videomore_url) entries_for_batch.append(self.url_result(videomore_url))
# Look for Webcaster embeds # Look for Webcaster embeds
webcaster_url = WebcasterFeedIE._extract_url(self, webpage) webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
if webcaster_url: if webcaster_url:
return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key()) entries_for_batch.append(self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key()))
# Look for Playwire embeds # Look for Playwire embeds
mobj = re.search( mobj = re.search(
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for 5min embeds # Look for 5min embeds
mobj = re.search( mobj = re.search(
r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage) r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
if mobj is not None: if mobj is not None:
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') entries_for_batch.append(self.url_result('5min:%s' % mobj.group('id'), 'FiveMin'))
# Look for Crooks and Liars embeds # Look for Crooks and Liars embeds
mobj = re.search( mobj = re.search(
r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage) r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url')) entries_for_batch.append(self.url_result(mobj.group('url')))
# Look for NBC Sports VPlayer embeds # Look for NBC Sports VPlayer embeds
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
if nbc_sports_url: if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') entries_for_batch.append(self.url_result(nbc_sports_url, 'NBCSportsVPlayer'))
# Look for NBC News embeds # Look for NBC News embeds
nbc_news_embed_url = re.search( nbc_news_embed_url = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage) r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
if nbc_news_embed_url: if nbc_news_embed_url:
return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews') entries_for_batch.append(self.url_result(nbc_news_embed_url.group('url'), 'NBCNews'))
# Look for Google Drive embeds # Look for Google Drive embeds
google_drive_url = GoogleDriveIE._extract_url(webpage) google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url: if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive') entries_for_batch.append(self.url_result(google_drive_url, 'GoogleDrive'))
# Look for UDN embeds # Look for UDN embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None: if mobj is not None:
return self.url_result( entries_for_batch.append(self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed'))
# Look for Senate ISVP iframe # Look for Senate ISVP iframe
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
if senate_isvp_url: if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP') entries_for_batch.append(self.url_result(senate_isvp_url, 'SenateISVP'))
# Look for Dailymotion Cloud videos # Look for Dailymotion Cloud videos
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
if dmcloud_url: if dmcloud_url:
return self.url_result(dmcloud_url, 'DailymotionCloud') entries_for_batch.append(self.url_result(dmcloud_url, 'DailymotionCloud'))
# Look for OnionStudios embeds # Look for OnionStudios embeds
onionstudios_url = OnionStudiosIE._extract_url(webpage) onionstudios_url = OnionStudiosIE._extract_url(webpage)
if onionstudios_url: if onionstudios_url:
return self.url_result(onionstudios_url) entries_for_batch.append(self.url_result(onionstudios_url))
# Look for ViewLift embeds # Look for ViewLift embeds
viewlift_url = ViewLiftEmbedIE._extract_url(webpage) viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if viewlift_url: if viewlift_url:
return self.url_result(viewlift_url) entries_for_batch.append(self.url_result(viewlift_url))
# Look for JWPlatform embeds # Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage) jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url: if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform') entries_for_batch.append(self.url_result(jwplatform_url, 'JWPlatform'))
# Look for Digiteka embeds # Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage) digiteka_url = DigitekaIE._extract_url(webpage)
if digiteka_url: if digiteka_url:
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key()) entries_for_batch.append(self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key()))
# Look for Arkena embeds # Look for Arkena embeds
arkena_url = ArkenaIE._extract_url(webpage) arkena_url = ArkenaIE._extract_url(webpage)
if arkena_url: if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key()) entries_for_batch.append(self.url_result(arkena_url, ArkenaIE.ie_key()))
# Look for Piksel embeds # Look for Piksel embeds
piksel_url = PikselIE._extract_url(webpage) piksel_url = PikselIE._extract_url(webpage)
if piksel_url: if piksel_url:
return self.url_result(piksel_url, PikselIE.ie_key()) entries_for_batch.append(self.url_result(piksel_url, PikselIE.ie_key()))
# Look for Limelight embeds # Look for Limelight embeds
limelight_urls = LimelightBaseIE._extract_urls(webpage, url) limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
if limelight_urls: if limelight_urls:
return self.playlist_result( entries_for_batch.append(self.playlist_result(
limelight_urls, video_id, video_title, video_description) limelight_urls, video_id, video_title, video_description))
# Look for Anvato embeds # Look for Anvato embeds
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id) anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
if anvato_urls: if anvato_urls:
return self.playlist_result( entries_for_batch.append(self.playlist_result(
anvato_urls, video_id, video_title, video_description) anvato_urls, video_id, video_title, video_description))
# Look for AdobeTVVideo embeds # Look for AdobeTVVideo embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( entries_for_batch.append(self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), self._proto_relative_url(unescapeHTML(mobj.group(1))),
'AdobeTVVideo') 'AdobeTVVideo'))
# Look for Vine embeds # Look for Vine embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))', r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( entries_for_batch.append(self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine') self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine'))
# Look for VODPlatform embeds # Look for VODPlatform embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1', r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( entries_for_batch.append(self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform'))
# Look for Mangomolo embeds # Look for Mangomolo embeds
mobj = re.search( mobj = re.search(
@ -2597,18 +2600,18 @@ class GenericIE(InfoExtractor):
# Look for Instagram embeds # Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage) instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None: if instagram_embed_url is not None:
return self.url_result( entries_for_batch.append(self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()))
# Look for LiveLeak embeds # Look for LiveLeak embeds
liveleak_url = LiveLeakIE._extract_url(webpage) liveleak_url = LiveLeakIE._extract_url(webpage)
if liveleak_url: if liveleak_url:
return self.url_result(liveleak_url, 'LiveLeak') entries_for_batch.append(self.url_result(liveleak_url, 'LiveLeak'))
# Look for 3Q SDN embeds # Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage) threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url: if threeqsdn_url:
return { entries_for_batch.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': ThreeQSDNIE.ie_key(), 'ie_key': ThreeQSDNIE.ie_key(),
'url': self._proto_relative_url(threeqsdn_url), 'url': self._proto_relative_url(threeqsdn_url),
@ -2616,58 +2619,58 @@ class GenericIE(InfoExtractor):
'description': video_description, 'description': video_description,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'uploader': video_uploader, 'uploader': video_uploader,
} })
# Look for VBOX7 embeds # Look for VBOX7 embeds
vbox7_url = Vbox7IE._extract_url(webpage) vbox7_url = Vbox7IE._extract_url(webpage)
if vbox7_url: if vbox7_url:
return self.url_result(vbox7_url, Vbox7IE.ie_key()) entries_for_batch.append(self.url_result(vbox7_url, Vbox7IE.ie_key()))
# Look for DBTV embeds # Look for DBTV embeds
dbtv_urls = DBTVIE._extract_urls(webpage) dbtv_urls = DBTVIE._extract_urls(webpage)
if dbtv_urls: if dbtv_urls:
return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key()))
# Look for Videa embeds # Look for Videa embeds
videa_urls = VideaIE._extract_urls(webpage) videa_urls = VideaIE._extract_urls(webpage)
if videa_urls: if videa_urls:
return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key()) entries_for_batch.append(self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key()))
# Look for 20 minuten embeds # Look for 20 minuten embeds
twentymin_urls = TwentyMinutenIE._extract_urls(webpage) twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
if twentymin_urls: if twentymin_urls:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()))
# Look for Openload embeds # Look for Openload embeds
openload_urls = OpenloadIE._extract_urls(webpage) openload_urls = OpenloadIE._extract_urls(webpage)
if openload_urls: if openload_urls:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()))
# Look for VideoPress embeds # Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage) videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls: if videopress_urls:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()))
# Look for Rutube embeds # Look for Rutube embeds
rutube_urls = RutubeIE._extract_urls(webpage) rutube_urls = RutubeIE._extract_urls(webpage)
if rutube_urls: if rutube_urls:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
rutube_urls, ie=RutubeIE.ie_key()) rutube_urls, ie=RutubeIE.ie_key()))
# Look for WashingtonPost embeds # Look for WashingtonPost embeds
wapo_urls = WashingtonPostIE._extract_urls(webpage) wapo_urls = WashingtonPostIE._extract_urls(webpage)
if wapo_urls: if wapo_urls:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()))
# Look for Mediaset embeds # Look for Mediaset embeds
mediaset_urls = MediasetIE._extract_urls(webpage) mediaset_urls = MediasetIE._extract_urls(webpage)
if mediaset_urls: if mediaset_urls:
return self.playlist_from_matches( entries_for_batch.append(self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()))
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
@ -2691,7 +2694,7 @@ class GenericIE(InfoExtractor):
'title': video_title, 'title': video_title,
}) })
self._sort_formats(entry['formats']) self._sort_formats(entry['formats'])
return self.playlist_result(entries) entries_for_batch.append(self.playlist_result(entries))
jwplayer_data = self._find_jwplayer_data( jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json) webpage, video_id, transform_source=js_to_json)
@ -2777,10 +2780,10 @@ class GenericIE(InfoExtractor):
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url: if new_url != url:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)
return { entries_for_batch.append({
'_type': 'url', '_type': 'url',
'url': new_url, 'url': new_url,
} })
else: else:
found = None found = None
@ -2790,7 +2793,7 @@ class GenericIE(InfoExtractor):
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser) # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
embed_url = self._html_search_meta('twitter:player', webpage, default=None) embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url: if embed_url:
return self.url_result(embed_url) entries_for_batch.append(self.url_result(embed_url))
if not found: if not found:
raise UnsupportedError(url) raise UnsupportedError(url)
@ -2830,7 +2833,7 @@ class GenericIE(InfoExtractor):
if ext == 'smil': if ext == 'smil':
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id) entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
elif ext == 'xspf': elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) entries_for_batch.append(self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id))
elif ext == 'm3u8': elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4') entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
elif ext == 'mpd': elif ext == 'mpd':
@ -2861,13 +2864,18 @@ class GenericIE(InfoExtractor):
entries.append(entry_info_dict) entries.append(entry_info_dict)
if len(entries) == 1: if len(entries) == 1:
return entries[0] entries_for_batch.append(entries[0])
else: else:
for num, e in enumerate(entries, start=1): for num, e in enumerate(entries, start=1):
# 'url' results don't have a title # 'url' results don't have a title
if e.get('title') is not None: if e.get('title') is not None:
e['title'] = '%s (%d)' % (e['title'], num) e['title'] = '%s (%d)' % (e['title'], num)
return { entries_for_batch.append({
'_type': 'playlist', '_type': 'playlist',
'entries': entries, 'entries': entries,
})
return {
'_type': 'playlist',
'entries': entries_for_batch,
} }