1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-10 05:47:24 +08:00

Fix ESPN extractor

Fixes the ESPN extractor to extract clips from the site.
This commit is contained in:
xela722 2019-08-17 16:01:22 -04:00
parent 5e1e3aa387
commit 2c75db360c

View File

@ -51,20 +51,6 @@ class ESPNIE(OnceIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
'info_dict': {
'id': '18910086',
'ext': 'mp4',
'title': 'Kyrie spins around defender for two',
'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
'timestamp': 1489539155,
'upload_date': '20170315',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
'only_matching': True, 'only_matching': True,
@ -103,27 +89,29 @@ class ESPNIE(OnceIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
clip = self._download_json( webpage = self._download_webpage(url, video_id)
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
video_id)['videos'][0]
title = clip['headline'] data_id=self._search_regex(r'data-cerebro-id="(.*?)"',webpage,'data_id',group=1)
request_url = 'https://watch.auth.api.espn.com/video/auth/getclip/%s?apikey=5p8m6dw513q716wt2os04mec3' % data_id
clip = self._download_xml(
request_url,
video_id).findall('clip')[0]
title = clip.findall('headline')[0].text
format_urls = set() format_urls = set()
formats = [] formats = []
def traverse_source(source, base_source_id=None):
for source_id, source in source.items():
if source_id == 'alert':
continue
elif isinstance(source, compat_str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
source,
'%s-%s' % (base_source_id, source_id)
if base_source_id else source_id)
def traverse_source(source):
for element in source.iter():
if element.tag == 'transcodes':
continue
extract_source(element.text, element.tag)
def extract_source(source_url, source_id=None): def extract_source(source_url, source_id=None):
if source_url in format_urls: if source_url in format_urls:
return return
@ -134,9 +122,6 @@ class ESPNIE(OnceIE):
elif ext == 'smil': elif ext == 'smil':
formats.extend(self._extract_smil_formats( formats.extend(self._extract_smil_formats(
source_url, video_id, fatal=False)) source_url, video_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, video_id, f4m_id=source_id, fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native', source_url, video_id, 'mp4', entry_protocol='m3u8_native',
@ -157,21 +142,18 @@ class ESPNIE(OnceIE):
f['preference'] = 1 f['preference'] = 1
formats.append(f) formats.append(f)
links = clip.get('links', {}) links = clip.findall('transcodes')[0]
traverse_source(links.get('source', {})) traverse_source(links)
traverse_source(links.get('mobile', {}))
self._sort_formats(formats) self._sort_formats(formats)
description = clip.get('caption') or clip.get('description') description = clip.findall('caption')[0].text
thumbnail = clip.get('thumbnail') duration = int_or_none(clip.findall('durationSeconds')[0].text)
duration = int_or_none(clip.get('duration')) timestamp = unified_timestamp(clip.findall('publishDate')[0].text)
timestamp = unified_timestamp(clip.get('originalPublishDate'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,