From 39f03c440943e85698ad3c21995cea3876771fe9 Mon Sep 17 00:00:00 2001 From: xela722 Date: Sat, 17 Aug 2019 15:55:33 -0400 Subject: [PATCH 1/4] Fix ESPN Extractor Reqorked the extractor the would previously give bad request errors. --- youtube_dl/extractor/espn.py | 62 +++++++++++++----------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 6cf05e6da..e703a28d9 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -51,20 +51,6 @@ class ESPNIE(OnceIE): 'params': { 'skip_download': True, }, - }, { - 'url': 'https://broadband.espn.go.com/video/clip?id=18910086', - 'info_dict': { - 'id': '18910086', - 'ext': 'mp4', - 'title': 'Kyrie spins around defender for two', - 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', - 'timestamp': 1489539155, - 'upload_date': '20170315', - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', 'only_matching': True, @@ -103,27 +89,29 @@ class ESPNIE(OnceIE): def _real_extract(self, url): video_id = self._match_id(url) - clip = self._download_json( - 'http://api-app.espn.com/v1/video/clips/%s' % video_id, - video_id)['videos'][0] + webpage = self._download_webpage(url, video_id) - title = clip['headline'] + data_id=self._search_regex(r'data-cerebro-id="(.*?)"',webpage,'data_id',group=1) + + request_url = 'https://watch.auth.api.espn.com/video/auth/getclip/%s?apikey=5p8m6dw513q716wt2os04mec3' % data_id + + clip = self._download_xml( + request_url, + video_id).findall('clip')[0] + + + title = clip.findall('headline')[0].text format_urls = set() formats = [] - def traverse_source(source, base_source_id=None): - for source_id, source in source.items(): - if source_id == 'alert': + + def traverse_source(source): + for element in source.iter(): + if element.tag == 'transcodes': continue - elif isinstance(source, compat_str): - extract_source(source, base_source_id) - elif isinstance(source, dict): - traverse_source( - source, - '%s-%s' % (base_source_id, source_id) - if base_source_id else source_id) - + + extract_source(element.text, element.tag) def extract_source(source_url, source_id=None): if source_url in format_urls: return @@ -134,9 +122,6 @@ class ESPNIE(OnceIE): elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, video_id, f4m_id=source_id, fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', @@ -157,21 +142,18 @@ class ESPNIE(OnceIE): f['preference'] = 1 formats.append(f) - links = clip.get('links', {}) - traverse_source(links.get('source', {})) - traverse_source(links.get('mobile', {})) + links = clip.findall('transcodes')[0] + traverse_source(links) self._sort_formats(formats) - description = clip.get('caption') or clip.get('description') - thumbnail = clip.get('thumbnail') - duration = int_or_none(clip.get('duration')) - timestamp = unified_timestamp(clip.get('originalPublishDate')) + description = clip.findall('caption')[0].text + duration = int_or_none(clip.findall('durationSeconds')[0].text) + timestamp = unified_timestamp(clip.findall('publishDate')[0].text) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'formats': formats, From 5e1e3aa387e0bb98193692f21881342becaefdb0 Mon Sep 17 00:00:00 2001 From: xela722 Date: Sat, 17 Aug 2019 15:58:06 -0400 Subject: [PATCH 2/4] Revert "Fix ESPN Extractor" This reverts commit 39f03c440943e85698ad3c21995cea3876771fe9. --- youtube_dl/extractor/espn.py | 62 +++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e703a28d9..6cf05e6da 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -51,6 +51,20 @@ class ESPNIE(OnceIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://broadband.espn.go.com/video/clip?id=18910086', + 'info_dict': { + 'id': '18910086', + 'ext': 'mp4', + 'title': 'Kyrie spins around defender for two', + 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', + 'timestamp': 1489539155, + 'upload_date': '20170315', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', 'only_matching': True, @@ -89,29 +103,27 @@ class ESPNIE(OnceIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + clip = self._download_json( + 'http://api-app.espn.com/v1/video/clips/%s' % video_id, + video_id)['videos'][0] - data_id=self._search_regex(r'data-cerebro-id="(.*?)"',webpage,'data_id',group=1) - - request_url = 'https://watch.auth.api.espn.com/video/auth/getclip/%s?apikey=5p8m6dw513q716wt2os04mec3' % data_id - - clip = self._download_xml( - request_url, - video_id).findall('clip')[0] - - - title = clip.findall('headline')[0].text + title = clip['headline'] format_urls = set() formats = [] - - def traverse_source(source): - for element in source.iter(): - if element.tag == 'transcodes': + def traverse_source(source, base_source_id=None): + for source_id, source in source.items(): + if source_id == 'alert': continue - - extract_source(element.text, element.tag) + elif isinstance(source, compat_str): + extract_source(source, base_source_id) + elif isinstance(source, dict): + traverse_source( + source, + '%s-%s' % (base_source_id, source_id) + if base_source_id else source_id) + def extract_source(source_url, source_id=None): if source_url in format_urls: return @@ -122,6 +134,9 @@ class ESPNIE(OnceIE): elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, video_id, f4m_id=source_id, fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', @@ -142,18 +157,21 @@ class ESPNIE(OnceIE): f['preference'] = 1 formats.append(f) - links = clip.findall('transcodes')[0] - traverse_source(links) + links = clip.get('links', {}) + traverse_source(links.get('source', {})) + traverse_source(links.get('mobile', {})) self._sort_formats(formats) - description = clip.findall('caption')[0].text - duration = int_or_none(clip.findall('durationSeconds')[0].text) - timestamp = unified_timestamp(clip.findall('publishDate')[0].text) + description = clip.get('caption') or clip.get('description') + thumbnail = clip.get('thumbnail') + duration = int_or_none(clip.get('duration')) + timestamp = unified_timestamp(clip.get('originalPublishDate')) return { 'id': video_id, 'title': title, 'description': description, + 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'formats': formats, From 2c75db360c0286d00116fb5ec81f2965a44f515b Mon Sep 17 00:00:00 2001 From: xela722 Date: Sat, 17 Aug 2019 16:01:22 -0400 Subject: [PATCH 3/4] Fix ESPN extractor Fixes the ESPN extractor to extract clips from the site. --- youtube_dl/extractor/espn.py | 62 +++++++++++++----------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 6cf05e6da..e703a28d9 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -51,20 +51,6 @@ class ESPNIE(OnceIE): 'params': { 'skip_download': True, }, - }, { - 'url': 'https://broadband.espn.go.com/video/clip?id=18910086', - 'info_dict': { - 'id': '18910086', - 'ext': 'mp4', - 'title': 'Kyrie spins around defender for two', - 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', - 'timestamp': 1489539155, - 'upload_date': '20170315', - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', 'only_matching': True, @@ -103,27 +89,29 @@ class ESPNIE(OnceIE): def _real_extract(self, url): video_id = self._match_id(url) - clip = self._download_json( - 'http://api-app.espn.com/v1/video/clips/%s' % video_id, - video_id)['videos'][0] + webpage = self._download_webpage(url, video_id) - title = clip['headline'] + data_id=self._search_regex(r'data-cerebro-id="(.*?)"',webpage,'data_id',group=1) + + request_url = 'https://watch.auth.api.espn.com/video/auth/getclip/%s?apikey=5p8m6dw513q716wt2os04mec3' % data_id + + clip = self._download_xml( + request_url, + video_id).findall('clip')[0] + + + title = clip.findall('headline')[0].text format_urls = set() formats = [] - def traverse_source(source, base_source_id=None): - for source_id, source in source.items(): - if source_id == 'alert': + + def traverse_source(source): + for element in source.iter(): + if element.tag == 'transcodes': continue - elif isinstance(source, compat_str): - extract_source(source, base_source_id) - elif isinstance(source, dict): - traverse_source( - source, - '%s-%s' % (base_source_id, source_id) - if base_source_id else source_id) - + + extract_source(element.text, element.tag) def extract_source(source_url, source_id=None): if source_url in format_urls: return @@ -134,9 +122,6 @@ class ESPNIE(OnceIE): elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, video_id, f4m_id=source_id, fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', @@ -157,21 +142,18 @@ class ESPNIE(OnceIE): f['preference'] = 1 formats.append(f) - links = clip.get('links', {}) - traverse_source(links.get('source', {})) - traverse_source(links.get('mobile', {})) + links = clip.findall('transcodes')[0] + traverse_source(links) self._sort_formats(formats) - description = clip.get('caption') or clip.get('description') - thumbnail = clip.get('thumbnail') - duration = int_or_none(clip.get('duration')) - timestamp = unified_timestamp(clip.get('originalPublishDate')) + description = clip.findall('caption')[0].text + duration = int_or_none(clip.findall('durationSeconds')[0].text) + timestamp = unified_timestamp(clip.findall('publishDate')[0].text) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'formats': formats, From be2911be031e250ed7887573821b58b32d5dd96e Mon Sep 17 00:00:00 2001 From: xela722 Date: Sat, 17 Aug 2019 16:07:58 -0400 Subject: [PATCH 4/4] Style to fit Flake8 --- youtube_dl/extractor/espn.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e703a28d9..95d54d5d7 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -4,7 +4,6 @@ import re from .common import InfoExtractor from .once import OnceIE -from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, @@ -91,27 +90,25 @@ class ESPNIE(OnceIE): webpage = self._download_webpage(url, video_id) - data_id=self._search_regex(r'data-cerebro-id="(.*?)"',webpage,'data_id',group=1) + data_id = self._search_regex(r'data-cerebro-id="(.*?)"', webpage, 'data_id', group=1) request_url = 'https://watch.auth.api.espn.com/video/auth/getclip/%s?apikey=5p8m6dw513q716wt2os04mec3' % data_id clip = self._download_xml( - request_url, + request_url, video_id).findall('clip')[0] - title = clip.findall('headline')[0].text format_urls = set() formats = [] - def traverse_source(source): for element in source.iter(): if element.tag == 'transcodes': continue - extract_source(element.text, element.tag) + def extract_source(source_url, source_id=None): if source_url in format_urls: return