From 2107c2b0aa1e7cd9c28a7da56912fdde924f7dd0 Mon Sep 17 00:00:00 2001 From: Mark Lee Date: Sun, 27 Nov 2016 15:33:26 -0800 Subject: [PATCH 1/4] [spike] Extract MGID via custom mobile URL If the conventional MGID detection fails, fall back to searching for the mobile app's custom URL format. --- youtube_dl/extractor/mtv.py | 4 ++-- youtube_dl/extractor/spike.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 74a3a035e..ff0c6149f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -201,7 +201,7 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage): + def _extract_mgid(self, webpage, fatal=True): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -221,7 +221,7 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', fatal=fatal) return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 218785ee4..d1f7bb17a 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor @@ -32,3 +34,12 @@ class SpikeIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') + + def _extract_mgid(self, webpage): + mgid = super(SpikeIE, self)._extract_mgid(webpage, fatal=False) + if mgid is None: + url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') + video_type, episode_id = url_parts.split('/', 1) + mgid = 'mgid:arc:{}:spike.com:{}'.format(video_type, episode_id) + return mgid From 53ebfd701f8915b99a54323107d4c84d3f2ad47f Mon Sep 17 00:00:00 2001 From: Mark Lee Date: Sun, 27 Nov 2016 18:43:23 -0800 Subject: [PATCH 2/4] Use Python 2.6-compatible format syntax --- youtube_dl/extractor/spike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index d1f7bb17a..9d8ef6040 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -41,5 +41,5 @@ class SpikeIE(MTVServicesInfoExtractor): if mgid is None: url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') video_type, episode_id = url_parts.split('/', 1) - mgid = 'mgid:arc:{}:spike.com:{}'.format(video_type, episode_id) + mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id) return mgid From d4b9a720dc839e0cba87519a21d00fcb7321246c Mon Sep 17 00:00:00 2001 From: Mark Lee Date: Mon, 28 Nov 2016 08:24:42 -0800 Subject: [PATCH 3/4] Add test URL --- youtube_dl/extractor/spike.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 9d8ef6040..4a1113cf3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -18,6 +18,16 @@ class SpikeIE(MTVServicesInfoExtractor): 'timestamp': 1388120400, 'upload_date': '20131227', }, + }, { + 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209', + 'md5': 'b25c6f16418aefb9ad5a6cae2559321f', + 'expected_warnings': ['unable to extract .*mgid'], + 'info_dict': { + 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', + 'ext': 'mp4', + 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1', + 'description': 'md5:a739ca8f978a7802f67f8016d27ce114', + }, }, { 'url': 'http://www.spike.com/video-clips/lhtu8m/', 'only_matching': True, From 9d60d1851a221f9369db40292f71cb56c12116d6 Mon Sep 17 00:00:00 2001 From: Mark Lee Date: Mon, 28 Nov 2016 09:27:32 -0800 Subject: [PATCH 4/4] Use default instead of fatal --- youtube_dl/extractor/mtv.py | 5 +++-- youtube_dl/extractor/spike.py | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ff0c6149f..03351917e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,6 +13,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, + NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, @@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage, fatal=True): + def _extract_mgid(self, webpage, default=NO_DEFAULT): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', fatal=fatal) + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 4a1113cf3..abfee3ece 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -21,7 +21,6 @@ class SpikeIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209', 'md5': 'b25c6f16418aefb9ad5a6cae2559321f', - 'expected_warnings': ['unable to extract .*mgid'], 'info_dict': { 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', 'ext': 'mp4', @@ -47,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') def _extract_mgid(self, webpage): - mgid = super(SpikeIE, self)._extract_mgid(webpage, fatal=False) + mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) if mgid is None: url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') video_type, episode_id = url_parts.split('/', 1)