From 5feef03206ca7af34b51010ce352012aa1a696b8 Mon Sep 17 00:00:00 2001 From: desseim Date: Mon, 23 Nov 2015 22:14:47 +0100 Subject: [PATCH] Add MTV81 extractor Support for videos from http://www.mtv81.com/ . --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/comedycentral.py | 2 +- youtube_dl/extractor/gametrailers.py | 2 +- youtube_dl/extractor/mtv.py | 66 ++++++++++++++++++++++----- youtube_dl/extractor/southpark.py | 8 ++-- youtube_dl/extractor/spike.py | 2 +- 6 files changed, 62 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 947b83683..f8b0fb083 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -353,6 +353,7 @@ from .movshare import MovShareIE from .mtv import ( MTVIE, MTVServicesEmbeddedIE, + MTV81IE, MTVIggyIE, MTVDEIE, ) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 3e4bd10b6..ec4336a91 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -18,7 +18,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ (video-clips|episodes|cc-studios|video-collections|full-episodes) /(?P.*)''' - _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + _FEED_BASE_URL = 'http://comedycentral.com/feeds/mrss/' _TEST = { 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index a6ab795ae..06ade714c 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -16,4 +16,4 @@ class GametrailersIE(MTVServicesInfoExtractor): }, } - _FEED_URL = 'http://www.gametrailers.com/feeds/mrss' + _FEED_BASE_URL = 'http://www.gametrailers.com/feeds/mrss' diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index d887583e6..e7c12e9a9 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -4,10 +4,13 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_str, ) from ..utils import ( + compat_urlparse, + compat_urllib_parse, + compat_urllib_parse_urlparse, + compat_parse_qs, ExtractorError, find_xpath_attr, fix_xml_ampersands, @@ -40,8 +43,16 @@ class MTVServicesInfoExtractor(InfoExtractor): base = 'http://viacommtvstrmfs.fplive.net/' return base + m.group('finalid') + def _get_feed_base_url(self, uri): + return self._FEED_BASE_URL + def _get_feed_url(self, uri): - return self._FEED_URL + data = compat_urllib_parse.urlencode({'uri': uri}) + feed_url = self._get_feed_base_url(uri) + '?' + if self._LANG: + feed_url += 'lang=%s&' % self._LANG + feed_url += data + return feed_url def _get_thumbnail_url(self, uri, itemdoc): search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) @@ -170,12 +181,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) - data = compat_urllib_parse.urlencode({'uri': uri}) - info_url = feed_url + '?' - if self._LANG: - info_url += 'lang=%s&' % self._LANG - info_url += data - return self._get_videos_info_from_url(info_url, video_id) + return self._get_videos_info_from_url(feed_url, video_id) def _get_videos_info_from_url(self, url, video_id): idoc = self._download_xml( @@ -199,7 +205,7 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid is None or ':' not in mgid: mgid = self._search_regex( - [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], + [r'data-mgid="(.*?)"', r'(?:swfobject.embedSWF)|(?:getTheVideo)\(".*?(mgid:.*?)"'], webpage, 'mgid', default=None) if not mgid: @@ -235,7 +241,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): if mobj: return mobj.group('url') - def _get_feed_url(self, uri): + def _get_feed_base_url(self, uri): video_id = self._id_from_uri(uri) site_id = uri.replace(video_id, '') config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/' @@ -251,12 +257,48 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): return self._get_videos_info(mgid) +class MTV81IE(MTVServicesInfoExtractor): + IE_NAME = 'mtv81.com' + _VALID_URL = r'https?://www.mtv81.com/videos/' + + _TEST = { + 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', + 'md5': 'a253c454fa662955b95d47d998ab1119', + 'info_dict': { + 'id': '1105285', + 'ext': 'mp4', + 'title': 'Artist to Watch - KRUSH - Segment 1', + 'description': '<i>Artist to Watch - KRUSH - Segment 1</i><br/><br/>' + }, + } + + def _get_feed_url(self, uri): + video_id = self._id_from_uri(uri) + site_id = uri.replace(video_id, '') + config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/' + 'config.xml'.format(site_id)) + config_doc = self._download_xml(config_url, video_id) + feed_node = config_doc.find('.//feed') + feed_original_url = feed_node.text.strip() + # we need to keep 'version' query parameter and add 'uri' one + feed_original_url_parts = compat_urllib_parse_urlparse(feed_original_url) + feed_original_query_parts = compat_parse_qs(feed_original_url_parts.query) + try: + feed_query_parts = {'version': feed_original_query_parts['version']} + except KeyError: + feed_query_parts = {} + feed_query_parts['uri'] = uri + feed_query = compat_urllib_parse.urlencode(feed_query_parts, doseq=True) + feed_url = compat_urlparse.urlunparse((feed_original_url_parts.scheme, feed_original_url_parts.netloc, feed_original_url_parts.path, feed_original_url_parts.params, feed_query, feed_original_url_parts.fragment)) + return feed_url + + class MTVIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' - _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' + _FEED_BASE_URL = 'http://www.mtv.com/player/embed/AS3/rss/' _TESTS = [ { @@ -304,7 +346,7 @@ class MTVIggyIE(MTVServicesInfoExtractor): 'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet', } } - _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/' + _FEED_BASE_URL = 'http://all.mtvworldverticals.com/feed-xml/' class MTVDEIE(MTVServicesInfoExtractor): diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 87b650468..7f638820e 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -8,7 +8,7 @@ class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' + _FEED_BASE_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _TESTS = [{ 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', @@ -35,7 +35,7 @@ class SouthParkEsIE(SouthParkIE): class SouthParkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' + _FEED_BASE_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _TESTS = [{ 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', @@ -59,7 +59,7 @@ class SouthParkDeIE(SouthParkIE): class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' + _FEED_BASE_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' _TESTS = [{ 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', @@ -70,7 +70,7 @@ class SouthParkNlIE(SouthParkIE): class SouthParkDkIE(SouthParkIE): IE_NAME = 'southparkstudios.dk' _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' - _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' + _FEED_BASE_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' _TESTS = [{ 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 182f286df..d3445d805 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -19,7 +19,7 @@ class SpikeIE(MTVServicesInfoExtractor): }, } - _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _FEED_BASE_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' def _real_extract(self, url):