From be6751d4caa9451ce46af5d1517a5f2e6714314e Mon Sep 17 00:00:00 2001 From: felix Date: Sat, 15 Aug 2015 00:46:19 +0200 Subject: [PATCH] [msnbc] new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/msnbc.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/msnbc.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 83d21bd15..f2af451e2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -335,6 +335,7 @@ from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviezine import MoviezineIE from .movshare import MovShareIE +from .msnbc import MSNBCIE from .mtv import ( MTVIE, MTVServicesEmbeddedIE, diff --git a/youtube_dl/extractor/msnbc.py b/youtube_dl/extractor/msnbc.py new file mode 100644 index 000000000..5c9b6c8a6 --- /dev/null +++ b/youtube_dl/extractor/msnbc.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MSNBCIE(InfoExtractor): + _VALID_URL = r'http://www\.msnbc\.com/(?P[a-z0-9-]+)/watch/(?P[a-z0-9-]+)' + + _TESTS = [{ + 'url': 'http://www.msnbc.com/morning-joe/watch/american-trains-iraqis-in-fight-against-isis-465258051578', + 'info_dict': { + 'id': 'n_mj_vandyke_150616_647133', + 'title': 'American trains Iraqis in fight against ISIS', + 'description': 'md5:6432ea377a7f0bc6981d4c4fc48d4c4e', + 'timestamp': 1434451583, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + guid = self._html_search_meta('nv:videoId', webpage, 'guid') + + playlist_json = self._download_json('http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&byGuid=%s' % (guid), guid) + + entry = playlist_json['entries'][0] + + thumbnails = [{ + 'url': thumb['plfile$url'], + 'width': thumb['plfile$width'], + 'height': thumb['plfile$height'], + } for thumb in entry['media$thumbnails']] + + for content_item in entry['media$content']: + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'id': guid, + 'title': entry['title'], + 'description': entry['description'], + 'timestamp': entry['media$availableDate'] / 1000, + 'thumbnails': thumbnails, + 'url': content_item['plfile$url'], + }