From e23c8a0bb1f04f6f31072857c0af4ced9ac4c805 Mon Sep 17 00:00:00 2001 From: David Howell Date: Wed, 30 Dec 2015 14:25:02 -0700 Subject: [PATCH 1/2] CultureUnplugged CultureUnplugged --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cultureunplugged.py | 48 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/cultureunplugged.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb7151443..f622b2dfd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -127,6 +127,7 @@ from .crunchyroll import ( ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE +from .cultureunplugged import CultureUnpluggedIE from .dailymotion import ( DailymotionIE, DailymotionPlaylistIE, diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py new file mode 100644 index 000000000..384852d7e --- /dev/null +++ b/youtube_dl/extractor/cultureunplugged.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CultureUnpluggedIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?cultureunplugged\.com/documentary\/watch-online\/play\/(?P\d+)\/(?P[^/]+)' + _TEST = { + 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West', + 'md5': 'ac6c093b089f7d05e79934dcb3d228fc', + 'info_dict': { + 'id': '53662', + 'display_id': 'The-Next--Best-West', + 'description': 'The Next, Best West explores our changing relationship with the land that sustains us. It tells the story of how the conventional American concept of progress has steered our exploitation of the Western landscape, and takes you to three places – Colorado’s San Luis Valley, the high plains of eastern Montana and the Elwha River on Washington’s Olympic Peninsula – where a vibrant new understanding of progress presages a better future. The Next, Best West shows how our interpretation of progress has shaped the singular landscape of the American West, and how a new understanding of progress may be our best hope for a bright and healthy future. The West is a place of pure beauty that has provided us so much, yet we have cared for it too little. But that is beginning to change.', + 'ext': 'mp4', + 'title': 'The Next, Best West', + 'thumbnail': 'http://cdn.cultureunplugged.com/thumbnails_16_9/lg/53662.jpg', + } + } + + def _real_extract(self, url): + + video_id = self._match_id(url) + display_id = re.match(self._VALID_URL, url).group('display_id') + json_url = 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % (video_id) + json_output = self._download_json(json_url, video_id) + + title = json_output['title'] + description = json_output['synopsis'] + creator = json_output['producer'] + thumbnail = json_output['large_thumb'] + + formats = [{ + 'url': json_output['url'], + 'format': 'mp4' + }] + + return { + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'description': description, + 'creator': creator, + 'thumbnail': thumbnail, + 'formats': formats + } From adfce1f544e3a28bd1a8a210f5cdb4bdf2fa35bf Mon Sep 17 00:00:00 2001 From: David Howell Date: Sat, 2 Jan 2016 03:21:25 -0700 Subject: [PATCH 2/2] Regex + JSON get Added back https variable and removed backslashes. Changed to .get() safe search function. Ran test regarding null values and returned successfully. --- youtube_dl/extractor/cultureunplugged.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py index 384852d7e..98f748db0 100644 --- a/youtube_dl/extractor/cultureunplugged.py +++ b/youtube_dl/extractor/cultureunplugged.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class CultureUnpluggedIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?cultureunplugged\.com/documentary\/watch-online\/play\/(?P\d+)\/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P\d+)/(?P[^/]+)' _TEST = { 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West', 'md5': 'ac6c093b089f7d05e79934dcb3d228fc', @@ -27,10 +27,10 @@ class CultureUnpluggedIE(InfoExtractor): json_url = 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % (video_id) json_output = self._download_json(json_url, video_id) - title = json_output['title'] - description = json_output['synopsis'] - creator = json_output['producer'] - thumbnail = json_output['large_thumb'] + title = json_output.get('title') + description = json_output.get('synopsis') + creator = json_output.get('producer') + thumbnail = json_output.get('large_thumb') formats = [{ 'url': json_output['url'],