From 60d3a2e0f8b383c4f648d7bf346f156d6603b14a Mon Sep 17 00:00:00 2001 From: AGSPhoenix Date: Mon, 24 Feb 2014 21:29:37 -0500 Subject: [PATCH 0001/2348] Fix incorrect format codes Corrects the descriptions for the DASH video format codes 264 and 138 (1440p and 2160p, respectively). --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f65052a89..b40a45384 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -199,9 +199,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40}, '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40}, '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, - '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, + '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40}, '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, - '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, From 4d756a9cc01ed2e85b2ea540b70c78dee10fc118 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 25 Feb 2014 10:43:34 +0100 Subject: [PATCH 0002/2348] [testurl] Fix case when only one IE matches --- youtube_dl/extractor/testurl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/testurl.py b/youtube_dl/extractor/testurl.py index bdc6e2064..c7d559315 100644 --- a/youtube_dl/extractor/testurl.py +++ b/youtube_dl/extractor/testurl.py @@ -39,6 +39,8 @@ class TestURLIE(InfoExtractor): ('Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors)), expected=True) + else: + extractor = matching_extractors[0] num_str = mobj.group('num') num = int(num_str) if num_str else 0 From 27579b9e4c4adf5411faeacbbf45dae97a7df5e9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 25 Feb 2014 11:06:47 +0100 Subject: [PATCH 0003/2348] [vevo] Add suppot for v3 SMIL URLs (Fixes #2409) --- youtube_dl/extractor/vevo.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index e458ac961..dd87158db 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -24,9 +24,10 @@ class VevoIE(InfoExtractor): (?P[^&?#]+)''' _TESTS = [{ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - 'file': 'GB1101300280.mp4', "md5": "06bea460acb744eab74a9d7dcb4bfd61", 'info_dict': { + 'id': 'GB1101300280', + 'ext': 'mp4', "upload_date": "20130624", "uploader": "Hurts", "title": "Somebody to Die For", @@ -34,6 +35,18 @@ class VevoIE(InfoExtractor): "width": 1920, "height": 1080, } + }, { + 'note': 'v3 SMIL format', + 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', + 'md5': '893ec0e0d4426a1d96c01de8f2bdff58', + 'info_dict': { + 'id': 'USUV71302923', + 'ext': 'mp4', + 'upload_date': '20140219', + 'uploader': 'Cassadee Pope', + 'title': 'I Wish I Could Break Your Heart', + 'duration': 226.101, + } }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' @@ -105,9 +118,23 @@ class VevoIE(InfoExtractor): video_info = self._download_json(json_url, video_id)['video'] formats = self._formats_from_json(video_info) + + # Download SMIL + smil_blocks = sorted(( + f for f in video_info['videoVersions'] + if f['sourceType'] == 13), + key=lambda f: f['version']) + + smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( + self._SMIL_BASE_URL, video_id, video_id.lower()) + if smil_blocks: + smil_url_m = self._search_regex( + r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', + fatal=False) + if smil_url_m is not None: + smil_url = smil_url_m + try: - smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( - self._SMIL_BASE_URL, video_id, video_id.lower()) smil_xml = self._download_webpage(smil_url, video_id, 'Downloading SMIL info') formats.extend(self._formats_from_smil(smil_xml)) From 6cadf8c858be4cdaa1fd8da2b4e8bee53434b03b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 25 Feb 2014 11:15:34 +0100 Subject: [PATCH 0004/2348] [vevo] Add age_limit support --- youtube_dl/extractor/vevo.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index dd87158db..fa147a575 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -46,6 +46,21 @@ class VevoIE(InfoExtractor): 'uploader': 'Cassadee Pope', 'title': 'I Wish I Could Break Your Heart', 'duration': 226.101, + 'age_limit': 0, + } + }, { + 'note': 'Age-limited video', + 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', + 'info_dict': { + 'id': 'USRV81300282', + 'ext': 'mp4', + 'age_limit': 18, + 'title': 'Tunnel Vision (Explicit)', + 'uploader': 'Justin Timberlake', + 'upload_date': '20130704', + }, + 'params': { + 'skip_download': 'true', } }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' @@ -119,6 +134,14 @@ class VevoIE(InfoExtractor): formats = self._formats_from_json(video_info) + is_explicit = video_info.get('isExplicit') + if is_explicit is True: + age_limit = 18 + elif is_explicit is False: + age_limit = 0 + else: + age_limit = None + # Download SMIL smil_blocks = sorted(( f for f in video_info['videoVersions'] @@ -155,4 +178,5 @@ class VevoIE(InfoExtractor): 'upload_date': upload_date.strftime('%Y%m%d'), 'uploader': video_info['mainArtists'][0]['artistName'], 'duration': video_info['duration'], + 'age_limit': age_limit, } From 3c7fd0bdb24e9eef7d18d197b8939bd460652aa1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 25 Feb 2014 11:15:55 +0100 Subject: [PATCH 0005/2348] release 2014.02.25.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2b1eee34d..e5d5960e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.02.25' +__version__ = '2014.02.25.1' From ea5a0be811b89530f26d71286703a31563cef80b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 25 Feb 2014 14:11:01 +0100 Subject: [PATCH 0006/2348] Skip youtube toptracks test All the playlists return 500 errors. --- test/test_youtube_lists.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 38ac989ce..5eccc11ff 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -118,6 +118,8 @@ class TestYoutubeLists(unittest.TestCase): self.assertEqual(original_video['id'], 'rjFaenf1T-Y') def test_youtube_toptracks(self): + print('Skipping: The playlist page gives error 500') + return dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=MCUS') From 344400951ccddaaa477738957bdbdb86a704c55b Mon Sep 17 00:00:00 2001 From: Sergey M Date: Tue, 25 Feb 2014 20:26:11 +0700 Subject: [PATCH 0007/2348] [crunchyroll] Tidy and modernize --- youtube_dl/extractor/crunchyroll.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 920728e01..a20b88f02 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -1,7 +1,10 @@ # encoding: utf-8 from __future__ import unicode_literals -import re, base64, zlib +import re +import base64 +import zlib + from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor @@ -19,13 +22,15 @@ from ..aes import ( inc, ) + class CrunchyrollIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' - _TESTS = [{ + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' + _TEST = { 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', - 'file': '645513.flv', #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', 'info_dict': { + 'id': '645513', + 'ext': 'flv', 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', 'description': 'md5:2d17137920c64f2f49981a7797d275ef', 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', @@ -36,7 +41,7 @@ class CrunchyrollIE(InfoExtractor): # rtmp 'skip_download': True, }, - }] + } _FORMAT_IDS = { '360': ('60', '106'), @@ -68,7 +73,7 @@ class CrunchyrollIE(InfoExtractor): shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) # Extend 160 Bit hash to 256 Bit return shaHash + [0] * 12 - + key = obfuscate_key(id) class Counter: __value = iv @@ -80,7 +85,7 @@ class CrunchyrollIE(InfoExtractor): return zlib.decompress(decrypted_data) def _convert_subtitles_to_srt(self, subtitles): - i=1 + i = 1 output = '' for start, end, text in re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): start = start.replace('.', ',') @@ -90,7 +95,7 @@ class CrunchyrollIE(InfoExtractor): if not text: continue output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) - i+=1 + i += 1 return output def _real_extract(self,url): @@ -123,7 +128,7 @@ class CrunchyrollIE(InfoExtractor): playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') - + stream_id = self._search_regex(r'([^<]+)', playerdata, 'stream_id') video_thumbnail = self._search_regex(r'([^<]+)', playerdata, 'thumbnail', fatal=False) @@ -175,4 +180,4 @@ class CrunchyrollIE(InfoExtractor): 'upload_date': video_upload_date, 'subtitles': subtitles, 'formats': formats, - } + } \ No newline at end of file From 70cb73922b532c20925e725aa0243305c4caaac6 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Tue, 25 Feb 2014 20:27:25 +0700 Subject: [PATCH 0008/2348] [crunchyroll] Fix subtitle lang code extraction --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index a20b88f02..44d78fe5e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -166,7 +166,7 @@ class CrunchyrollIE(InfoExtractor): data = base64.b64decode(data) subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') - lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) + lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) From 1d430674c7ad8341d2025f43f93c4f582d1f92ea Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Tue, 25 Feb 2014 20:29:16 +0700 Subject: [PATCH 0009/2348] [crunchyroll] Handle error message --- youtube_dl/extractor/crunchyroll.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 44d78fe5e..5587ade12 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json import base64 import zlib @@ -113,6 +114,12 @@ class CrunchyrollIE(InfoExtractor): if note_m: raise ExtractorError(note_m) + mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P{.+?})\]\)', webpage) + if mobj: + msg = json.loads(mobj.group('msg')) + if msg.get('type') == 'error': + raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) + video_title = self._html_search_regex(r']*>(.+?)', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') From d0a72674c6eab914be41b637ac11627485111313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 25 Feb 2014 20:51:51 +0100 Subject: [PATCH 0010/2348] [crunchyroll] Use `enumerate` --- youtube_dl/extractor/crunchyroll.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 5587ade12..026a9177e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -86,9 +86,8 @@ class CrunchyrollIE(InfoExtractor): return zlib.decompress(decrypted_data) def _convert_subtitles_to_srt(self, subtitles): - i = 1 output = '' - for start, end, text in re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): + for i, (start, end, text) in enumerate(re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1): start = start.replace('.', ',') end = end.replace('.', ',') text = clean_html(text) @@ -96,7 +95,6 @@ class CrunchyrollIE(InfoExtractor): if not text: continue output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) - i += 1 return output def _real_extract(self,url): @@ -187,4 +185,4 @@ class CrunchyrollIE(InfoExtractor): 'upload_date': video_upload_date, 'subtitles': subtitles, 'formats': formats, - } \ No newline at end of file + } From 76df418cbac1ed02c8919924d92bd1bc7ddc70d9 Mon Sep 17 00:00:00 2001 From: ruuk Date: Tue, 25 Feb 2014 12:04:44 -0800 Subject: [PATCH 0011/2348] Add thumbnail for metacafe --- youtube_dl/extractor/metacafe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 99d3c83a5..364ba993b 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -166,6 +166,7 @@ class MetacafeIE(InfoExtractor): video_title = self._html_search_regex(r'(?im)(.*) - Video', webpage, u'title') description = self._og_search_description(webpage) + thumbnail = self._html_search_regex(r' Date: Tue, 25 Feb 2014 14:44:34 -0600 Subject: [PATCH 0012/2348] Add support for ocw.mit.edu video lectures --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/mit.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8eff3df41..ee081b1f3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -137,7 +137,7 @@ from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE -from .mit import TechTVMITIE, MITIE +from .mit import TechTVMITIE, MITIE, OCWMITIE from .mixcloud import MixcloudIE from .mpora import MporaIE from .mofosex import MofosexIE diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 76b717fe5..7c40cb8bd 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -3,6 +3,7 @@ import json from .common import InfoExtractor from ..utils import ( + compat_urlparse, clean_html, get_element_by_id, ) @@ -81,3 +82,56 @@ class MITIE(TechTVMITIE): embed_url = self._search_regex(r'', start_page, 'xml root') + xml_name = self._html_search_regex(r'', start_page, 'xml root') - xml_name = self._html_search_regex(r'', start_page, 'xml filename', None, False) + if xml_name is None: + # Fallback to the older format + xml_name = self._html_search_regex(r'', start_page, 'xml root', None, False) - xml_root = self._html_search_regex(r'', start_page, 'xml root') + + self.report_extraction(video_id) xml_name = self._html_search_regex(r'', start_page, 'xml filename') xml_decription_url = xml_root + 'xml/' + xml_name - xml_description = self._download_xml(xml_decription_url, video_id) video_title = xml_description.find('./metadata/title').text - video_details = { 'id': video_id, 'title': video_title, } - video_formats = [] - mp4_video = xml_description.find('./metadata/mp4video') - if mp4_video is not None: - mobj = re.match(r'(?Phttps?://.*?/).*', mp4_video.text) - video_root = mobj.group('root') - formats = xml_description.findall('./metadata/MBRVideos/MBRVideo') - for format in formats: - mobj = re.match(r'mp4\:(?P.*)', format.find('streamName').text) - url = video_root + mobj.group('path') - vbr = format.find('bitrate').text - video_formats.append({ - 'url': url, - 'vbr': int(vbr), - }) - video_details['formats'] = video_formats - else: - # Fallback to flv - akami_url = xml_description.find('./metadata/akamaiHost').text - slide_video_path = xml_description.find('./metadata/slideVideo').text - video_formats.append({ - 'url': 'rtmp://' + akami_url + '/' + slide_video_path, - 'format_note': 'slide deck video', - 'quality': -2, - 'preference': -2, - 'format_id': 'slides', - }) - speaker_video_path = xml_description.find('./metadata/speakerVideo').text - video_formats.append({ - 'url': 'rtmp://' + akami_url + '/' + speaker_video_path, - 'format_note': 'speaker video', - 'quality': -1, - 'preference': -1, - 'format_id': 'speaker', - }) + video_formats = self._parse_mp4(xml_description) + if video_formats is None: + video_formats = self._parse_flv(xml_description) return [{ 'id': video_id, From 4cf9654693f4b191b45dea3f0de7a6fb0e3dd08d Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Thu, 27 Feb 2014 17:44:05 +0700 Subject: [PATCH 0036/2348] Add one more format to unified_strdate --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0c482631a..02b8f7c45 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -772,6 +772,7 @@ def unified_strdate(date_str): '%B %d %Y', '%b %d %Y', '%Y-%m-%d', + '%d.%m.%Y', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S', From 0c7214c40432a634f86a9da4ee1ef0c83d62dc5e Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Thu, 27 Feb 2014 17:44:29 +0700 Subject: [PATCH 0037/2348] [prosiebensat1] Add support for ProSiebenSat.1 Digital sites (Closes #2346 #2469) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/prosiebensat1.py | 296 ++++++++++++++++++++++++++ 2 files changed, 297 insertions(+) create mode 100644 youtube_dl/extractor/prosiebensat1.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e16bbd969..8549a1a1e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -175,6 +175,7 @@ from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .radiofrance import RadioFranceIE from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py new file mode 100644 index 000000000..ec31ee7d1 --- /dev/null +++ b/youtube_dl/extractor/prosiebensat1.py @@ -0,0 +1,296 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from hashlib import sha1 +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + unified_strdate, + clean_html, + RegexNotFoundError, +) + + +class ProSiebenSat1IE(InfoExtractor): + IE_NAME = 'prosiebensat1' + IE_DESC = 'ProSiebenSat.1 Digital' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P.+)' + + _TESTS = [ + { + 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', + 'info_dict': { + 'id': '2104602', + 'ext': 'mp4', + 'title': 'Staffel 2, Episode 18 - Jahresrückblick', + 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', + 'upload_date': '20131231', + 'duration': 5845.04, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', + 'info_dict': { + 'id': '2570327', + 'ext': 'mp4', + 'title': 'Lady-Umstyling für Audrina', + 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', + 'upload_date': '20131014', + 'duration': 606.76, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Seems to be broken', + }, + { + 'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge', + 'info_dict': { + 'id': '2437108', + 'ext': 'mp4', + 'title': 'Folge 48: Gold Rogers Heimat', + 'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.', + 'upload_date': '20140226', + 'duration': 1401.48, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', + 'info_dict': { + 'id': '2904997', + 'ext': 'mp4', + 'title': 'Sexy laufen in Ugg Boots', + 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', + 'upload_date': '20140122', + 'duration': 245.32, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', + 'info_dict': { + 'id': '2906572', + 'ext': 'mp4', + 'title': 'Im Interview: Kai Wiesinger', + 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', + 'upload_date': '20140225', + 'duration': 522.56, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', + 'info_dict': { + 'id': '2992323', + 'ext': 'mp4', + 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', + 'description': 'md5:2669cde3febe9bce13904f701e774eb6', + 'upload_date': '20140225', + 'duration': 2410.44, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', + 'info_dict': { + 'id': '3004256', + 'ext': 'mp4', + 'title': 'Schalke: Tönnies möchte Raul zurück', + 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', + 'upload_date': '20140226', + 'duration': 228.96, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', + 'info_dict': { + 'id': '2572814', + 'ext': 'mp4', + 'title': 'Andreas Kümmert: Rocket Man', + 'description': 'md5:6ddb02b0781c6adf778afea606652e38', + 'upload_date': '20131017', + 'duration': 469.88, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', + 'info_dict': { + 'id': '2156342', + 'ext': 'mp4', + 'title': 'Kurztrips zum Valentinstag', + 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528', + 'upload_date': '20130206', + 'duration': 307.24, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + ] + + _CLIPID_REGEXES = [ + r'"clip_id"\s*:\s+"(\d+)"', + r'clipid: "(\d+)"', + ] + _TITLE_REGEXES = [ + r'

\s*(.+?)

', + r'
\s*

(.+?)

', + r'\s*

(.+?)

', + r'
\s*

(.+?)

', + ] + _DESCRIPTION_REGEXES = [ + r'

\s*(.+?)

', + r'
\s*

Beschreibung: (.+?)

', + r'
\s*
\s*
\s*(.+?)\s*