diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eac50eda5..59280a2b4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -649,6 +649,7 @@ from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE from .telebruxelles import TeleBruxellesIE +from .tele5 import Tele5IE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py new file mode 100644 index 000000000..26a477481 --- /dev/null +++ b/youtube_dl/extractor/tele5.py @@ -0,0 +1,76 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor + + +class Tele5IE(InfoExtractor): + _VALID_URL = r'http://www.tele5.de/(?Pre-play/filme|a-z)/(?P.*?).html' + + _TESTS = [{ + 'url': 'http://www.tele5.de/re-play/filme/flying-swords-of-dragon-gate.html', + 'info_dict': { + 'ext': 'mp4', + 'id': 'flying-swords-of-dragon-gate', + 'title': 'Flying Swords of Dragon Gate', + }, + }] + + def _get_video_url(self, thumbnail, entity_id, video_js_url): + video_js = self._download_webpage(video_js_url, None) + flavor_id = re.compile(r'''"id\\":\\"(.*?)\\"''').findall(video_js)[-1] + return thumbnail.split('thumbnail/')[0] + 'playManifest/entryId/{}/flavorId/{}/format/url/protocol/http/a.mp4'.format(entity_id, flavor_id) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + series = mobj.group('type') == 'a-z' + webpage = self._download_webpage(url, video_id) + + if series: + title = self._html_search_regex(r'(.*?)', webpage, 'title').split('-')[0].strip() + partner_data =self._html_search_regex( + r'', + webpage, 'video js').split('/') + uiconf_id = partner_data[-3] + partner_id = partner_data[-1] + + entries = [] + for player in re.compile(r'''
(.*?)', webpage, 'title') + video_js_url =self._html_search_regex( + r'', + webpage, 'video js') + thumbnail = self._html_search_regex( + r'', + webpage, "thumbnail url", fatal=False) + entity_id = thumbnail.split('entry_id/')[1].split('/')[0] + video_url = self._get_video_url(thumbnail, entity_id, video_js_url) + + return { + 'ext': 'mp4', + 'id': video_id, + 'thumbnail': thumbnail, + 'title': title, + 'url': video_url, + } +