From 5008e175357e681584c9d7dcb4b909998645c72b Mon Sep 17 00:00:00 2001 From: j Date: Sun, 20 Dec 2015 15:35:05 +0100 Subject: [PATCH 1/2] [tele5] Add new extractor (closes #7805) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tele5.py | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/tele5.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eac50eda5..59280a2b4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -649,6 +649,7 @@ from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE from .telebruxelles import TeleBruxellesIE +from .tele5 import Tele5IE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py new file mode 100644 index 000000000..26a477481 --- /dev/null +++ b/youtube_dl/extractor/tele5.py @@ -0,0 +1,76 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor + + +class Tele5IE(InfoExtractor): + _VALID_URL = r'http://www.tele5.de/(?Pre-play/filme|a-z)/(?P.*?).html' + + _TESTS = [{ + 'url': 'http://www.tele5.de/re-play/filme/flying-swords-of-dragon-gate.html', + 'info_dict': { + 'ext': 'mp4', + 'id': 'flying-swords-of-dragon-gate', + 'title': 'Flying Swords of Dragon Gate', + }, + }] + + def _get_video_url(self, thumbnail, entity_id, video_js_url): + video_js = self._download_webpage(video_js_url, None) + flavor_id = re.compile(r'''"id\\":\\"(.*?)\\"''').findall(video_js)[-1] + return thumbnail.split('thumbnail/')[0] + 'playManifest/entryId/{}/flavorId/{}/format/url/protocol/http/a.mp4'.format(entity_id, flavor_id) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + series = mobj.group('type') == 'a-z' + webpage = self._download_webpage(url, video_id) + + if series: + title = self._html_search_regex(r'(.*?)', webpage, 'title').split('-')[0].strip() + partner_data =self._html_search_regex( + r'', + webpage, 'video js').split('/') + uiconf_id = partner_data[-3] + partner_id = partner_data[-1] + + entries = [] + for player in re.compile(r'''
(.*?)', webpage, 'title') + video_js_url =self._html_search_regex( + r'', + webpage, 'video js') + thumbnail = self._html_search_regex( + r'', + webpage, "thumbnail url", fatal=False) + entity_id = thumbnail.split('entry_id/')[1].split('/')[0] + video_url = self._get_video_url(thumbnail, entity_id, video_js_url) + + return { + 'ext': 'mp4', + 'id': video_id, + 'thumbnail': thumbnail, + 'title': title, + 'url': video_url, + } + From b2ae7e2b5b69170e5413d0d1603bea496ffd7fcd Mon Sep 17 00:00:00 2001 From: tetzank Date: Fri, 25 Dec 2015 19:22:01 +0100 Subject: [PATCH 2/2] [tele5] added support for different formats --- youtube_dl/extractor/tele5.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index 26a477481..606b61e9d 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -18,8 +18,20 @@ class Tele5IE(InfoExtractor): def _get_video_url(self, thumbnail, entity_id, video_js_url): video_js = self._download_webpage(video_js_url, None) - flavor_id = re.compile(r'''"id\\":\\"(.*?)\\"''').findall(video_js)[-1] - return thumbnail.split('thumbnail/')[0] + 'playManifest/entryId/{}/flavorId/{}/format/url/protocol/http/a.mp4'.format(entity_id, flavor_id) + match = re.compile(r'''\\"flavorParamsId\\":(\d),\\"width\\":(\d+),\\"height\\":(\d+),\\"bitrate\\":(\d+),\\"frameRate\\":(\d+),.+?,\\"id\\":\\"(\w+)\\",.+?,\\"size\\":(\d+),''').findall(video_js) + formats = [] + for m in match: + # assumes matches are already in the right order, from worst to best quality + formats.append({ + 'url': thumbnail.split('thumbnail/')[0] + 'playManifest/entryId/{}/flavorId/{}/format/url/protocol/http/a.mp4'.format(entity_id, m[5]), + 'format_id': m[0], # needs to be string otherwise cmp fails + 'width': int(m[1]), + 'height': int(m[2]), + 'tbr': int(m[3]), # needs to be an integer + 'frameRate': int(m[4]), + 'filesize': int(m[6]) + }) + return formats def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -47,11 +59,11 @@ class Tele5IE(InfoExtractor): 'uiconf_id': uiconf_id, 'entity_id': entity_id, }) - video_url = self._get_video_url(thumbnail, entity_id, video_js_url) + formats = self._get_video_url(thumbnail, entity_id, video_js_url) entries.append({ 'id': entity_id, 'title': data['name'], - 'url': video_url, + 'formats': formats, 'thumbnail': data['thumbnail'] }) return self.playlist_result(entries, video_id, title)