From c73c24420384912de1eca4d1a9a63cd7f27daa9f Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sat, 16 Sep 2017 13:50:49 +0200 Subject: [PATCH] Added extractor for pietsmiet.de --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pietsmiet.py | 79 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/pietsmiet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ecb33bc9e..b1ed1a088 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -793,6 +793,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pietsmiet import PietsmietIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py new file mode 100644 index 000000000..f39bef0d5 --- /dev/null +++ b/youtube_dl/extractor/pietsmiet.py @@ -0,0 +1,79 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .once import OnceIE +from ..compat import ( + compat_urllib_parse_unquote, +) +from ..utils import ( + unescapeHTML, + js_to_json, + int_or_none, +) + + +class PietsmietIE(OnceIE): + _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/categories/[\w-]+/(?P\d+)-.*/?' + _TEST = { + 'url': 'http://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', + 'info_dict': { + 'id': '29844', + 'ext': 'mp4', + 'title': 'Was würdet ihr die Maus fragen? 🎮 Frag PietSmiet #912', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + data_video_config = self._search_regex( + r'var config=(.*?);var', webpage, 'video config') + data_video = self._parse_json(js_to_json(unescapeHTML(data_video_config)), page_id) + + formats = [] + + m3u8_manifest_url = data_video['sources'][0]['file'] + m3u8_formats = self._extract_m3u8_formats( + m3u8_manifest_url, page_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + + # Give reproducible names for HLS formats instead of hls- + for f in m3u8_formats: + f['format_id'] = 'hls-{}p'.format(f['height']) + + formats.extend(m3u8_formats) + + if len(data_video['sources']) > 1: + http_video = data_video['sources'][1] + + # Calculate resolution for HTTP format but should always be 1280x720 + format_height_raw = self._search_regex( + '([0-9]+)p', http_video['label'], 'http video height', + default=720, fatal=False) + format_height = int_or_none(format_height_raw) + + if format_height: + format_width = float(format_height) * (16 / 9) + + formats.append({ + 'url': "https:{}".format(http_video['file']), + 'ext': http_video['type'], + 'format_id': 'http-{}'.format(http_video['label']), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) + + self._sort_formats(formats) + + return { + 'id': page_id, + 'display_id': page_id, + 'title': compat_urllib_parse_unquote(data_video['abouttext']), + 'formats': formats, + 'thumbnail': 'http://www.pietsmiet.de/{}'.format(data_video.get('image')), + }