puls4 Add new extractor

2024-12-31 00:22:53 +08:00 · 2015-02-27 15:41:58 +01:00 · 2015-02-27 15:41:58 +01:00 · 7862ad88b7
commit 7862ad88b7
parent f3bff94cf9
2 changed files with 62 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -373,6 +373,7 @@ from .pornotube import PornotubeIE
 from .pornoxo import PornoXOIE
 from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .puls4 import Puls4IE
 from .pyvideo import PyvideoIE
 from .quickvid import QuickVidIE
 from .r7 import R7IE
--- a/youtube_dl/extractor/puls4.py
+++ b/youtube_dl/extractor/puls4.py
@ -0,0 +1,61 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 from .common import InfoExtractor
 import re
 class Puls4IE(InfoExtractor):
    _VALID_URL = r'https?://www.puls4.com/video/.+?/play/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
        'md5': '49f6a6629747eeec43cef6a46b5df81d',
        'info_dict': {
            'id': '2716816',
            'ext': 'mp4',
            'title': 'Pro und Contra vom 23.02.2015'}},
        {
        'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
        'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
        'info_dict': {
            'id': '1298106',
            'ext': 'mp4',
            'title': 'Lucky Fritz'}}
    ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        # if fsk-button
        real_url = self._html_search_regex(r'\"fsk-button\".+?href=\"([^"]+)',
                                           webpage, 'fsk_button', default=None)
        if real_url:
            webpage = self._download_webpage(real_url, video_id)
        title = self._html_search_regex(
            r'<div id="bg_brandableContent">.+?<h1>(.+?)</h1>',
            webpage, 'title', flags=re.DOTALL)
        sd_url = self._html_search_regex(
            r'{\"url\":\"([^"]+?)\",\"hd\":false',
            webpage, 'sd_url').replace('\\', '')
        formats = [{'format_id': 'sd', 'url': sd_url, 'quality': -2}]
        hd_url = self._html_search_regex(
            r'{\"url\":\"([^"]+?)\",\"hd\":true',
            webpage, 'hd_url', default=None)
        if hd_url:
            hd_url = hd_url.replace('\\', '')
            formats.append({'format_id': 'hd', 'url': hd_url, 'quality': -1})
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'ext': 'mp4'
        }