diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06de556b7..de05038c6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1517,3 +1517,5 @@ from .zattoo import ( from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE + +from .visir import VisirIE diff --git a/youtube_dl/extractor/visir.py b/youtube_dl/extractor/visir.py new file mode 100644 index 000000000..2fecedfa7 --- /dev/null +++ b/youtube_dl/extractor/visir.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + determine_ext, + RegexNotFoundError) + + +class VisirIE(InfoExtractor): + IE_NAME = 'visir' + _VALID_URL = r'https?://(?:www.)visir.is/(?:.*)/(?P.*)' + # _VALID_URL = r'https?://(?:www.)visir.is/(k|g)/(?P.*)' + _TEST = { + 'url': 'https://www.visir.is/k/clp4238', + 'md5': '30fe8b7cb54ac47115f45d8a1be7438e', + 'info_dict': { + 'id': 'clp4238', + 'ext': 'mp4', + 'title': 'Eldgos í Grímsvötnum - Vísir', + 'description': 'Egill Aðalsteinsson kvikmyndatökumaður flaug yfir gosstöðvarnar í kvöld. ' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + thumbnails = [] + description = self._og_search_description(webpage) + if ('/article/' in url) or ('/g/' in url): + try: + article_vid = self._html_search_regex(r"(?Phttps\:\/\/www\.visir\.is\/player\/.*?)\".", webpage, + 'url') + except RegexNotFoundError as e: + article_vid = self._html_search_regex( + r"(?:src=)\"(?Phttps?://(www.)?visir.is/section/media/\?template=iplayer.*?)\"", webpage, + 'url') + webpage = self._download_webpage(article_vid, video_id) + + title = self._html_search_regex(r"Title:.'(.*?)'", webpage, 'title') + filename = self._html_search_regex(r"File:.'(.*?)'", webpage, 'filenme') + hostname = self._html_search_regex(r"Host:.'(.*?)'", webpage, 'hostname') + thumb_url = self._html_search_regex(r"image:.'(.*?)'", webpage, 'thumb_url') + thumbnails.append({'url': thumb_url}) + media_url = ''.join(['https://', hostname, filename]) + ext = determine_ext(media_url) + + formats = [{'url': media_url}] + if ext == 'm3u8': + m3u8_formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', fatal=False) + formats.extend(m3u8_formats) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'thumbnails': thumbnails + # TODO more properties (see youtube_dl/extractor/common.py) + }