1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 18:40:01 +08:00

[visir] Add new information extractor

This commit is contained in:
Alex Seiler 2017-01-27 15:57:03 +01:00
parent b51a4ebed4
commit 40daaef417
3 changed files with 126 additions and 0 deletions

View File

@ -1119,6 +1119,7 @@ from .viki import (
VikiIE, VikiIE,
VikiChannelIE, VikiChannelIE,
) )
from .visir import VisirMediaIE
from .viu import ( from .viu import (
ViuIE, ViuIE,
ViuPlaylistIE, ViuPlaylistIE,

View File

@ -81,6 +81,7 @@ from .videa import VideaIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import OpenloadIE
from .visir import VisirMediaIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1473,6 +1474,20 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [TwentyMinutenIE.ie_key()], 'add_ie': [TwentyMinutenIE.ie_key()],
},
{
# Visir embed
'url': 'http://www.visir.is/-viljum-hjalpa-theim-ad-hjalpa-sjalfum-ser-/article/2017170129096',
'info_dict': {
'id': 'VTV8CE25BB4-9132-48AD-A2EE-00AF0BAA02A0',
'ext': 'mp4',
'title': 'H\u00f3pur nemenda s\u00f6fnu\u00f0u pening fyrir Ge\u00f0hj\u00e1lp',
'description': None,
},
'params': {
'skip_download': True,
},
'add_ie': [VisirMediaIE.ie_key()],
} }
# { # {
# # TODO: find another test # # TODO: find another test
@ -2438,6 +2453,12 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches( return _playlist_from_matches(
openload_urls, ie=OpenloadIE.ie_key()) openload_urls, ie=OpenloadIE.ie_key())
# Look for Visir embeds
visir_urls = VisirMediaIE._extract_urls(webpage)
if visir_urls:
return _playlist_from_matches(
visir_urls, ie=VisirMediaIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')

View File

@ -0,0 +1,104 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
base_url,
remove_start,
urljoin,
)
class VisirMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?visir\.is/section(?:/media)?/.+?fileid=(?P<id>[^/]+)$'
_TESTS = [{
'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP51729',
'md5': '1486324696d1b9f30fcea985a7922f2c',
'info_dict': {
'id': 'CLP51729',
'display_id': 'CLP51729',
'ext': 'mp4',
'title': 'Gu\u00f0j\u00f3n: Mj\u00f6g j\u00e1kv\u00e6\u00f0ur \u00e1 framhaldi\u00f0',
'description': None,
'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/51729_3.jpg'
},
}, {
'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP45905',
'info_dict': {
'id': 'CLP45905',
'display_id': 'CLP45905',
'ext': 'mp4',
'title': 'Eva Laufey - Nau\u00f0synlegt a\u00f0 b\u00f6rn f\u00e1i a\u00f0 koma n\u00e1l\u00e6gt matarger\u00f0',
'description': 'md5:24422433a08d270a3690d149edf113b8',
'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/45905_3.jpg',
},
'params': {
'skip_download': True,
},
}]
@staticmethod
def _extract_urls(webpage):
media_base_url = 'http://www.visir.is/section/media/?template=iplayer&fileid=%s'
video_ids = [media_base_url % m.group('id') for m in re.finditer(
r'App\.Player\.Init\(\{[^\}]*Type:\s*\'(?:audio|video)\'[^\}]+FileId:\s*\'(?P<id>.+?)\'[^\}]+Host:\s*\'visirvod\.365cdn\.is\'',
webpage)]
return video_ids
def _extract_formats(self, filename, video_id, media_type):
playlist_url = 'http://visirvod.365cdn.is/hls-vod/_definst_/mp4:%s/playlist.m3u8' % filename
if media_type == 'video':
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['dash'])
else:
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['dash', 'f4m', 'm3u8'])
self._sort_formats(formats)
return formats
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
regex_pattern = r'App\.Player\.Init\s*\(\s*\{[^\}]*%s:[^\}]*?\'(.+?)\''
video_id = self._search_regex(
regex_pattern % 'FileId',
webpage, 'video id')
filename = self._search_regex(
regex_pattern % 'File',
webpage, 'filename')
media_type = self._search_regex(
regex_pattern % 'Type',
webpage, 'media type')
formats = self._extract_formats(filename, video_id, media_type)
title = self._search_regex(
regex_pattern % 'Title',
webpage, 'video title', default=None)
if not title:
title = self._og_search_title(webpage)
if title:
title = remove_start(title, 'Vísir -').strip()
description = self._og_search_description(webpage, default=None)
thumbnail = self._search_regex(
regex_pattern % '(?:I|i)mage',
webpage, 'video title', default=None)
if thumbnail:
if thumbnail.startswith('/'):
thumbnail = urljoin(base_url(url), thumbnail)
else:
thumbnail = self._og_search_thumbnail(webpage, default=None)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}