mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-13 19:57:19 +08:00
[visir] Add new information extractor
This commit is contained in:
parent
b51a4ebed4
commit
40daaef417
@ -1119,6 +1119,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .visir import VisirMediaIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
|
@ -81,6 +81,7 @@ from .videa import VideaIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .ustream import UstreamIE
|
||||
from .openload import OpenloadIE
|
||||
from .visir import VisirMediaIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -1473,6 +1474,20 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# Visir embed
|
||||
'url': 'http://www.visir.is/-viljum-hjalpa-theim-ad-hjalpa-sjalfum-ser-/article/2017170129096',
|
||||
'info_dict': {
|
||||
'id': 'VTV8CE25BB4-9132-48AD-A2EE-00AF0BAA02A0',
|
||||
'ext': 'mp4',
|
||||
'title': 'H\u00f3pur nemenda s\u00f6fnu\u00f0u pening fyrir Ge\u00f0hj\u00e1lp',
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VisirMediaIE.ie_key()],
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
@ -2438,6 +2453,12 @@ class GenericIE(InfoExtractor):
|
||||
return _playlist_from_matches(
|
||||
openload_urls, ie=OpenloadIE.ie_key())
|
||||
|
||||
# Look for Visir embeds
|
||||
visir_urls = VisirMediaIE._extract_urls(webpage)
|
||||
if visir_urls:
|
||||
return _playlist_from_matches(
|
||||
visir_urls, ie=VisirMediaIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
|
104
youtube_dl/extractor/visir.py
Normal file
104
youtube_dl/extractor/visir.py
Normal file
@ -0,0 +1,104 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
base_url,
|
||||
remove_start,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class VisirMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?visir\.is/section(?:/media)?/.+?fileid=(?P<id>[^/]+)$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP51729',
|
||||
'md5': '1486324696d1b9f30fcea985a7922f2c',
|
||||
'info_dict': {
|
||||
'id': 'CLP51729',
|
||||
'display_id': 'CLP51729',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gu\u00f0j\u00f3n: Mj\u00f6g j\u00e1kv\u00e6\u00f0ur \u00e1 framhaldi\u00f0',
|
||||
'description': None,
|
||||
'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/51729_3.jpg'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP45905',
|
||||
'info_dict': {
|
||||
'id': 'CLP45905',
|
||||
'display_id': 'CLP45905',
|
||||
'ext': 'mp4',
|
||||
'title': 'Eva Laufey - Nau\u00f0synlegt a\u00f0 b\u00f6rn f\u00e1i a\u00f0 koma n\u00e1l\u00e6gt matarger\u00f0',
|
||||
'description': 'md5:24422433a08d270a3690d149edf113b8',
|
||||
'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/45905_3.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
media_base_url = 'http://www.visir.is/section/media/?template=iplayer&fileid=%s'
|
||||
video_ids = [media_base_url % m.group('id') for m in re.finditer(
|
||||
r'App\.Player\.Init\(\{[^\}]*Type:\s*\'(?:audio|video)\'[^\}]+FileId:\s*\'(?P<id>.+?)\'[^\}]+Host:\s*\'visirvod\.365cdn\.is\'',
|
||||
webpage)]
|
||||
return video_ids
|
||||
|
||||
def _extract_formats(self, filename, video_id, media_type):
|
||||
playlist_url = 'http://visirvod.365cdn.is/hls-vod/_definst_/mp4:%s/playlist.m3u8' % filename
|
||||
if media_type == 'video':
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['dash'])
|
||||
else:
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['dash', 'f4m', 'm3u8'])
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
regex_pattern = r'App\.Player\.Init\s*\(\s*\{[^\}]*%s:[^\}]*?\'(.+?)\''
|
||||
video_id = self._search_regex(
|
||||
regex_pattern % 'FileId',
|
||||
webpage, 'video id')
|
||||
filename = self._search_regex(
|
||||
regex_pattern % 'File',
|
||||
webpage, 'filename')
|
||||
media_type = self._search_regex(
|
||||
regex_pattern % 'Type',
|
||||
webpage, 'media type')
|
||||
|
||||
formats = self._extract_formats(filename, video_id, media_type)
|
||||
|
||||
title = self._search_regex(
|
||||
regex_pattern % 'Title',
|
||||
webpage, 'video title', default=None)
|
||||
if not title:
|
||||
title = self._og_search_title(webpage)
|
||||
if title:
|
||||
title = remove_start(title, 'Vísir -').strip()
|
||||
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
regex_pattern % '(?:I|i)mage',
|
||||
webpage, 'video title', default=None)
|
||||
if thumbnail:
|
||||
if thumbnail.startswith('/'):
|
||||
thumbnail = urljoin(base_url(url), thumbnail)
|
||||
else:
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user