1
0
mirror of https://github.com/l1ving/youtube-dl synced 2024-11-20 18:23:03 +08:00

[viqeo] Add extractor (closes #17066)

This commit is contained in:
Sergey M․ 2018-07-30 03:05:36 +07:00
parent 5484828418
commit 9d1b213845
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
3 changed files with 115 additions and 0 deletions

View File

@ -1291,6 +1291,7 @@ from .viki import (
VikiIE, VikiIE,
VikiChannelIE, VikiChannelIE,
) )
from .viqeo import ViqeoIE
from .viu import ( from .viu import (
ViuIE, ViuIE,
ViuPlaylistIE, ViuPlaylistIE,

View File

@ -113,6 +113,7 @@ from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE from .indavideo import IndavideoEmbedIE
from .apa import APAIE from .apa import APAIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2060,6 +2061,15 @@ class GenericIE(InfoExtractor):
}, },
'skip': 'TODO: fix nested playlists processing in tests', 'skip': 'TODO: fix nested playlists processing in tests',
}, },
{
# Viqeo embeds
'url': 'https://viqeo.tv/',
'info_dict': {
'id': 'viqeo',
'title': 'All-new video platform',
},
'playlist_count': 6,
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -3094,6 +3104,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
sharevideos_urls, video_id, video_title) sharevideos_urls, video_id, video_title)
viqeo_urls = ViqeoIE._extract_urls(webpage)
if viqeo_urls:
return self.playlist_from_matches(
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries:

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,
url_or_none,
)
class ViqeoIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
viqeo:|
https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
)
(?P<id>[\da-f]+)
'''
_TESTS = [{
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
'md5': 'a169dd1a6426b350dca4296226f21e76',
'info_dict': {
'id': 'cde96f09d25f39bee837',
'ext': 'mp4',
'title': 'cde96f09d25f39bee837',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 76,
},
}, {
'url': 'viqeo:cde96f09d25f39bee837',
'only_matching': True,
}, {
'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
data = self._parse_json(
self._search_regex(
r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
video_id)
formats = []
thumbnails = []
for media_file in data['mediaFiles']:
if not isinstance(media_file, dict):
continue
media_url = url_or_none(media_file.get('url'))
if not media_url or not media_url.startswith(('http', '//')):
continue
media_type = str_or_none(media_file.get('type'))
if not media_type:
continue
media_kind = media_type.split('/')[0].lower()
f = {
'url': media_url,
'width': int_or_none(media_file.get('width')),
'height': int_or_none(media_file.get('height')),
}
format_id = str_or_none(media_file.get('quality'))
if media_kind == 'image':
f['id'] = format_id
thumbnails.append(f)
elif media_kind in ('video', 'audio'):
is_audio = media_kind == 'audio'
f.update({
'format_id': 'audio' if is_audio else format_id,
'fps': int_or_none(media_file.get('fps')),
'vcodec': 'none' if is_audio else None,
})
formats.append(f)
self._sort_formats(formats)
duration = int_or_none(data.get('duration'))
return {
'id': video_id,
'title': video_id,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}