1
0
mirror of https://github.com/l1ving/youtube-dl synced 2024-11-21 15:32:51 +08:00
youtube-dl/youtube_dl/extractor/hitbox.py

204 lines
6.8 KiB
Python
Raw Normal View History

2014-12-29 01:07:32 +08:00
# coding: utf-8
from __future__ import unicode_literals
2014-12-30 05:22:07 +08:00
2014-12-30 04:10:59 +08:00
import re
2014-12-29 01:07:32 +08:00
from .common import InfoExtractor
from ..utils import (
2014-12-30 05:22:07 +08:00
clean_html,
parse_iso8601,
float_or_none,
int_or_none,
compat_str,
determine_ext,
2014-12-29 01:07:32 +08:00
)
class HitboxIE(InfoExtractor):
2014-12-30 05:22:07 +08:00
IE_NAME = 'hitbox'
2014-12-29 01:07:32 +08:00
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
2014-12-30 04:10:59 +08:00
_TEST = {
2014-12-29 01:07:32 +08:00
'url': 'http://www.hitbox.tv/video/203213',
'info_dict': {
'id': '203213',
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
'alt_title': 'hitboxlive - Aug 9th #6',
2014-12-30 05:22:07 +08:00
'description': '',
2014-12-29 01:07:32 +08:00
'ext': 'mp4',
'thumbnail': 're:^https?://.*\.jpg$',
2014-12-30 05:22:07 +08:00
'duration': 215.1666,
2014-12-29 01:07:32 +08:00
'resolution': 'HD 720p',
2014-12-30 05:22:07 +08:00
'uploader': 'hitboxlive',
2014-12-29 01:07:32 +08:00
'view_count': int,
2014-12-30 05:22:07 +08:00
'timestamp': 1407576133,
2014-12-29 01:07:32 +08:00
'upload_date': '20140809',
'categories': ['Live Show'],
},
'params': {
# m3u8 download
'skip_download': True,
},
2014-12-30 04:10:59 +08:00
}
2014-12-29 01:07:32 +08:00
2014-12-30 04:10:59 +08:00
def _extract_metadata(self, url, video_id):
2014-12-29 01:07:32 +08:00
thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json(
2015-04-22 23:09:21 +08:00
'%s/%s' % (url, video_id), video_id,
'Downloading metadata JSON')
2014-12-29 01:07:32 +08:00
2014-12-30 04:10:59 +08:00
date = 'media_live_since'
media_type = 'livestream'
if metadata.get('media_type') == 'video':
media_type = 'video'
date = 'media_date_added'
video_meta = metadata.get(media_type, [])[0]
2014-12-29 01:07:32 +08:00
title = video_meta.get('media_status')
alt_title = video_meta.get('media_title')
2014-12-30 05:22:07 +08:00
description = clean_html(
video_meta.get('media_description') or
video_meta.get('media_description_md'))
duration = float_or_none(video_meta.get('media_duration'))
2014-12-29 01:07:32 +08:00
uploader = video_meta.get('media_user_name')
2014-12-30 05:22:07 +08:00
views = int_or_none(video_meta.get('media_views'))
timestamp = parse_iso8601(video_meta.get(date), ' ')
2014-12-29 01:07:32 +08:00
categories = [video_meta.get('category_name')]
thumbs = [
{'url': thumb_base + video_meta.get('media_thumbnail'),
'width': 320,
'height': 180},
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
'width': 768,
'height': 432},
]
return {
'id': video_id,
'title': title,
'alt_title': alt_title,
'description': description,
'ext': 'mp4',
'thumbnails': thumbs,
'duration': duration,
2014-12-30 05:22:07 +08:00
'uploader': uploader,
2014-12-29 01:07:32 +08:00
'view_count': views,
2014-12-30 05:22:07 +08:00
'timestamp': timestamp,
2014-12-29 01:07:32 +08:00
'categories': categories,
}
2014-12-30 04:10:59 +08:00
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
2014-12-30 05:22:07 +08:00
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
2015-04-22 23:09:21 +08:00
video_id, 'Downloading video JSON')
2014-12-30 04:10:59 +08:00
formats = []
for video in player_config['clip']['bitrates']:
label = video.get('label')
if label == 'Auto':
continue
video_url = video.get('url')
if not video_url:
continue
bitrate = int_or_none(video.get('bitrate'))
if determine_ext(video_url) == 'm3u8':
if not video_url.startswith('http'):
continue
formats.append({
'url': video_url,
'ext': 'mp4',
'tbr': bitrate,
'format_note': label,
'protocol': 'm3u8_native',
})
else:
formats.append({
'url': video_url,
'tbr': bitrate,
'format_note': label,
})
2015-04-22 23:01:52 +08:00
self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.hitbox.tv/api/media/video',
video_id)
metadata['formats'] = formats
2014-12-30 04:10:59 +08:00
return metadata
class HitboxLiveIE(HitboxIE):
2014-12-30 05:22:07 +08:00
IE_NAME = 'hitbox:live'
2014-12-30 04:10:59 +08:00
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
_TEST = {
'url': 'http://www.hitbox.tv/dimak',
'info_dict': {
'id': 'dimak',
'ext': 'mp4',
2014-12-30 05:22:07 +08:00
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
'timestamp': int,
'upload_date': compat_str,
'title': compat_str,
'uploader': 'Dimak',
2014-12-30 04:10:59 +08:00
},
'params': {
# live
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
2014-12-30 05:22:07 +08:00
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
video_id)
2014-12-30 04:10:59 +08:00
formats = []
cdns = player_config.get('cdns')
servers = []
for cdn in cdns:
base_url = cdn.get('netConnectionUrl')
host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
if base_url not in servers:
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
if label == 'Auto':
continue
stream_url = stream.get('url')
if not stream_url:
continue
bitrate = int_or_none(stream.get('bitrate'))
if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
if not stream_url.startswith('http'):
continue
2014-12-30 04:10:59 +08:00
formats.append({
'url': stream_url,
2014-12-30 04:10:59 +08:00
'ext': 'mp4',
'tbr': bitrate,
'format_note': label,
'rtmp_live': True,
})
else:
formats.append({
'url': '%s/%s' % (base_url, stream_url),
'ext': 'mp4',
'tbr': bitrate,
2014-12-30 04:10:59 +08:00
'rtmp_live': True,
'format_note': host,
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.hitbox.tv/api/media/live',
video_id)
2014-12-30 04:10:59 +08:00
metadata['formats'] = formats
metadata['is_live'] = True
metadata['title'] = self._live_title(metadata.get('title'))
2014-12-30 04:10:59 +08:00
return metadata