From dd15a13346d9ebbcc100f65b733219a183940a58 Mon Sep 17 00:00:00 2001 From: f4r Date: Fri, 19 May 2017 15:59:43 +1200 Subject: [PATCH] [smashcast] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/smashcast.py | 206 +++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 youtube_dl/extractor/smashcast.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ed603eb29..aab8d2ff4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -899,6 +899,7 @@ from .skynewsarabia import ( from .skysports import SkySportsIE from .slideshare import SlideshareIE from .slutload import SlutloadIE +from .smashcast import SmashcastIE, SmashcastLiveIE from .smotri import ( SmotriIE, SmotriCommunityIE, diff --git a/youtube_dl/extractor/smashcast.py b/youtube_dl/extractor/smashcast.py new file mode 100644 index 000000000..6a0c86405 --- /dev/null +++ b/youtube_dl/extractor/smashcast.py @@ -0,0 +1,206 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + parse_iso8601, + float_or_none, + int_or_none, + compat_str, + determine_ext, +) + + +class SmashcastIE(InfoExtractor): + IE_NAME = 'smashcast' + _VALID_URL = r'https?://(?:www\.)?smashcast\.tv/(?:[\w\d\-]+/)?videos?/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.smashcast.tv/video/203213', + 'info_dict': { + 'id': '203213', + 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy', + 'alt_title': 'hitboxlive - Aug 9th #6', + 'description': '', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 215.1666, + 'resolution': None, #Quick bodge, originally 'HD 720p' + 'uploader': 'hitboxlive', + 'view_count': int, + 'timestamp': 1407576133, + 'upload_date': '20140809', + 'categories': ['Live Show'], + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _extract_metadata(self, url, video_id): + thumb_base = 'https://edge.sf.hitbox.tv' + metadata = self._download_json( + '%s/%s' % (url, video_id), video_id, + 'Downloading metadata JSON') + + date = 'media_live_since' + media_type = 'livestream' + if metadata.get('media_type') == 'video': + media_type = 'video' + date = 'media_date_added' + + video_meta = metadata.get(media_type, [])[0] + title = video_meta.get('media_status') + alt_title = video_meta.get('media_title') + description = clean_html( + video_meta.get('media_description') or + video_meta.get('media_description_md')) + duration = float_or_none(video_meta.get('media_duration')) + uploader = video_meta.get('media_user_name') + views = int_or_none(video_meta.get('media_views')) + timestamp = parse_iso8601(video_meta.get(date), ' ') + categories = [video_meta.get('category_name')] + thumbs = [ + {'url': thumb_base + video_meta.get('media_thumbnail'), + 'width': 320, + 'height': 180}, + {'url': thumb_base + video_meta.get('media_thumbnail_large'), + 'width': 768, + 'height': 432}, + ] + + return { + 'id': video_id, + 'title': title, + 'alt_title': alt_title, + 'description': description, + 'ext': 'mp4', + 'thumbnails': thumbs, + 'duration': duration, + 'uploader': uploader, + 'view_count': views, + 'timestamp': timestamp, + 'categories': categories, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_config = self._download_json( + 'https://www.smashcast.tv/api/player/config/video/%s' % video_id, + video_id, 'Downloading video JSON') + + formats = [] + for video in player_config['clip']['bitrates']: + label = video.get('label') + if label == 'Auto': + continue + video_url = video.get('url') + if not video_url: + continue + bitrate = int_or_none(video.get('bitrate')) + if determine_ext(video_url) == 'm3u8': + if not video_url.startswith('http'): + continue + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'tbr': bitrate, + 'format_note': label, + 'protocol': 'm3u8_native', + }) + else: + formats.append({ + 'url': video_url, + 'tbr': bitrate, + 'format_note': label, + }) + self._sort_formats(formats) + + metadata = self._extract_metadata( + 'https://www.smashcast.tv/api/media/video', + video_id) + metadata['formats'] = formats + + return metadata + + +class SmashcastLiveIE(SmashcastIE): + IE_NAME = 'smashcast:live' + _VALID_URL = r'https?://(?:www\.)?smashcast\.tv/(?!video)(?P.+)' + _TEST = { + 'url': 'http://www.smashcast.tv/dimak', + 'info_dict': { + 'id': 'dimak', + 'ext': 'mp4', + 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e', + 'timestamp': int, + 'upload_date': compat_str, + 'title': compat_str, + 'uploader': 'Dimak', + }, + 'params': { + # live + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_config = self._download_json( + 'https://www.smashcast.tv/api/player/config/live/%s' % video_id, + video_id) + + formats = [] + cdns = player_config.get('cdns') + servers = [] + for cdn in cdns: + # Subscribe URLs are not playable + if cdn.get('rtmpSubscribe') is True: + continue + base_url = cdn.get('netConnectionUrl') + host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) + if base_url not in servers: + servers.append(base_url) + for stream in cdn.get('bitrates'): + label = stream.get('label') + if label == 'Auto': + continue + stream_url = stream.get('url') + if not stream_url: + continue + bitrate = int_or_none(stream.get('bitrate')) + if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8': + if not stream_url.startswith('http'): + continue + formats.append({ + 'url': stream_url, + 'ext': 'mp4', + 'tbr': bitrate, + 'format_note': label, + 'rtmp_live': True, + }) + else: + formats.append({ + 'url': '%s/%s' % (base_url, stream_url), + 'ext': 'mp4', + 'tbr': bitrate, + 'rtmp_live': True, + 'format_note': host, + 'page_url': url, + 'player_url': 'http://www.smashcast.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf', + }) + self._sort_formats(formats) + + metadata = self._extract_metadata( + 'https://www.smashcast.tv/api/media/live', + video_id) + metadata['formats'] = formats + metadata['is_live'] = True + metadata['title'] = self._live_title(metadata.get('title')) + + return metadata