From 7d27abcaaa4a57d84648c1c42bd4449d0b8e5a7b Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Sun, 3 Feb 2019 10:42:50 -0800 Subject: [PATCH] [willowtv] Add new extractor willow.tv is a Cricket TV channel and Internet service. On the Internet, they offer live streaming, full replays, and highlight videos. This initial commit handles downloading of replays. Note that replays are only accessible with an active subscription, and youtube-dl must be passed the cookies for an active login session. That means that it's not possible to run a test case in general, and I think not possible at all, as I don't think there's a way to pass cookies when running the tests. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/willowtv.py | 105 +++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 youtube_dl/extractor/willowtv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06de556b7..701aa8b96 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1411,6 +1411,7 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE +from .willowtv import WillowTvReplayIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/willowtv.py b/youtube_dl/extractor/willowtv.py new file mode 100644 index 000000000..5d9b1f7f8 --- /dev/null +++ b/youtube_dl/extractor/willowtv.py @@ -0,0 +1,105 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + determine_ext, + strip_jsonp, + try_get, +) + + +class WillowTvReplayIE(InfoExtractor): + IE_NAME = 'willowtv:replay' + _VALID_URL = r'https?://(?:www\.)?willow\.tv/watch-live/(?P.*)(?:/.*)' + _TESTS = [{ + 'url': 'https://www.willow.tv/watch-live/iu-vs-kk-streaming-online-eliminator-1-pakistan-super-league-2019/replay', + 'info_dict': { + 'id': 'iu-vs-kk-streaming-online-eliminator-1-pakistan-super-league-2019', + 'ext': 'mp4', + }, + 'skip': 'There are no free examples. An account and cookies are required', + }] + + def _real_extract(self, url): + meta_format = 'https://willowfeedsv2.willow.tv/willowds/mspecific/%s.json' + replay_format = 'https://www.willow.tv/match_replay_data_by_id?matchid=%s' + + slug = self._match_id(url) + + meta = self._download_json( + meta_format % slug, + slug, + transform_source=strip_jsonp) + + results = try_get(meta, lambda x: x['result'], list) or [] + if not results: + raise ExtractorError( + 'No results present for this match. Is it finished yet?', + expected=True) + + entries = [] + for result in results: + matchid = result['Id'] + + name = try_get(result, lambda x: x['Name'], compat_str) + series = try_get(result, lambda x: x['SeriesName'], compat_str) + match_type = try_get(result, lambda x: x['Type'], compat_str) + + data = self._download_json( + replay_format % matchid, + matchid, + transform_source=strip_jsonp) + + status = try_get(data, lambda x: x['status'], compat_str) + if status != 'success': + raise ExtractorError( + 'You must pass the cookies for a logged in willow.tv ' + 'session with --cookies to download replays.', + expected=True) + + replays = try_get(data, lambda x: x['result']['replay'], list) \ + or [] + if not replays: + raise ExtractorError( + 'No replays present for this match. ' + 'They may not have been uploaded yet.', + expected=True) + + vid_format = try_get(data, lambda x: x['result']['vidFormat'], + compat_str) + if vid_format != 'HLS': + raise ExtractorError( + 'Unsupported video format "%s".' % vid_format, + expected=True) + + for outer in replays: + for inner in outer: + part = inner['priority'] + part_id = compat_str(part) + url = inner['secureurl'] + + formats = self._extract_m3u8_formats( + url, part_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id='hls', fatal=True) + self._sort_formats(formats) + + title = try_get(inner, lambda x: x['title'], compat_str) \ + or name or slug + + entries.append({ + 'id': part_id, + 'title': title, + 'series': series, + 'episode_name': name, + 'episode_id': matchid, + 'chapter_number': part, + 'chapter_id': part_id, + 'match_type': match_type, + 'formats': formats, + }) + + return self.playlist_result(entries, slug)