From 4666796c6c678429672267b2635e2fa35ed6303a Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Fri, 23 Dec 2016 23:04:53 +0300 Subject: [PATCH 01/16] [StreamMe] add a new extractor --- youtube_dl/extractor/extractors.py | 5 + youtube_dl/extractor/streamme.py | 160 +++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 youtube_dl/extractor/streamme.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fcfe87f6f..08c67c235 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1221,3 +1221,8 @@ from .youtube import ( from .zapiks import ZapiksIE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE +from .streamme import ( + StreamMeIE, + StreamMeLiveIE, + StreamMeArchiveIE, +) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py new file mode 100644 index 000000000..c7e396f6e --- /dev/null +++ b/youtube_dl/extractor/streamme.py @@ -0,0 +1,160 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + ExtractorError, +) + +class StreamMeIE(InfoExtractor): + IE_NAME = 'StreamMe:video' + _API_CHANNEL = 'https://www.stream.me/api-user/v1//channel' + _API_ARCHIVE = 'https://www.stream.me/api-vod/v1//archives' + _VALID_URL_BASE = r'https?://(video-cdn|www).stream.me' + _VALID_URL = r'%s/archive/(?P[^/]+)/[^/]+/(?P[^/?]+)' % _VALID_URL_BASE + _TEST = { + 'url': 'https://www.stream.me/archive/kombatcup/kombat-cup-week-8-sunday-open/pDlXAj6mYb', + 'md5': 'b32af6fad972d0bcf5854a416b5b3b01', + 'info_dict': { + 'id': 'pDlXAj6mYb', + 'ext': 'mp4', + 'title': 'Kombat Cup Week #8 - Sunday Open', + 'uploader': 'KombatCup', + 'uploader_id': 'kombatcup', + 'timestamp': 1481512102000, + 'age_limit': 13, + } + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = self._match_id(url) + apiurl = self._API_ARCHIVE.replace('', m.group('channel_id')) + + # webpage = self._download_webpage(url, video_id) + data = json.loads(self._download_webpage(apiurl, video_id)) + + for vod in data.get('_embedded').get('vod'): + vod_info = [] + if vod.get('urlId') == video_id: + vod_info = vod + break + + manifest_json = self._download_json(vod_info + .get('_links') + .get('manifest') + .get('href'), video_id) + + formats = self._extract_formats(manifest_json.get('formats')) + + self._sort_formats(formats, 'vbr') + info = self._extract_info(vod_info) + info['formats'] = formats + return info + + def _extract_info(self, info): + return { + 'id': info.get('urlId') or 'live', + # 'formats': self.formats, + 'title': info.get('title'), + 'age_limit': int_or_none(info.get('ageRating')), + 'description': info.get('description') or None, + 'dislike_count': int_or_none(info.get('stats').get('raw').get('dislikes')), + 'display_id': info.get('titleSlug') or None, + 'duration': int_or_none(info.get('duration')), + 'like_count': int_or_none(info.get('stats').get('raw').get('likes')), + 'thumbnail': info.get('_links').get('thumbnail').get('href') or None, + 'timestamp': info.get('whenCreated') or None, + 'uploader': info.get('username') or None, + 'uploader_id': info.get('userSlug') or None, + 'view_count': int_or_none(info.get('stats').get('raw').get('views')), + 'is_live': True if info.get('active') else False, + } + + def _extract_formats(self, fmts): + formats = [] + for fmt_tag, d in fmts.items(): + # skip websocket and mjpeg we can't handle them anyway + if fmt_tag in ('mjpeg-lodef', 'mp4-ws',): continue + for fmt_info in d.get('encodings'): + formats.append({ + 'url': fmt_info.get('location'), + 'width': fmt_info.get('videoWidth'), + 'height': fmt_info.get('videoHeight'), + 'vbr': fmt_info.get('videoKbps'), + 'abr': fmt_info.get('audioKbps'), + 'acodec': d.get('audioCodec'), + 'vcodec': d.get('videoCodec'), + 'format_id': "%s%sp" % (fmt_tag, fmt_info.get('videoHeight')), + 'ext': 'flv' if fmt_tag.split('-')[1] == 'rtmp' else 'mp4', + # I don't know all the possible protocols yet. + # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' + }) + return formats + +class StreamMeLiveIE(StreamMeIE): + IE_NAME = 'StreamIE:live' + _VALID_URL = r'%s/(?P[^\#]+$)' % StreamMeIE._VALID_URL_BASE + + def _real_extract(self, url): + channel_id = self._match_id(url) + apiurl = StreamMeIE._API_CHANNEL.replace('', channel_id) + + data = json.loads(self._download_webpage(apiurl, channel_id)) + stream_info = [] + # search for a live stream... + for stream in data.get('_embedded').get('streams'): + stream_info = stream + break # TODO: add to a list (multi-streams?) + + if not stream_info.get('active'): + raise ExtractorError('%s is offline' % channel_id, expected=True) + + manifest_json = self._download_json(stream_info + .get('_links') + .get('manifest') + .get('href'), channel_id) + formats = self._extract_formats(manifest_json.get('formats')) + + self._sort_formats(formats, 'vbr') + info = self._extract_info(stream_info) + info['formats'] = formats + return info + +class StreamMeArchiveIE(StreamMeIE): + IE_NAME = 'StreamMe:archives' + _VALID_URL = r'%s/(?P[^\#]+(?P\#archives)$)' % StreamMeIE._VALID_URL_BASE + _PLAYLIST_TYPE = 'past broadcasts' + _PLAYLIST_LIMIT = 128 + _TEST = { + 'url': 'https://www.stream.me/kombatcup#archives', + 'info_dict': { + 'id': 'kombatcup', + 'title': 'KombatCup', + }, + 'playlist_mincount': 25, + } + + def _real_extract(self, url): + channel_id = self._match_id(url).split('#')[0] + apiurl = StreamMeIE._API_ARCHIVE.replace('', channel_id) + # TODO: implement paginated downloading + data = json.loads(self._download_webpage(apiurl+'?limit=%d&offset=0' % self._PLAYLIST_LIMIT, channel_id)) + playlist = [] + + for vod in data.get('_embedded').get('vod'): + manifest_json = self._download_json(vod + .get('_links') + .get('manifest') + .get('href'), vod.get('urlId')) + formats = self._extract_formats(manifest_json.get('formats')) + self._sort_formats(formats, 'vbr') + info = self._extract_info(vod) + info['formats'] = formats + playlist.append(info) + + return self.playlist_result(playlist, channel_id, info.get('uploader')) From 45a80c1b16172313c452a675cd8d651e3e3db9a6 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Sun, 25 Dec 2016 04:40:44 +0300 Subject: [PATCH 02/16] [StreamMe]: flake8 checked --- youtube_dl/extractor/streamme.py | 67 +++++++++++++++++--------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index c7e396f6e..414f89bff 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, ) + class StreamMeIE(InfoExtractor): IE_NAME = 'StreamMe:video' _API_CHANNEL = 'https://www.stream.me/api-user/v1//channel' @@ -35,7 +36,6 @@ class StreamMeIE(InfoExtractor): video_id = self._match_id(url) apiurl = self._API_ARCHIVE.replace('', m.group('channel_id')) - # webpage = self._download_webpage(url, video_id) data = json.loads(self._download_webpage(apiurl, video_id)) for vod in data.get('_embedded').get('vod'): @@ -43,14 +43,13 @@ class StreamMeIE(InfoExtractor): if vod.get('urlId') == video_id: vod_info = vod break - + manifest_json = self._download_json(vod_info - .get('_links') - .get('manifest') - .get('href'), video_id) + .get('_links') + .get('manifest') + .get('href'), video_id) formats = self._extract_formats(manifest_json.get('formats')) - self._sort_formats(formats, 'vbr') info = self._extract_info(vod_info) info['formats'] = formats @@ -79,23 +78,25 @@ class StreamMeIE(InfoExtractor): formats = [] for fmt_tag, d in fmts.items(): # skip websocket and mjpeg we can't handle them anyway - if fmt_tag in ('mjpeg-lodef', 'mp4-ws',): continue + if fmt_tag in ('mjpeg-lodef', 'mp4-ws',): + continue for fmt_info in d.get('encodings'): formats.append({ - 'url': fmt_info.get('location'), - 'width': fmt_info.get('videoWidth'), - 'height': fmt_info.get('videoHeight'), - 'vbr': fmt_info.get('videoKbps'), - 'abr': fmt_info.get('audioKbps'), - 'acodec': d.get('audioCodec'), - 'vcodec': d.get('videoCodec'), + 'url': fmt_info.get('location'), + 'width': fmt_info.get('videoWidth'), + 'height': fmt_info.get('videoHeight'), + 'vbr': fmt_info.get('videoKbps'), + 'abr': fmt_info.get('audioKbps'), + 'acodec': d.get('audioCodec'), + 'vcodec': d.get('videoCodec'), 'format_id': "%s%sp" % (fmt_tag, fmt_info.get('videoHeight')), 'ext': 'flv' if fmt_tag.split('-')[1] == 'rtmp' else 'mp4', # I don't know all the possible protocols yet. # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' - }) + }) return formats + class StreamMeLiveIE(StreamMeIE): IE_NAME = 'StreamIE:live' _VALID_URL = r'%s/(?P[^\#]+$)' % StreamMeIE._VALID_URL_BASE @@ -106,44 +107,48 @@ class StreamMeLiveIE(StreamMeIE): data = json.loads(self._download_webpage(apiurl, channel_id)) stream_info = [] - # search for a live stream... + # search for a live stream... for stream in data.get('_embedded').get('streams'): stream_info = stream - break # TODO: add to a list (multi-streams?) - + break # TODO: add to a list (multi-streams?) + if not stream_info.get('active'): raise ExtractorError('%s is offline' % channel_id, expected=True) - - manifest_json = self._download_json(stream_info - .get('_links') - .get('manifest') - .get('href'), channel_id) - formats = self._extract_formats(manifest_json.get('formats')) + manifest_json = self._download_json(stream_info + .get('_links') + .get('manifest') + .get('href'), channel_id) + + formats = self._extract_formats(manifest_json.get('formats')) self._sort_formats(formats, 'vbr') info = self._extract_info(stream_info) info['formats'] = formats return info + class StreamMeArchiveIE(StreamMeIE): IE_NAME = 'StreamMe:archives' _VALID_URL = r'%s/(?P[^\#]+(?P\#archives)$)' % StreamMeIE._VALID_URL_BASE _PLAYLIST_TYPE = 'past broadcasts' _PLAYLIST_LIMIT = 128 _TEST = { - 'url': 'https://www.stream.me/kombatcup#archives', - 'info_dict': { - 'id': 'kombatcup', - 'title': 'KombatCup', - }, - 'playlist_mincount': 25, + 'url': 'https://www.stream.me/kombatcup#archives', + 'info_dict': { + 'id': 'kombatcup', + 'title': 'KombatCup', + }, + 'playlist_mincount': 25, + 'params': { + 'skip_download': True, + } } def _real_extract(self, url): channel_id = self._match_id(url).split('#')[0] apiurl = StreamMeIE._API_ARCHIVE.replace('', channel_id) # TODO: implement paginated downloading - data = json.loads(self._download_webpage(apiurl+'?limit=%d&offset=0' % self._PLAYLIST_LIMIT, channel_id)) + data = json.loads(self._download_webpage(apiurl + '?limit=%d&offset=0' % self._PLAYLIST_LIMIT, channel_id)) playlist = [] for vod in data.get('_embedded').get('vod'): From d7dd51a67d44e203006a4ce4eccf711f9def8519 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Tue, 27 Dec 2016 01:51:18 +0300 Subject: [PATCH 03/16] [StreamMe] fix URL regexes (on site change) --- youtube_dl/extractor/streamme.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 414f89bff..52c5b661e 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -16,7 +16,7 @@ class StreamMeIE(InfoExtractor): _API_CHANNEL = 'https://www.stream.me/api-user/v1//channel' _API_ARCHIVE = 'https://www.stream.me/api-vod/v1//archives' _VALID_URL_BASE = r'https?://(video-cdn|www).stream.me' - _VALID_URL = r'%s/archive/(?P[^/]+)/[^/]+/(?P[^/?]+)' % _VALID_URL_BASE + _VALID_URL = r'%s\/archive\/(?P[^\#\/]+)\/[^\/]+\/(?P[^\/]+)' % _VALID_URL_BASE _TEST = { 'url': 'https://www.stream.me/archive/kombatcup/kombat-cup-week-8-sunday-open/pDlXAj6mYb', 'md5': 'b32af6fad972d0bcf5854a416b5b3b01', @@ -129,11 +129,11 @@ class StreamMeLiveIE(StreamMeIE): class StreamMeArchiveIE(StreamMeIE): IE_NAME = 'StreamMe:archives' - _VALID_URL = r'%s/(?P[^\#]+(?P\#archives)$)' % StreamMeIE._VALID_URL_BASE + _VALID_URL = r'%s/(?P[^\#]+)(\#archive)$' % StreamMeIE._VALID_URL_BASE _PLAYLIST_TYPE = 'past broadcasts' _PLAYLIST_LIMIT = 128 _TEST = { - 'url': 'https://www.stream.me/kombatcup#archives', + 'url': 'https://www.stream.me/kombatcup#archive', 'info_dict': { 'id': 'kombatcup', 'title': 'KombatCup', From f08af07ccac2519cf69893b899b582312a1ebb57 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Thu, 29 Dec 2016 05:01:08 +0300 Subject: [PATCH 04/16] [StreamMe] live stream test added --- youtube_dl/extractor/streamme.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 52c5b661e..7a0562f9d 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -99,7 +99,25 @@ class StreamMeIE(InfoExtractor): class StreamMeLiveIE(StreamMeIE): IE_NAME = 'StreamIE:live' - _VALID_URL = r'%s/(?P[^\#]+$)' % StreamMeIE._VALID_URL_BASE + _VALID_URL = r'%s\/(?P[^\#\/]+$)' % StreamMeIE._VALID_URL_BASE + _TEST = { + 'url': 'https://www.stream.me/kombatcup', + 'info_dict': { + 'id': 'live', # see: StreamMeIE._extract_info() + 'ext': 'mp4', + 'title': 'KombatCup\'s Live Stream', + 'age_limit': 13, + 'uploader_id': 'kombatcup', + 'uploader': 'KombatCup', + 'like_count': int, + 'dislike_count': int, + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } def _real_extract(self, url): channel_id = self._match_id(url) From 86138f5d5a3492398dd34f7d69f5e94ad9e28fc4 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Fri, 30 Dec 2016 04:55:16 +0300 Subject: [PATCH 05/16] [StreamMe] add "Source" to formats list --- youtube_dl/extractor/streamme.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 7a0562f9d..b3eb1cc65 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -94,6 +94,16 @@ class StreamMeIE(InfoExtractor): # I don't know all the possible protocols yet. # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' }) + if d.get('origin') is not None: + fmt_tag = d.get('origin').get('location').split(':')[0] + formats.append({ + 'url': d.get('origin').get('location'), + 'acodec': d.get('origin').get('audioCodec'), + 'vcodec': d.get('origin').get('videoCodec'), + 'format_id': 'Source-' + fmt_tag, + 'ext': 'flv' if fmt_tag == 'rtmp' else 'mp4', + 'source_preference': 1, + }) return formats From a04c072888491cf5f50b08f19c5c3b87576600ff Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Sat, 31 Dec 2016 21:04:31 +0300 Subject: [PATCH 06/16] [StreamMe] some code style fixes --- youtube_dl/extractor/streamme.py | 63 ++++++++++++++------------------ 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index b3eb1cc65..72ddd34ef 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -13,10 +13,10 @@ from ..utils import ( class StreamMeIE(InfoExtractor): IE_NAME = 'StreamMe:video' - _API_CHANNEL = 'https://www.stream.me/api-user/v1//channel' - _API_ARCHIVE = 'https://www.stream.me/api-vod/v1//archives' + _API_CHANNEL = 'https://www.stream.me/api-user/v1/%s/channel' + _API_ARCHIVE = 'https://www.stream.me/api-vod/v1/%s/archives' _VALID_URL_BASE = r'https?://(video-cdn|www).stream.me' - _VALID_URL = r'%s\/archive\/(?P[^\#\/]+)\/[^\/]+\/(?P[^\/]+)' % _VALID_URL_BASE + _VALID_URL = r'%s/archive\/(?P[^\#/]+)/[^\/]+/(?P[^/]+)' % _VALID_URL_BASE _TEST = { 'url': 'https://www.stream.me/archive/kombatcup/kombat-cup-week-8-sunday-open/pDlXAj6mYb', 'md5': 'b32af6fad972d0bcf5854a416b5b3b01', @@ -34,20 +34,18 @@ class StreamMeIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = self._match_id(url) - apiurl = self._API_ARCHIVE.replace('', m.group('channel_id')) + apiurl = self._API_ARCHIVE % m.group('channel_id') - data = json.loads(self._download_webpage(apiurl, video_id)) + data = self._download_json(apiurl, video_id) - for vod in data.get('_embedded').get('vod'): + for vod in data['_embedded']['vod']: vod_info = [] if vod.get('urlId') == video_id: vod_info = vod break - manifest_json = self._download_json(vod_info - .get('_links') - .get('manifest') - .get('href'), video_id) + manifest_json = self._download_json(vod_info['_links']['manifest']['href'], + video_id, note='Downloading video manifest') formats = self._extract_formats(manifest_json.get('formats')) self._sort_formats(formats, 'vbr') @@ -57,19 +55,19 @@ class StreamMeIE(InfoExtractor): def _extract_info(self, info): return { - 'id': info.get('urlId') or 'live', + 'id': info.get('urlId') or info.get('publicId'), # 'formats': self.formats, - 'title': info.get('title'), + 'title': info.get('title') or 'Untitled Broadcast', 'age_limit': int_or_none(info.get('ageRating')), - 'description': info.get('description') or None, + 'description': info.get('description'), 'dislike_count': int_or_none(info.get('stats').get('raw').get('dislikes')), - 'display_id': info.get('titleSlug') or None, + 'display_id': info.get('titleSlug'), 'duration': int_or_none(info.get('duration')), 'like_count': int_or_none(info.get('stats').get('raw').get('likes')), - 'thumbnail': info.get('_links').get('thumbnail').get('href') or None, - 'timestamp': info.get('whenCreated') or None, - 'uploader': info.get('username') or None, - 'uploader_id': info.get('userSlug') or None, + 'thumbnail': info.get('_links').get('thumbnail').get('href'), + 'timestamp': info.get('whenCreated'), + 'uploader': info.get('username'), + 'uploader_id': info.get('userSlug'), 'view_count': int_or_none(info.get('stats').get('raw').get('views')), 'is_live': True if info.get('active') else False, } @@ -83,10 +81,10 @@ class StreamMeIE(InfoExtractor): for fmt_info in d.get('encodings'): formats.append({ 'url': fmt_info.get('location'), - 'width': fmt_info.get('videoWidth'), - 'height': fmt_info.get('videoHeight'), - 'vbr': fmt_info.get('videoKbps'), - 'abr': fmt_info.get('audioKbps'), + 'width': int_or_none(fmt_info.get('videoWidth')), + 'height': int_or_none(fmt_info.get('videoHeight')), + 'vbr': int_or_none(fmt_info.get('videoKbps')), + 'abr': int_or_none(fmt_info.get('audioKbps')), 'acodec': d.get('audioCodec'), 'vcodec': d.get('videoCodec'), 'format_id': "%s%sp" % (fmt_tag, fmt_info.get('videoHeight')), @@ -113,7 +111,7 @@ class StreamMeLiveIE(StreamMeIE): _TEST = { 'url': 'https://www.stream.me/kombatcup', 'info_dict': { - 'id': 'live', # see: StreamMeIE._extract_info() + 'id': '1246a915-eebe-4ffe-b12e-e4f5332abc4d', 'ext': 'mp4', 'title': 'KombatCup\'s Live Stream', 'age_limit': 13, @@ -131,7 +129,7 @@ class StreamMeLiveIE(StreamMeIE): def _real_extract(self, url): channel_id = self._match_id(url) - apiurl = StreamMeIE._API_CHANNEL.replace('', channel_id) + apiurl = StreamMeIE._API_CHANNEL % channel_id data = json.loads(self._download_webpage(apiurl, channel_id)) stream_info = [] @@ -143,10 +141,8 @@ class StreamMeLiveIE(StreamMeIE): if not stream_info.get('active'): raise ExtractorError('%s is offline' % channel_id, expected=True) - manifest_json = self._download_json(stream_info - .get('_links') - .get('manifest') - .get('href'), channel_id) + manifest_json = self._download_json(stream_info['_links']['manifest']['href'], + channel_id, 'Download video manifest') formats = self._extract_formats(manifest_json.get('formats')) self._sort_formats(formats, 'vbr') @@ -174,16 +170,13 @@ class StreamMeArchiveIE(StreamMeIE): def _real_extract(self, url): channel_id = self._match_id(url).split('#')[0] - apiurl = StreamMeIE._API_ARCHIVE.replace('', channel_id) + apiurl = StreamMeIE._API_ARCHIVE % channel_id # TODO: implement paginated downloading - data = json.loads(self._download_webpage(apiurl + '?limit=%d&offset=0' % self._PLAYLIST_LIMIT, channel_id)) + data = self._download_json(apiurl, channel_id, query={'limit': self._PLAYLIST_LIMIT, 'offset': 0}) playlist = [] - for vod in data.get('_embedded').get('vod'): - manifest_json = self._download_json(vod - .get('_links') - .get('manifest') - .get('href'), vod.get('urlId')) + for vod in data['_embedded']['vod']: + manifest_json = self._download_json(vod['_links']['manifest']['href'], vod.get('urlId')) formats = self._extract_formats(manifest_json.get('formats')) self._sort_formats(formats, 'vbr') info = self._extract_info(vod) From 3010df8aa75602c3aca2e40ba7605df0edfb4a79 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Sat, 31 Dec 2016 21:26:17 +0300 Subject: [PATCH 07/16] [StreamMe] skip live stream test if channell is offline --- youtube_dl/extractor/streamme.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 72ddd34ef..18b7620c0 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -16,7 +16,7 @@ class StreamMeIE(InfoExtractor): _API_CHANNEL = 'https://www.stream.me/api-user/v1/%s/channel' _API_ARCHIVE = 'https://www.stream.me/api-vod/v1/%s/archives' _VALID_URL_BASE = r'https?://(video-cdn|www).stream.me' - _VALID_URL = r'%s/archive\/(?P[^\#/]+)/[^\/]+/(?P[^/]+)' % _VALID_URL_BASE + _VALID_URL = r'%s/archive/(?P[^\#/]+)/[^/]+/(?P[^/]+)' % _VALID_URL_BASE _TEST = { 'url': 'https://www.stream.me/archive/kombatcup/kombat-cup-week-8-sunday-open/pDlXAj6mYb', 'md5': 'b32af6fad972d0bcf5854a416b5b3b01', @@ -107,7 +107,7 @@ class StreamMeIE(InfoExtractor): class StreamMeLiveIE(StreamMeIE): IE_NAME = 'StreamIE:live' - _VALID_URL = r'%s\/(?P[^\#\/]+$)' % StreamMeIE._VALID_URL_BASE + _VALID_URL = r'%s/(?P[^\#/]+$)' % StreamMeIE._VALID_URL_BASE _TEST = { 'url': 'https://www.stream.me/kombatcup', 'info_dict': { @@ -121,6 +121,7 @@ class StreamMeLiveIE(StreamMeIE): 'dislike_count': int, 'is_live': True, }, + 'skip': 'kombatcup is offline', 'params': { # m3u8 download 'skip_download': True, From d7262cb4a176be4f425ae88741ade41236dbb53b Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Sun, 1 Jan 2017 04:57:34 +0300 Subject: [PATCH 08/16] [StreamMe] even more unnecessary .get()s removed --- youtube_dl/extractor/streamme.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 18b7620c0..b90c33b6a 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -47,7 +46,7 @@ class StreamMeIE(InfoExtractor): manifest_json = self._download_json(vod_info['_links']['manifest']['href'], video_id, note='Downloading video manifest') - formats = self._extract_formats(manifest_json.get('formats')) + formats = self._extract_formats(manifest_json['formats']) self._sort_formats(formats, 'vbr') info = self._extract_info(vod_info) info['formats'] = formats @@ -78,7 +77,7 @@ class StreamMeIE(InfoExtractor): # skip websocket and mjpeg we can't handle them anyway if fmt_tag in ('mjpeg-lodef', 'mp4-ws',): continue - for fmt_info in d.get('encodings'): + for fmt_info in d['encodings']: formats.append({ 'url': fmt_info.get('location'), 'width': int_or_none(fmt_info.get('videoWidth')), @@ -93,7 +92,7 @@ class StreamMeIE(InfoExtractor): # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' }) if d.get('origin') is not None: - fmt_tag = d.get('origin').get('location').split(':')[0] + fmt_tag = d['origin']['location'].split(':')[0] formats.append({ 'url': d.get('origin').get('location'), 'acodec': d.get('origin').get('audioCodec'), @@ -132,10 +131,10 @@ class StreamMeLiveIE(StreamMeIE): channel_id = self._match_id(url) apiurl = StreamMeIE._API_CHANNEL % channel_id - data = json.loads(self._download_webpage(apiurl, channel_id)) + data = self._download_json(apiurl, channel_id) stream_info = [] # search for a live stream... - for stream in data.get('_embedded').get('streams'): + for stream in data['_embedded']['streams']: stream_info = stream break # TODO: add to a list (multi-streams?) @@ -143,9 +142,9 @@ class StreamMeLiveIE(StreamMeIE): raise ExtractorError('%s is offline' % channel_id, expected=True) manifest_json = self._download_json(stream_info['_links']['manifest']['href'], - channel_id, 'Download video manifest') + channel_id, 'Downloading video manifest') - formats = self._extract_formats(manifest_json.get('formats')) + formats = self._extract_formats(manifest_json['formats']) self._sort_formats(formats, 'vbr') info = self._extract_info(stream_info) info['formats'] = formats @@ -177,8 +176,9 @@ class StreamMeArchiveIE(StreamMeIE): playlist = [] for vod in data['_embedded']['vod']: - manifest_json = self._download_json(vod['_links']['manifest']['href'], vod.get('urlId')) - formats = self._extract_formats(manifest_json.get('formats')) + manifest_json = self._download_json(vod['_links']['manifest']['href'], + vod['urlId'], note='Downloading video manifest') + formats = self._extract_formats(manifest_json['formats']) self._sort_formats(formats, 'vbr') info = self._extract_info(vod) info['formats'] = formats From 62c0e83bf555ab6e536fb369ae5c887e2771f69e Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Mon, 2 Jan 2017 01:36:26 +0300 Subject: [PATCH 09/16] [StreamMe] simpify VOD extractor a bit. A new API endpoint has been introduced on site --- youtube_dl/extractor/streamme.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index b90c33b6a..0bed2d0fd 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, @@ -14,8 +12,9 @@ class StreamMeIE(InfoExtractor): IE_NAME = 'StreamMe:video' _API_CHANNEL = 'https://www.stream.me/api-user/v1/%s/channel' _API_ARCHIVE = 'https://www.stream.me/api-vod/v1/%s/archives' - _VALID_URL_BASE = r'https?://(video-cdn|www).stream.me' - _VALID_URL = r'%s/archive/(?P[^\#/]+)/[^/]+/(?P[^/]+)' % _VALID_URL_BASE + _API_VOD = 'https://www.stream.me/api-vod/v1/vod/%s' + _VALID_URL_BASE = r'https?://www.stream.me' + _VALID_URL = r'%s/archive/(?P[^#/]+)/[^/]+/(?P[^/]+)' % _VALID_URL_BASE _TEST = { 'url': 'https://www.stream.me/archive/kombatcup/kombat-cup-week-8-sunday-open/pDlXAj6mYb', 'md5': 'b32af6fad972d0bcf5854a416b5b3b01', @@ -31,17 +30,13 @@ class StreamMeIE(InfoExtractor): } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) video_id = self._match_id(url) - apiurl = self._API_ARCHIVE % m.group('channel_id') + data = self._download_json(self._API_VOD % video_id, video_id) - data = self._download_json(apiurl, video_id) - - for vod in data['_embedded']['vod']: - vod_info = [] - if vod.get('urlId') == video_id: - vod_info = vod - break + if len(data['_embedded']['streams']) > 0: + vod_info = data['_embedded']['streams'][0] + else: + raise ExtractorError('Video "%s" not found' % video_id, expected=True) manifest_json = self._download_json(vod_info['_links']['manifest']['href'], video_id, note='Downloading video manifest') @@ -64,7 +59,7 @@ class StreamMeIE(InfoExtractor): 'duration': int_or_none(info.get('duration')), 'like_count': int_or_none(info.get('stats').get('raw').get('likes')), 'thumbnail': info.get('_links').get('thumbnail').get('href'), - 'timestamp': info.get('whenCreated'), + 'timestamp': int_or_none(info.get('whenCreated')), 'uploader': info.get('username'), 'uploader_id': info.get('userSlug'), 'view_count': int_or_none(info.get('stats').get('raw').get('views')), @@ -91,7 +86,7 @@ class StreamMeIE(InfoExtractor): # I don't know all the possible protocols yet. # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' }) - if d.get('origin') is not None: + if d.get('origin') is not None and d.get('origin').get('location') is not None: fmt_tag = d['origin']['location'].split(':')[0] formats.append({ 'url': d.get('origin').get('location'), @@ -106,7 +101,7 @@ class StreamMeIE(InfoExtractor): class StreamMeLiveIE(StreamMeIE): IE_NAME = 'StreamIE:live' - _VALID_URL = r'%s/(?P[^\#/]+$)' % StreamMeIE._VALID_URL_BASE + _VALID_URL = r'%s/(?P[^#/]+$)' % StreamMeIE._VALID_URL_BASE _TEST = { 'url': 'https://www.stream.me/kombatcup', 'info_dict': { @@ -153,7 +148,7 @@ class StreamMeLiveIE(StreamMeIE): class StreamMeArchiveIE(StreamMeIE): IE_NAME = 'StreamMe:archives' - _VALID_URL = r'%s/(?P[^\#]+)(\#archive)$' % StreamMeIE._VALID_URL_BASE + _VALID_URL = r'%s/(?P[^#]+)#archive$' % StreamMeIE._VALID_URL_BASE _PLAYLIST_TYPE = 'past broadcasts' _PLAYLIST_LIMIT = 128 _TEST = { From 20910f945b1363eebe36b183046b4243ea9cb18c Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Mon, 2 Jan 2017 04:12:28 +0300 Subject: [PATCH 10/16] [StreamMe] put some more checks & thumbnail tests --- youtube_dl/extractor/streamme.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 0bed2d0fd..2506159a4 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -25,6 +25,7 @@ class StreamMeIE(InfoExtractor): 'uploader': 'KombatCup', 'uploader_id': 'kombatcup', 'timestamp': 1481512102000, + 'thumbnail': 're:https?://.*.jpg$', 'age_limit': 13, } } @@ -48,23 +49,30 @@ class StreamMeIE(InfoExtractor): return info def _extract_info(self, info): - return { - 'id': info.get('urlId') or info.get('publicId'), + data = { + 'id': info.get('urlId') or info['publicId'], # 'formats': self.formats, 'title': info.get('title') or 'Untitled Broadcast', 'age_limit': int_or_none(info.get('ageRating')), 'description': info.get('description'), - 'dislike_count': int_or_none(info.get('stats').get('raw').get('dislikes')), 'display_id': info.get('titleSlug'), 'duration': int_or_none(info.get('duration')), - 'like_count': int_or_none(info.get('stats').get('raw').get('likes')), - 'thumbnail': info.get('_links').get('thumbnail').get('href'), 'timestamp': int_or_none(info.get('whenCreated')), 'uploader': info.get('username'), 'uploader_id': info.get('userSlug'), - 'view_count': int_or_none(info.get('stats').get('raw').get('views')), 'is_live': True if info.get('active') else False, } + if info.get('stats') and info['stats'].get('raw'): + stats = info['stats']['raw'] + data.update({ + 'like_count': int_or_none(stats.get('likes')), + 'dislike_count': int_or_none(stats.get('dislikes')), + 'view_count': int_or_none(stats.get('views')), + }) + if info.get('_links') and info['_links'].get('thumbnail'): + if info['_links']['thumbnail'].get('href'): + data['thumbnail'] = info['_links']['thumbnail']['href'] + return data def _extract_formats(self, fmts): formats = [] @@ -86,12 +94,12 @@ class StreamMeIE(InfoExtractor): # I don't know all the possible protocols yet. # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' }) - if d.get('origin') is not None and d.get('origin').get('location') is not None: + if d.get('origin') and d['origin'].get('location'): fmt_tag = d['origin']['location'].split(':')[0] formats.append({ - 'url': d.get('origin').get('location'), - 'acodec': d.get('origin').get('audioCodec'), - 'vcodec': d.get('origin').get('videoCodec'), + 'url': d['origin']['location'], + 'acodec': d['origin'].get('audioCodec'), + 'vcodec': d['origin'].get('videoCodec'), 'format_id': 'Source-' + fmt_tag, 'ext': 'flv' if fmt_tag == 'rtmp' else 'mp4', 'source_preference': 1, @@ -113,6 +121,7 @@ class StreamMeLiveIE(StreamMeIE): 'uploader': 'KombatCup', 'like_count': int, 'dislike_count': int, + 'thumbnail': 're:https?://.*.jpg$', 'is_live': True, }, 'skip': 'kombatcup is offline', From fcc3b98dd46974d837a7f6dfce0d42ee9ee99f6e Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Mon, 2 Jan 2017 21:05:33 +0300 Subject: [PATCH 11/16] [StreamMe] Fix "invalid escape sequences" error on Python 3.6 as in #11581 --- youtube_dl/extractor/streamme.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 2506159a4..72acb690a 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -25,7 +25,7 @@ class StreamMeIE(InfoExtractor): 'uploader': 'KombatCup', 'uploader_id': 'kombatcup', 'timestamp': 1481512102000, - 'thumbnail': 're:https?://.*.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 13, } } @@ -115,13 +115,13 @@ class StreamMeLiveIE(StreamMeIE): 'info_dict': { 'id': '1246a915-eebe-4ffe-b12e-e4f5332abc4d', 'ext': 'mp4', - 'title': 'KombatCup\'s Live Stream', + 'title': "KombatCup's Live Stream", 'age_limit': 13, 'uploader_id': 'kombatcup', 'uploader': 'KombatCup', 'like_count': int, 'dislike_count': int, - 'thumbnail': 're:https?://.*.jpg$', + 'thumbnail': r're:https?://.*\.jpg$', 'is_live': True, }, 'skip': 'kombatcup is offline', From ca25624b1ae04081b0479d16c73d34da5d316733 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 04:43:39 +0300 Subject: [PATCH 12/16] [StreamMe] make sure "id" and "title" always present --- youtube_dl/extractor/streamme.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 72acb690a..60b837ba4 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + compat_str, ExtractorError, ) @@ -50,9 +51,9 @@ class StreamMeIE(InfoExtractor): def _extract_info(self, info): data = { - 'id': info.get('urlId') or info['publicId'], + 'id': info.get('urlId') or info.get('publicId'), # 'formats': self.formats, - 'title': info.get('title') or 'Untitled Broadcast', + 'title': info.get('title'), 'age_limit': int_or_none(info.get('ageRating')), 'description': info.get('description'), 'display_id': info.get('titleSlug'), @@ -152,6 +153,11 @@ class StreamMeLiveIE(StreamMeIE): self._sort_formats(formats, 'vbr') info = self._extract_info(stream_info) info['formats'] = formats + if not info.get('title'): + info['title'] = self._live_title(data.get('displayName') or channel_id) + if not info.get('id'): + info['id'] = compat_str(abs(hash('%s/%s' % (channel_id, formats[0]))) % (10 ** 6)) + return info From 78a46105335c96e079a54b9af90d8c5ccac31d40 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Thu, 5 Jan 2017 20:30:27 +0300 Subject: [PATCH 13/16] [StreamMe] handle empty archives properly. Some more checks added --- youtube_dl/extractor/streamme.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 60b837ba4..b0fe970f0 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -35,13 +35,24 @@ class StreamMeIE(InfoExtractor): video_id = self._match_id(url) data = self._download_json(self._API_VOD % video_id, video_id) - if len(data['_embedded']['streams']) > 0: + if not data and data.get('_embedded'): + raise ExtractorError( + '{0} returns no data or data is incorrect'.format(video_id), expected=True) + + if len(data['_embedded'].get('streams')) > 0: vod_info = data['_embedded']['streams'][0] else: - raise ExtractorError('Video "%s" not found' % video_id, expected=True) + raise ExtractorError('Video "{0}" not found'.format(video_id), expected=True) - manifest_json = self._download_json(vod_info['_links']['manifest']['href'], - video_id, note='Downloading video manifest') + if vod_info.get('_links') and vod_info['_links'].get('manifest'): + if vod_info['_links']['manifest'].get('href'): + manifest_json = self._download_json( + vod_info['_links']['manifest'].get('href'), + video_id, note='Downloading video manifest') + else: + raise ExtractorError('JSON has unexpected format', expected=True) + if not manifest_json or not manifest_json.get('formats'): + raise ExtractorError('Video manifest has no formats information', expected=True) formats = self._extract_formats(manifest_json['formats']) self._sort_formats(formats, 'vbr') @@ -170,7 +181,6 @@ class StreamMeArchiveIE(StreamMeIE): 'url': 'https://www.stream.me/kombatcup#archive', 'info_dict': { 'id': 'kombatcup', - 'title': 'KombatCup', }, 'playlist_mincount': 25, 'params': { @@ -183,8 +193,10 @@ class StreamMeArchiveIE(StreamMeIE): apiurl = StreamMeIE._API_ARCHIVE % channel_id # TODO: implement paginated downloading data = self._download_json(apiurl, channel_id, query={'limit': self._PLAYLIST_LIMIT, 'offset': 0}) - playlist = [] + if not data: + raise ExtractorError('{0} returns empty data. Try again later'.format(channel_id), expected=True) + playlist = [] for vod in data['_embedded']['vod']: manifest_json = self._download_json(vod['_links']['manifest']['href'], vod['urlId'], note='Downloading video manifest') @@ -194,4 +206,6 @@ class StreamMeArchiveIE(StreamMeIE): info['formats'] = formats playlist.append(info) - return self.playlist_result(playlist, channel_id, info.get('uploader')) + return self.playlist_result( + playlist, channel_id, + data.get('displayName') if data else 'Archived Videos') From ead38e7351c15ca32f77f1a27c7f6d08379ab2e7 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Tue, 10 Jan 2017 18:37:17 +0300 Subject: [PATCH 14/16] [StreamMe] fix timestamp extraction, test added --- youtube_dl/extractor/streamme.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index b0fe970f0..530b350ae 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -25,7 +25,8 @@ class StreamMeIE(InfoExtractor): 'title': 'Kombat Cup Week #8 - Sunday Open', 'uploader': 'KombatCup', 'uploader_id': 'kombatcup', - 'timestamp': 1481512102000, + 'timestamp': 1481512102, + 'upload_date': '20161212', 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 13, } @@ -69,7 +70,7 @@ class StreamMeIE(InfoExtractor): 'description': info.get('description'), 'display_id': info.get('titleSlug'), 'duration': int_or_none(info.get('duration')), - 'timestamp': int_or_none(info.get('whenCreated')), + 'timestamp': int_or_none(info.get('whenCreated'), scale=1000), 'uploader': info.get('username'), 'uploader_id': info.get('userSlug'), 'is_live': True if info.get('active') else False, From b952ce639588b7a92bfcadf0710926b5ea241505 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Tue, 10 Jan 2017 19:19:26 +0300 Subject: [PATCH 15/16] [StreamMe] simplify extractor (adopted some tricks from twitch.py) --- youtube_dl/extractor/streamme.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 530b350ae..4ceaedd45 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - int_or_none, - compat_str, ExtractorError, + compat_str, + int_or_none, + str_or_none, + try_get, ) @@ -68,23 +70,17 @@ class StreamMeIE(InfoExtractor): 'title': info.get('title'), 'age_limit': int_or_none(info.get('ageRating')), 'description': info.get('description'), + 'dislike_count': int_or_none(info.get('stats', {}).get('raw', {}).get('dislikes')), 'display_id': info.get('titleSlug'), 'duration': int_or_none(info.get('duration')), + 'is_live': True if info.get('active') else False, + 'like_count': int_or_none(info.get('stats', {}).get('raw', {}).get('likes')), + 'thumbnail': info.get('_links', {}).get('thumbnail', {}).get('href'), 'timestamp': int_or_none(info.get('whenCreated'), scale=1000), 'uploader': info.get('username'), 'uploader_id': info.get('userSlug'), - 'is_live': True if info.get('active') else False, + 'view_count': int_or_none(info.get('stats', {}).get('raw', {}).get('views')), } - if info.get('stats') and info['stats'].get('raw'): - stats = info['stats']['raw'] - data.update({ - 'like_count': int_or_none(stats.get('likes')), - 'dislike_count': int_or_none(stats.get('dislikes')), - 'view_count': int_or_none(stats.get('views')), - }) - if info.get('_links') and info['_links'].get('thumbnail'): - if info['_links']['thumbnail'].get('href'): - data['thumbnail'] = info['_links']['thumbnail']['href'] return data def _extract_formats(self, fmts): @@ -107,10 +103,12 @@ class StreamMeIE(InfoExtractor): # I don't know all the possible protocols yet. # 'protocol': 'm3u8_native' if fmt_tag == 'mp4-hls' else 'http' }) - if d.get('origin') and d['origin'].get('location'): - fmt_tag = d['origin']['location'].split(':')[0] + + video_url = str_or_none(try_get(d, lambda x: x['origin']['location'], compat_str), '') + if ':' in video_url: + fmt_tag = video_url.split(':')[0] formats.append({ - 'url': d['origin']['location'], + 'url': video_url, 'acodec': d['origin'].get('audioCodec'), 'vcodec': d['origin'].get('videoCodec'), 'format_id': 'Source-' + fmt_tag, From bc20584f3379a4277db2d78380a65454faade8dd Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 11 Jan 2017 02:45:27 +0300 Subject: [PATCH 16/16] [StreamMe] download archive page by page --- youtube_dl/extractor/streamme.py | 36 +++++++++++++++++++------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/streamme.py b/youtube_dl/extractor/streamme.py index 4ceaedd45..9490730e8 100644 --- a/youtube_dl/extractor/streamme.py +++ b/youtube_dl/extractor/streamme.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -175,7 +176,6 @@ class StreamMeArchiveIE(StreamMeIE): IE_NAME = 'StreamMe:archives' _VALID_URL = r'%s/(?P[^#]+)#archive$' % StreamMeIE._VALID_URL_BASE _PLAYLIST_TYPE = 'past broadcasts' - _PLAYLIST_LIMIT = 128 _TEST = { 'url': 'https://www.stream.me/kombatcup#archive', 'info_dict': { @@ -190,21 +190,29 @@ class StreamMeArchiveIE(StreamMeIE): def _real_extract(self, url): channel_id = self._match_id(url).split('#')[0] apiurl = StreamMeIE._API_ARCHIVE % channel_id - # TODO: implement paginated downloading - data = self._download_json(apiurl, channel_id, query={'limit': self._PLAYLIST_LIMIT, 'offset': 0}) - if not data: + page = self._download_json(apiurl, channel_id) + if not page: raise ExtractorError('{0} returns empty data. Try again later'.format(channel_id), expected=True) - + total = int_or_none(page.get('total'), default=0) playlist = [] - for vod in data['_embedded']['vod']: - manifest_json = self._download_json(vod['_links']['manifest']['href'], - vod['urlId'], note='Downloading video manifest') - formats = self._extract_formats(manifest_json['formats']) - self._sort_formats(formats, 'vbr') - info = self._extract_info(vod) - info['formats'] = formats - playlist.append(info) + count = 0 + for page_count in itertools.count(1): + if count >= total or apiurl is None: + break + for vod in page['_embedded']['vod']: + manifest_json = self._download_json(vod['_links']['manifest']['href'], + vod['urlId'], note='Downloading video manifest') + formats = self._extract_formats(manifest_json['formats']) + self._sort_formats(formats, 'vbr') + info = self._extract_info(vod) + info['formats'] = formats + playlist.append(info) + count += 1 + apiurl = try_get(page, lambda x: x['_links']['next'], compat_str) + if apiurl is not None: + page = self._download_json(apiurl, channel_id, + note='Downloading JSON page {0}'.format(page_count + 1)) return self.playlist_result( playlist, channel_id, - data.get('displayName') if data else 'Archived Videos') + page.get('displayName') if page else 'Archived Videos')