1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-02-03 20:05:35 +08:00

[StreamMe] download archive page by page

This commit is contained in:
sh!zeeg 2017-01-11 02:45:27 +03:00
parent b952ce6395
commit bc20584f33

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -175,7 +176,6 @@ class StreamMeArchiveIE(StreamMeIE):
IE_NAME = 'StreamMe:archives' IE_NAME = 'StreamMe:archives'
_VALID_URL = r'%s/(?P<id>[^#]+)#archive$' % StreamMeIE._VALID_URL_BASE _VALID_URL = r'%s/(?P<id>[^#]+)#archive$' % StreamMeIE._VALID_URL_BASE
_PLAYLIST_TYPE = 'past broadcasts' _PLAYLIST_TYPE = 'past broadcasts'
_PLAYLIST_LIMIT = 128
_TEST = { _TEST = {
'url': 'https://www.stream.me/kombatcup#archive', 'url': 'https://www.stream.me/kombatcup#archive',
'info_dict': { 'info_dict': {
@ -190,21 +190,29 @@ class StreamMeArchiveIE(StreamMeIE):
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url).split('#')[0] channel_id = self._match_id(url).split('#')[0]
apiurl = StreamMeIE._API_ARCHIVE % channel_id apiurl = StreamMeIE._API_ARCHIVE % channel_id
# TODO: implement paginated downloading page = self._download_json(apiurl, channel_id)
data = self._download_json(apiurl, channel_id, query={'limit': self._PLAYLIST_LIMIT, 'offset': 0}) if not page:
if not data:
raise ExtractorError('{0} returns empty data. Try again later'.format(channel_id), expected=True) raise ExtractorError('{0} returns empty data. Try again later'.format(channel_id), expected=True)
total = int_or_none(page.get('total'), default=0)
playlist = [] playlist = []
for vod in data['_embedded']['vod']: count = 0
manifest_json = self._download_json(vod['_links']['manifest']['href'], for page_count in itertools.count(1):
vod['urlId'], note='Downloading video manifest') if count >= total or apiurl is None:
formats = self._extract_formats(manifest_json['formats']) break
self._sort_formats(formats, 'vbr') for vod in page['_embedded']['vod']:
info = self._extract_info(vod) manifest_json = self._download_json(vod['_links']['manifest']['href'],
info['formats'] = formats vod['urlId'], note='Downloading video manifest')
playlist.append(info) formats = self._extract_formats(manifest_json['formats'])
self._sort_formats(formats, 'vbr')
info = self._extract_info(vod)
info['formats'] = formats
playlist.append(info)
count += 1
apiurl = try_get(page, lambda x: x['_links']['next'], compat_str)
if apiurl is not None:
page = self._download_json(apiurl, channel_id,
note='Downloading JSON page {0}'.format(page_count + 1))
return self.playlist_result( return self.playlist_result(
playlist, channel_id, playlist, channel_id,
data.get('displayName') if data else 'Archived Videos') page.get('displayName') if page else 'Archived Videos')