From 5324aece983265f9c40ecf286f57c89d453a0347 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 20 Feb 2017 18:40:12 +0100 Subject: [PATCH] [kaltura][azmedien-live] Add support for live streams --- youtube_dl/extractor/azmedien.py | 59 +++++++++++++-- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/kaltura.py | 115 ++++++++++++++++++----------- 3 files changed, 128 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index f4e07d901..6e871771c 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -7,13 +7,21 @@ from .common import InfoExtractor from .kaltura import KalturaIE from ..utils import ( get_element_by_class, + base_url, + ExtractorError, get_element_by_id, + NO_DEFAULT, strip_or_none, urljoin, ) class AZMedienBaseIE(InfoExtractor): + def _extract_partner_id(self, video_id, webpage, default=NO_DEFAULT): + return self._search_regex( + r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', + webpage, 'kaltura partner id', default=default) + def _kaltura_video(self, partner_id, entry_id): return self.url_result( 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), @@ -73,12 +81,8 @@ class AZMedienIE(AZMedienBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - partner_id = self._search_regex( - r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', - webpage, 'kaltura partner id') + partner_id = self._extract_partner_id(video_id, webpage) entry_id = self._html_search_regex( r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' % re.escape(video_id), webpage, 'kaltura entry id', group='id') @@ -211,3 +215,48 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE): title = self._og_search_title(webpage, fatal=False) description = self._og_search_description(webpage) return self.playlist_result(entries, playlist_id, title, description) + + +class AZMedienLiveIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien Live TV' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + live + ) + ''' + + _TEST = { + 'url': 'http://www.telezueri.ch/live', + 'only_matching': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._extract_partner_id(video_id, webpage) + script_urls = [urljoin( + base_url(url), m.group('url')) for m in re.finditer( + r']+type=["\']text/javascript["\'][^>]+src=["\'](?P.*/[0-9a-f]+\.js)["\']', + webpage)] + for url in script_urls: + js = self._download_webpage(url, video_id, note='Downloading javascript file %s' % url) + entry_id = self._search_regex( + r'[^/]{2}\s*kalturaLiveVideo\(\s*["\'](.+?)["\'].+\)', + js, + 'partner id', + default=None, + fatal=False) + if entry_id: + break + else: + raise ExtractorError('Cannot extract Kaltura partner id for live broadcast.') + + return self._kaltura_video(partner_id, entry_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1613a9d3..773ab0024 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -82,6 +82,7 @@ from .awaan import ( ) from .azmedien import ( AZMedienIE, + AZMedienLiveIE, AZMedienPlaylistIE, AZMedienShowPlaylistIE, ) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 54374ea76..1eba3d5f5 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -102,6 +102,23 @@ class KalturaIE(InfoExtractor): { 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'only_matching': True, + }, + { + # Kaltura live stream + 'url': 'kaltura:1719221:1_hoislpiz', + 'info_dict': { + 'id': '1_hoislpiz', + 'ext': 'm3u8', + 'title': 'TeleZüri LIVE Stream', + 'upload_date': '20150624', + 'uploader_id': 'webit', + 'thumbnail': 're:^https?://.*/thumbnail/.*', + 'timestamp': 1435129674, + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } } ] @@ -263,49 +280,62 @@ class KalturaIE(InfoExtractor): unsigned_url += '?referrer=%s' % referrer return unsigned_url - data_url = info['dataUrl'] - if '/flvclipper/' in data_url: - data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url) - formats = [] - for f in flavor_assets: - # Continue if asset is not ready - if f.get('status') != 2: - continue - # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) - # skip for now. - if f.get('fileExt') == 'chun': - continue - if not f.get('fileExt'): - # QT indicates QuickTime; some videos have broken fileExt - if f.get('containerFormat') == 'qt': - f['fileExt'] = 'mov' - else: - f['fileExt'] = 'mp4' - video_url = sign_url( - '%s/flavorId/%s' % (data_url, f['id'])) - # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g - # -f mp4-56) - vcodec = 'none' if 'videoCodecId' not in f and f.get( - 'frameRate') == 0 else f.get('videoCodecId') - formats.append({ - 'format_id': '%(fileExt)s-%(bitrate)s' % f, - 'ext': f.get('fileExt'), - 'tbr': int_or_none(f['bitrate']), - 'fps': int_or_none(f.get('frameRate')), - 'filesize_approx': int_or_none(f.get('size'), invscale=1024), - 'container': f.get('containerFormat'), - 'vcodec': vcodec, - 'height': int_or_none(f.get('height')), - 'width': int_or_none(f.get('width')), - 'url': video_url, - }) - if '/playManifest/' in data_url: - m3u8_url = sign_url(data_url.replace( - 'format/url', 'format/applehttp')) - formats.extend(self._extract_m3u8_formats( - m3u8_url, entry_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) + is_live = False + if info.get('objectType') == 'KalturaLiveStreamEntry': + is_live = True + for f in info.get('liveStreamConfigurations'): + if f.get('protocol') == 'hds': + formats.extend(self._extract_f4m_formats(f.get('url'), entry_id)) + elif f.get('protocol') == 'hls' or f.get('protocol') == 'applehttp': + formats.extend(self._extract_m3u8_formats(f.get('url'), entry_id)) + elif f.get('protocol') == 'sl': + formats.extend(self._extract_ism_formats(f.get('url'), entry_id)) + elif f.get('protocol') == 'mpegdash': + formats.extend(self._extract_mpd_formats(f.get('url'), entry_id)) + else: + data_url = info['dataUrl'] + if '/flvclipper/' in data_url: + data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url) + + for f in flavor_assets: + # Continue if asset is not ready + if f.get('status') != 2: + continue + # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) + # skip for now. + if f.get('fileExt') == 'chun': + continue + if not f.get('fileExt'): + # QT indicates QuickTime; some videos have broken fileExt + if f.get('containerFormat') == 'qt': + f['fileExt'] = 'mov' + else: + f['fileExt'] = 'mp4' + video_url = sign_url( + '%s/flavorId/%s' % (data_url, f['id'])) + # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g + # -f mp4-56) + vcodec = 'none' if 'videoCodecId' not in f and f.get( + 'frameRate') == 0 else f.get('videoCodecId') + formats.append({ + 'format_id': '%(fileExt)s-%(bitrate)s' % f, + 'ext': f.get('fileExt'), + 'tbr': int_or_none(f['bitrate']), + 'fps': int_or_none(f.get('frameRate')), + 'filesize_approx': int_or_none(f.get('size'), invscale=1024), + 'container': f.get('containerFormat'), + 'vcodec': vcodec, + 'height': int_or_none(f.get('height')), + 'width': int_or_none(f.get('width')), + 'url': video_url, + }) + if '/playManifest/' in data_url: + m3u8_url = sign_url(data_url.replace( + 'format/url', 'format/applehttp')) + formats.extend(self._extract_m3u8_formats( + m3u8_url, entry_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) self._sort_formats(formats) @@ -334,4 +364,5 @@ class KalturaIE(InfoExtractor): 'timestamp': info.get('createdAt'), 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, 'view_count': info.get('plays'), + 'is_live': is_live, }