From 5324aece983265f9c40ecf286f57c89d453a0347 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Mon, 20 Feb 2017 18:40:12 +0100
Subject: [PATCH] [kaltura][azmedien-live] Add support for live streams

---
 youtube_dl/extractor/azmedien.py   |  59 +++++++++++++--
 youtube_dl/extractor/extractors.py |   1 +
 youtube_dl/extractor/kaltura.py    | 115 ++++++++++++++++++-----------
 3 files changed, 128 insertions(+), 47 deletions(-)
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
index f4e07d901..6e871771c 100644
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@@ -7,13 +7,21 @@ from .common import InfoExtractor
 from .kaltura import KalturaIE
 from ..utils import (
     get_element_by_class,
+    base_url,
+    ExtractorError,
     get_element_by_id,
+    NO_DEFAULT,
     strip_or_none,
     urljoin,
 )
 
 
 class AZMedienBaseIE(InfoExtractor):
+    def _extract_partner_id(self, video_id, webpage, default=NO_DEFAULT):
+        return self._search_regex(
+            r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
+            webpage, 'kaltura partner id', default=default)
+
     def _kaltura_video(self, partner_id, entry_id):
         return self.url_result(
             'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
@@ -73,12 +81,8 @@ class AZMedienIE(AZMedienBaseIE):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
-
-        partner_id = self._search_regex(
-            r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
-            webpage, 'kaltura partner id')
+        partner_id = self._extract_partner_id(video_id, webpage)
         entry_id = self._html_search_regex(
             r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
             % re.escape(video_id), webpage, 'kaltura entry id', group='id')
@@ -211,3 +215,48 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE):
         title = self._og_search_title(webpage, fatal=False)
         description = self._og_search_description(webpage)
         return self.playlist_result(entries, playlist_id, title, description)
+
+
+class AZMedienLiveIE(AZMedienBaseIE):
+    IE_DESC = 'AZ Medien Live TV'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?P<id>
+                            (?:
+                                telezueri\.ch|
+                                telebaern\.tv|
+                                telem1\.ch
+                            )/
+                            live
+                        )
+                    '''
+
+    _TEST = {
+        'url': 'http://www.telezueri.ch/live',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        partner_id = self._extract_partner_id(video_id, webpage)
+        script_urls = [urljoin(
+            base_url(url), m.group('url')) for m in re.finditer(
+                r'<script[^>]+type=["\']text/javascript["\'][^>]+src=["\'](?P<url>.*/[0-9a-f]+\.js)["\']',
+                webpage)]
+        for url in script_urls:
+            js = self._download_webpage(url, video_id, note='Downloading javascript file %s' % url)
+            entry_id = self._search_regex(
+                r'[^/]{2}\s*kalturaLiveVideo\(\s*["\'](.+?)["\'].+\)',
+                js,
+                'partner id',
+                default=None,
+                fatal=False)
+            if entry_id:
+                break
+        else:
+            raise ExtractorError('Cannot extract Kaltura partner id for live broadcast.')
+
+        return self._kaltura_video(partner_id, entry_id)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b1613a9d3..773ab0024 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -82,6 +82,7 @@ from .awaan import (
 )
 from .azmedien import (
     AZMedienIE,
+    AZMedienLiveIE,
     AZMedienPlaylistIE,
     AZMedienShowPlaylistIE,
 )
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 54374ea76..1eba3d5f5 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -102,6 +102,23 @@ class KalturaIE(InfoExtractor):
         {
             'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
             'only_matching': True,
+        },
+        {
+            # Kaltura live stream
+            'url': 'kaltura:1719221:1_hoislpiz',
+            'info_dict': {
+                'id': '1_hoislpiz',
+                'ext': 'm3u8',
+                'title': 'TeleZüri LIVE Stream',
+                'upload_date': '20150624',
+                'uploader_id': 'webit',
+                'thumbnail': 're:^https?://.*/thumbnail/.*',
+                'timestamp': 1435129674,
+                'is_live': True,
+            },
+            'params': {
+                'skip_download': True,
+            }
         }
     ]
 
@@ -263,49 +280,62 @@ class KalturaIE(InfoExtractor):
                 unsigned_url += '?referrer=%s' % referrer
             return unsigned_url
 
-        data_url = info['dataUrl']
-        if '/flvclipper/' in data_url:
-            data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
-
         formats = []
-        for f in flavor_assets:
-            # Continue if asset is not ready
-            if f.get('status') != 2:
-                continue
-            # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
-            # skip for now.
-            if f.get('fileExt') == 'chun':
-                continue
-            if not f.get('fileExt'):
-                # QT indicates QuickTime; some videos have broken fileExt
-                if f.get('containerFormat') == 'qt':
-                    f['fileExt'] = 'mov'
-                else:
-                    f['fileExt'] = 'mp4'
-            video_url = sign_url(
-                '%s/flavorId/%s' % (data_url, f['id']))
-            # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
-            # -f mp4-56)
-            vcodec = 'none' if 'videoCodecId' not in f and f.get(
-                'frameRate') == 0 else f.get('videoCodecId')
-            formats.append({
-                'format_id': '%(fileExt)s-%(bitrate)s' % f,
-                'ext': f.get('fileExt'),
-                'tbr': int_or_none(f['bitrate']),
-                'fps': int_or_none(f.get('frameRate')),
-                'filesize_approx': int_or_none(f.get('size'), invscale=1024),
-                'container': f.get('containerFormat'),
-                'vcodec': vcodec,
-                'height': int_or_none(f.get('height')),
-                'width': int_or_none(f.get('width')),
-                'url': video_url,
-            })
-        if '/playManifest/' in data_url:
-            m3u8_url = sign_url(data_url.replace(
-                'format/url', 'format/applehttp'))
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, entry_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False))
+        is_live = False
+        if info.get('objectType') == 'KalturaLiveStreamEntry':
+            is_live = True
+            for f in info.get('liveStreamConfigurations'):
+                if f.get('protocol') == 'hds':
+                    formats.extend(self._extract_f4m_formats(f.get('url'), entry_id))
+                elif f.get('protocol') == 'hls' or f.get('protocol') == 'applehttp':
+                    formats.extend(self._extract_m3u8_formats(f.get('url'), entry_id))
+                elif f.get('protocol') == 'sl':
+                    formats.extend(self._extract_ism_formats(f.get('url'), entry_id))
+                elif f.get('protocol') == 'mpegdash':
+                    formats.extend(self._extract_mpd_formats(f.get('url'), entry_id))
+        else:
+            data_url = info['dataUrl']
+            if '/flvclipper/' in data_url:
+                data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
+            for f in flavor_assets:
+                # Continue if asset is not ready
+                if f.get('status') != 2:
+                    continue
+                # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
+                # skip for now.
+                if f.get('fileExt') == 'chun':
+                    continue
+                if not f.get('fileExt'):
+                    # QT indicates QuickTime; some videos have broken fileExt
+                    if f.get('containerFormat') == 'qt':
+                        f['fileExt'] = 'mov'
+                    else:
+                        f['fileExt'] = 'mp4'
+                video_url = sign_url(
+                    '%s/flavorId/%s' % (data_url, f['id']))
+                # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
+                # -f mp4-56)
+                vcodec = 'none' if 'videoCodecId' not in f and f.get(
+                    'frameRate') == 0 else f.get('videoCodecId')
+                formats.append({
+                    'format_id': '%(fileExt)s-%(bitrate)s' % f,
+                    'ext': f.get('fileExt'),
+                    'tbr': int_or_none(f['bitrate']),
+                    'fps': int_or_none(f.get('frameRate')),
+                    'filesize_approx': int_or_none(f.get('size'), invscale=1024),
+                    'container': f.get('containerFormat'),
+                    'vcodec': vcodec,
+                    'height': int_or_none(f.get('height')),
+                    'width': int_or_none(f.get('width')),
+                    'url': video_url,
+                })
+            if '/playManifest/' in data_url:
+                m3u8_url = sign_url(data_url.replace(
+                    'format/url', 'format/applehttp'))
+                formats.extend(self._extract_m3u8_formats(
+                    m3u8_url, entry_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
 
         self._sort_formats(formats)
 
@@ -334,4 +364,5 @@ class KalturaIE(InfoExtractor):
             'timestamp': info.get('createdAt'),
             'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
             'view_count': info.get('plays'),
+            'is_live': is_live,
         }