From 91d21e0a8464f102e55a6711d69367a06c531184 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Mon, 6 Feb 2017 17:01:34 +0100
Subject: [PATCH 1/3] [telebasel] [simplex] Add new information extractors

---
 youtube_dl/extractor/extractors.py |   8 +
 youtube_dl/extractor/simplex.py    | 233 +++++++++++++++++++++++++++++
 youtube_dl/extractor/telebasel.py  | 125 ++++++++++++++++
 3 files changed, 366 insertions(+)
 create mode 100644 youtube_dl/extractor/simplex.py
 create mode 100644 youtube_dl/extractor/telebasel.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 12cda36cc..caac397ef 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -849,6 +849,10 @@ from .shared import (
     VivoIE,
 )
 from .showroomlive import ShowRoomLiveIE
+from .simplex import (
+    SimplexIE,
+    SimplexHostsIE,
+)
 from .sina import SinaIE
 from .sixplay import SixPlayIE
 from .skynewsarabia import (
@@ -931,6 +935,10 @@ from .teamfourstar import TeamFourStarIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tele13 import Tele13IE
+from .telebasel import (
+    TelebaselMediathekIE,
+    TelebaselArticleIE,
+)
 from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telegraaf import TelegraafIE
diff --git a/youtube_dl/extractor/simplex.py b/youtube_dl/extractor/simplex.py
new file mode 100644
index 000000000..2f0ad013f
--- /dev/null
+++ b/youtube_dl/extractor/simplex.py
@@ -0,0 +1,233 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    str_or_none,
+    try_get,
+    urljoin,
+)
+
+
+class SimplexIE(InfoExtractor):
+    IE_DESC = 'Simplex Player'
+    _VALID_URL = r'''(?x)
+                simplex:
+                (?P<server_url>https?://(?:www\.)?.+):
+                (?P<customer_id>\d+):
+                (?P<author_id>\d+):
+                (?P<project_id>\d+)
+                '''
+
+    _TEST = {
+        'url': 'simplex:http://video.telebasel.ch:4062:4063:62349',
+        'only_matching': True,
+    }
+
+    @staticmethod
+    def _extract_width_height(resolution):
+        try:
+            w, h = resolution.split('x')
+            w = int_or_none(w)
+            h = int_or_none(h)
+            return w, h
+        except (AttributeError, ValueError):
+            return None, None
+
+    def _known_simplex_format(self, simplex_formats, fid):
+        for sf in simplex_formats:
+            if type(sf['id']) == str and sf['id'] == fid:
+                return sf
+            elif type(sf['id']) == list and fid in sf['id']:
+                return sf
+        return None
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        server_url = mobj.group('server_url')
+        customer_id = mobj.group('customer_id')
+        author_id = mobj.group('author_id')
+        project_id = mobj.group('project_id')
+        video_id = '%s-%s-%s' % (customer_id, author_id, project_id)
+
+        content_url = urljoin(
+            server_url,
+            'content/%s/%s/%s/' % (customer_id, author_id, project_id))
+
+        player_data = self._download_json(
+            urljoin(content_url, 'data.sid'),
+            video_id,
+            note='Downloading player data JSON',
+            errnote='Unable to download player data JSON')
+        video_data = self._download_json(
+            urljoin(content_url, 'pl01.sid'),
+            video_id,
+            note='Downloading video data JSON',
+            errnote='Unable to download video data JSON',
+            transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+        title = str_or_none(player_data['title'])
+        description = str_or_none(player_data.get('description'))
+        timestamp = int_or_none(player_data.get('createDate'))
+        language = str_or_none(player_data.get('language'))
+        duration = float_or_none(player_data.get('duration'), scale=10)
+
+        file_information = try_get(video_data, lambda x: x['data'], dict)
+        if not file_information:
+            raise ExtractorError('Cannot extract file information data.')
+
+        filename = str_or_none(file_information.get('filename'))
+        thumbname = str_or_none(file_information.get('thumb'))
+        thumbnail = urljoin(content_url, thumbname + '.jpg') if thumbname else None
+
+        qualities = try_get(player_data, lambda x: x['qualities'], list)
+        if not qualities:
+            raise ExtractorError('Cannot find available formats.')
+
+        # simplex_formats is the list of known simplex player formats.
+        # There might be some more format ids, but we are not sure, what they do:
+        # id 400: It was indicated to be for Apple TV.
+        # id 500: No additional information found.
+        simplex_formats = [
+            {'id': '20', 'filename': filename + '.flv', 'method': 'url'},
+            {'id': '40', 'filename': filename + '_40.flv', 'method': 'url'},
+            {'id': '200', 'filename': filename + '.mp4', 'method': 'url'},
+            {'id': ['300', '350', '355', '360'], 'filename': 'index.m3u8', 'method': 'm3u8'},
+        ]
+
+        formats = []
+
+        m3u8_done = False
+        format_infos = []
+        for quali in qualities:
+            fid = str_or_none(quali.get('id'))
+
+            vbr = int_or_none(quali.get('b'))
+            resolution = str_or_none(quali.get('s'))
+            width, height = SimplexIE._extract_width_height(resolution)
+            form_info = {
+                'resolution': resolution,
+                'width': width,
+                'height': height,
+                'vbr': vbr,
+                'abr': int_or_none(quali.get('ab')),
+                'asr': int_or_none(quali.get('ar')),
+                'fps': int_or_none(quali.get('r')),
+                'language': language,
+                'format_id': 'hls-%s' % str_or_none(vbr)
+            }
+            format_infos.append(form_info)
+
+            simplex_format = self._known_simplex_format(simplex_formats, fid)
+            if simplex_format:
+                format_url = urljoin(content_url, simplex_format['filename'])
+                if simplex_format['method'] == 'url':
+                    form = {
+                        'url': format_url
+                    }
+                    form.update(form_info)
+                    formats.append(form)
+                elif simplex_format['method'] == 'm3u8' and not m3u8_done:
+                    forms = self._extract_m3u8_formats(
+                        format_url,
+                        video_id,
+                        ext='mp4',
+                        entry_protocol='m3u8_native')
+                    formats.extend(forms)
+                    m3u8_done = True
+
+        # Try to add additional information to the formats exracted by _extract_m3u8_formats:
+        for form in formats:
+            if form['url'].endswith('.m3u8'):
+                vbr = int_or_none(
+                    self._search_regex(r'(\d+)kb.m3u8', form['url'], 'm3u8 vbr', default=None))
+                if vbr:
+                    try:
+                        form_info = next(f for f in format_infos if f['vbr'] == vbr)
+                        form.update(form_info)
+                    except StopIteration:
+                        pass
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
+
+
+class SimplexHostsIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                (?P<server_url>https?://(?:www\.)?
+                    (?:
+                        video\.telebasel\.ch|
+                        media10\.simplex\.tv
+                    )
+                )
+                /content/
+                (?P<customer_id>\d+)/
+                (?P<author_id>\d+)/
+                (?P<project_id>\d+)
+                '''
+
+    _TESTS = [{
+        'url': 'http://media10.simplex.tv/content/906/907/76997/',
+        'md5': 'e6b8ebefac5aeae4a6790fec18382ca0',
+        'info_dict': {
+            'id': '906-907-76997',
+            'ext': 'flv',
+            'title': '03.02.17: Der Trailer zum Rückrunden-Start',
+            'description': None,
+            'duration': 44.0,
+            'timestamp': 1486135964,
+            'upload_date': '20170203',
+            'url': 'http://media10.simplex.tv/content/906/907/76997/simvid_1_40.flv',
+            'thumbnail': 'http://media10.simplex.tv/content/906/907/76997/simvid_1.jpg',
+            'language': 'de',
+            'width': 1280,
+            'height': 720,
+            'vbr': 2304,
+            'abr': 160,
+            'fps': 25,
+            'asr': 44100,
+            'resolution': '1280x720'
+        }
+    }, {
+        'url': 'https://video.telebasel.ch/content/4062/4063/77067',
+        'info_dict': {
+            'id': '4062-4063-77067',
+            'ext': 'flv',
+            'title': 'News vom 05.02.2017',
+            'description': 'md5:23fb960068621263d5d4418996387674',
+            'timestamp': 1486314961,
+            'upload_date': '20170205',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        server_url = mobj.group('server_url')
+        customer_id = mobj.group('customer_id')
+        author_id = mobj.group('author_id')
+        project_id = mobj.group('project_id')
+
+        video_id = '%s-%s-%s' % (customer_id, author_id, project_id)
+        simplex_url = 'simplex:%s:%s:%s:%s' % (server_url, customer_id, author_id, project_id)
+
+        return self.url_result(
+            simplex_url,
+            ie=SimplexIE.ie_key(),
+            video_id=video_id)
diff --git a/youtube_dl/extractor/telebasel.py b/youtube_dl/extractor/telebasel.py
new file mode 100644
index 000000000..2498b2480
--- /dev/null
+++ b/youtube_dl/extractor/telebasel.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .simplex import SimplexIE
+from ..utils import (
+    ExtractorError,
+    str_or_none,
+    strip_or_none,
+    remove_end,
+    try_get,
+    urljoin,
+)
+
+
+class TelebaselBaseIE(InfoExtractor):
+    _SERVER_URL = 'https://video.telebasel.ch/'
+    _CUSTOMER_ID = '4062'
+    _AUTHOR_ID = '4063'
+
+
+class TelebaselMediathekIE(TelebaselBaseIE):
+    IE_DESC = 'telebasel.ch Mediathek'
+    _VALID_URL = r'''(?x)
+                https?://
+                    (?:www\.)?
+                    telebasel\.ch/
+                    (?!telebasel-archiv)
+                    (?!\d+)
+                    (?P<show_name>[^/]+)
+                    (?:
+                        /.*pid=(?P<pid>\d+).*
+                    )?
+                '''
+
+    _TESTS = [{
+        'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881',
+        'only_matching': True,
+    }, {
+        'url': 'https://telebasel.ch/telebasel-reihe-8',
+        'only_matching': True,
+    }, {
+        'url': 'https://telebasel.ch/telebasel-talk/?channel=15881',
+        'only_matching': True,
+    }]
+
+    def _extract_video_id(self, url, show_name):
+        webpage = self._download_webpage(url, show_name)
+        channel_id = self._html_search_regex(
+            r'<div[^>]+class=["\']tb-mediathek-videos["\'][^>]+data-channels=["\'](\d+)["\']',
+            webpage, 'channel id')
+
+        episodes_url = urljoin(
+            self._SERVER_URL,
+            'multichannel/%s/%s/.ofdd/json' % (self._CUSTOMER_ID, channel_id))
+        episodes = self._download_json(
+            episodes_url,
+            channel_id,
+            note='Downloading episodes JSON',
+            errnote='Unable to download episodes JSON',
+            transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+        video_id = str_or_none(
+            try_get(episodes, lambda x: x['projects'][0]['projectId'], int))
+        if not video_id:
+            raise ExtractorError('Could not extract video id from the webpage.')
+
+        return video_id
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_name = mobj.group('show_name')
+        video_id = mobj.group('pid')
+
+        if not video_id:
+            video_id = self._extract_video_id(url, show_name)
+
+        return self.url_result(
+            'simplex:%s:%s:%s:%s' % (
+                self._SERVER_URL, self._CUSTOMER_ID,
+                self._AUTHOR_ID, video_id),
+            ie=SimplexIE.ie_key())
+
+
+class TelebaselArticleIE(TelebaselBaseIE):
+    IE_DESC = 'telebasel.ch articles'
+    _VALID_URL = r'https?://(?:www\.)?telebasel\.ch/(?P<id>\d{4}/\d{2}/\d{2}/[^/]+)/?'
+
+    _TEST = {
+        'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
+        'info_dict': {
+            'id': '2017/02/01/report-usr-iii-einfach-erklaert',
+            'title': 'Report: USR III einfach erklärt',
+            'description': 'md5:2cb2b94ac023a6a9517cffc58d500c7e',
+        },
+        'playlist_count': 3,
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        search_url = urljoin(
+            self._SERVER_URL,
+            r'content/%s/%s/(?P<pid>\d+)' % (self._CUSTOMER_ID, self._AUTHOR_ID))
+        embed_regex = r'<iframe[^>]+src=["\']%s.+["\']' % search_url
+        entries = [
+            self.url_result(
+                'simplex:%s:%s:%s:%s' % (
+                    self._SERVER_URL, self._CUSTOMER_ID,
+                    self._AUTHOR_ID, m.group('pid')),
+                ie=SimplexIE.ie_key())
+            for m in re.finditer(embed_regex, webpage)]
+
+        title = strip_or_none(
+            remove_end(self._og_search_title(webpage), '- Telebasel'))
+        description = self._og_search_description(webpage)
+
+        return self.playlist_result(
+            entries,
+            playlist_id=display_id,
+            playlist_title=title,
+            playlist_description=description)

From 36144cfe1b47b03a0241a906fc8d5737abe317c8 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Mon, 6 Feb 2017 18:13:05 +0100
Subject: [PATCH 2/3] [telebasel] [simplex] Handle Telebasel articles in the
 generic information extractor.

---
 youtube_dl/extractor/extractors.py |  5 +--
 youtube_dl/extractor/generic.py    | 18 +++++++++-
 youtube_dl/extractor/simplex.py    |  7 ++++
 youtube_dl/extractor/telebasel.py  | 54 +++---------------------------
 4 files changed, 29 insertions(+), 55 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index caac397ef..3c15f2678 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -935,10 +935,7 @@ from .teamfourstar import TeamFourStarIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tele13 import Tele13IE
-from .telebasel import (
-    TelebaselMediathekIE,
-    TelebaselArticleIE,
-)
+from .telebasel import TelebaselIE
 from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telegraaf import TelegraafIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 1c233f038..7e9345a4e 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -83,6 +83,7 @@ from .twentymin import TwentyMinutenIE
 from .ustream import UstreamIE
 from .openload import OpenloadIE
 from .videopress import VideoPressIE
+from .simplex import SimplexIE
 
 
 class GenericIE(InfoExtractor):
@@ -1499,10 +1500,19 @@ class GenericIE(InfoExtractor):
                 'timestamp': 1435711927,
                 'upload_date': '20150701',
             },
+            'add_ie': [VideoPressIE.ie_key()],
+        },
+        {
+            # Simplex embed
+            'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
+            'info_dict': {
+                'id': '?channel=105100',
+                'title': 'Report: USR III einfach erklärt - Telebasel',
+            },
             'params': {
                 'skip_download': True,
             },
-            'add_ie': [VideoPressIE.ie_key()],
+            'playlist_count': 3,
         }
         # {
         #     # TODO: find another test
@@ -2474,6 +2484,12 @@ class GenericIE(InfoExtractor):
             return _playlist_from_matches(
                 videopress_urls, ie=VideoPressIE.ie_key())
 
+        # Look for Simplex embeds
+        simplex_urls = SimplexIE._extract_urls(webpage)
+        if simplex_urls:
+            return _playlist_from_matches(
+                simplex_urls, ie=SimplexIE.ie_key())
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')
diff --git a/youtube_dl/extractor/simplex.py b/youtube_dl/extractor/simplex.py
index 2f0ad013f..7c5c67a48 100644
--- a/youtube_dl/extractor/simplex.py
+++ b/youtube_dl/extractor/simplex.py
@@ -29,6 +29,13 @@ class SimplexIE(InfoExtractor):
         'only_matching': True,
     }
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return ['simplex:%s:%s:%s:%s' % (
+                m.group('server_url'), m.group('customer_id'),
+                m.group('author_id'), m.group('project_id'))
+                for m in re.finditer(r'<iframe[^>]+src=["\']%s.+["\']' % SimplexHostsIE._VALID_URL, webpage)]
+
     @staticmethod
     def _extract_width_height(resolution):
         try:
diff --git a/youtube_dl/extractor/telebasel.py b/youtube_dl/extractor/telebasel.py
index 2498b2480..f9d12b780 100644
--- a/youtube_dl/extractor/telebasel.py
+++ b/youtube_dl/extractor/telebasel.py
@@ -8,20 +8,12 @@ from .simplex import SimplexIE
 from ..utils import (
     ExtractorError,
     str_or_none,
-    strip_or_none,
-    remove_end,
     try_get,
     urljoin,
 )
 
 
-class TelebaselBaseIE(InfoExtractor):
-    _SERVER_URL = 'https://video.telebasel.ch/'
-    _CUSTOMER_ID = '4062'
-    _AUTHOR_ID = '4063'
-
-
-class TelebaselMediathekIE(TelebaselBaseIE):
+class TelebaselIE(InfoExtractor):
     IE_DESC = 'telebasel.ch Mediathek'
     _VALID_URL = r'''(?x)
                 https?://
@@ -34,6 +26,9 @@ class TelebaselMediathekIE(TelebaselBaseIE):
                         /.*pid=(?P<pid>\d+).*
                     )?
                 '''
+    _SERVER_URL = 'https://video.telebasel.ch/'
+    _CUSTOMER_ID = '4062'
+    _AUTHOR_ID = '4063'
 
     _TESTS = [{
         'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881',
@@ -82,44 +77,3 @@ class TelebaselMediathekIE(TelebaselBaseIE):
                 self._SERVER_URL, self._CUSTOMER_ID,
                 self._AUTHOR_ID, video_id),
             ie=SimplexIE.ie_key())
-
-
-class TelebaselArticleIE(TelebaselBaseIE):
-    IE_DESC = 'telebasel.ch articles'
-    _VALID_URL = r'https?://(?:www\.)?telebasel\.ch/(?P<id>\d{4}/\d{2}/\d{2}/[^/]+)/?'
-
-    _TEST = {
-        'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
-        'info_dict': {
-            'id': '2017/02/01/report-usr-iii-einfach-erklaert',
-            'title': 'Report: USR III einfach erklärt',
-            'description': 'md5:2cb2b94ac023a6a9517cffc58d500c7e',
-        },
-        'playlist_count': 3,
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        search_url = urljoin(
-            self._SERVER_URL,
-            r'content/%s/%s/(?P<pid>\d+)' % (self._CUSTOMER_ID, self._AUTHOR_ID))
-        embed_regex = r'<iframe[^>]+src=["\']%s.+["\']' % search_url
-        entries = [
-            self.url_result(
-                'simplex:%s:%s:%s:%s' % (
-                    self._SERVER_URL, self._CUSTOMER_ID,
-                    self._AUTHOR_ID, m.group('pid')),
-                ie=SimplexIE.ie_key())
-            for m in re.finditer(embed_regex, webpage)]
-
-        title = strip_or_none(
-            remove_end(self._og_search_title(webpage), '- Telebasel'))
-        description = self._og_search_description(webpage)
-
-        return self.playlist_result(
-            entries,
-            playlist_id=display_id,
-            playlist_title=title,
-            playlist_description=description)

From 788fcd4585432663480a23a83905ff6f8d91c866 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Tue, 7 Feb 2017 05:00:33 +0100
Subject: [PATCH 3/3] [simplex] Fix format extraction for python 2.6 and 2.7

---
 youtube_dl/extractor/simplex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/simplex.py b/youtube_dl/extractor/simplex.py
index 7c5c67a48..dd5e0e101 100644
--- a/youtube_dl/extractor/simplex.py
+++ b/youtube_dl/extractor/simplex.py
@@ -48,7 +48,7 @@ class SimplexIE(InfoExtractor):
 
     def _known_simplex_format(self, simplex_formats, fid):
         for sf in simplex_formats:
-            if type(sf['id']) == str and sf['id'] == fid:
+            if sf['id'] == fid:
                 return sf
             elif type(sf['id']) == list and fid in sf['id']:
                 return sf