From 38d14cd94d948ee71221a56380d24b09179bf58b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 06:39:46 +0100
Subject: [PATCH 01/15] rework tvp.pl extractor

---
 youtube_dl/extractor/__init__.py |   2 +-
 youtube_dl/extractor/tvp.py      | 277 ++++++++++++++++++++-----------
 2 files changed, 181 insertions(+), 98 deletions(-)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 625b0bf16..9e92dfb31 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -742,7 +742,7 @@ from .tvc import (
 )
 from .tvigle import TvigleIE
 from .tvland import TVLandIE
-from .tvp import TvpIE, TvpSeriesIE
+from .tvp import TvpIE, TvpLegacyIE
 from .tvplay import TVPlayIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index f57d609d4..4b3bd9982 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -1,12 +1,181 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
+from .common import InfoExtractor, ExtractorError
 
-import re
+VIDEO_LISTING_URL = ('http://www.api.v3.tvp.pl/shared/listing.php'
+                     '?dump=json&direct=true&count=-1&parent_id={id}')
+META_URL = 'http://www.tvp.pl/shared/video_data.php?dump=json&video_id={id}'
+TOKENIZER_URL = 'http://www.tvp.pl/shared/cdn/tokenizer_v2.php?object_id={id}'
+IGNORED_MIMETYPES = 'application/vnd.ms-ss', 'application/x-mpegurl'
 
-from .common import InfoExtractor
+
+class TvpApi:
+
+    def __init__(self, ie):
+        """:type ie: InfoExtractor"""
+        self.ie = ie
+
+    def listing(self, id):
+        json = self._get_json(VIDEO_LISTING_URL, id)
+        return json
+
+    def meta(self, id):
+        json = self._get_json(META_URL, id)
+        return json
+
+    def context(self, id):
+        meta = self.meta(id)
+        return meta['context']
+
+    def formats(self, id):
+        json = self._get_json(TOKENIZER_URL, id)
+        status = json['status']
+        if status == 'NOT_PLAYABLE':
+            raise ExtractorError("video is not playable")
+        if status != 'OK':
+            raise ExtractorError("unknown status: %s", status)
+        return json['formats']
+
+    def _get_json(self, url, id):
+        id = int(id)
+        formatted_url = url.format(id=id)
+        return self.ie._download_json(formatted_url, id)
 
 
 class TvpIE(InfoExtractor):
+    IE_NAME = 'tvp.pl'
+    _VALID_URL = r'https?://(?:vod\.|www\.)?tvp\.pl/(?P<id>\d+)/.*'
+
+    _TESTS = [{
+        'url': 'http://vod.tvp.pl/4278035/odc-2',
+        'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
+        'info_dict': {
+            'id': '4278035',
+            'ext': 'wmv',
+            'title': 'Ogniem i mieczem, odc. 2',
+            'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.'
+        },
+    }, {
+        'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
+        'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
+        'info_dict': {
+            'id': '194536',
+            'ext': 'mp4',
+            'title': 'Czas honoru, I seria – odc. 13',
+            'description': 'Czesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna?'
+        },
+    }, {
+        'url': 'http://vod.tvp.pl/17834272/odc-39',
+        'md5': 'dafdadb130a45e79bab64aed94b73661',
+        'info_dict': {
+            'id': '17834272',
+            'ext': 'mp4',
+            'title': 'Na sygnale, odc. 39',
+            'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
+        },
+    }, {
+        'url': 'http://vod.tvp.pl/4278026/ogniem-i-mieczem',
+        'info_dict': {
+            'title': 'Ogniem i mieczem',
+            'id': '4278026',
+            'description': 'Romans z historią w tle',
+        },
+        'playlist_count': 4,
+    }, {
+        'url': 'http://vod.tvp.pl/9329207/',
+        'info_dict': {
+            'title': 'Boso przez świat',
+            'id': '9329207',
+            'description': 'Docieramy do plemion w zapomnianych regionach naszej planety. Poznajemy ich kulturę, wierzenia i zwyczaje. Na ile są podobne do naszych? Wojciech Cejrowski jest naszym przewodnikiem po najbardziej dzikich zakątkach globu.',
+        },
+        'playlist_count': 86,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return super(TvpIE, cls).suitable(url)
+
+    def _real_initialize(self):
+        self.api = TvpApi(self)
+
+    @staticmethod
+    def _format_formats(formats, video_id):
+
+        mime_ext = {
+            'video/x-ms-wmv': 'wmv',
+            'video/mp4': 'mp4'
+        }
+
+        viable_formats = []
+        for f in formats:
+            if f['mimeType'] in IGNORED_MIMETYPES:
+                continue
+
+            elif f['mimeType'].startswith('video/'):
+                viable_formats.append(
+                    {'url': f['url'],
+                     'ext': mime_ext.get(f['mimeType'], None),
+                     'vbr': f['totalBitrate']})
+
+        return viable_formats
+
+    def _get_video(self, context):
+        id = context['material_id']
+        if context['title_root']:
+            title = context['title_root']
+        elif not context['website_title']:
+            title = context['title']
+        else:
+            title = ', '.join([context['website_title'], context['title']])
+        url = context['url']
+        description = context['description_root']
+
+        try:
+            formats = self._format_formats(self.api.formats(id), id)
+        except ExtractorError as e:
+            self.to_screen("%s: %s" % (title, e))
+            raise
+
+        self._sort_formats(formats)
+
+        return {
+            'id': str(id),
+            'url': url,
+            'title': title,
+            'description': description,
+            'formats': formats,
+        }
+
+    def _get_playlist_videos(self, playlist_id):
+        ids = [playlist_id]
+
+        while ids:
+            item_id = ids.pop()
+            listing = self.api.listing(item_id)
+            for item in listing['items']:
+                if 'directory_video' in item['types']:
+                    ids.append(item['_id'])
+                if 'video' in item['types'] and item['is_released']:
+                    meta = self.api.context(item['_id'])
+                    try:
+                        yield self._get_video(meta)
+                    except ExtractorError:
+                        pass
+
+    def _get_playlist(self, context):
+        id = context['material_id']
+        title = context['title']
+        description = context['lead_root']
+
+        return self.playlist_result(self._get_playlist_videos(id),
+                                    str(id), title, description)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        ctx = self.api.context(video_id)
+        is_playlist = ctx['format_id'] == 0
+        return self._get_playlist(ctx) if is_playlist else self._get_video(ctx)
+
+
+class TvpLegacyIE(TvpIE):
     IE_NAME = 'tvp.pl'
     _VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
 
@@ -17,6 +186,7 @@ class TvpIE(InfoExtractor):
             'id': '4278035',
             'ext': 'wmv',
             'title': 'Ogniem i mieczem, odc. 2',
+            'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
         },
     }, {
         'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
@@ -25,10 +195,11 @@ class TvpIE(InfoExtractor):
             'id': '194536',
             'ext': 'mp4',
             'title': 'Czas honoru, I seria – odc. 13',
+            'description': 'Czesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna?',
         },
     }, {
         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
-        'md5': 'c3b15ed1af288131115ff17a17c19dda',
+        'md5': 'b0005b542e5b4de643a9690326ab1257',
         'info_dict': {
             'id': '17916176',
             'ext': 'mp4',
@@ -36,104 +207,16 @@ class TvpIE(InfoExtractor):
         },
     }, {
         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
-        'md5': 'c3b15ed1af288131115ff17a17c19dda',
+        'md5': 'dafdadb130a45e79bab64aed94b73661',
         'info_dict': {
             'id': '17834272',
             'ext': 'mp4',
             'title': 'Na sygnale, odc. 39',
+            'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
         },
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        webpage = self._download_webpage(
-            'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
-
-        title = self._search_regex(
-            r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
-            webpage, 'title', group='title')
-        series_title = self._search_regex(
-            r'name\s*:\s*([\'"])SeriesTitle\1\s*,\s*value\s*:\s*\1(?P<series>.+?)\1',
-            webpage, 'series', group='series', default=None)
-        if series_title:
-            title = '%s, %s' % (series_title, title)
-
-        thumbnail = self._search_regex(
-            r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
-
-        video_url = self._search_regex(
-            r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
-        if not video_url:
-            video_url = self._download_json(
-                'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
-                video_id)['video_url']
-
-        ext = video_url.rsplit('.', 1)[-1]
-        if ext != 'ism/manifest':
-            if '/' in ext:
-                ext = 'mp4'
-            formats = [{
-                'format_id': 'direct',
-                'url': video_url,
-                'ext': ext,
-            }]
-        else:
-            m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
-            formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
-
-
-class TvpSeriesIE(InfoExtractor):
-    IE_NAME = 'tvp.pl:Series'
-    _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
-
-    _TESTS = [{
-        'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem',
-        'info_dict': {
-            'title': 'Ogniem i mieczem',
-            'id': '4278026',
-        },
-        'playlist_count': 4,
-    }, {
-        'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat',
-        'info_dict': {
-            'title': 'Boso przez świat',
-            'id': '9329207',
-        },
-        'playlist_count': 86,
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id, tries=5)
-
-        title = self._html_search_regex(
-            r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series')
-        playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id')
-        playlist = self._download_webpage(
-            'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend'
-            'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5,
-            note='Downloading playlist')
-
-        videos_paths = re.findall(
-            '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
-        entries = [
-            self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
-            for v_path in videos_paths]
-
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'display_id': display_id,
-            'title': title,
-            'entries': entries,
-        }
+        context = self.api.context(video_id)
+        return self._get_video(context)

From 35a5e04066b5a10d58e698c8da072503d3f3a7a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 06:46:34 +0100
Subject: [PATCH 02/15] unnecessary method

---
 youtube_dl/extractor/tvp.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 4b3bd9982..f43b8b614 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -89,10 +89,6 @@ class TvpIE(InfoExtractor):
         'playlist_count': 86,
     }]
 
-    @classmethod
-    def suitable(cls, url):
-        return super(TvpIE, cls).suitable(url)
-
     def _real_initialize(self):
         self.api = TvpApi(self)
 

From 20f9a56e2eb70c2eef4f95e1395574a8667f71dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 07:20:40 +0100
Subject: [PATCH 03/15] moar subdomains

---
 youtube_dl/extractor/tvp.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index f43b8b614..60181d4ef 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -4,6 +4,7 @@ VIDEO_LISTING_URL = ('http://www.api.v3.tvp.pl/shared/listing.php'
                      '?dump=json&direct=true&count=-1&parent_id={id}')
 META_URL = 'http://www.tvp.pl/shared/video_data.php?dump=json&video_id={id}'
 TOKENIZER_URL = 'http://www.tvp.pl/shared/cdn/tokenizer_v2.php?object_id={id}'
+FILE_INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id={id}'
 IGNORED_MIMETYPES = 'application/vnd.ms-ss', 'application/x-mpegurl'
 
 
@@ -21,6 +22,10 @@ class TvpApi:
         json = self._get_json(META_URL, id)
         return json
 
+    def info(self, id):
+        json = self._get_json(FILE_INFO_URL, id)
+        return json
+
     def context(self, id):
         meta = self.meta(id)
         return meta['context']
@@ -42,7 +47,7 @@ class TvpApi:
 
 class TvpIE(InfoExtractor):
     IE_NAME = 'tvp.pl'
-    _VALID_URL = r'https?://(?:vod\.|www\.)?tvp\.pl/(?P<id>\d+)/.*'
+    _VALID_URL = r'https?://(?:vod|www)\.(\w+\.)?tvp\.pl/(?P<id>\d+)/.*'
 
     _TESTS = [{
         'url': 'http://vod.tvp.pl/4278035/odc-2',
@@ -53,6 +58,15 @@ class TvpIE(InfoExtractor):
             'title': 'Ogniem i mieczem, odc. 2',
             'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.'
         },
+    }, {
+        'url': 'http://www.rodzinka.tvp.pl/22729075/odc-169',
+        'md5': '4dc102e0883555d31b120e8328c02022',
+        'info_dict': {
+            'id': '22353810',
+            'ext': 'mp4',
+            'title': 'rodzinka.pl, odc. 169',
+            'description': 'Natalia szykuje dla Marii paczkę z ubrankami dla dziecka,\nale ciężko jej się z nimi rozstać – wiążę się z tym zbyt wiele wspomnień. Kacper chce wymusić od Ludwika pieniądze opowiadając o wróżce zębuszcze. A czy zna tak zwanego „Skrzata Dlatata”?',
+            },
     }, {
         'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
         'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
@@ -165,8 +179,14 @@ class TvpIE(InfoExtractor):
                                     str(id), title, description)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        ctx = self.api.context(video_id)
+        id = self._match_id(url)
+        ctx = self.api.context(id)
+        if ctx['format_id'] == 0:
+            file_info = self.api.info(id)
+            original_id = file_info.get('copy_of_object_id')
+            if original_id:
+                ctx = self.api.context(original_id)
+
         is_playlist = ctx['format_id'] == 0
         return self._get_playlist(ctx) if is_playlist else self._get_video(ctx)
 

From 6ebaca4df3eadd532c7dc067cd6a8f4e6b54f94d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 07:28:02 +0100
Subject: [PATCH 04/15] encoding for py2

---
 youtube_dl/extractor/tvp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 60181d4ef..3bc0b8f0a 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 from .common import InfoExtractor, ExtractorError
 
 VIDEO_LISTING_URL = ('http://www.api.v3.tvp.pl/shared/listing.php'

From dcb069d69cf869b98e3620cc4f6df71dcd723c6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 07:37:10 +0100
Subject: [PATCH 05/15] There's more of them!

---
 youtube_dl/extractor/tvp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 3bc0b8f0a..e912647b8 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -48,7 +48,7 @@ class TvpApi:
 
 class TvpIE(InfoExtractor):
     IE_NAME = 'tvp.pl'
-    _VALID_URL = r'https?://(?:vod|www)\.(\w+\.)?tvp\.pl/(?P<id>\d+)/.*'
+    _VALID_URL = r'https?://(\w+\.)+tvp\.pl/(?P<id>\d+)/.*'
 
     _TESTS = [{
         'url': 'http://vod.tvp.pl/4278035/odc-2',

From eaef7bdf99c06c0ddc78c85fa609f92ced45b2f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 07:43:31 +0100
Subject: [PATCH 06/15] more tests

---
 youtube_dl/extractor/tvp.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index e912647b8..02db4b3a5 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -59,6 +59,14 @@ class TvpIE(InfoExtractor):
             'title': 'Ogniem i mieczem, odc. 2',
             'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.'
         },
+        }, {
+        'url': 'http://warszawa.tvp.pl/23433721/03012016',
+        'md5': '8740c6e0532f37e836104f3fb38921d9',
+        'info_dict': {
+            'id': '23433721',
+            'ext': 'mp4',
+            'title': 'Echa tygodnia – kraj, 03.01.2016',
+        },
     }, {
         'url': 'http://www.rodzinka.tvp.pl/22729075/odc-169',
         'md5': '4dc102e0883555d31b120e8328c02022',

From a507d7aebd5846b505157f28672ee171b3ac07fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 09:58:42 +0100
Subject: [PATCH 07/15] remove exception voodoo

---
 youtube_dl/extractor/tvp.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 02db4b3a5..e40244ad4 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -35,7 +35,7 @@ class TvpApi:
         json = self._get_json(TOKENIZER_URL, id)
         status = json['status']
         if status == 'NOT_PLAYABLE':
-            raise ExtractorError("video is not playable")
+            raise ExtractorError("video is not playable", expected=True)
         if status != 'OK':
             raise ExtractorError("unknown status: %s", status)
         return json['formats']
@@ -147,11 +147,7 @@ class TvpIE(InfoExtractor):
         url = context['url']
         description = context['description_root']
 
-        try:
-            formats = self._format_formats(self.api.formats(id), id)
-        except ExtractorError as e:
-            self.to_screen("%s: %s" % (title, e))
-            raise
+        formats = self._format_formats(self.api.formats(id), id)
 
         self._sort_formats(formats)
 
@@ -174,10 +170,7 @@ class TvpIE(InfoExtractor):
                     ids.append(item['_id'])
                 if 'video' in item['types'] and item['is_released']:
                     meta = self.api.context(item['_id'])
-                    try:
-                        yield self._get_video(meta)
-                    except ExtractorError:
-                        pass
+                    yield self._get_video(meta)
 
     def _get_playlist(self, context):
         id = context['material_id']

From c213f8cc3384e8319568bd2f3cea5ae5ad2cfdf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Fri, 8 Jan 2016 10:00:33 +0100
Subject: [PATCH 08/15] squashing action

don't rely on optional key

single quotes

unnecessary overload

remove api class

more exceptions more better

make _format_formats not static

refactor guessing title to a method

fix old api calls

bring stabilization in the field of id types

don't extract videos in a playlist
---
 youtube_dl/extractor/tvp.py | 128 ++++++++++++++----------------------
 1 file changed, 51 insertions(+), 77 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index e40244ad4..fe79704e8 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -1,55 +1,18 @@
 # -*- encoding: utf-8 -*-
 from .common import InfoExtractor, ExtractorError
 
-VIDEO_LISTING_URL = ('http://www.api.v3.tvp.pl/shared/listing.php'
-                     '?dump=json&direct=true&count=-1&parent_id={id}')
-META_URL = 'http://www.tvp.pl/shared/video_data.php?dump=json&video_id={id}'
-TOKENIZER_URL = 'http://www.tvp.pl/shared/cdn/tokenizer_v2.php?object_id={id}'
-FILE_INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id={id}'
-IGNORED_MIMETYPES = 'application/vnd.ms-ss', 'application/x-mpegurl'
-
-
-class TvpApi:
-
-    def __init__(self, ie):
-        """:type ie: InfoExtractor"""
-        self.ie = ie
-
-    def listing(self, id):
-        json = self._get_json(VIDEO_LISTING_URL, id)
-        return json
-
-    def meta(self, id):
-        json = self._get_json(META_URL, id)
-        return json
-
-    def info(self, id):
-        json = self._get_json(FILE_INFO_URL, id)
-        return json
-
-    def context(self, id):
-        meta = self.meta(id)
-        return meta['context']
-
-    def formats(self, id):
-        json = self._get_json(TOKENIZER_URL, id)
-        status = json['status']
-        if status == 'NOT_PLAYABLE':
-            raise ExtractorError("video is not playable", expected=True)
-        if status != 'OK':
-            raise ExtractorError("unknown status: %s", status)
-        return json['formats']
-
-    def _get_json(self, url, id):
-        id = int(id)
-        formatted_url = url.format(id=id)
-        return self.ie._download_json(formatted_url, id)
-
 
 class TvpIE(InfoExtractor):
     IE_NAME = 'tvp.pl'
     _VALID_URL = r'https?://(\w+\.)+tvp\.pl/(?P<id>\d+)/.*'
 
+    _VIDEO_LISTING_URL = ('http://www.api.v3.tvp.pl/shared/listing.php'
+                          '?dump=json&direct=true&count=-1&parent_id={id}')
+    _META_URL = 'http://www.tvp.pl/shared/video_data.php?dump=json&video_id={id}'
+    _TOKENIZER_URL = 'http://www.tvp.pl/shared/cdn/tokenizer_v2.php?object_id={id}'
+    _FILE_INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id={id}'
+    _IGNORED_MIMETYPES = 'application/vnd.ms-ss', 'application/x-mpegurl'
+
     _TESTS = [{
         'url': 'http://vod.tvp.pl/4278035/odc-2',
         'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
@@ -112,11 +75,11 @@ class TvpIE(InfoExtractor):
         'playlist_count': 86,
     }]
 
-    def _real_initialize(self):
-        self.api = TvpApi(self)
+    def _get_json(self, url, entry_id):
+        formatted_url = url.format(id=int(entry_id))
+        return self._download_json(formatted_url, entry_id)
 
-    @staticmethod
-    def _format_formats(formats, video_id):
+    def _format_formats(self, formats, video_id):
 
         mime_ext = {
             'video/x-ms-wmv': 'wmv',
@@ -125,7 +88,7 @@ class TvpIE(InfoExtractor):
 
         viable_formats = []
         for f in formats:
-            if f['mimeType'] in IGNORED_MIMETYPES:
+            if f['mimeType'] in self._IGNORED_MIMETYPES:
                 continue
 
             elif f['mimeType'].startswith('video/'):
@@ -136,23 +99,37 @@ class TvpIE(InfoExtractor):
 
         return viable_formats
 
-    def _get_video(self, context):
-        id = context['material_id']
-        if context['title_root']:
-            title = context['title_root']
-        elif not context['website_title']:
-            title = context['title']
-        else:
-            title = ', '.join([context['website_title'], context['title']])
-        url = context['url']
-        description = context['description_root']
+    @staticmethod
+    def _guess_title(item):
+        title_root = item.get('title_root')
+        title = item.get('title')
+        website_title = item.get('website_title')
+        if title_root:
+            return item['title_root']
+        if title and website_title:
+            return '{}, {}'.format(website_title, title)
+        return title
 
-        formats = self._format_formats(self.api.formats(id), id)
+    def _get_video(self, context):
+        video_id = str(context['material_id'])
+        title = self._guess_title(context)
+        url = context['url']
+        description = context.get('description_root')
+
+        formats_req = self._get_json(self._TOKENIZER_URL, video_id)
+        req_status = formats_req['status']
+        if req_status == 'NOT_PLAYABLE':
+            raise ExtractorError('(%s) is not playable' % title,
+                                 expected=True, video_id=video_id)
+        elif req_status != 'OK':
+            raise ExtractorError('(%s) unknown status: %s' % (title, req_status),
+                                 video_id=video_id)
+        formats = self._format_formats(formats_req['formats'], video_id)
 
         self._sort_formats(formats)
 
         return {
-            'id': str(id),
+            'id': video_id,
             'url': url,
             'title': title,
             'description': description,
@@ -164,30 +141,32 @@ class TvpIE(InfoExtractor):
 
         while ids:
             item_id = ids.pop()
-            listing = self.api.listing(item_id)
+            listing = self._get_json(self._VIDEO_LISTING_URL, item_id)
             for item in listing['items']:
                 if 'directory_video' in item['types']:
                     ids.append(item['_id'])
                 if 'video' in item['types'] and item['is_released']:
-                    meta = self.api.context(item['_id'])
-                    yield self._get_video(meta)
+                    yield {
+                        '_type': 'url',
+                        'title': self._guess_title(item),
+                        'url': item['url']}
 
     def _get_playlist(self, context):
-        id = context['material_id']
+        pls_id = str(context['material_id'])
         title = context['title']
-        description = context['lead_root']
+        description = context.get('lead_root')
 
-        return self.playlist_result(self._get_playlist_videos(id),
-                                    str(id), title, description)
+        return self.playlist_result(self._get_playlist_videos(pls_id),
+                                    pls_id, title, description)
 
     def _real_extract(self, url):
-        id = self._match_id(url)
-        ctx = self.api.context(id)
+        entry_id = self._match_id(url)
+        ctx = self._get_json(self._META_URL, entry_id)['context']
         if ctx['format_id'] == 0:
-            file_info = self.api.info(id)
+            file_info = self._get_json(self._FILE_INFO_URL, entry_id)
             original_id = file_info.get('copy_of_object_id')
             if original_id:
-                ctx = self.api.context(original_id)
+                ctx = self._get_json(self._META_URL, original_id)['context']
 
         is_playlist = ctx['format_id'] == 0
         return self._get_playlist(ctx) if is_playlist else self._get_video(ctx)
@@ -233,8 +212,3 @@ class TvpLegacyIE(TvpIE):
             'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
         },
     }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        context = self.api.context(video_id)
-        return self._get_video(context)

From 77d024527b529beb7651179487399764a9c67a34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sat, 9 Jan 2016 05:13:04 +0100
Subject: [PATCH 09/15] no need for None in get

---
 youtube_dl/extractor/tvp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index fe79704e8..273908418 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -94,7 +94,7 @@ class TvpIE(InfoExtractor):
             elif f['mimeType'].startswith('video/'):
                 viable_formats.append(
                     {'url': f['url'],
-                     'ext': mime_ext.get(f['mimeType'], None),
+                     'ext': mime_ext.get(f['mimeType']),
                      'vbr': f['totalBitrate']})
 
         return viable_formats

From bc05fc757552acd46e23eb62d398019ac916e856 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sat, 9 Jan 2016 05:18:58 +0100
Subject: [PATCH 10/15] increase chance for a title

---
 youtube_dl/extractor/tvp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 273908418..1aa2146b9 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -153,7 +153,7 @@ class TvpIE(InfoExtractor):
 
     def _get_playlist(self, context):
         pls_id = str(context['material_id'])
-        title = context['title']
+        title = self._guess_title(context)
         description = context.get('lead_root')
 
         return self.playlist_result(self._get_playlist_videos(pls_id),

From 8bee77a175a7724fde513b0e468e16f0cdaedc88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sat, 9 Jan 2016 05:41:16 +0100
Subject: [PATCH 11/15] expect less from format json

---
 youtube_dl/extractor/tvp.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 1aa2146b9..42d318c5e 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -88,14 +88,15 @@ class TvpIE(InfoExtractor):
 
         viable_formats = []
         for f in formats:
-            if f['mimeType'] in self._IGNORED_MIMETYPES:
+            if f.get('mimeType') in self._IGNORED_MIMETYPES:
+                continue
+            if 'url' not in f:
                 continue
 
-            elif f['mimeType'].startswith('video/'):
-                viable_formats.append(
-                    {'url': f['url'],
-                     'ext': mime_ext.get(f['mimeType']),
-                     'vbr': f['totalBitrate']})
+            viable_formats.append(
+                {'url': f['url'],
+                 'ext': mime_ext.get(f.get('mimeType')),
+                 'vbr': f.get('totalBitrate')})
 
         return viable_formats
 

From e8784b67879dd498583ea50498a6ef171ac99ede Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sat, 9 Jan 2016 05:52:43 +0100
Subject: [PATCH 12/15] not vital part of api

---
 youtube_dl/extractor/tvp.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 42d318c5e..2446d4404 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -75,9 +75,10 @@ class TvpIE(InfoExtractor):
         'playlist_count': 86,
     }]
 
-    def _get_json(self, url, entry_id):
+    def _get_json(self, url, entry_id, fatal=True):
         formatted_url = url.format(id=int(entry_id))
-        return self._download_json(formatted_url, entry_id)
+        json = self._download_json(formatted_url, entry_id, fatal=fatal)
+        return {} if json is None else json
 
     def _format_formats(self, formats, video_id):
 
@@ -164,7 +165,7 @@ class TvpIE(InfoExtractor):
         entry_id = self._match_id(url)
         ctx = self._get_json(self._META_URL, entry_id)['context']
         if ctx['format_id'] == 0:
-            file_info = self._get_json(self._FILE_INFO_URL, entry_id)
+            file_info = self._get_json(self._FILE_INFO_URL, entry_id, fatal=False)
             original_id = file_info.get('copy_of_object_id')
             if original_id:
                 ctx = self._get_json(self._META_URL, original_id)['context']

From 3fb7f85dd40459e56c9b360484f77a14fa5263af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sun, 10 Jan 2016 05:47:57 +0100
Subject: [PATCH 13/15] unicode_literals

---
 youtube_dl/extractor/tvp.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 2446d4404..66cf07d32 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -1,4 +1,6 @@
 # -*- encoding: utf-8 -*-
+from __future__ import unicode_literals
+
 from .common import InfoExtractor, ExtractorError
 
 

From e3225fc155eaa895b386e4e6b1b90368405c2f77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sun, 10 Jan 2016 05:48:59 +0100
Subject: [PATCH 14/15] change dict to url_result

---
 youtube_dl/extractor/tvp.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 66cf07d32..364047992 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -150,10 +150,9 @@ class TvpIE(InfoExtractor):
                 if 'directory_video' in item['types']:
                     ids.append(item['_id'])
                 if 'video' in item['types'] and item['is_released']:
-                    yield {
-                        '_type': 'url',
-                        'title': self._guess_title(item),
-                        'url': item['url']}
+                    yield self.url_result(item['url'],
+                                          video_id=item['_id'],
+                                          video_title=self._guess_title(item))
 
     def _get_playlist(self, context):
         pls_id = str(context['material_id'])

From e8b7c3220e77f27b55576e80c8654c44198b8f81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?dzikie=20dro=C5=BCd=C5=BCe?= <daz@hackerspace.pl>
Date: Sun, 10 Jan 2016 06:32:54 +0100
Subject: [PATCH 15/15] tvp.info

---
 youtube_dl/extractor/tvp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 364047992..a14d87792 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor, ExtractorError
 
 class TvpIE(InfoExtractor):
     IE_NAME = 'tvp.pl'
-    _VALID_URL = r'https?://(\w+\.)+tvp\.pl/(?P<id>\d+)/.*'
+    _VALID_URL = r'https?://(\w+\.)+tvp\.(?:pl|info)/(?P<id>\d+)/.*'
 
     _VIDEO_LISTING_URL = ('http://www.api.v3.tvp.pl/shared/listing.php'
                           '?dump=json&direct=true&count=-1&parent_id={id}')