From 95b8a52327dd2e71e6540e43cfa06942cc696a07 Mon Sep 17 00:00:00 2001
From: Michal Duda <github@vookimedlo.cz>
Date: Sun, 7 Oct 2018 21:33:26 +0200
Subject: [PATCH 1/3] [MallTv] Add new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/malltv.py     | 75 ++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 mode change 100644 => 100755 youtube_dl/extractor/extractors.py
 create mode 100644 youtube_dl/extractor/malltv.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
old mode 100644
new mode 100755
index 464c8d690..175824fdf
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -595,6 +595,7 @@ from .mailru import (
     MailRuMusicSearchIE,
 )
 from .makertv import MakerTVIE
+from .malltv import MallTvIE
 from .mangomolo import (
     MangomoloVideoIE,
     MangomoloLiveIE,
diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py
new file mode 100644
index 000000000..3351d5e8f
--- /dev/null
+++ b/youtube_dl/extractor/malltv.py
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    NO_DEFAULT,
+)
+
+
+class MallTvIE(InfoExtractor):
+    _VALID_URL = r'https://mall.tv/(?P<id>[^/#?]+)'
+    _TEST = {
+        'url': 'https://mall.tv/tajemstvi-nejkrupavejsich-kurecich-kridylek',
+        'info_dict': {
+            'id': 'tajemstvi-nejkrupavejsich-kurecich-kridylek',
+            'ext': 'mp4',
+            'title': 'Tajemství nejkřupavějších kuřecích křidýlek',
+            'description': 'md5:f77cbb85d08745bfc85a2768fa34b57d',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 58.0,
+            'upload_date': '20180912',
+            'timestamp': 1536781320,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    # MAll.tv has malformed type atribute (i.e. missing quotes)
+    #
+    JSON_LD_RE_MALLTV_MALFORMED = r'(?is)<script[^>]+type=application/ld\+json[^>]*>(?P<json_ld>.+?)</script>'
+
+    def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
+        json_ld = self._search_regex(
+            self.JSON_LD_RE_MALLTV_MALFORMED, html, 'JSON-LD', group='json_ld', **kwargs)
+        default = kwargs.get('default', NO_DEFAULT)
+        if not json_ld:
+            return default if default is not NO_DEFAULT else {}
+        # JSON-LD may be malformed and thus `fatal` should be respected.
+        # At the same time `default` may be passed that assumes `fatal=False`
+        # for _search_regex. Let's simulate the same behavior here as well.
+        fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+        return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage, default=None)
+        description = self._og_search_description(webpage, default=None)
+
+        ldjson = self._search_json_ld(webpage, video_id, default=None)
+
+        # Again, the malform attribute
+        #
+        source = self._search_regex(re.compile(r'<source\s+src=([^ \t]+)'), webpage, None, default=None)
+
+        format_url = source + '.m3u8'
+        formats = self._extract_m3u8_formats(format_url, video_id)
+        for format in formats:
+            format['ext'] = 'mp4'
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': ldjson['duration'],
+            'timestamp': ldjson['timestamp'],
+            'thumbnail': ldjson['thumbnail'],
+            'formats': formats
+        }

From 9b0aa8acfe35a5c0a94a4fe38720a7bbfdfd1014 Mon Sep 17 00:00:00 2001
From: Michal Duda <github@vookimedlo.cz>
Date: Mon, 8 Oct 2018 18:34:59 +0200
Subject: [PATCH 2/3] [MallTv] Add new extractor - removed comments about
 malformed attrib values

---
 youtube_dl/extractor/malltv.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py
index 3351d5e8f..d0557b9c3 100644
--- a/youtube_dl/extractor/malltv.py
+++ b/youtube_dl/extractor/malltv.py
@@ -29,13 +29,11 @@ class MallTvIE(InfoExtractor):
         },
     }
 
-    # MAll.tv has malformed type atribute (i.e. missing quotes)
-    #
-    JSON_LD_RE_MALLTV_MALFORMED = r'(?is)<script[^>]+type=application/ld\+json[^>]*>(?P<json_ld>.+?)</script>'
+    JSON_LD_RE_UNQUOTED_ATTRIB = r'(?is)<script[^>]+type=application/ld\+json[^>]*>(?P<json_ld>.+?)</script>'
 
     def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
         json_ld = self._search_regex(
-            self.JSON_LD_RE_MALLTV_MALFORMED, html, 'JSON-LD', group='json_ld', **kwargs)
+            self.JSON_LD_RE_UNQUOTED_ATTRIB, html, 'JSON-LD', group='json_ld', **kwargs)
         default = kwargs.get('default', NO_DEFAULT)
         if not json_ld:
             return default if default is not NO_DEFAULT else {}
@@ -47,7 +45,6 @@ class MallTvIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
 
         title = self._og_search_title(webpage, default=None)
@@ -55,8 +52,6 @@ class MallTvIE(InfoExtractor):
 
         ldjson = self._search_json_ld(webpage, video_id, default=None)
 
-        # Again, the malform attribute
-        #
         source = self._search_regex(re.compile(r'<source\s+src=([^ \t]+)'), webpage, None, default=None)
 
         format_url = source + '.m3u8'

From 5ea8c87e0c03e5a3a8500c52158b85d2aac441ea Mon Sep 17 00:00:00 2001
From: Michal Duda <github@vookimedlo.cz>
Date: Mon, 8 Oct 2018 20:21:50 +0200
Subject: [PATCH 3/3] [MallTv] Add new extractor - added optional www subdomain

---
 youtube_dl/extractor/malltv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py
index d0557b9c3..48d7a7986 100644
--- a/youtube_dl/extractor/malltv.py
+++ b/youtube_dl/extractor/malltv.py
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class MallTvIE(InfoExtractor):
-    _VALID_URL = r'https://mall.tv/(?P<id>[^/#?]+)'
+    _VALID_URL = r'https://(?:www\.)?mall.tv/(?P<id>[^/#?]+)'
     _TEST = {
-        'url': 'https://mall.tv/tajemstvi-nejkrupavejsich-kurecich-kridylek',
+        'url': 'https://www.mall.tv/tajemstvi-nejkrupavejsich-kurecich-kridylek',
         'info_dict': {
             'id': 'tajemstvi-nejkrupavejsich-kurecich-kridylek',
             'ext': 'mp4',