From b853edb7d97a90f3d66c578383fad56cefdbeea7 Mon Sep 17 00:00:00 2001 From: Jiri Palecek Date: Thu, 4 May 2017 16:20:24 +0200 Subject: [PATCH 1/5] [ceskatelevize] Extend the extractor for ceskatelevize.cz Download videos from: - articles on the news site (http://ceskatelevize.cz/ct24) - sports articles - sports videos and live videos, incl. streams exlcusive for the internet --- test/test_subtitles.py | 8 ++ youtube_dl/extractor/ceskatelevize.py | 187 +++++++++++++++++++++++--- 2 files changed, 180 insertions(+), 15 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 1b8de822a..df1bf2369 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -190,6 +190,14 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' IE = CeskaTelevizeIE + #get the subtitles from the first video, if need be + def getInfoDict(self): + idict = super(TestCeskaTelevizeSubtitles, self).getInfoDict() + if not idict.get('requested_subtitles') and idict.get('entries') and len(idict['entries']) > 0 and \ + idict['entries'][0].get('requested_subtitles'): + idict['requested_subtitles'] = idict['entries'][0].get('requested_subtitles') + return idict + def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e250de18c..4ecc1a4d1 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals import re +import time from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -15,6 +19,8 @@ from ..utils import ( unescapeHTML, urlencode_postdata, USER_AGENTS, + RegexNotFoundError, + compat_str, ) @@ -52,7 +58,7 @@ class CeskaTelevizeIE(InfoExtractor): # live stream 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'info_dict': { - 'id': 402, + 'id': '402', 'ext': 'mp4', 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, @@ -67,14 +73,31 @@ class CeskaTelevizeIE(InfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): + def _real_extract(self, url, retries=0): playlist_id = self._match_id(url) + if playlist_id == 'iFramePlayer.php': + parsed = compat_urlparse.urlparse(url) + qs_dict = compat_urlparse.parse_qs(parsed.query) + if qs_dict.get('videoID'): + playlist_id = qs_dict['videoID'][0] + elif qs_dict.get('IDEC'): + playlist_id = qs_dict['IDEC'][0] + else: + self.report_warning("Could not extract ID from iFramePlayer URL %s" % url) + webpage = self._download_webpage(url, playlist_id) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s

' % NOT_AVAILABLE_STRING in webpage: raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + if 'Neplatný kód pro videopřehrávač' in webpage: + if retries < 1: + self._report_warning('Invalid code on the page, retrying...') + time.sleep(15) + return self._real_extract(url, retries + 1) + else: + raise ExtractorError('Invalid code supplied for player') type_ = None episode_id = None @@ -129,7 +152,7 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage, default=None) + playlist_title = self._og_search_title(webpage, default=None) or unescapeHTML(self._search_regex(r']*>(.*) 1: + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + else: + return entries[0] def _get_subtitles(self, episode_id, subs): original_subtitles = self._download_webpage( @@ -236,27 +262,26 @@ class CeskaTelevizeIE(InfoExtractor): class CeskaTelevizePoradyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?!ivysilani)[^?#&]*/(?:(?P\d+)-[^/?#]*|zive-vysilani(?:/[^?#]*)?)/?(?:[?#]|$)' _TESTS = [{ # video with 18+ caution trailer 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Alternativní průvodce současným queer světem', + 'id': '215 562 21090/0007', + 'title': r're:Queer: Bogotart.*', }, 'playlist': [{ 'info_dict': { 'id': '61924494876844842', 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', + 'title': r're:Queer: Bogotart .*\(Varování 18\+\)', 'duration': 10.2, }, }, { 'info_dict': { 'id': '61924494877068022', 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', + 'title': r're:Queer: Bogotart .*\(Queer\)', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 1558.3, }, @@ -265,6 +290,88 @@ class CeskaTelevizePoradyIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'expected_warnings': [r'.*unable to extract.*OpenGraph description.*|.*retrying.*'], + }, { + 'url': 'http://www.ceskatelevize.cz/sport/zive-vysilani/', + 'info_dict': { + 'title': r're:ČT Sport živě.*', + 'id': '402', + 'ext': 'mp4', + 'is_live': True + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': [r'.*unable to extract.*OpenGraph description.*|.*retrying.*'], + }, { + 'url': 'http://www.ceskatelevize.cz/ct24/domaci/2101064-line-reky-se-plni-na-nekterych-mistech-plati-pohotovost', + 'info_dict': { + 'id': '2101064', + 'title': 'Řeky se plnily. V Teplicích nad Bečvou byl vyhlášen stav ohrožení', + 'description': 'Kvůli silnému dešti platí v některých regionech Česka výstraha před povodněmi. Na několika místech Moravskoslezského, Zlínského a Olomouckého kraje platí druhý povodňový stupeň, stav pohotovosti. Třetí stupeň povodňové aktivity znamenající ohrožení byl vyhlášen v Teplicích nad Bečvou a na říčce Polančici na Ostravsku. Počasí sledujte zde.', + }, + 'playlist': [{ + 'info_dict': { + 'id': "61924494877291243", + 'ext': 'mp4', + 'title': r're:Události.*', + }, + }, { + 'info_dict': { + "id": "61924494877291060", + 'ext': 'mp4', + 'title': r're:Studio ČT24.*', + }, + }, { + 'info_dict': { + 'ext': 'mp4', + "id": "61924494877291027", + 'title': 'startswith:', + }, + }, { + 'info_dict': { + 'ext': 'mp4', + "id": "61924494877291070", + 'title': 'startswith:', + }, + }, { + 'info_dict': { + 'ext': 'mp4', + "id": "61924494877291208", + 'title': 'startswith:', + }, + }, + ], + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': [r'.*unable to extract.*OpenGraph description.*|.*retrying.*'], + }, { + 'url': 'http://www.ceskatelevize.cz/sport/nejlepsi-videa/353066-ogier-i-meeke-meli-v-argentine-nehodu-v-cele-je-evans/', + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'info_dict': { + 'id': "61924494877291497", + "ext": "mp4", + 'title': r're:Ogier i Meeke měli v Argentině nehodu, v čele je Evans.*', + }, + 'expected_warnings': [r'.*unable to extract.*OpenGraph description.*|.*retrying.*'], + }, { + 'url': 'http://www.ceskatelevize.cz/sport/fotbal/1-liga/352926-fotbal-extra-jaroslav-starka-s-pribrami-na-vecne-casy-a-nikdy-jinak/', + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'info_dict': { + "id": "61924494877290816", + 'title': r're:Starka: S negativní publicitou jsem se naučil žít.*', + "ext": "mp4", + }, + 'expected_warnings': [r'.*unable to extract.*OpenGraph description.*|.*retrying.*'], }] def _real_extract(self, url): @@ -272,8 +379,58 @@ class CeskaTelevizePoradyIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - data_url = unescapeHTML(self._search_regex( - r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'iframe player url', group='url')) + hash_if_any = self._search_regex( + r'media_ivysilani:{hash:"(?P\w+)', + webpage, 'hash for iVysilani', group='hash', default=None) - return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) + def fixup_hash(data_url): + if re.search(r'[&?]hash=', data_url) is None and hash_if_any: + return data_url + "&hash=" + hash_if_any + else: + return data_url + + # This would be so much easier with XPath + webpage_nolive = re.sub(r']*\bid=[\'"]live.*?', '', webpage, flags=re.S) + + matches = [compat_urlparse.urljoin('http://www.ceskatelevize.cz', fixup_hash(unescapeHTML(m.group('url')))) for m in + re.finditer(r'(?:]*\bdata-url=|]*\bsrc=)(["\'])(?P[^"\']*)["\']', + webpage_nolive) + if "/ivysilani/" in m.group('url') + ] + + ajaxUrl = self._search_regex(r'CT_VideoPlayer.config.ajaxUrl\s*=\s*\'([^\']*)\'', + webpage, 'video player ajax URL', default='/sport/ajax') + + def processMatch(href): + match1 = re.search(r'\bq=\'([^\']*)\'', href) + if not match1: + return '' + json = self._download_json( + compat_urllib_request.Request(compat_urlparse.urljoin('http://www.ceskatelevize.cz', ajaxUrl), + compat_urllib_parse.urlencode([('cmd', 'getVideoPlayerUrl'), ('q', match1.group(1)), ('autoStart', 'true')]), headers={'Content-Type': 'application/x-www-form-urlencoded'}), + video_id) + return compat_urlparse.urljoin('http://www.ceskatelevize.cz', json['videoPlayerUrl']) + + matches2 = [processMatch(unescapeHTML(m.group('href'))) for m in + re.finditer(r'<(?:[^>]*?\b(?:id=["\'](?P[^"\']*)["\']|href=(["\'])(?P(?:(?!\1).)*)["\']))*', + webpage) + if m.group('id') and "videoItem" in m.group('id') and m.group('href') + ] + + matches = matches + [m for m in matches2 if m] + if not matches: + raise RegexNotFoundError('Unable to extract iframe player URL') + + title = self._og_search_title(webpage) + ret = self.playlist_from_matches(matches, video_id=video_id, video_title=title, ie=CeskaTelevizeIE.ie_key()) + if len(ret['entries']) == 1: + ret = ret['entries'][0] + + def set_if_any(info, key, data): + if data: + info[key] = data + + set_if_any(ret, 'thumbnail', self._og_search_thumbnail(webpage)) + set_if_any(ret, 'description', self._og_search_description(webpage)) + + return ret From e2603c57e73ba851fdbaab763358953aec5d7456 Mon Sep 17 00:00:00 2001 From: Jiri Palecek Date: Fri, 5 May 2017 01:03:23 +0200 Subject: [PATCH 2/5] [ceskatelevize] fix error with Python 2.6 --- youtube_dl/extractor/ceskatelevize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 4ecc1a4d1..d69191713 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -390,7 +390,7 @@ class CeskaTelevizePoradyIE(InfoExtractor): return data_url # This would be so much easier with XPath - webpage_nolive = re.sub(r']*\bid=[\'"]live.*?', '', webpage, flags=re.S) + webpage_nolive = re.sub(r'(?s)]*\bid=[\'"]live.*?', '', webpage) matches = [compat_urlparse.urljoin('http://www.ceskatelevize.cz', fixup_hash(unescapeHTML(m.group('url')))) for m in re.finditer(r'(?:]*\bdata-url=|]*\bsrc=)(["\'])(?P[^"\']*)["\']', From 4bdf3c40661912c1dc941984925ab305701af1c9 Mon Sep 17 00:00:00 2001 From: Jiri Palecek Date: Fri, 5 May 2017 05:00:32 +0200 Subject: [PATCH 3/5] Actually implement getting videos on mshokej.ceskatelevize... --- youtube_dl/extractor/ceskatelevize.py | 94 ++++++++++++++++++++------- 1 file changed, 71 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index d69191713..f3cb6629b 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -25,7 +25,7 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'https?://(?:(?:www\.)?ceskatelevize\.cz/ivysilani/|mshokej\.ceskatelevize\.cz/)(?:[^/?#&]+/)*(?P[^/#?]+)' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { @@ -71,6 +71,30 @@ class CeskaTelevizeIE(InfoExtractor): }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, + }, { + 'url': 'http://mshokej.ceskatelevize.cz/mshokej/zpravy/353352--pastrnak-jsem-rad-ze-jsem-se-rozhodl-prijet-reprezentovat-je-pro-me-cest', + 'info_dict': { + 'id': '61924494877293706', + 'ext': 'mp4', + 'title': 'Článek - MS hokej 2017', + 'duration': 68.8, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://mshokej.ceskatelevize.cz/videoarchiv/rozhovory-a-reportaze/353090--chystany-special-pro-ms-spousta-novinek-a-prime-prenosy-vsech-zapasu', + 'info_dict': { + 'id': '61924494877291670', + 'ext': 'mp4', + 'title': 'videoarchiv - MS hokej 2017', + 'duration': 243.4, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }] def _real_extract(self, url, retries=0): @@ -101,35 +125,59 @@ class CeskaTelevizeIE(InfoExtractor): type_ = None episode_id = None + data = [] - playlist = self._parse_json( - self._search_regex( - r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', - default='{}'), playlist_id) - if playlist: - type_ = playlist.get('type') - episode_id = playlist.get('id') + is_mshokej = re.match(r'^https?://mshokej\..*', url) + if is_mshokej: + ids = [unescapeHTML(m.group('id')) for m in re.finditer(r'<(?:[^>]*?\b(?:class=["\'](?P[^"\']*)["\']|data-(?:videoarchive_autoplay|id)=["\'](?P[^"\']*)["\']|data-type=["\'](?P[^"\']*)["\']))*', webpage) + if ((m.group('dataType') and m.group('dataType') == 'media') or + m.group('class') and "video-archive__video" in m.group('class')) and + m.group('id') + ] + o = set() + for id in ids: + if id not in o: + data.append({ + 'playlist[0][type]': 'ct24', + 'playlist[0][id]': id, + 'requestUrl': url, + 'requestSource': 'sport', + 'type': 'dash' + }) + o.add(id) + if not data: + raise ExtractorError('Couldn\'t find any video ids') + else: + playlist = self._parse_json( + self._search_regex( + r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', + default='{}'), playlist_id) + if playlist: + type_ = playlist.get('type') + episode_id = playlist.get('id') - if not type_: - type_ = self._html_search_regex( - r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', - webpage, 'type') - if not episode_id: - episode_id = self._html_search_regex( - r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', - webpage, 'episode_id') + if not type_: + type_ = self._html_search_regex( + r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', + webpage, 'type') + if not episode_id: + episode_id = self._html_search_regex( + r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', + webpage, 'episode_id') - data = { - 'playlist[0][type]': type_, - 'playlist[0][id]': episode_id, - 'requestUrl': compat_urllib_parse_urlparse(url).path, - 'requestSource': 'iVysilani', - } + data = [{ + 'playlist[0][type]': type_, + 'playlist[0][id]': episode_id, + 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestSource': 'iVysilani', + }] entries = [] - for user_agent in (None, USER_AGENTS['Safari']): + for data in data: + for user_agent in (None, USER_AGENTS['Safari']): req = sanitized_Request( + 'http://mshokej.ceskatelevize.cz/get-client-playlist' if is_mshokej else 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', data=urlencode_postdata(data)) From 3faf92847543ebdb69d5eef9f366ca64783103de Mon Sep 17 00:00:00 2001 From: Jiri Palecek Date: Fri, 5 May 2017 14:21:00 +0200 Subject: [PATCH 4/5] [ceskatelevize] fix a small error in error handling code Duh. InfoExtractor._report_warning isn't a thing. --- youtube_dl/extractor/ceskatelevize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index f3cb6629b..9561e2483 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor): elif qs_dict.get('IDEC'): playlist_id = qs_dict['IDEC'][0] else: - self.report_warning("Could not extract ID from iFramePlayer URL %s" % url) + self._downloader.report_warning("Could not extract ID from iFramePlayer URL %s" % url) webpage = self._download_webpage(url, playlist_id) @@ -117,7 +117,7 @@ class CeskaTelevizeIE(InfoExtractor): raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) if 'Neplatný kód pro videopřehrávač' in webpage: if retries < 1: - self._report_warning('Invalid code on the page, retrying...') + self._downloader._report_warning('Invalid code on the page, retrying...') time.sleep(15) return self._real_extract(url, retries + 1) else: From e8222af8cd20f0025acbc661c11f781ede805842 Mon Sep 17 00:00:00 2001 From: Jiri Palecek Date: Fri, 5 May 2017 22:28:39 +0200 Subject: [PATCH 5/5] [ceskatelevize] Support for live video streams of the 2017 IIHF World Ice Hockey Championship To get the streams use URL http://mshokej.ceskatelevize.cz. NOTE: This is without tests (so far), because I don't know how to make tests of something that changes in the course of hours. --- youtube_dl/extractor/ceskatelevize.py | 55 ++++++++++++++++++--------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 9561e2483..999251260 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -25,7 +25,7 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?ceskatelevize\.cz/ivysilani/|mshokej\.ceskatelevize\.cz/)(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'https?://(?:(?:www\.)?ceskatelevize\.cz/ivysilani/|mshokej\.ceskatelevize\.cz/)(?:[^/?#&]+/)*(?P[^/#?]+)|https?://mshokej\.ceskatelevize\.cz/' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { @@ -129,19 +129,19 @@ class CeskaTelevizeIE(InfoExtractor): is_mshokej = re.match(r'^https?://mshokej\..*', url) if is_mshokej: - ids = [unescapeHTML(m.group('id')) for m in re.finditer(r'<(?:[^>]*?\b(?:class=["\'](?P[^"\']*)["\']|data-(?:videoarchive_autoplay|id)=["\'](?P[^"\']*)["\']|data-type=["\'](?P[^"\']*)["\']))*', webpage) + ids = [(unescapeHTML(m.group('id')), "broadcast" in m.group('class')) for m in re.finditer(r'<(?:[^>]*?\b(?:class=["\'](?P[^"\']*)["\']|data-(?:videoarchive_autoplay|id|live_channel)=["\'](?P[^"\']*)["\']|data-type=["\'](?P[^"\']*)["\']))*', webpage) if ((m.group('dataType') and m.group('dataType') == 'media') or - m.group('class') and "video-archive__video" in m.group('class')) and + m.group('class') and re.search(r'\b(?:video-archive__video|broadcast)\b', m.group('class'))) and m.group('id') ] o = set() - for id in ids: + for id, is_broadcast in ids: if id not in o: data.append({ 'playlist[0][type]': 'ct24', 'playlist[0][id]': id, 'requestUrl': url, - 'requestSource': 'sport', + 'requestSource': 'sport' if not is_broadcast else 'mshokej-live', 'type': 'dash' }) o.add(id) @@ -176,12 +176,20 @@ class CeskaTelevizeIE(InfoExtractor): for data in data: for user_agent in (None, USER_AGENTS['Safari']): - req = sanitized_Request( - 'http://mshokej.ceskatelevize.cz/get-client-playlist' if is_mshokej else - 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=urlencode_postdata(data)) + req = None + if data['requestSource'] == 'mshokej-live': + req = sanitized_Request( + 'http://playlist.ceskatelevize.cz/get-live-playlist/flash/%s' % data['playlist[0][id]'] + ) + else: + + req = sanitized_Request( + 'http://mshokej.ceskatelevize.cz/get-client-playlist' if is_mshokej else + 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + data=urlencode_postdata(data)) + + req.add_header('Content-type', 'application/x-www-form-urlencoded') - req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') req.add_header('X-Requested-With', 'XMLHttpRequest') if user_agent: @@ -193,20 +201,31 @@ class CeskaTelevizeIE(InfoExtractor): if not playlistpage: continue - playlist_url = playlistpage['url'] - if playlist_url == 'error_region': - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + playlist = None + playlist_title = None + playlist_description = None - req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) - req.add_header('Referer', url) + if playlistpage.get('url'): + playlist_url = playlistpage['url'] + if playlist_url == 'error_region': + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - playlist_title = self._og_search_title(webpage, default=None) or unescapeHTML(self._search_regex(r']*>(.*)]*>(.*)