From 40daaef417d0da5d32933c721596c44c4ea0d616 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Fri, 27 Jan 2017 15:57:03 +0100 Subject: [PATCH 1/5] [visir] Add new information extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 21 ++++++ youtube_dl/extractor/visir.py | 104 +++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 youtube_dl/extractor/visir.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 81366f933..984c26b06 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1119,6 +1119,7 @@ from .viki import ( VikiIE, VikiChannelIE, ) +from .visir import VisirMediaIE from .viu import ( ViuIE, ViuPlaylistIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a23486620..15f0b04ae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -81,6 +81,7 @@ from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE +from .visir import VisirMediaIE class GenericIE(InfoExtractor): @@ -1473,6 +1474,20 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TwentyMinutenIE.ie_key()], + }, + { + # Visir embed + 'url': 'http://www.visir.is/-viljum-hjalpa-theim-ad-hjalpa-sjalfum-ser-/article/2017170129096', + 'info_dict': { + 'id': 'VTV8CE25BB4-9132-48AD-A2EE-00AF0BAA02A0', + 'ext': 'mp4', + 'title': 'H\u00f3pur nemenda s\u00f6fnu\u00f0u pening fyrir Ge\u00f0hj\u00e1lp', + 'description': None, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [VisirMediaIE.ie_key()], } # { # # TODO: find another test @@ -2438,6 +2453,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( openload_urls, ie=OpenloadIE.ie_key()) + # Look for Visir embeds + visir_urls = VisirMediaIE._extract_urls(webpage) + if visir_urls: + return _playlist_from_matches( + visir_urls, ie=VisirMediaIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/visir.py b/youtube_dl/extractor/visir.py new file mode 100644 index 000000000..a83928cb4 --- /dev/null +++ b/youtube_dl/extractor/visir.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + base_url, + remove_start, + urljoin, +) + + +class VisirMediaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?visir\.is/section(?:/media)?/.+?fileid=(?P[^/]+)$' + _TESTS = [{ + 'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP51729', + 'md5': '1486324696d1b9f30fcea985a7922f2c', + 'info_dict': { + 'id': 'CLP51729', + 'display_id': 'CLP51729', + 'ext': 'mp4', + 'title': 'Gu\u00f0j\u00f3n: Mj\u00f6g j\u00e1kv\u00e6\u00f0ur \u00e1 framhaldi\u00f0', + 'description': None, + 'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/51729_3.jpg' + }, + }, { + 'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP45905', + 'info_dict': { + 'id': 'CLP45905', + 'display_id': 'CLP45905', + 'ext': 'mp4', + 'title': 'Eva Laufey - Nau\u00f0synlegt a\u00f0 b\u00f6rn f\u00e1i a\u00f0 koma n\u00e1l\u00e6gt matarger\u00f0', + 'description': 'md5:24422433a08d270a3690d149edf113b8', + 'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/45905_3.jpg', + }, + 'params': { + 'skip_download': True, + }, + }] + + @staticmethod + def _extract_urls(webpage): + media_base_url = 'http://www.visir.is/section/media/?template=iplayer&fileid=%s' + video_ids = [media_base_url % m.group('id') for m in re.finditer( + r'App\.Player\.Init\(\{[^\}]*Type:\s*\'(?:audio|video)\'[^\}]+FileId:\s*\'(?P.+?)\'[^\}]+Host:\s*\'visirvod\.365cdn\.is\'', + webpage)] + return video_ids + + def _extract_formats(self, filename, video_id, media_type): + playlist_url = 'http://visirvod.365cdn.is/hls-vod/_definst_/mp4:%s/playlist.m3u8' % filename + if media_type == 'video': + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['dash']) + else: + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['dash', 'f4m', 'm3u8']) + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + regex_pattern = r'App\.Player\.Init\s*\(\s*\{[^\}]*%s:[^\}]*?\'(.+?)\'' + video_id = self._search_regex( + regex_pattern % 'FileId', + webpage, 'video id') + filename = self._search_regex( + regex_pattern % 'File', + webpage, 'filename') + media_type = self._search_regex( + regex_pattern % 'Type', + webpage, 'media type') + + formats = self._extract_formats(filename, video_id, media_type) + + title = self._search_regex( + regex_pattern % 'Title', + webpage, 'video title', default=None) + if not title: + title = self._og_search_title(webpage) + if title: + title = remove_start(title, 'Vísir -').strip() + + description = self._og_search_description(webpage, default=None) + + thumbnail = self._search_regex( + regex_pattern % '(?:I|i)mage', + webpage, 'video title', default=None) + if thumbnail: + if thumbnail.startswith('/'): + thumbnail = urljoin(base_url(url), thumbnail) + else: + thumbnail = self._og_search_thumbnail(webpage, default=None) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } From c9ae209e2c6c4fa1fb953fe31ece81817da37068 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 30 Jan 2017 12:57:15 +0100 Subject: [PATCH 2/5] [visir] Complete rewrite of the visir information extractor. - Articles are handled by VisirArticleIE, the media sites by VisirMediaIE. - The video m3u8 playlist is retrieved from a json playlist, but unfortunately this does not always word, e.g. http://www.visir.is/kaup-thriggja-risaskipa-styrkja-tengsl-islands-og-graenlands/article/2017170129275 - A better way to do it, is to mimic the javascript code from the visir embedded video site, e.g. http://www.visir.is/section/media/?template=iplayer&fileid=SRC38BFD200-8465-4A46-9F2A-342E613568E1 --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/generic.py | 21 ---- youtube_dl/extractor/visir.py | 161 ++++++++++++++++++----------- 3 files changed, 106 insertions(+), 82 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 984c26b06..1d79860d0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1119,7 +1119,11 @@ from .viki import ( VikiIE, VikiChannelIE, ) -from .visir import VisirMediaIE +from .visir import ( + VisirBaseIE, + VisirMediaIE, + VisirArticleIE, +) from .viu import ( ViuIE, ViuPlaylistIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 15f0b04ae..a23486620 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -81,7 +81,6 @@ from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE -from .visir import VisirMediaIE class GenericIE(InfoExtractor): @@ -1474,20 +1473,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TwentyMinutenIE.ie_key()], - }, - { - # Visir embed - 'url': 'http://www.visir.is/-viljum-hjalpa-theim-ad-hjalpa-sjalfum-ser-/article/2017170129096', - 'info_dict': { - 'id': 'VTV8CE25BB4-9132-48AD-A2EE-00AF0BAA02A0', - 'ext': 'mp4', - 'title': 'H\u00f3pur nemenda s\u00f6fnu\u00f0u pening fyrir Ge\u00f0hj\u00e1lp', - 'description': None, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [VisirMediaIE.ie_key()], } # { # # TODO: find another test @@ -2453,12 +2438,6 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( openload_urls, ie=OpenloadIE.ie_key()) - # Look for Visir embeds - visir_urls = VisirMediaIE._extract_urls(webpage) - if visir_urls: - return _playlist_from_matches( - visir_urls, ie=VisirMediaIE.ie_key()) - # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/visir.py b/youtube_dl/extractor/visir.py index a83928cb4..7a67034ea 100644 --- a/youtube_dl/extractor/visir.py +++ b/youtube_dl/extractor/visir.py @@ -5,100 +5,141 @@ import re from .common import InfoExtractor from ..utils import ( - base_url, + NO_DEFAULT, + js_to_json, remove_start, urljoin, ) -class VisirMediaIE(InfoExtractor): +class VisirBaseIE(InfoExtractor): + _VALID_URL = r'visir:(?P[^:]+):(?P(?:audio|video)):(?P\d+):(?P\d+)' + _BASE_URL = 'http://www.visir.is' + + def _extract_player_info(self, video_id, webpage, default=NO_DEFAULT): + field_names = ('FileId', 'Categoryid', 'Subcategoryid', 'Type', 'File') + player_info_regex = r'App\.Player\.Init\s*\(\s*(.+?)\)' + player_info_script = self._search_regex( + player_info_regex, webpage, 'player info', default=default) + if not player_info_script: + return len(field_names) * [None] + player_info_dict = self._parse_json( + player_info_script, video_id, transform_source=js_to_json) + return (player_info_dict.get(name) for name in field_names) + + def _extract_fields_from_media_list(self, video_id, category, subcategory, media_type): + url = 'http://www.visir.is/section/MEDIA?template=related_json&kat=%s&subkat=%s' % (category, subcategory) + if media_type == 'audio': + url += '&type=audio' + media_collection = self._download_json(url, video_id) + field_names = ('link', 'file', 'title', 'image') + return next( + (e.get(field) for field in field_names) for e in media_collection if e.get('mediaid') == video_id) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, media_type, category_id, subcategory_id = mobj.group( + 'id', 'type', 'category', 'subcategory') + media_link, _, _, _ = self._extract_fields_from_media_list( + video_id, category_id, subcategory_id, media_type) + return self.url_result( + urljoin(self._BASE_URL, media_link), ie=VisirMediaIE.ie_key()) + + +class VisirMediaIE(VisirBaseIE): _VALID_URL = r'https?://(?:www\.)?visir\.is/section(?:/media)?/.+?fileid=(?P[^/]+)$' _TESTS = [{ 'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP51729', 'md5': '1486324696d1b9f30fcea985a7922f2c', 'info_dict': { 'id': 'CLP51729', - 'display_id': 'CLP51729', 'ext': 'mp4', - 'title': 'Gu\u00f0j\u00f3n: Mj\u00f6g j\u00e1kv\u00e6\u00f0ur \u00e1 framhaldi\u00f0', + 'title': u'Guðjón: Mjög jákvæður á framhaldið', 'description': None, - 'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/51729_3.jpg' + 'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/ExternalData/IsBolti_clips/51729_3.jpg' }, }, { - 'url': 'http://www.visir.is/section/MEDIA99&fileid=CLP45905', + 'url': 'http://www.visir.is/section/MEDIA98&fileid=CLP49923', 'info_dict': { - 'id': 'CLP45905', - 'display_id': 'CLP45905', - 'ext': 'mp4', - 'title': 'Eva Laufey - Nau\u00f0synlegt a\u00f0 b\u00f6rn f\u00e1i a\u00f0 koma n\u00e1l\u00e6gt matarger\u00f0', - 'description': 'md5:24422433a08d270a3690d149edf113b8', - 'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/clips/45905_3.jpg', + 'id': 'CLP49923', + 'ext': 'mp3', + 'title': u'Ósk Gunnars - Sigga Soffía og dansverkið FUBAR', + 'description': u'Ósk Gunnars alla virka daga á FM957 frá 13-17', }, 'params': { 'skip_download': True, }, }] - @staticmethod - def _extract_urls(webpage): - media_base_url = 'http://www.visir.is/section/media/?template=iplayer&fileid=%s' - video_ids = [media_base_url % m.group('id') for m in re.finditer( - r'App\.Player\.Init\(\{[^\}]*Type:\s*\'(?:audio|video)\'[^\}]+FileId:\s*\'(?P.+?)\'[^\}]+Host:\s*\'visirvod\.365cdn\.is\'', - webpage)] - return video_ids - - def _extract_formats(self, filename, video_id, media_type): - playlist_url = 'http://visirvod.365cdn.is/hls-vod/_definst_/mp4:%s/playlist.m3u8' % filename - if media_type == 'video': - formats = self._extract_wowza_formats( - playlist_url, video_id, skip_protocols=['dash']) - else: - formats = self._extract_wowza_formats( - playlist_url, video_id, skip_protocols=['dash', 'f4m', 'm3u8']) + def _extract_formats(self, video_id, playlist_url, filepath): + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['dash']) + formats.append( + {'url': urljoin('http://static.visir.is/', filepath)}) self._sort_formats(formats) return formats def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - regex_pattern = r'App\.Player\.Init\s*\(\s*\{[^\}]*%s:[^\}]*?\'(.+?)\'' - video_id = self._search_regex( - regex_pattern % 'FileId', - webpage, 'video id') - filename = self._search_regex( - regex_pattern % 'File', - webpage, 'filename') - media_type = self._search_regex( - regex_pattern % 'Type', - webpage, 'media type') - - formats = self._extract_formats(filename, video_id, media_type) - - title = self._search_regex( - regex_pattern % 'Title', - webpage, 'video title', default=None) - if not title: - title = self._og_search_title(webpage) - if title: - title = remove_start(title, 'Vísir -').strip() + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) description = self._og_search_description(webpage, default=None) - thumbnail = self._search_regex( - regex_pattern % '(?:I|i)mage', - webpage, 'video title', default=None) - if thumbnail: - if thumbnail.startswith('/'): - thumbnail = urljoin(base_url(url), thumbnail) - else: - thumbnail = self._og_search_thumbnail(webpage, default=None) + _, category_id, subcategory_id, media_type, filepath = self._extract_player_info( + video_id, webpage) + + _, playlist_url, title, thumbnail = self._extract_fields_from_media_list( + video_id, category_id, subcategory_id, media_type) + + formats = self._extract_formats( + video_id, playlist_url, filepath) return { 'id': video_id, - 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'formats': formats, } + +class VisirArticleIE(VisirBaseIE): + _VALID_URL = r'https?://(?:www\.)?visir\.is/.+/article/(?P\d+)$' + + _TEST = { + 'url': 'http://www.visir.is/landsmenn-minntust-birnu-brjansdottur/article/2017170128825', + 'info_dict': { + 'id': '2017170128825', + 'title': u'Landsmenn minntust Birnu Brjánsdóttur', + 'description': u'Hundruð kerta voru tendruð á Arnarhóli í ljósaskiptunum í dag.' + }, + 'playlist_count': 2, + } + + def _real_extract(self, url): + article_id = self._match_id(url) + webpage = self._download_webpage(url, article_id) + + title = remove_start(self._og_search_title(webpage), u'Vísir -').strip() + description = self._og_search_description(webpage, default=None) + + entries = [] + + # Try to find the main video of the article: + video_id, category_id, subcategory_id, media_type, _= self._extract_player_info( + article_id, webpage, default=None) # TODO: default? + if video_id and category_id and subcategory_id and media_type in ('video', 'audio'): + entries.append(self.url_result( + 'visir:%s:%s:%s:%s' % (video_id, media_type, category_id, subcategory_id), + ie=VisirBaseIE.ie_key())) + + # Try to find embedded visir videos: + video_urls = [m.group('url') for m in re.finditer( + r']+src=(["\'])(?Phttp://www\.visir\.is/section/.+?)\1', webpage)] + for url in video_urls: + entries.append(self.url_result(url, ie=VisirMediaIE.ie_key())) + + return self.playlist_result( + entries, + playlist_id=article_id, + playlist_title=title, + playlist_description=description) From 90c7aa1a590bf0d642356a3d741a4b6534bb71d0 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 30 Jan 2017 17:07:21 +0100 Subject: [PATCH 3/5] [visir] Major rewrite of the visir information extractor. --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/visir.py | 118 +++++++++++++++-------------- 2 files changed, 63 insertions(+), 56 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1d79860d0..ca2117fca 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1120,7 +1120,6 @@ from .viki import ( VikiChannelIE, ) from .visir import ( - VisirBaseIE, VisirMediaIE, VisirArticleIE, ) diff --git a/youtube_dl/extractor/visir.py b/youtube_dl/extractor/visir.py index 7a67034ea..85f9a461f 100644 --- a/youtube_dl/extractor/visir.py +++ b/youtube_dl/extractor/visir.py @@ -13,37 +13,69 @@ from ..utils import ( class VisirBaseIE(InfoExtractor): - _VALID_URL = r'visir:(?P[^:]+):(?P(?:audio|video)):(?P\d+):(?P\d+)' - _BASE_URL = 'http://www.visir.is' + _URL_BASE = 'http://www.visir.is' - def _extract_player_info(self, video_id, webpage, default=NO_DEFAULT): - field_names = ('FileId', 'Categoryid', 'Subcategoryid', 'Type', 'File') + def _extract_player_info_dict(self, webpage, display_id, default=NO_DEFAULT): player_info_regex = r'App\.Player\.Init\s*\(\s*(.+?)\)' player_info_script = self._search_regex( player_info_regex, webpage, 'player info', default=default) - if not player_info_script: - return len(field_names) * [None] - player_info_dict = self._parse_json( - player_info_script, video_id, transform_source=js_to_json) - return (player_info_dict.get(name) for name in field_names) + if player_info_script: + return self._parse_json( + player_info_script, display_id, transform_source=js_to_json) + return default - def _extract_fields_from_media_list(self, video_id, category, subcategory, media_type): - url = 'http://www.visir.is/section/MEDIA?template=related_json&kat=%s&subkat=%s' % (category, subcategory) + def _extract_playlist_dict(self, media_id, category_id, subcategory_id, media_type): + url = urljoin( + self._URL_BASE, '/section/MEDIA?template=related_json&kat=%s&subkat=%s' % (category_id, subcategory_id)) if media_type == 'audio': url += '&type=audio' - media_collection = self._download_json(url, video_id) - field_names = ('link', 'file', 'title', 'image') + media_collection = self._download_json(url, media_id) return next( - (e.get(field) for field in field_names) for e in media_collection if e.get('mediaid') == video_id) + media_entry for media_entry in media_collection if media_entry.get('mediaid') == media_id) - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id, media_type, category_id, subcategory_id = mobj.group( - 'id', 'type', 'category', 'subcategory') - media_link, _, _, _ = self._extract_fields_from_media_list( - video_id, category_id, subcategory_id, media_type) - return self.url_result( - urljoin(self._BASE_URL, media_link), ie=VisirMediaIE.ie_key()) + def _extract_formats(self, media_id, playlist_url, filepath): + formats = [] + if playlist_url: + formats = self._extract_wowza_formats( + playlist_url, media_id, skip_protocols=['dash']) + formats.append( + {'url': urljoin('http://static.visir.is/', filepath)}) + self._sort_formats(formats) + return formats + + def _extract_media(self, player_info_dict, media_id, description=None): + category_id = player_info_dict.get('Categoryid') + subcategory_id = player_info_dict.get('Subcategoryid') + media_type = player_info_dict.get('Type') + filepath = player_info_dict.get('File') + + try: + playlist_dict = self._extract_playlist_dict(media_id, category_id, subcategory_id, media_type) + title = playlist_dict.get('title') + thumbnail = playlist_dict.get('image') + playlist_url = playlist_dict.get('file') + + except StopIteration: + # Fallback if video is not found in playlist_dict: + title = player_info_dict.get('Title') + thumbnail = player_info_dict.get('image') + if media_type == 'video': + geoblock = player_info_dict.get('GeoBlock') + host = player_info_dict.get('Host') + geo = '-geo/' if geoblock else '/' + playlist_url = 'http://' + host + '/hls-vod' + geo + '_definst_/mp4:' + filepath + '/playlist.m3u8' + else: + playlist_url = None + + formats = self._extract_formats(media_id, playlist_url, filepath) + + return { + 'id': media_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats + } class VisirMediaIE(VisirBaseIE): @@ -71,40 +103,18 @@ class VisirMediaIE(VisirBaseIE): }, }] - def _extract_formats(self, video_id, playlist_url, filepath): - formats = self._extract_wowza_formats( - playlist_url, video_id, skip_protocols=['dash']) - formats.append( - {'url': urljoin('http://static.visir.is/', filepath)}) - self._sort_formats(formats) - return formats - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + + player_info_dict = self._extract_player_info_dict(webpage, media_id) description = self._og_search_description(webpage, default=None) + return self._extract_media(player_info_dict, media_id, description=description) - _, category_id, subcategory_id, media_type, filepath = self._extract_player_info( - video_id, webpage) - - _, playlist_url, title, thumbnail = self._extract_fields_from_media_list( - video_id, category_id, subcategory_id, media_type) - - formats = self._extract_formats( - video_id, playlist_url, filepath) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats, - } class VisirArticleIE(VisirBaseIE): _VALID_URL = r'https?://(?:www\.)?visir\.is/.+/article/(?P\d+)$' - _TEST = { 'url': 'http://www.visir.is/landsmenn-minntust-birnu-brjansdottur/article/2017170128825', 'info_dict': { @@ -125,12 +135,10 @@ class VisirArticleIE(VisirBaseIE): entries = [] # Try to find the main video of the article: - video_id, category_id, subcategory_id, media_type, _= self._extract_player_info( - article_id, webpage, default=None) # TODO: default? - if video_id and category_id and subcategory_id and media_type in ('video', 'audio'): - entries.append(self.url_result( - 'visir:%s:%s:%s:%s' % (video_id, media_type, category_id, subcategory_id), - ie=VisirBaseIE.ie_key())) + player_info_dict = self._extract_player_info_dict(webpage, article_id, default=None) + if player_info_dict: + media_id = player_info_dict.get('FileId') + entries.append(self._extract_media(player_info_dict, media_id)) # Try to find embedded visir videos: video_urls = [m.group('url') for m in re.finditer( From 4318e9e5df132c3998f18e85d96a8e8429e770df Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Tue, 31 Jan 2017 15:58:12 +0100 Subject: [PATCH 4/5] [visir] Add rtmp and rtsp to skip_protocols --- youtube_dl/extractor/visir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/visir.py b/youtube_dl/extractor/visir.py index 85f9a461f..98d5dfa4c 100644 --- a/youtube_dl/extractor/visir.py +++ b/youtube_dl/extractor/visir.py @@ -37,7 +37,7 @@ class VisirBaseIE(InfoExtractor): formats = [] if playlist_url: formats = self._extract_wowza_formats( - playlist_url, media_id, skip_protocols=['dash']) + playlist_url, media_id, skip_protocols=['dash', 'rtmp', 'rtsp']) formats.append( {'url': urljoin('http://static.visir.is/', filepath)}) self._sort_formats(formats) @@ -74,7 +74,7 @@ class VisirBaseIE(InfoExtractor): 'title': title, 'description': description, 'thumbnail': thumbnail, - 'formats': formats + 'formats': formats, } From 000133ddcced13bd3106807ae3ab59e3a180dbd4 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Fri, 3 Feb 2017 14:29:51 +0100 Subject: [PATCH 5/5] [visir] Fix python 3.2 syntax errors. --- youtube_dl/extractor/visir.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/visir.py b/youtube_dl/extractor/visir.py index 98d5dfa4c..6b6fac666 100644 --- a/youtube_dl/extractor/visir.py +++ b/youtube_dl/extractor/visir.py @@ -86,7 +86,7 @@ class VisirMediaIE(VisirBaseIE): 'info_dict': { 'id': 'CLP51729', 'ext': 'mp4', - 'title': u'Guðjón: Mjög jákvæður á framhaldið', + 'title': 'Guðjón: Mjög jákvæður á framhaldið', 'description': None, 'thumbnail': 'http://www.visir.is/apps/pbcsi.dll/urlget?url=/ExternalData/IsBolti_clips/51729_3.jpg' }, @@ -95,8 +95,8 @@ class VisirMediaIE(VisirBaseIE): 'info_dict': { 'id': 'CLP49923', 'ext': 'mp3', - 'title': u'Ósk Gunnars - Sigga Soffía og dansverkið FUBAR', - 'description': u'Ósk Gunnars alla virka daga á FM957 frá 13-17', + 'title': 'Ósk Gunnars - Sigga Soffía og dansverkið FUBAR', + 'description': 'Ósk Gunnars alla virka daga á FM957 frá 13-17', }, 'params': { 'skip_download': True, @@ -119,8 +119,8 @@ class VisirArticleIE(VisirBaseIE): 'url': 'http://www.visir.is/landsmenn-minntust-birnu-brjansdottur/article/2017170128825', 'info_dict': { 'id': '2017170128825', - 'title': u'Landsmenn minntust Birnu Brjánsdóttur', - 'description': u'Hundruð kerta voru tendruð á Arnarhóli í ljósaskiptunum í dag.' + 'title': 'Landsmenn minntust Birnu Brjánsdóttur', + 'description': 'Hundruð kerta voru tendruð á Arnarhóli í ljósaskiptunum í dag.' }, 'playlist_count': 2, } @@ -129,7 +129,7 @@ class VisirArticleIE(VisirBaseIE): article_id = self._match_id(url) webpage = self._download_webpage(url, article_id) - title = remove_start(self._og_search_title(webpage), u'Vísir -').strip() + title = remove_start(self._og_search_title(webpage), 'Vísir -').strip() description = self._og_search_description(webpage, default=None) entries = []