From 9dc48d44b50db6e505c5ce0d2a9fff89dd997e51 Mon Sep 17 00:00:00 2001 From: bato3 Date: Thu, 12 Jul 2018 17:42:47 +0200 Subject: [PATCH 1/6] Add support to `Clappr` player https://github.com/clappr/clappr New extractor Vidlox --- youtube_dl/extractor/common.py | 127 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 17 ++++ youtube_dl/extractor/vidlox.py | 75 +++++++++++++++++ 4 files changed, 220 insertions(+) create mode 100644 youtube_dl/extractor/vidlox.py diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5d4db54d5..a7d151af3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2480,6 +2480,85 @@ class InfoExtractor(object): m3u8_id='hls', fatal=False)) return formats + def _find_clappr_data(self, webpage, video_id = None, transform_source=js_to_json): + """ + Find Clappr.Player data + http://clappr.github.io/classes/Player.html#method_constructor + """ + mobj = re.search( + r'new Clappr.Player\((?P{.+?})\);', + webpage.replace("\n","").replace("\t","")) + if mobj: + try: + clappr_data = self._parse_json(mobj.group('json'), + video_id=video_id, + transform_source=transform_source) + except ExtractorError: + pass + else: + if isinstance(clappr_data, dict): + return clappr_data + + + def _parse_clappr_data(self, clappr_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + """ + Parse Clappr player data + http://clappr.github.io/classes/Player.html#method_constructor + """ + + info_dict = { + 'id': video_id, + 'subtitles':{}, + } + info_dict['formats'] = self._extract_url_list_formats( + clappr_data.get("sources", [clappr_data.get("source")]), + video_id=video_id,m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) + + thumbnail = clappr_data.get("poster") + if thumbnail: + info_dict['thumbnail'] = thumbnail + + # Title from `chromecast` plugin https://github.com/deaathh/sdasdas + title = clappr_data.get('chromecast',{}).get('title') + if title: + info_dict['title'] = title + #Subtitles: + #https://github.com/clappr/clappr/blob/master/doc/BUILTIN_PLUGINS.md#playback-configuration + subtitles = clappr_data.get('externalTracks') or clappr_data.get('playback',{}).get('externalTracks') + if subtitles: + for sub in subtitles: + if sub.get('kind',"subtitles") != "subtitles": + continue + lang = sub.get('lang') or sub.get('language') or sub.get('label','undefined') + src = sub.get('src') + if not src: + continue + info_dict['subtitles'].setdefault(lang, []).append({ + 'url': compat_urlparse.urljoin(base_url,src), + 'ext': determine_ext(src), + }) + #https://github.com/JMVTechnology/Clappr-Subtitle + subtitle = clappr_data.get('subtitle') + if subtitle: + if isinstance(subtitle, dict): + src = subtitle.get("src") + lang = subtitle.get("lang") or subtitle.get('label') + else: + src = subtitle + if src: + src = compat_urlparse.urljoin(base_url,src) + ext = determine_ext(src) + if not lang: + lang = src.split('/')[-1] + if video_id in lang: + lang = lang.replace("%s_" % video_id,'').replace(video_id,'').replace(".%s" % ext, '') + info_dict['subtitles'].setdefault(lang, []).append({ + 'url': src, + 'ext': ext, + }) + return info_dict + def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): query = compat_urlparse.urlparse(url).query url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) @@ -2533,6 +2612,54 @@ class InfoExtractor(object): }) return formats + def _extract_url_list_formats(self, sources, video_id=None, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + """ + Transform ["url1", "url2", {source: <>, mimeType: <>}] to formats. + Knows + """ + formats = [] + format_id = -1 + for source in sources: + #The media source URL, or {source: <>, mimeType: <>} + if isinstance(source, dict): + source_url = source.get('source') + mime = source.get('mimeType') + else: + source_url = source + mime = None + + format_id = format_id + 1 + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + ext = mimetype2ext(mime) or determine_ext(source_url, 'mp4') + if ext == "m3u8": + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=m3u8_id, fatal=False, preference=1)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, video_id, mpd_id=mpd_id, fatal=False)) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + source_url, video_id, fatal=False)) + elif ext == "f4m": + formats.extend(self._extract_f4m_formats( + source_url, video_id, m3u8_id=m3u8_id, fatal=False)) + else: + urlh = self._request_webpage(source_url, video_id, note="Checking format %d information"%format_id, fatal=False) + size = int(urlh.headers.get('Content-Length')) + formats.append({ + 'url': source_url, + 'ext': ext, + 'format_id': "%d" % format_id, + 'filesize': size, + 'preference': int(size / 1024 / 1024 / 10 ), + }) + if len(formats) == 0: + raise ExtractorError('Source not found', expected=True, video_id=video_id) + return formats + def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): mobj = re.search( r'(?s)jwplayer\((?P[\'"])[^\'" ]+(?P=quote)\)(?!).*?\.setup\s*\((?P[^)]+)\)', diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c6f8a785a..f3a7a47e7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1266,6 +1266,7 @@ from .viewlift import ( ViewLiftEmbedIE, ) from .viewster import ViewsterIE +from .vidlox import VidloxIE from .viidea import ViideaIE from .vimeo import ( VimeoIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index aa04905ed..e462e2828 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2060,6 +2060,16 @@ class GenericIE(InfoExtractor): 'skip': 'TODO: fix nested playlists processing in tests', }, # { + # # Clappr.Player({}) + # 'url': 'http://demo.teleosmedia.com/mosaic/', + # 'md5': "TODO", + # 'info_dict': { + # 'id': 'mosaic', + # 'title': 'video', + # 'ext': 'mp4' + # }, + # }, + # { # # TODO: find another test # # http://schema.org/VideoObject # 'url': 'https://flipagram.com/f/nyvTSJMKId', @@ -3118,6 +3128,13 @@ class GenericIE(InfoExtractor): jwplayer_data, video_id, require_title=False, base_url=url) return merge_dicts(info, info_dict) + # Clappr.player() + clappr_dict = self._find_clappr_data(webpage, video_id) + if clappr_dict: + info = self._parse_clappr_data(clappr_dict, + video_id=video_id, base_url=url) + return merge_dicts(info, info_dict) + # Video.js embed mobj = re.search( r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;', diff --git a/youtube_dl/extractor/vidlox.py b/youtube_dl/extractor/vidlox.py new file mode 100644 index 000000000..c2203a08c --- /dev/null +++ b/youtube_dl/extractor/vidlox.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from ..utils import ExtractorError +from .common import InfoExtractor +from .openload import PhantomJSwrapper + + +class VidloxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidlox\.(?:me|tv)/(?:embed-)?(?P[0-9a-z]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'https://vidlox.me/5tq733o3wj1d', + 'md5': 'f780592146ad0458679064de891f3e3f', + 'info_dict': { + 'id': '5tq733o3wj1d', + 'ext': 'mp4', + 'title': r're:big buck bunny 1080p surround', + 'thumbnail': r're:^https?://.*\.jpg$', + 'subtitles': { + 'Spanish': [{ + 'ext': 'srt', + }], + } + } + }, { + 'url': 'https://vidlox.me/embed-bs2nk6dgqio1.html', + 'only_matching': True, + }] + + + + def _real_extract(self, url): + + video_id = self._match_id(url) + page_url = "https://vidlox.me/%s" % video_id + phantom = PhantomJSwrapper(self, required_version='2.0') + + # download page for couple simple test + webpage = self._download_webpage(page_url, video_id).replace("\n","").replace("\t","") + if 'File not found' in webpage: + raise ExtractorError('File not found', expected=True, video_id=video_id) + + title = None + if 'This video can be watched as embed only.' in webpage: + # extract tilte and download embed + title = self._html_search_regex( + r']*?>(?P.+?)\s*', webpage, 'title').replace('Watch ','',1) + webpage = None + page_url = "https://vidlox.me/embed-%s.html" % video_id + + # execute JS + webpage, _ = phantom.get(page_url, webpage, video_id=video_id) + + + + # extract player data + clappr_dict = self._find_clappr_data(webpage, video_id) + if not clappr_dict: + raise ExtractorError('Player data not found', + expected=False, video_id=video_id) + + # and parse it + info_dict = self._parse_clappr_data(clappr_dict, + video_id=video_id, base_url=page_url) + + info_dict['title'] = title or self._html_search_regex( + r']*?>(?P.+?)\s*</h1>', webpage, 'title') + + + + + return info_dict From 99d88a0b5c9f6c1d3a2af3c9f8ad26d17071c9fc Mon Sep 17 00:00:00 2001 From: bato3 <bato3@bandyci.org> Date: Thu, 12 Jul 2018 17:55:19 +0200 Subject: [PATCH 2/6] Add support for Clapper library and add extractor Vidlox --- youtube_dl/extractor/generic.py | 3 +-- youtube_dl/extractor/vidlox.py | 27 +++++++-------------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e462e2828..3b09ed954 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3131,8 +3131,7 @@ class GenericIE(InfoExtractor): # Clappr.player() clappr_dict = self._find_clappr_data(webpage, video_id) if clappr_dict: - info = self._parse_clappr_data(clappr_dict, - video_id=video_id, base_url=url) + info = self._parse_clappr_data(clappr_dict, video_id=video_id, base_url=url) return merge_dicts(info, info_dict) # Video.js embed diff --git a/youtube_dl/extractor/vidlox.py b/youtube_dl/extractor/vidlox.py index c2203a08c..ad3196036 100644 --- a/youtube_dl/extractor/vidlox.py +++ b/youtube_dl/extractor/vidlox.py @@ -1,10 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -import re - -from ..utils import ExtractorError -from .common import InfoExtractor +from ..utils import ExtractorError +from .common import InfoExtractor from .openload import PhantomJSwrapper @@ -29,8 +27,6 @@ class VidloxIE(InfoExtractor): 'url': 'https://vidlox.me/embed-bs2nk6dgqio1.html', 'only_matching': True, }] - - def _real_extract(self, url): @@ -39,37 +35,28 @@ class VidloxIE(InfoExtractor): phantom = PhantomJSwrapper(self, required_version='2.0') # download page for couple simple test - webpage = self._download_webpage(page_url, video_id).replace("\n","").replace("\t","") + webpage = self._download_webpage(page_url, video_id).replace("\n", "").replace("\t", "") if 'File not found' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) title = None if 'This video can be watched as embed only.' in webpage: # extract tilte and download embed - title = self._html_search_regex( - r'<title[^>]*?>(?P<title>.+?)\s*', webpage, 'title').replace('Watch ','',1) + title = self._html_search_regex(r']*?>(?P.+?)\s*', webpage, 'title').replace('Watch ', '', 1) webpage = None page_url = "https://vidlox.me/embed-%s.html" % video_id # execute JS webpage, _ = phantom.get(page_url, webpage, video_id=video_id) - - # extract player data clappr_dict = self._find_clappr_data(webpage, video_id) if not clappr_dict: - raise ExtractorError('Player data not found', - expected=False, video_id=video_id) + raise ExtractorError('Clappr data not found', expected=False, video_id=video_id) # and parse it - info_dict = self._parse_clappr_data(clappr_dict, - video_id=video_id, base_url=page_url) - - info_dict['title'] = title or self._html_search_regex( - r']*?>(?P.+?)\s*</h1>', webpage, 'title') - - + info_dict = self._parse_clappr_data(clappr_dict, video_id=video_id, base_url=page_url) + info_dict['title'] = title or self._html_search_regex(r'<h1[^>]*?>(?P<title>.+?)\s*</h1>', webpage, 'title') return info_dict From 6d4dd47f73c81adf41def144cf600077f304dcb6 Mon Sep 17 00:00:00 2001 From: bato3 <bato3@bandyci.org> Date: Thu, 12 Jul 2018 18:10:26 +0200 Subject: [PATCH 3/6] flake 8 --- youtube_dl/extractor/common.py | 69 +++++++++++++++------------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a7d151af3..e1c76d052 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2480,82 +2480,77 @@ class InfoExtractor(object): m3u8_id='hls', fatal=False)) return formats - def _find_clappr_data(self, webpage, video_id = None, transform_source=js_to_json): + def _find_clappr_data(self, webpage, video_id=None, transform_source=js_to_json): """ Find Clappr.Player data - http://clappr.github.io/classes/Player.html#method_constructor + https://github.com/clappr/clappr """ mobj = re.search( r'new Clappr.Player\((?P<json>{.+?})\);', - webpage.replace("\n","").replace("\t","")) + webpage.replace("\n", "").replace("\t", "")) if mobj: try: - clappr_data = self._parse_json(mobj.group('json'), - video_id=video_id, - transform_source=transform_source) + clappr_data = self._parse_json(mobj.group('json'), video_id=video_id, transform_source=transform_source) except ExtractorError: pass else: if isinstance(clappr_data, dict): return clappr_data - - def _parse_clappr_data(self, clappr_data, video_id=None, require_title=True, - m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + def _parse_clappr_data(self, clappr_data, video_id=None, require_title=True, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): """ - Parse Clappr player data - http://clappr.github.io/classes/Player.html#method_constructor + Parse Clappr player data + http://clappr.github.io/classes/Player.html#method_constructor """ info_dict = { 'id': video_id, - 'subtitles':{}, + 'subtitles': {}, } - info_dict['formats'] = self._extract_url_list_formats( - clappr_data.get("sources", [clappr_data.get("source")]), - video_id=video_id,m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) + info_dict['formats'] = self._extract_url_list_formats( + clappr_data.get("sources", [clappr_data.get("source")]), video_id=video_id, m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) thumbnail = clappr_data.get("poster") - if thumbnail: + if thumbnail: info_dict['thumbnail'] = thumbnail # Title from `chromecast` plugin https://github.com/deaathh/sdasdas - title = clappr_data.get('chromecast',{}).get('title') + title = clappr_data.get('chromecast', {}).get('title') if title: info_dict['title'] = title - #Subtitles: - #https://github.com/clappr/clappr/blob/master/doc/BUILTIN_PLUGINS.md#playback-configuration - subtitles = clappr_data.get('externalTracks') or clappr_data.get('playback',{}).get('externalTracks') + # Subtitles: + # https://github.com/clappr/clappr/blob/master/doc/BUILTIN_PLUGINS.md#playback-configuration + subtitles = clappr_data.get('externalTracks') or clappr_data.get('playback', {}).get('externalTracks') if subtitles: for sub in subtitles: - if sub.get('kind',"subtitles") != "subtitles": + if sub.get('kind', "subtitles") != "subtitles": continue - lang = sub.get('lang') or sub.get('language') or sub.get('label','undefined') + lang = sub.get('lang') or sub.get('language') or sub.get('label', 'undefined') src = sub.get('src') if not src: continue info_dict['subtitles'].setdefault(lang, []).append({ - 'url': compat_urlparse.urljoin(base_url,src), - 'ext': determine_ext(src), + 'url': compat_urlparse.urljoin(base_url, src), + 'ext': determine_ext(src), }) - #https://github.com/JMVTechnology/Clappr-Subtitle + # https://github.com/JMVTechnology/Clappr-Subtitle subtitle = clappr_data.get('subtitle') if subtitle: if isinstance(subtitle, dict): - src = subtitle.get("src") + src = subtitle.get("src") lang = subtitle.get("lang") or subtitle.get('label') else: src = subtitle if src: - src = compat_urlparse.urljoin(base_url,src) + src = compat_urlparse.urljoin(base_url, src) ext = determine_ext(src) if not lang: lang = src.split('/')[-1] if video_id in lang: - lang = lang.replace("%s_" % video_id,'').replace(video_id,'').replace(".%s" % ext, '') + lang = lang.replace("%s_" % video_id, '').replace(video_id, '').replace(".%s" % ext, '') info_dict['subtitles'].setdefault(lang, []).append({ - 'url': src, - 'ext': ext, + 'url': src, + 'ext': ext, }) return info_dict @@ -2612,16 +2607,14 @@ class InfoExtractor(object): }) return formats - def _extract_url_list_formats(self, sources, video_id=None, - m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + def _extract_url_list_formats(self, sources, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): """ Transform ["url1", "url2", {source: <>, mimeType: <>}] to formats. - Knows """ - formats = [] + formats = [] format_id = -1 for source in sources: - #The media source URL, or {source: <>, mimeType: <>} + # The media source URL, or {source: <>, mimeType: <>} if isinstance(source, dict): source_url = source.get('source') mime = source.get('mimeType') @@ -2643,18 +2636,18 @@ class InfoExtractor(object): elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) - elif ext == "f4m": + elif ext == "f4m": formats.extend(self._extract_f4m_formats( source_url, video_id, m3u8_id=m3u8_id, fatal=False)) else: - urlh = self._request_webpage(source_url, video_id, note="Checking format %d information"%format_id, fatal=False) + urlh = self._request_webpage(source_url, video_id, note="Checking format %d information" % format_id, fatal=False) size = int(urlh.headers.get('Content-Length')) formats.append({ 'url': source_url, 'ext': ext, 'format_id': "%d" % format_id, 'filesize': size, - 'preference': int(size / 1024 / 1024 / 10 ), + 'preference': int(size / 1024 / 1024 / 10), }) if len(formats) == 0: raise ExtractorError('Source not found', expected=True, video_id=video_id) From 56c6e3ec6c92efdba40f3be22d3cd93ce2d2cf8e Mon Sep 17 00:00:00 2001 From: bato3 <bato3@bandyci.org> Date: Fri, 13 Jul 2018 01:06:49 +0200 Subject: [PATCH 4/6] single quotes --- youtube_dl/extractor/common.py | 22 +++++++++++----------- youtube_dl/extractor/vidlox.py | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e1c76d052..2110198be 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2487,7 +2487,7 @@ class InfoExtractor(object): """ mobj = re.search( r'new Clappr.Player\((?P<json>{.+?})\);', - webpage.replace("\n", "").replace("\t", "")) + webpage.replace('\n', '').replace('\t', '')) if mobj: try: clappr_data = self._parse_json(mobj.group('json'), video_id=video_id, transform_source=transform_source) @@ -2508,9 +2508,9 @@ class InfoExtractor(object): 'subtitles': {}, } info_dict['formats'] = self._extract_url_list_formats( - clappr_data.get("sources", [clappr_data.get("source")]), video_id=video_id, m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) + clappr_data.get('sources', [clappr_data.get('source')]), video_id=video_id, m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) - thumbnail = clappr_data.get("poster") + thumbnail = clappr_data.get('poster') if thumbnail: info_dict['thumbnail'] = thumbnail @@ -2523,7 +2523,7 @@ class InfoExtractor(object): subtitles = clappr_data.get('externalTracks') or clappr_data.get('playback', {}).get('externalTracks') if subtitles: for sub in subtitles: - if sub.get('kind', "subtitles") != "subtitles": + if sub.get('kind', 'subtitles') != 'subtitles': continue lang = sub.get('lang') or sub.get('language') or sub.get('label', 'undefined') src = sub.get('src') @@ -2537,8 +2537,8 @@ class InfoExtractor(object): subtitle = clappr_data.get('subtitle') if subtitle: if isinstance(subtitle, dict): - src = subtitle.get("src") - lang = subtitle.get("lang") or subtitle.get('label') + src = subtitle.get('src') + lang = subtitle.get('lang') or subtitle.get('label') else: src = subtitle if src: @@ -2547,7 +2547,7 @@ class InfoExtractor(object): if not lang: lang = src.split('/')[-1] if video_id in lang: - lang = lang.replace("%s_" % video_id, '').replace(video_id, '').replace(".%s" % ext, '') + lang = lang.replace('%s_' % video_id, '').replace(video_id, '').replace('.%s' % ext, '') info_dict['subtitles'].setdefault(lang, []).append({ 'url': src, 'ext': ext, @@ -2626,7 +2626,7 @@ class InfoExtractor(object): if base_url: source_url = compat_urlparse.urljoin(base_url, source_url) ext = mimetype2ext(mime) or determine_ext(source_url, 'mp4') - if ext == "m3u8": + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=m3u8_id, fatal=False, preference=1)) @@ -2636,16 +2636,16 @@ class InfoExtractor(object): elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) - elif ext == "f4m": + elif ext == 'f4m': formats.extend(self._extract_f4m_formats( source_url, video_id, m3u8_id=m3u8_id, fatal=False)) else: - urlh = self._request_webpage(source_url, video_id, note="Checking format %d information" % format_id, fatal=False) + urlh = self._request_webpage(source_url, video_id, note='Checking format %d information' % format_id, fatal=False) size = int(urlh.headers.get('Content-Length')) formats.append({ 'url': source_url, 'ext': ext, - 'format_id': "%d" % format_id, + 'format_id': '%d' % format_id, 'filesize': size, 'preference': int(size / 1024 / 1024 / 10), }) diff --git a/youtube_dl/extractor/vidlox.py b/youtube_dl/extractor/vidlox.py index ad3196036..14b3a2d59 100644 --- a/youtube_dl/extractor/vidlox.py +++ b/youtube_dl/extractor/vidlox.py @@ -31,11 +31,11 @@ class VidloxIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - page_url = "https://vidlox.me/%s" % video_id + page_url = 'https://vidlox.me/%s' % video_id phantom = PhantomJSwrapper(self, required_version='2.0') # download page for couple simple test - webpage = self._download_webpage(page_url, video_id).replace("\n", "").replace("\t", "") + webpage = self._download_webpage(page_url, video_id).replace('\n', '').replace('\t', '') if 'File not found' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) @@ -44,7 +44,7 @@ class VidloxIE(InfoExtractor): # extract tilte and download embed title = self._html_search_regex(r'<title[^>]*?>(?P<title>.+?)\s*', webpage, 'title').replace('Watch ', '', 1) webpage = None - page_url = "https://vidlox.me/embed-%s.html" % video_id + page_url = 'https://vidlox.me/embed-%s.html' % video_id # execute JS webpage, _ = phantom.get(page_url, webpage, video_id=video_id) From 32064a0b729c1f7bb3db9acc874c33492fdd5e2b Mon Sep 17 00:00:00 2001 From: bato3 Date: Sat, 14 Jul 2018 13:10:41 +0200 Subject: [PATCH 5/6] Sort formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2110198be..e9c4bd037 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2508,7 +2508,10 @@ class InfoExtractor(object): 'subtitles': {}, } info_dict['formats'] = self._extract_url_list_formats( - clappr_data.get('sources', [clappr_data.get('source')]), video_id=video_id, m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) + clappr_data.get('sources', [clappr_data.get('source')]), + video_id=video_id, m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url + ) + self._sort_formats(info_dict['formats']) thumbnail = clappr_data.get('poster') if thumbnail: From bebcd6691a3a67fbfbfc38417a4487c57fb12b13 Mon Sep 17 00:00:00 2001 From: bato3 Date: Fri, 27 Jul 2018 17:42:48 +0200 Subject: [PATCH 6/6] checking headers for filesize was bad idea --- youtube_dl/extractor/common.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e9c4bd037..7dd189b11 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2643,14 +2643,10 @@ class InfoExtractor(object): formats.extend(self._extract_f4m_formats( source_url, video_id, m3u8_id=m3u8_id, fatal=False)) else: - urlh = self._request_webpage(source_url, video_id, note='Checking format %d information' % format_id, fatal=False) - size = int(urlh.headers.get('Content-Length')) formats.append({ 'url': source_url, 'ext': ext, 'format_id': '%d' % format_id, - 'filesize': size, - 'preference': int(size / 1024 / 1024 / 10), }) if len(formats) == 0: raise ExtractorError('Source not found', expected=True, video_id=video_id)