From d6535a44f045529f362762adced31e1e44d4c17a Mon Sep 17 00:00:00 2001 From: alrii Date: Wed, 14 Mar 2018 09:43:11 -0400 Subject: [PATCH 1/4] [vidtodo] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidtodo.py | 77 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 youtube_dl/extractor/vidtodo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bef3b82ee..3cd616ecc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1235,6 +1235,7 @@ from .vidme import ( VidmeUserIE, VidmeUserLikesIE, ) +from .vidtodo import VidtodoIE from .vidzi import VidziIE from .vier import VierIE, VierVideosIE from .viewlift import ( diff --git a/youtube_dl/extractor/vidtodo.py b/youtube_dl/extractor/vidtodo.py new file mode 100644 index 000000000..9e9c2546c --- /dev/null +++ b/youtube_dl/extractor/vidtodo.py @@ -0,0 +1,77 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + determine_ext, + decode_packed_codes, + std_headers, + ExtractorError, +) + + +def xpro(encoded_url): + decoded_url = '' + alphabet = 'abcdefghijklmnopqrstuvwxyz' + betalpha = 'nopqrstuvwxyzabcdefghijklm' + for char in encoded_url: + if char.isalpha(): + decoded_url += alphabet[betalpha.find(char)] + else: + decoded_url += char + return decoded_url + + +class VidtodoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(vidtod|vidtodo).me/(?:embed-)?(?P\w+)' + _TESTS = [{ + 'url': 'https://vidtodo.me/4c8rx0tt8ek4', + 'md5': '10a0195a5855df8050bd2a0d6692f7c5', + 'info_dict': { + 'id': '4c8rx0tt8ek4', + 'ext': 'mp4', + 'title': 'Watch 343291981 mp4', + }, + }, { + 'url': 'https://vidtodo.com/embed-4c8rx0tt8ek4.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + std_headers['referer'] = 'https://rg3.github.io/youtube-dl/' + webpage = self._download_webpage('http://vidtod.me/%s' % video_id, video_id, headers=std_headers) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + data = re.search(r"}\('.+.split\('\|'\)", webpage).group(0) + + if data: + codes = decode_packed_codes(data) + else: + raise ExtractorError('File not found', expected=True, video_id=video_id) + + source = re.search(r'\.setup\((.+?\],image:.+?\")', codes).group(1) + '}' + source_keys = ['sources', 'file', 'label', 'image'] + for key_ in source_keys: + source = source.replace(key_, '"' + key_ + '"') + source = eval(source) + + formats = [] + for format_ in source['sources']: + ext = determine_ext(format_['file'], default_ext=None) + formats.append({ + 'url': format_['file'], + 'ext': ext or 'mp4', + 'resolution': format_['label'] + }) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': source['image'], + 'formats': formats, + } From 711ecd2d0b8f36423cdfbc7eee83c41462aebcd4 Mon Sep 17 00:00:00 2001 From: arnold Date: Wed, 14 Mar 2018 10:28:47 -0400 Subject: [PATCH 2/4] Update vidtodo.py --- youtube_dl/extractor/vidtodo.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vidtodo.py b/youtube_dl/extractor/vidtodo.py index 9e9c2546c..e55edff65 100644 --- a/youtube_dl/extractor/vidtodo.py +++ b/youtube_dl/extractor/vidtodo.py @@ -34,10 +34,7 @@ class VidtodoIE(InfoExtractor): 'ext': 'mp4', 'title': 'Watch 343291981 mp4', }, - }, { - 'url': 'https://vidtodo.com/embed-4c8rx0tt8ek4.html', - 'only_matching': True, - }] + },] def _real_extract(self, url): video_id = self._match_id(url) From 89d1b9095b989932717b716fb9d72421ec2ce1ea Mon Sep 17 00:00:00 2001 From: arnold Date: Thu, 15 Mar 2018 02:06:12 -0400 Subject: [PATCH 3/4] [Vidtodo] Fix requested changes --- youtube_dl/extractor/vidtodo.py | 72 +++++++++++++++------------------ 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/vidtodo.py b/youtube_dl/extractor/vidtodo.py index e55edff65..8f6400d33 100644 --- a/youtube_dl/extractor/vidtodo.py +++ b/youtube_dl/extractor/vidtodo.py @@ -5,70 +5,64 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, decode_packed_codes, - std_headers, ExtractorError, + js_to_json, ) -def xpro(encoded_url): - decoded_url = '' - alphabet = 'abcdefghijklmnopqrstuvwxyz' - betalpha = 'nopqrstuvwxyzabcdefghijklm' - for char in encoded_url: - if char.isalpha(): - decoded_url += alphabet[betalpha.find(char)] - else: - decoded_url += char - return decoded_url - - class VidtodoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(vidtod|vidtodo).me/(?:embed-)?(?P\w+)' _TESTS = [{ 'url': 'https://vidtodo.me/4c8rx0tt8ek4', - 'md5': '10a0195a5855df8050bd2a0d6692f7c5', + 'md5': 'cfd8415e586d59a4de942757eeb7145f', 'info_dict': { 'id': '4c8rx0tt8ek4', 'ext': 'mp4', 'title': 'Watch 343291981 mp4', }, - },] + }, ] + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' + + @staticmethod + def xpro(encoded_url): + decoded_url = '' + alphabet = 'abcdefghijklmnopqrstuvwxyz' + betalpha = 'nopqrstuvwxyzabcdefghijklm' + for char in encoded_url: + if char.isalpha(): + decoded_url += alphabet[betalpha.find(char)] + else: + decoded_url += char + return decoded_url def _real_extract(self, url): video_id = self._match_id(url) - std_headers['referer'] = 'https://rg3.github.io/youtube-dl/' - webpage = self._download_webpage('http://vidtod.me/%s' % video_id, video_id, headers=std_headers) + headers = { + 'User-Agent': self._USER_AGENT, + 'Connection': 'keep-alive', + 'referer': 'https://vidtodo.com', + } + + webpage = self._download_webpage('http://vidtod.me/%s' % video_id, video_id, headers=headers) title = self._html_search_regex(r'(.+?)', webpage, 'title') - data = re.search(r"}\('.+.split\('\|'\)", webpage).group(0) - + data = re.search(r"}\('.+\.split\('\|'\)", webpage).group(0) if data: codes = decode_packed_codes(data) else: raise ExtractorError('File not found', expected=True, video_id=video_id) - source = re.search(r'\.setup\((.+?\],image:.+?\")', codes).group(1) + '}' - source_keys = ['sources', 'file', 'label', 'image'] - for key_ in source_keys: - source = source.replace(key_, '"' + key_ + '"') - source = eval(source) + source = self._search_regex(r'setup\(([^)].+\.jpg\")', codes, 'jwplayer data', fatal=False) + '}' + encoded_url = self._search_regex(r'xpro\((.+?)\)', source, 'encoded url', fatal=False) + if encoded_url: + source = source.replace('xpro(' + encoded_url + ')', self.xpro(encoded_url)) - formats = [] - for format_ in source['sources']: - ext = determine_ext(format_['file'], default_ext=None) - formats.append({ - 'url': format_['file'], - 'ext': ext or 'mp4', - 'resolution': format_['label'] - }) + jwplayer_data = self._parse_json(source, video_id, transform_source=js_to_json) - return { - 'id': video_id, - 'title': title, - 'thumbnail': source['image'], - 'formats': formats, - } + info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) + info_dict['title'] = title + + return info_dict From 130d38c18b2dd4b5b6865734a60d7594183ca370 Mon Sep 17 00:00:00 2001 From: alrii Date: Mon, 19 Mar 2018 02:05:39 -0400 Subject: [PATCH 4/4] [Vidtodo] update _VALID_URL regex --- youtube_dl/extractor/vidtodo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidtodo.py b/youtube_dl/extractor/vidtodo.py index 8f6400d33..3f39256a7 100644 --- a/youtube_dl/extractor/vidtodo.py +++ b/youtube_dl/extractor/vidtodo.py @@ -12,7 +12,7 @@ from ..utils import ( class VidtodoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(vidtod|vidtodo).me/(?:embed-)?(?P\w+)' + _VALID_URL = r'https?://(?:www\.)?vidt.d?.+\.(?:me|com)/(?:embed-)?(?P\w+)' _TESTS = [{ 'url': 'https://vidtodo.me/4c8rx0tt8ek4', 'md5': 'cfd8415e586d59a4de942757eeb7145f',