From 9efe794fb97f0da6b98eb355c350c03805c0c2e4 Mon Sep 17 00:00:00 2001 From: waahhhh <40632052+waahhhh@users.noreply.github.com> Date: Sun, 1 Sep 2019 00:33:27 +0200 Subject: [PATCH] [vivo] added ROT47 to utils; updated stream url decoding; updated test video --- youtube_dl/extractor/shared.py | 33 +++++++++++++++++---------------- youtube_dl/utils.py | 12 ++++++++++++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index ff575f592..ed9c069d7 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_b64decode +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote_plus +) from ..utils import ( determine_ext, ExtractorError, @@ -10,6 +13,7 @@ from ..utils import ( parse_filesize, url_or_none, urlencode_postdata, + rot47 ) @@ -85,13 +89,13 @@ class VivoIE(SharedBaseIE): _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' _TEST = { - 'url': 'http://vivo.sx/d7ddda0e78', - 'md5': '15b3af41be0b4fe01f4df075c2678b2c', + 'url': 'https://vivo.sx/5ec5b4cd00', + 'md5': 'b95282602513086f5b71aae4cd043a0c', 'info_dict': { - 'id': 'd7ddda0e78', + 'id': '5ec5b4cd00', 'ext': 'mp4', - 'title': 'Chicken', - 'filesize': 515659, + 'title': 'big_buck_bunny_480p_surround-fix', + 'filesize': 68270000, }, } @@ -113,15 +117,12 @@ class VivoIE(SharedBaseIE): def _extract_video_url(self, webpage, video_id, url): def decode_url(encoded_url): - return compat_b64decode(encoded_url).decode('utf-8') + return rot47(compat_urllib_parse_unquote_plus(encoded_url)) + + stream_url = self._search_regex( + r'InitializeStream\s*\(\{[\s\S]*(source\:[\s]\')(?P[\s\S\:]+?)(\',\s*)', webpage, + 'stream url', default=None, group='url') - stream_url = url_or_none(decode_url(self._search_regex( - r'data-stream\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'stream url', default=None, group='url'))) if stream_url: - return stream_url - return self._parse_json( - self._search_regex( - r'InitializeStream\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'stream', group='url'), - video_id, transform_source=decode_url)[0] + return url_or_none(decode_url(stream_url)) + return None diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index aed988b88..32ddbaae7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -5594,3 +5594,15 @@ def random_birthday(year_field, month_field, day_field): month_field: str(random_date.month), day_field: str(random_date.day), } + + +# source: https://rot47.net/_py/rot47.txt +def rot47(s): + x = [] + for i in range(len(s)): + j = ord(s[i]) + if j >= 33 and j <= 126: + x.append(chr(33 + ((j + 14) % 94))) + else: + x.append(s[i]) + return ''.join(x)