From ebd5dcde566e33d8d0b8ae03449e42a74e2a678a Mon Sep 17 00:00:00 2001 From: Simon Kalt Date: Sun, 27 Sep 2015 13:59:04 +0200 Subject: [PATCH] [vidzi] Fix extraction as described in #6819 --- youtube_dl/extractor/vidzi.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 08a5a7b8d..0cd12a212 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +import re +import string class VidziIE(InfoExtractor): @@ -16,10 +18,42 @@ class VidziIE(InfoExtractor): }, } + def int2base(self, x, base): + digs = string.digits + string.ascii_letters + if x < 0: + sign = -1 + elif x == 0: + return digs[0] + else: + sign = 1 + x *= sign + digits = [] + while x: + digits.append(digs[x % base]) + x = x // base + if sign < 0: + digits.append('-') + digits.reverse() + return ''.join(digits) + + def unpack_packer(self, p, a, c, k, s): + k = k.split(s) + for i in range(int(c) - 1, 1, -1): + p = re.sub('\\b' + self.int2base(i, int(a)) + '\\b', k[i], p) + return p + + def unpack(self, content): + packers = re.findall(r'function\(p,a,c,k,e,d\){.+}\(\'.*\',\d+,\d+,\'[^\']+\'\.split\(\'.\'\)', content) + for (packer) in packers: + p, a, c, k, s = re.search(r'function\(p,a,c,k,e,d\){.+}\(\'(.*)\',(\d+),(\d+),\'([^\']+)\'\.split\(\'(.)\'\)', packer).groups() + content = content.replace(packer, self.unpack_packer(p, a, c, k, s)) + return content + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + webpage = self.unpack(webpage) video_url = self._html_search_regex( r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url') title = self._html_search_regex(