From a4ee9b5e583ab22ef4ff0078964f4a8ee30aec27 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Wed, 20 Jul 2016 02:29:36 +0300 Subject: [PATCH] [aqstream] New extractor Some initial support for aqstream.com live streams. It doesn't work with all streams yet but works for some. --- youtube_dl/extractor/aqstream.py | 112 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/utils.py | 16 +++++ 3 files changed, 129 insertions(+) create mode 100644 youtube_dl/extractor/aqstream.py diff --git a/youtube_dl/extractor/aqstream.py b/youtube_dl/extractor/aqstream.py new file mode 100644 index 000000000..6399681dc --- /dev/null +++ b/youtube_dl/extractor/aqstream.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import hashlib +import random +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..utils import ( + ExtractorError, + bytes_to_intlist, + intlist_to_bytes, + decode_pkcs7, + urlencode_postdata, + remove_start, +) + + +class AqstreamIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aqstream\.com/[-\w]+/(?P[-\w]+)' + _TESTS = [{ + 'url': 'http://aqstream.com/jtbc/JTBC', + 'only_matching': True, + }] + _STREAMS_CAT = 'kr' + _STREAMS_URL = 'http://aqstream.com/list.php?cat=%s&all' % _STREAMS_CAT + # Currently calculated in JS code as + # `document.getElementById("theme").lastElementChild.className` + # Not worth to messing with DOM because id of theme element might be easily + # changed too. + _STREAMS_SECRET = b'switch-handle' + + def _decode_streams(self, data, video_id): + data = bytes_to_intlist(base64.b64decode(data)) + iv, ciphertext = data[:16], data[16:] + key = bytes_to_intlist(hashlib.sha256(self._STREAMS_SECRET).digest()) + # TODO(Kagami): aes_cbc_decrypt is really slow, it takes about 1 second + # to decode just 24kb... + plaintext = decode_pkcs7(aes_cbc_decrypt(ciphertext, key, iv)) + text = intlist_to_bytes(plaintext).decode('utf-8') + return self._parse_json(text, video_id) + + def _find_stream(self, streams, video_id): + for stream_group in streams: + group_name, group = next(iter(stream_group.items())) + for stream in group: + stream_name = stream[0] + if stream_name == video_id: + return { + 'name': stream_name, + 'group_name': group_name, + 'src': stream[1], + 'type': stream[2], + 'link': stream[3], + } + + def _get_dmp_link(self, stream, dmp_servers, video_id): + server = 'http://' + random.choice(dmp_servers) # Stick to JS behavior + data = { + 'type': 'dmp', + 'id': remove_start(stream['link'], '[hls]|') + } + link = self._download_webpage( + server + '/pull.php', video_id, 'Getting DMP link', + data=urlencode_postdata(data), + headers={ + 'Referer': server, + 'X-Requested-With': 'XMLHttpRequest', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + return server + link + + def _real_extract(self, url): + video_id = self._match_id(url).replace('-', ' ') + + streams_data = self._download_webpage( + self._STREAMS_URL, video_id, 'Downloading streams data', + headers={ + 'X-Requested-With': 'XMLHttpRequest', + }) + streams = self._decode_streams(streams_data, video_id) + stream = self._find_stream( + streams['links'][self._STREAMS_CAT], video_id) + if not stream: + raise ExtractorError('Cannot find stream for %s channel' % video_id, + expected=True) + + if stream['src'] == 'direct' or stream['src'] == 'directstream': + link = stream['link'] + elif stream['src'] == 'dmp': + dmp_servers = streams['servers']['data'] + link = self._get_dmp_link(stream, dmp_servers, video_id) + else: + raise ExtractorError('%s links are not supported' % stream['src']) + + if stream['type'] == 'hls': + formats = self._extract_m3u8_formats(link, video_id, 'mp4', + live=True) + self._sort_formats(formats) + else: + formats = [{'url': link}] + + descriptions = streams['info'][self._STREAMS_CAT].get( + 'descriptions', {}) + + return { + 'id': video_id, + 'title': video_id, + 'description': descriptions.get(stream['group_name']), + 'formats': formats, + 'is_live': True, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7314be747..5a95679fc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -43,6 +43,7 @@ from .appletrailers import ( AppleTrailersIE, AppleTrailersSectionIE, ) +from .aqstream import AqstreamIE from .archiveorg import ArchiveOrgIE from .ard import ( ARDIE, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e6e0155b4..0d9b722a4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2968,3 +2968,19 @@ def parse_m3u8_attributes(attrib): def urshift(val, n): return val >> n if val >= 0 else (val + 0x100000000) >> n + + +def decode_pkcs7(data, block=16): + """ + Remove PKCS#7 padding, see: + . + + @param {int[]} data input data + @param {int} block cipher block size (1-255) + @returns {int[]} data without padding + """ + assert 0 < block < 256, 'Bad padding block' + num_padded = data[-1] + if num_padded > block or len(data) < num_padded: + raise ValueError('Input is not padded or padding is corrupt') + return data[:-num_padded]