From c87bac18143de515f63d4adb493e5f51ac6c08b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A7=85=E5=89=8D=20=E5=A6=96=E6=80=AA?= <34918963+ekimae-youkai@users.noreply.github.com> Date: Thu, 28 Dec 2017 17:36:41 +0000 Subject: [PATCH] [hclips] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hclips.py | 62 ++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/hclips.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e7b93a699..1e62faf22 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -420,6 +420,7 @@ from .hbo import ( HBOIE, HBOEpisodeIE, ) +from .hclips import HclipsIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE diff --git a/youtube_dl/extractor/hclips.py b/youtube_dl/extractor/hclips.py new file mode 100644 index 000000000..c1adf795e --- /dev/null +++ b/youtube_dl/extractor/hclips.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor, ExtractorError + + +class HclipsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hclips\.com/videos/(?P[a-zA-Z0-9-_]+)/?' + _TEST = { + 'url': 'https://www.hclips.com/videos/hottest-homemade-movie-with-milf-brunette-scenes28529/', + 'md5': '9f4b205e68340cb8eed5a52d96301fd3', + 'info_dict': { + 'id': '1214901', + 'display_id': 'hottest-homemade-movie-with-milf-brunette-scenes28529', + 'ext': 'mp4', + 'title': 'Hottest Homemade movie with MILF, Brunette scenes', + 'age_limit': 18, + } + } + + def decode_hclips_video_url(self, encoded_url): + # Warning: Contains cyrillic unicode + decode_table = "АВСDЕFGHIJKLМNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,~" + last_char = len(decode_table) - 1 + decoded_url = "" + for i in range(0, len(encoded_url), 4): + a = decode_table.index(encoded_url[i + 0]) + b = decode_table.index(encoded_url[i + 1]) + c = decode_table.index(encoded_url[i + 2]) + d = decode_table.index(encoded_url[i + 3]) + + decoded_url += chr((a << 2) | (b >> 4)) + + if c != last_char: + decoded_url += chr((b & 0xf) << 4 | c >> 2) + + if d != last_char: + decoded_url += chr((c & 0x3) << 6 | d) + + if not decoded_url.startswith("http"): + raise ExtractorError("Expected URL after decode. Got {}".format(decoded_url)) + + return decoded_url + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex(r'https://www.hclips.com/embed/([0-9]*)', webpage, 'id_number', default=display_id) + + title = self._html_search_regex(r'

(.+?)

', webpage, 'title', default=display_id) + + encoded_video_url = self._search_regex(r'var video_url="(.*?)"', webpage, 'video_url') + video_url = self.decode_hclips_video_url(encoded_video_url) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'url': video_url, + 'age_limit': 18, + }