From 2d2751d4dbd384a00b5829170af64de440808acc Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Tue, 2 Jul 2019 21:43:26 +0300 Subject: [PATCH] [porntrex] Add extractor - basic --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/porntrex.py | 128 +++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 youtube_dl/extractor/porntrex.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 530474f3f..abe8cb2a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1480,6 +1480,7 @@ from .younow import ( YouNowMomentIE, ) from .youporn import YouPornIE +from .porntrex import PornTrexIE from .yourporn import YourPornIE from .yourupload import YourUploadIE from .youtube import ( diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py new file mode 100644 index 000000000..075d4ea1d --- /dev/null +++ b/youtube_dl/extractor/porntrex.py @@ -0,0 +1,128 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + sanitized_Request, + get_elements_by_class, + get_element_by_class, + get_element_by_attribute +) + + +class PornTrexIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P\d+)/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.porntrex.com/video/781815/black-angelika-cayenne-klein-teens-vs-milfs-2-2015', + 'md5': 'aaa4b8890bf0ea9bb76a8588da79b65a', + 'info_dict': { + 'id': '781815', + 'display_id': 'black-angelika-cayenne-klein-teens-vs-milfs-2-2015', + 'ext': 'mp4', + 'title': 'Black Angelika & Cayenne Klein - Teens vs MILFs 2 (2015', + 'description': 'Black Angelika & Cayenne Klein - Teens vs MILFs 2 (2015)', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'RedB', + # 'upload_date': '', + 'average_rating': float, + 'view_count': int, + 'comment_count': int, + 'categories': list, + # 'tags': list, + 'age_limit': 18, + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + request = sanitized_Request(url) + request.add_header('Cookie', 'age_verified=1') + request.add_header('Referer', url) + webpage = self._download_webpage(request, display_id) + + title = self._html_search_regex( + r'(?s)]+class=["\']title-video[^>]+>(.+?)

', + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'title', webpage, fatal=True) + + page_data = self._search_regex( + r'flashvars\s*=\s*(\{.+?\});', webpage, + 'media definitions', default='[]', flags=re.MULTILINE | re.DOTALL) + page_data = page_data.replace('\t', '').replace('\n', '').replace("'", '"') + page_data = re.sub(r'([a-z1-9_]+):\s+', '"\\1": ', page_data) + page_data = self._parse_json(page_data, video_id, fatal=False) + + formats = [] + for key, value in page_data.items(): + if (key.startswith('video_url') or re.match(r'^video_alt_url\d+$', key)) and not key.endswith('_text'): + item = { + 'url': value, + 'format_id': page_data['%s_text' % key] + } + formats.append(item) + + self._sort_formats(formats) + + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail(webpage) + if thumbnail.startswith('//'): + thumbnail = 'https:%s' % thumbnail + + categories = get_elements_by_class('js-cat', webpage) + + average_rating = self._html_search_regex( + r'', + get_element_by_class('scale', webpage), + 'average rating', + default='0' + ) + average_rating = float(average_rating) + + view_count = self._html_search_regex( + r']+class=["\']badge["\']>([\d\s]+)', + webpage, + 'view count', + default='0' + ) + view_count = int(view_count.replace(' ', '')) + + comment_count = self._html_search_regex( + r'.+?Comments\s+\(([\d\s]+)\)', + get_element_by_attribute('href', '.block-new-comment', webpage), + 'view count', + default='0' + ) + comment_count = int(comment_count.replace(' ', '')) + + uploader = self._html_search_regex( + r'(.+?).+?', + get_element_by_class('username', webpage), + 'uploader', + flags=re.M | re.DOTALL, + default=None + ) + + # upload_date = '' + # tags = [] + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + # 'upload_date': upload_date, + 'average_rating': average_rating, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + # 'tags': tags, + 'formats': formats, + 'age_limit': 18 + }