1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-07 16:17:18 +08:00

[porntrex] Add extractor - basic

This commit is contained in:
Chirica Gheorghe 2019-07-02 21:43:26 +03:00
parent 1335bf10f6
commit 2d2751d4db
2 changed files with 129 additions and 0 deletions

View File

@ -1480,6 +1480,7 @@ from .younow import (
YouNowMomentIE,
)
from .youporn import YouPornIE
from .porntrex import PornTrexIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .youtube import (

View File

@ -0,0 +1,128 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
sanitized_Request,
get_elements_by_class,
get_element_by_class,
get_element_by_attribute
)
class PornTrexIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.porntrex.com/video/781815/black-angelika-cayenne-klein-teens-vs-milfs-2-2015',
'md5': 'aaa4b8890bf0ea9bb76a8588da79b65a',
'info_dict': {
'id': '781815',
'display_id': 'black-angelika-cayenne-klein-teens-vs-milfs-2-2015',
'ext': 'mp4',
'title': 'Black Angelika & Cayenne Klein - Teens vs MILFs 2 (2015',
'description': 'Black Angelika & Cayenne Klein - Teens vs MILFs 2 (2015)',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'RedB',
# 'upload_date': '',
'average_rating': float,
'view_count': int,
'comment_count': int,
'categories': list,
# 'tags': list,
'age_limit': 18,
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
request = sanitized_Request(url)
request.add_header('Cookie', 'age_verified=1')
request.add_header('Referer', url)
webpage = self._download_webpage(request, display_id)
title = self._html_search_regex(
r'(?s)<p[^>]+class=["\']title-video[^>]+>(.+?)</p>',
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True)
page_data = self._search_regex(
r'flashvars\s*=\s*(\{.+?\});', webpage,
'media definitions', default='[]', flags=re.MULTILINE | re.DOTALL)
page_data = page_data.replace('\t', '').replace('\n', '').replace("'", '"')
page_data = re.sub(r'([a-z1-9_]+):\s+', '"\\1": ', page_data)
page_data = self._parse_json(page_data, video_id, fatal=False)
formats = []
for key, value in page_data.items():
if (key.startswith('video_url') or re.match(r'^video_alt_url\d+$', key)) and not key.endswith('_text'):
item = {
'url': value,
'format_id': page_data['%s_text' % key]
}
formats.append(item)
self._sort_formats(formats)
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(webpage)
if thumbnail.startswith('//'):
thumbnail = 'https:%s' % thumbnail
categories = get_elements_by_class('js-cat', webpage)
average_rating = self._html_search_regex(
r'<span.+?data-rating=["\'](.+?)["\'](.+?)>',
get_element_by_class('scale', webpage),
'average rating',
default='0'
)
average_rating = float(average_rating)
view_count = self._html_search_regex(
r'<em[^>]+class=["\']badge["\']>([\d\s]+)</em>',
webpage,
'view count',
default='0'
)
view_count = int(view_count.replace(' ', ''))
comment_count = self._html_search_regex(
r'.+?Comments\s+\(([\d\s]+)\)',
get_element_by_attribute('href', '.block-new-comment', webpage),
'view count',
default='0'
)
comment_count = int(comment_count.replace(' ', ''))
uploader = self._html_search_regex(
r'<a.+?>(.+?)</a>.+?',
get_element_by_class('username', webpage),
'uploader',
flags=re.M | re.DOTALL,
default=None
)
# upload_date = ''
# tags = []
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
# 'upload_date': upload_date,
'average_rating': average_rating,
'view_count': view_count,
'comment_count': comment_count,
'categories': categories,
# 'tags': tags,
'formats': formats,
'age_limit': 18
}