From e7e90b094a9c65c0336e40cd64878517722f9e21 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 12:54:22 -0400 Subject: [PATCH 1/3] Fixed extractor #16276 fixed parsing description fixed parsing uploader fixed parsing view count fixed parsing duration, fixed parsing video_urls fixed parsing thumbnail --- youtube_dl/extractor/vporn.py | 48 ++++++++++++++--------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 858ac9e71..9a11ebfc0 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -6,8 +6,8 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, parse_duration, + parse_resolution, str_to_int, - urljoin, ) @@ -64,47 +64,37 @@ class VpornIE(InfoExtractor): title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() - description = self._html_search_regex( - r'class="(?:descr|description_txt)">(.*?)', - webpage, 'description', fatal=False) - thumbnail = urljoin('http://www.vporn.com', self._html_search_regex( - r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', - default=None)) - uploader = self._html_search_regex( - r'(?s)Uploaded by:.*?]*>(.+?)', - webpage, 'uploader', fatal=False) + description = self._search_regex(r'[^>]*class="(?:sidebar-box)"[^>]*>[\n]

(.*?)

', + webpage, 'description', fatal=False) - categories = re.findall(r']*>([^<]+)', webpage) + thumbnail = self._search_regex(r']+poster="([^"])"', webpage, 'thumbnail', default=None) or self._search_regex(r'posterurl\s=\s\'([^\']+)', webpage, 'thumbnail', fatal=False) + + uploader = self._search_regex(r'class="avatarname">(.*?)', + webpage, 'uploader', fatal=False) + + categories = re.findall(r']*class="tags links"[^>]*>([^<]+)', webpage) duration = parse_duration(self._search_regex( - r'Runtime:\s*\s*(\d+ min \d+ sec)', + r'class="durat-img"[^>]*>\s*(\d+ min \d+ sec)', webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( - r'class="views">([\d,\.]+) [Vv]iews<', + r'class="view-count">[\n]([\d,\.]+) [Vv]iews[\n]<', webpage, 'view count', fatal=False)) + comment_count = str_to_int(self._html_search_regex( r"'Comments \(([\d,\.]+)\)'", webpage, 'comment count', default=None)) formats = [] - - for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage): - video_url = video[1] - fmt = { - 'url': video_url, - 'format_id': video[0], - } - m = re.search(r'_(?P\d+)x(?P\d+)_(?P\d+)k\.mp4$', video_url) - if m: - fmt.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - 'vbr': int(m.group('vbr')), - }) - formats.append(fmt) - + for mobj in re.finditer(r']+src="([^"]+)"[^>]+label="([^"]+)[^>]*>', webpage): + f = parse_resolution(mobj.group(2)) + f.update({ + 'url': mobj.group(1), + 'format_id': mobj.group(2), + }) + formats.append(f) self._sort_formats(formats) return { From 0ace17d199bf9c23f685e9c265432ca4826ff65e Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 12:55:07 -0400 Subject: [PATCH 2/3] Fixed parsing tags #13720 --- youtube_dl/extractor/pornhub.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index be93d5d48..1804ba15d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -307,10 +307,13 @@ class PornHubIE(PornHubBaseIE): r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', webpage, 'page parameters', group='data', default='{}'), video_id, transform_source=js_to_json, fatal=False) - tags = categories = None + tags = None if page_params: tags = page_params.get('tags', '').split(',') - categories = page_params.get('categories', '').split(',') + + categories = [] + for mobj in re.finditer(r']+Category[^>]*>([^<]+)', webpage): + categories.append(mobj.group(1)) return { 'id': video_id, From dfa1574377727809bea9fa81e8a9e1983d59ce09 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 12:56:54 -0400 Subject: [PATCH 3/3] added like count to info dict --- youtube_dl/extractor/pornhd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index b52879c7a..534f06277 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -85,6 +85,9 @@ class PornHdIE(InfoExtractor): r"poster'?\s*:\s*([\"'])(?P(?:(?!\1).)+)\1", webpage, 'thumbnail', fatal=False, group='url') + like_count = int_or_none(self._search_regex( + r'class="save-count">(\d+)<', webpage, 'like_count', fatal=False)) + return { 'id': video_id, 'display_id': display_id, @@ -94,4 +97,5 @@ class PornHdIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, + 'like_count': like_count, }