From 8ee42812f1e1e5ef0928f4f126b19dc57eee4ebe Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Wed, 8 Feb 2017 14:55:18 +0100 Subject: [PATCH 1/4] Fixed Pornhub Extractor --- youtube_dl/extractor/pornhub.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 5042544f5..6bee1f553 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -159,21 +159,23 @@ class PornHubIE(InfoExtractor): video_variables = [] video_variablenames = [] for video_variablename, quote, video_variable in re.findall( - r'(player_quality_[0-9]{3,4}p.*?)=\s*(["\'])(.+?)\1;', webpage): - video_variables.append(compat_urllib_parse_unquote(re.sub( - r'{0}\s*\+\s*{0}'.format(quote), '', video_variable))) + r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage): + video_variables.append(video_variable) video_variablenames.append(video_variablename) + exestring = str(video_variablename) + ' = "' + str(video_variable) + '"' + exec(exestring) in locals() - video_urls = [] - for video_url in re.findall( + encoded_video_urls = [] + for encoded_video_url in re.findall( r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage): - video_urls.append(video_url) - - print('varnames: ' + str(video_variablenames)) - print('vars: ' + str(video_variables)) - print('urls: ' + str(video_urls)) + encoded_video_urls.append(encoded_video_url) # Decode the URLs + video_urls = [] + for url in encoded_video_urls: + exestring = 'video_urls.append(' + str(url) + ')' + #print('exe2: ' + exestring) + exec(exestring) in locals() if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( From 17ec4ac3719a4a14f6baaf49a8d5a68f6fca0d20 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Wed, 8 Feb 2017 14:57:02 +0100 Subject: [PATCH 2/4] Cleared a debug leftover --- youtube_dl/extractor/pornhub.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 6bee1f553..76895b730 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -174,7 +174,6 @@ class PornHubIE(InfoExtractor): video_urls = [] for url in encoded_video_urls: exestring = 'video_urls.append(' + str(url) + ')' - #print('exe2: ' + exestring) exec(exestring) in locals() if webpage.find('"encrypted":true') != -1: From 250c284a55b38d620ae3af1833b3aa8a42cf6094 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Wed, 8 Feb 2017 15:37:16 +0100 Subject: [PATCH 3/4] removed exec()-Function --- youtube_dl/extractor/pornhub.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 76895b730..0aae4a74c 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -6,6 +6,7 @@ import os import re from .common import InfoExtractor +from ..jsinterp import JSInterpreter from ..compat import ( compat_HTTPError, compat_urllib_parse_unquote, @@ -156,16 +157,12 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - video_variables = [] - video_variablenames = [] + video_variables = {} for video_variablename, quote, video_variable in re.findall( r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage): - video_variables.append(video_variable) - video_variablenames.append(video_variablename) - exestring = str(video_variablename) + ' = "' + str(video_variable) + '"' - exec(exestring) in locals() + video_variables[video_variablename] = video_variable - encoded_video_urls = [] + encoded_video_urls = [] for encoded_video_url in re.findall( r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage): encoded_video_urls.append(encoded_video_url) @@ -173,8 +170,11 @@ class PornHubIE(InfoExtractor): # Decode the URLs video_urls = [] for url in encoded_video_urls: - exestring = 'video_urls.append(' + str(url) + ')' - exec(exestring) in locals() + for varname, varval in video_variables.items(): + url = url.replace(varname, varval) + url = url.replace('+', '') + url = url.replace(' ', '') + video_urls.append(url) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( From 20810e69ef11f0ca02686085549b6e2aecd5d6dd Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Wed, 8 Feb 2017 15:39:03 +0100 Subject: [PATCH 4/4] removed reference to JSInterpreter --- youtube_dl/extractor/pornhub.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0aae4a74c..5e930f45e 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -6,7 +6,6 @@ import os import re from .common import InfoExtractor -from ..jsinterp import JSInterpreter from ..compat import ( compat_HTTPError, compat_urllib_parse_unquote,