Added support for PornHub Premium.

This commit completely refactors the PornHub extractor and adds a new extractor for PornHub Premium. Several minor issues have been fixed with the info extractor and support for international versions of PornHub should be better. Additionally, registered PornHub users can now authenticate and use youtube-dl to archive videos they have purchased. --netrc support has been added for both pornhub and pornhubpremium.
2025-01-21 15:32:58 +08:00 · 2020-02-16 01:51:58 -08:00 · 2020-02-16 01:51:58 -08:00 · c918f8606c
commit c918f8606c
parent 66bfdcea6d
4 changed files with 721 additions and 483 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -856,9 +856,15 @@ from .porncom import PornComIE
 from .pornhd import PornHdIE
 from .pornhub import (
    PornHubIE,
-    PornHubUserIE,
-    PornHubPagedVideoListIE,
-    PornHubUserVideosUploadIE,
+    PornHubProfileIE,
+    PornHubProfileVideosIE,
+    PornHubPlaylistIE,
+)
+from .pornhubpremium import (
+    PornHubPremiumIE,
+    PornHubPremiumProfileIE,
+    PornHubPremiumProfileVideosIE,
+    PornHubPremiumPlaylistIE,
 )
 from .pornotube import PornotubeIE
 from .pornovoisines import PornoVoisinesIE
@ -1129,6 +1135,7 @@ from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
 from .thisoldhouse import ThisOldHouseIE
 from .threeqsdn import ThreeQSDNIE
+from .thumbzilla import ThumbzillaIE
 from .tiktok import (
    TikTokIE,
    TikTokUserIE,
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
--- a/youtube_dl/extractor/pornhubpremium.py
+++ b/youtube_dl/extractor/pornhubpremium.py
@ -0,0 +1,134 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .pornhub import PornHubBaseIE
+from ..utils import ExtractorError
+from ..utils import urlencode_postdata
+
+
+class PornHubPremiumIE(PornHubBaseIE):
+    """
+    PornHubPremiumIE handles videos exclusively from pornhubpremium.com.
+    """
+    IE_NAME = 'pornhubpremium'
+    IE_DESC = 'PornHub Premium'
+
+    _NETRC_MACHINE = 'pornhubpremium'
+
+    _HOST = 'pornhubpremium.com'
+    _BASE_URL = 'https://%s' % _HOST
+    _LOGIN_FORM_URL = 'https://%s/premium/login' % _HOST
+    _LOGIN_POST_URL = 'https://www.%s/front/authenticate' % _HOST
+
+    _VALID_URL = r'https?://(?P<host>(?:\w+?.)?pornhubpremium\.com)/(?:(?:view_video\.php\?viewkey=)|embed/)(?P<id>[\da-z]+)'
+
+    @staticmethod
+    def _is_authenticated(webpage):
+        if '/user/logout' in webpage:
+            return True
+        if 'js_premiumLogOut' in webpage:
+            return True
+        return False
+
+    def _login(self):
+        username, password = self._get_login_info()
+
+        if not username or not password:
+            self.raise_login_required(
+                'A \'%s\' account is required' % self._NETRC_MACHINE)
+
+        # Set cookies
+        self._set_cookie(self._HOST, 'age_verified', '1')
+        self._set_cookie(self._HOST, 'platform', 'pc')
+
+        # Verify our auth status
+        main_page = self._download_webpage(
+            self._BASE_URL, video_id=None, note='Verifying login', tries=1, fatal=False)
+
+        # Already logged in
+        if self._is_authenticated(main_page):
+            return self.to_screen("Already authenticated")
+
+        # Fetch login page
+        login_page = self._download_webpage(
+            self._LOGIN_FORM_URL, video_id=None, note='Logging in', tries=3, fatal=True)
+
+        # Fetch login form
+        login_form = self._hidden_inputs(login_page)
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        # Submit sign-in request
+        response = self._download_json(
+            self._LOGIN_POST_URL, video_id=None, note='Sending credentials', fatal=True,
+            data=urlencode_postdata(login_form), headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': self._LOGIN_POST_URL,
+            })
+
+        # Success
+        if response.get('success') == '1':
+            return self.to_screen("Successfully authenticated")
+
+        # Error
+        login_error = response.get('message')
+        if login_error:
+            raise ExtractorError('Unable to login: %s' % login_error, expected=True)
+        self.report_warning('Login has probably failed')
+
+
+class PornHubPremiumProfileIE(PornHubPremiumIE):
+    """Extract videos from a model, pornstar, user, or channel profile."""
+
+    IE_NAME = 'pornhubpremium:profile'
+
+    _VALID_URL_PARTS = [
+        r'https?://(?P<host>(?:\w+?.)?pornhubpremium\.com)/',
+        r'(?:model|pornstar|users|channels)/(?P<username>[\w-]+)$'
+    ]
+    _VALID_URL = re.compile(''.join(_VALID_URL_PARTS))
+
+    def _real_extract(self, url):
+        self._set_cookies()
+        return self.url_result('%s/videos' % url)
+
+
+class PornHubPremiumProfileVideosIE(PornHubPremiumIE):
+    """Extract videos from a model, pornstar, user, or channel profile."""
+
+    IE_NAME = 'pornhubpremium:profile:videos'
+
+    _VALID_URL_PARTS = [
+        r'https?://(?P<host>(?:\w+?.)?pornhubpremium\.com)/',
+        r'(?:model|pornstar|users|channels)/(?P<username>[\w-]+)/videos(?:/(?P<category>[\w-]+))?'
+    ]
+    _VALID_URL = re.compile(''.join(_VALID_URL_PARTS))
+
+    def _real_extract(self, url):
+        self._set_cookies()
+        host, username, category = re.match(self._VALID_URL, url).groups()
+
+        playlist_id = '%s-%s' % (username, category if category else 'videos')
+        entries = self._extract_paged_entries(url, host, playlist_id)
+
+        return self.playlist_result(entries, playlist_id)
+
+
+class PornHubPremiumPlaylistIE(PornHubPremiumIE):
+    """Extract videos from a playlist."""
+
+    IE_NAME = 'pornhubpremium:playlist'
+    _VALID_URL = r'https?://(?P<host>(?:\w+?.)?pornhubpremium\.com)/playlist/(?P<playlist_id>[\d]+)'
+
+    def _real_extract(self, url):
+        self._set_cookies()
+        host, playlist_id = re.match(self._VALID_URL, url).groups()
+
+        entries = self._extract_paged_entries(url, host, playlist_id)
+
+        return self.playlist_result(
+            entries, playlist_id, self._extract_playlist_title(url, playlist_id))
--- a/youtube_dl/extractor/thumbzilla.py
+++ b/youtube_dl/extractor/thumbzilla.py
@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from .openload import PhantomJSwrapper
+from .pornhub import PornHubIE
+from ..utils import ExtractorError
+
+
+class ThumbzillaIE(InfoExtractor):
+    """
+    ThumbzillaIE is a frontend for other 'Tube' sites (mostly PornHub). ThumbzillaIE will
+    parse the video and delegate to the appropriate extractor via a url_result.
+    """
+    IE_DESC = 'Thumbzilla'
+    _VALID_URL = r'https?://(?P<host>(?:www\.)?thumbzilla\.com)/video/(?P<id>[\da-z]+)'
+
+    _TEST = {
+        'url': 'https://www.thumbzilla.com/video/ph5c8e8f15b40ff/hot-skinny-girl-gives-you',
+        'info_dict': {
+            'id': 'ph5c8e8f15b40ff',
+            'ext': 'mp4',
+            'upload_date': '20190317',
+            'age_limit': 18,
+            'uploader': 'lizashultz',
+            'title': 'Hot skinny girl gives you.',
+        }
+    }
+
+    def _download_webpage_handle(self, *args, **kwargs):
+        def dl(*args, **kwargs):
+            return super(ThumbzillaIE, self)._download_webpage_handle(*args, **kwargs)
+
+        webpage, urlh = dl(*args, **kwargs)
+
+        if any(re.search(p, webpage) for p in (
+                r'<body\b[^>]+\bonload=["\']go\(\)',
+                r'document\.cookie\s*=\s*["\']RNKEY=',
+                r'document\.location\.reload\(true\)')):
+            url_or_request = args[0]
+            url = (url_or_request.get_full_url()
+                   if isinstance(url_or_request, compat_urllib_request.Request)
+                   else url_or_request)
+            phantom = PhantomJSwrapper(self, required_version='2.0')
+            phantom.get(url, html=webpage)
+            webpage, urlh = dl(*args, **kwargs)
+
+        return webpage, urlh
+
+    def _real_extract(self, url):
+        host, video_id = re.match(self._VALID_URL, url).groups()
+
+        if video_id.startswith('ph'):
+            return self.url_result('https://pornhub.com/view_video.php?viewkey=%s' % video_id,
+                                   video_id=video_id, ie=PornHubIE.ie_key())
+        else:
+            raise ExtractorError('Unsupported video type')