From 8d6c3a0f15f4895a36153b6620a31fc1264a6f64 Mon Sep 17 00:00:00 2001 From: projx <1296359+projx@users.noreply.github.com> Date: Mon, 2 Sep 2019 23:09:14 +0100 Subject: [PATCH] Update pornhub.py Issue with title not being populated when download a playlist, so added title extraction for user playlists in PornHubPagedPlaylistBaseIE class. --- youtube_dl/extractor/pornhub.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 11b8cfcf7..15c2178ee 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -403,10 +403,16 @@ class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') item_id = mobj.group('id') + + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + playlist_id = mobj.group('id') + title = "" page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) @@ -423,14 +429,25 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: break raise + page_entries = self._extract_entries(webpage, host) + + if title == "": + playlist = self._parse_json( + self._search_regex( + r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage, + 'playlist', default='{}'), + playlist_id, fatal=False) + title = playlist.get('title') or self._search_regex( + r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False) + if not page_entries: break entries.extend(page_entries) - if not self._has_more(webpage): + if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), item_id) + return self.playlist_result(orderedSet(entries), item_id, title) class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):