From 768810dd2d5c40c862a15851a9499174fd9c3c6e Mon Sep 17 00:00:00 2001 From: DirkGiggler Date: Wed, 12 Jul 2017 00:45:29 +0000 Subject: [PATCH 1/6] [aznude] Add new extractor --- youtube_dl/extractor/aznude.py | 117 +++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 youtube_dl/extractor/aznude.py diff --git a/youtube_dl/extractor/aznude.py b/youtube_dl/extractor/aznude.py new file mode 100644 index 000000000..b6b8b8e3a --- /dev/null +++ b/youtube_dl/extractor/aznude.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse + +import re + + +class AZNudeIE(InfoExtractor): + IE_NAME = "aznude" + _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:mrskin|azncdn)/[^/?]+/[^/?]+/(?P.*)\.html' + _TEST = { + 'url': 'https://www.aznude.com/mrskin/marisatomei/loiteringwithintent/loiteringwithintent-mcnallytomei-hd-01-hd.html', + 'md5': '28973bf7b818edfe55677b67bc073e40', + 'info_dict': { + 'id': 'loiteringwithintent-mcnallytomei-hd-01-hd', + 'ext': 'mp4', + 'title': 'Marisa Tomei in Loitering With Intent - 01', + 'thumbnail': 'https://cdn1.aznude.com/marisatomei/loiteringwithintent/LoiteringWithIntent-McNallyTomei-HD-01-gigantic-4.jpg', + 'description': 'Watch Marisa Tomei\'s Breasts scene on AZNude for free (22 seconds).', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + numeric_id = "-".join(re.findall(r'(?P(?:s\d+e\d+)|(?:\d+[xX]\d+)|(?:\d+))', video_id)) + webpage = self._download_webpage(url, video_id) + + artist = self._search_regex(r'(?P[^<]+)', + webpage, + url, + default=None) + work = self._search_regex(r'in (?P[^<]+)', + webpage, + url, + default=None) + + if (artist is not None) and (work is not None): + title = artist + " in " + work + else: + title = self._og_search_title(webpage) + + return { + 'id': video_id, + 'title': title + " - " + numeric_id, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'url': self._search_regex(r'(?:
Download
)', + webpage, + 'url', + fatal=True) + } + + +class AZNudeMultiPageBaseIE(InfoExtractor): + def _extract_entries(self, webpage, regex, prefix): + for url in re.findall(regex, webpage): + yield self.url_result(prefix + url, AZNudeIE.ie_key()) + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + parse_result = urlparse(url) + url_prefix = parse_result.scheme + "://" + parse_result.netloc + + entries = self._extract_entries(webpage, self._get_entry_regex(page_id), url_prefix) + return self.playlist_result(entries, page_id, self._get_webpage_title(webpage)) + + def _get_webpage_title(self, webpage): + return self._search_regex(r'(?:)(?P<title>.+)(?:)', webpage, 'title', default=None, fatal=False).title() + + def _get_entry_regex(self, page_id): + return "" + + +class AZNudeCelebIE(AZNudeMultiPageBaseIE): + IE_NAME = "aznude:celeb" + _VALID_URL = r'https?://(?:www\.)?aznude\.com/view/celeb/[^/?]/(?P.+)\.html' + _TEST = { + 'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html', + 'info_dict': { + 'title': 'Marisa Tomei', + 'id': 'marisatomei', + }, + 'playlist_mincount': 33, + } + + def _get_webpage_title(self, webpage): + return self._search_regex(r'(?:)(?P<title>.+)(?: Nude - AZNude )', webpage, 'title', default=None).title() + + def _get_entry_regex(self, page_id): + return r'(?:href=")(?P/(?:mrskin|azncdn)/' + page_id + '/[^"]*)' + + +class AZNudeMovieIE(AZNudeMultiPageBaseIE): + IE_NAME = "aznude:movie" + _VALID_URL = r'https?://(?:www\.)?aznude\.com/view/movie/[^/?]/(?P.+)\.html' + _TEST = { + 'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html', + 'info_dict': { + 'title': 'Loitering With Intent', + 'id': 'loiteringwithintent', + }, + 'playlist_mincount': 2, + } + + def _get_webpage_title(self, webpage): + return self._search_regex(r'(?:)(?P<title>.+)(?: NUDE SCENES - AZNude)', webpage, 'title', default=None).title() + + def _get_entry_regex(self, page_id): + return r'(?:href=")(?P/(?:mrskin|azncdn)/[^/?]+/' + page_id + '/[^"]*)' From 5870df6f57157d6093dfee852fa42eb1813986d9 Mon Sep 17 00:00:00 2001 From: DirkGiggler Date: Wed, 12 Jul 2017 06:17:15 +0530 Subject: [PATCH 2/6] [aznude] updated extractors.py --- youtube_dl/extractor/extractors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06de556b7..596de0be5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -79,6 +79,11 @@ from .awaan import ( AWAANSeasonIE, ) from .azmedien import AZMedienIE +from .aznude import ( + AZNudeIE, + AZNudeCelebIE, + AZNudeMovieIE, +) from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE From 1eec868cf2d355d352e67a1e3f19669ef4ed23b2 Mon Sep 17 00:00:00 2001 From: DirkGiggler Date: Tue, 18 Jul 2017 13:12:05 +0530 Subject: [PATCH 3/6] Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats --- youtube_dl/extractor/aznude.py | 122 +++++++++++++++++---------------- 1 file changed, 62 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/aznude.py b/youtube_dl/extractor/aznude.py index b6b8b8e3a..a570ee011 100644 --- a/youtube_dl/extractor/aznude.py +++ b/youtube_dl/extractor/aznude.py @@ -12,7 +12,7 @@ import re class AZNudeIE(InfoExtractor): - IE_NAME = "aznude" + IE_NAME = 'aznude' _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:mrskin|azncdn)/[^/?]+/[^/?]+/(?P.*)\.html' _TEST = { 'url': 'https://www.aznude.com/mrskin/marisatomei/loiteringwithintent/loiteringwithintent-mcnallytomei-hd-01-hd.html', @@ -28,90 +28,92 @@ class AZNudeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - numeric_id = "-".join(re.findall(r'(?P(?:s\d+e\d+)|(?:\d+[xX]\d+)|(?:\d+))', video_id)) + numeric_id = '-'.join(re.findall(r'(?P(?:s\d+e\d+)|(?:\d+[xX]\d+)|(?:\d+))', video_id)) webpage = self._download_webpage(url, video_id) - artist = self._search_regex(r'(?P[^<]+)', + jwplayer_data = self._find_jwplayer_data(webpage) + parsed_formats = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)['formats'] + + for format in parsed_formats: + url = format['url'] + + if url.endswith('-lo.' + format['ext']): + format['format'] = 'Low Quality' + format['format_id'] = 'LQ' + format['quality'] = 1 + format['resolution'] = '640x360' + format['format_note'] = '360p video with 64 kbps audio' + + elif url.endswith('-hi.' + format['ext']): + format['format'] = 'High Quality' + format['format_id'] = 'HQ' + format['quality'] = 2 + format['resolution'] = '640x360' + format['format_note'] = '360p video with 128 kbps audio' + + elif url.endswith('-hd.' + format['ext']): + format['format'] = 'High Definition' + format['format_id'] = 'HD' + format['quality'] = 2 + format['resolution'] = '1280x720' + format['format_note'] = '720p video with 128 kbps audio' + else: + # Unknown format! + parsed_formats.remove(format) + + + artist = self._html_search_regex(r'(?P[^<]+)', webpage, url, default=None) - work = self._search_regex(r'in (?P[^<]+)', + work = self._html_search_regex(r'in (?P[^<]+)', webpage, url, default=None) if (artist is not None) and (work is not None): - title = artist + " in " + work + title = artist + ' in ' + work else: title = self._og_search_title(webpage) return { 'id': video_id, - 'title': title + " - " + numeric_id, + 'title': title + ' - ' + numeric_id, 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), - 'url': self._search_regex(r'(?:
Download
)', - webpage, - 'url', - fatal=True) + 'formats': parsed_formats } -class AZNudeMultiPageBaseIE(InfoExtractor): - def _extract_entries(self, webpage, regex, prefix): - for url in re.findall(regex, webpage): - yield self.url_result(prefix + url, AZNudeIE.ie_key()) +class AZNudeCollectionIE(InfoExtractor): + IE_NAME = 'aznude:collection' + _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?P(?:view|browse|tags)/.+\.html)' + _TESTS = [ { + 'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html', + 'info_dict': { + 'title': 'Marisa Tomei Nude - Aznude ', + 'id': 'view/celeb/m/marisatomei.html', + }, + 'playlist_mincount': 33, + }, { + 'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html', + 'info_dict': { + 'title': 'Loitering With Intent Nude Scenes - Aznude', + 'id': 'view/movie/l/loiteringwithintent.html', + }, + 'playlist_mincount': 2, + } ] def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) + title = self._search_regex(r'(?:)(?P<thetitle>.+)(?:)', webpage, 'title', default=None).title() parse_result = urlparse(url) - url_prefix = parse_result.scheme + "://" + parse_result.netloc + url_prefix = parse_result.scheme + '://' + parse_result.netloc - entries = self._extract_entries(webpage, self._get_entry_regex(page_id), url_prefix) - return self.playlist_result(entries, page_id, self._get_webpage_title(webpage)) + entries = [] + for path in re.findall(r'(?:]+href=")(?P[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"| [^"]+")[^>]*>)', webpage): + entries.append( self.url_result(url_prefix + path, AZNudeIE.ie_key()) ) - def _get_webpage_title(self, webpage): - return self._search_regex(r'(?:)(?P<title>.+)(?:)', webpage, 'title', default=None, fatal=False).title() - - def _get_entry_regex(self, page_id): - return "" - - -class AZNudeCelebIE(AZNudeMultiPageBaseIE): - IE_NAME = "aznude:celeb" - _VALID_URL = r'https?://(?:www\.)?aznude\.com/view/celeb/[^/?]/(?P.+)\.html' - _TEST = { - 'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html', - 'info_dict': { - 'title': 'Marisa Tomei', - 'id': 'marisatomei', - }, - 'playlist_mincount': 33, - } - - def _get_webpage_title(self, webpage): - return self._search_regex(r'(?:)(?P<title>.+)(?: Nude - AZNude )', webpage, 'title', default=None).title() - - def _get_entry_regex(self, page_id): - return r'(?:href=")(?P/(?:mrskin|azncdn)/' + page_id + '/[^"]*)' - - -class AZNudeMovieIE(AZNudeMultiPageBaseIE): - IE_NAME = "aznude:movie" - _VALID_URL = r'https?://(?:www\.)?aznude\.com/view/movie/[^/?]/(?P.+)\.html' - _TEST = { - 'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html', - 'info_dict': { - 'title': 'Loitering With Intent', - 'id': 'loiteringwithintent', - }, - 'playlist_mincount': 2, - } - - def _get_webpage_title(self, webpage): - return self._search_regex(r'(?:)(?P<title>.+)(?: NUDE SCENES - AZNude)', webpage, 'title', default=None).title() - - def _get_entry_regex(self, page_id): - return r'(?:href=")(?P/(?:mrskin|azncdn)/[^/?]+/' + page_id + '/[^"]*)' + return self.playlist_result(entries, page_id, title) From 2eb0eedf24ca52b34cc56c06812fd058c5fc09f3 Mon Sep 17 00:00:00 2001 From: DirkGiggler Date: Fri, 28 Jul 2017 06:18:22 +0530 Subject: [PATCH 4/6] Updated aznude.py --- youtube_dl/extractor/aznude.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/aznude.py b/youtube_dl/extractor/aznude.py index a570ee011..f5f11e111 100644 --- a/youtube_dl/extractor/aznude.py +++ b/youtube_dl/extractor/aznude.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import urljoin + try: from urlparse import urlparse except ImportError: @@ -41,22 +43,25 @@ class AZNudeIE(InfoExtractor): format['format'] = 'Low Quality' format['format_id'] = 'LQ' format['quality'] = 1 - format['resolution'] = '640x360' - format['format_note'] = '360p video with 64 kbps audio' + format['width'] = 640 + format['height'] = 360 + format['format_note'] = '360p video with mono audio' elif url.endswith('-hi.' + format['ext']): format['format'] = 'High Quality' format['format_id'] = 'HQ' format['quality'] = 2 - format['resolution'] = '640x360' - format['format_note'] = '360p video with 128 kbps audio' + format['width'] = 640 + format['height'] = 360 + format['format_note'] = '360p video with stereo audio' elif url.endswith('-hd.' + format['ext']): format['format'] = 'High Definition' format['format_id'] = 'HD' - format['quality'] = 2 - format['resolution'] = '1280x720' - format['format_note'] = '720p video with 128 kbps audio' + format['quality'] = 3 + format['width'] = 1280 + format['height'] = 720 + format['format_note'] = '720p video with stereo audio' else: # Unknown format! parsed_formats.remove(format) @@ -76,9 +81,12 @@ class AZNudeIE(InfoExtractor): else: title = self._og_search_title(webpage) + if numeric_id != "": + title = title + ' - ' + numeric_id + return { 'id': video_id, - 'title': title + ' - ' + numeric_id, + 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': parsed_formats @@ -87,7 +95,7 @@ class AZNudeIE(InfoExtractor): class AZNudeCollectionIE(InfoExtractor): IE_NAME = 'aznude:collection' - _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?P(?:view|browse|tags)/.+\.html)' + _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:view/[^/]+/[^/]+|browse/(?:videos|tags/vids))/(?P.+)\.html' _TESTS = [ { 'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html', 'info_dict': { @@ -114,6 +122,7 @@ class AZNudeCollectionIE(InfoExtractor): entries = [] for path in re.findall(r'(?:]+href=")(?P[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"| [^"]+")[^>]*>)', webpage): - entries.append( self.url_result(url_prefix + path, AZNudeIE.ie_key()) ) + if not path.startswith("//"): + entries.append( self.url_result(urljoin(url_prefix, path), AZNudeIE.ie_key()) ) return self.playlist_result(entries, page_id, title) From d83d3cca51843dab8bdafb57dc8421012e49cf59 Mon Sep 17 00:00:00 2001 From: DirkGiggler Date: Thu, 3 Aug 2017 15:41:41 +0530 Subject: [PATCH 5/6] Remove thumbnail url matching from aznude.py --- youtube_dl/extractor/aznude.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/aznude.py b/youtube_dl/extractor/aznude.py index f5f11e111..4d3a9636a 100644 --- a/youtube_dl/extractor/aznude.py +++ b/youtube_dl/extractor/aznude.py @@ -23,7 +23,6 @@ class AZNudeIE(InfoExtractor): 'id': 'loiteringwithintent-mcnallytomei-hd-01-hd', 'ext': 'mp4', 'title': 'Marisa Tomei in Loitering With Intent - 01', - 'thumbnail': 'https://cdn1.aznude.com/marisatomei/loiteringwithintent/LoiteringWithIntent-McNallyTomei-HD-01-gigantic-4.jpg', 'description': 'Watch Marisa Tomei\'s Breasts scene on AZNude for free (22 seconds).', }, } From 4596e462314ad4111db66a2443caf6c4eb727f2d Mon Sep 17 00:00:00 2001 From: DirkGiggler Date: Thu, 29 Aug 2019 00:46:59 +0530 Subject: [PATCH 6/6] Updated the AZNude extractor --- youtube_dl/extractor/aznude.py | 12 ++++++------ youtube_dl/extractor/extractors.py | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/aznude.py b/youtube_dl/extractor/aznude.py index 4d3a9636a..c69f719c4 100644 --- a/youtube_dl/extractor/aznude.py +++ b/youtube_dl/extractor/aznude.py @@ -99,14 +99,14 @@ class AZNudeCollectionIE(InfoExtractor): 'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html', 'info_dict': { 'title': 'Marisa Tomei Nude - Aznude ', - 'id': 'view/celeb/m/marisatomei.html', + 'id': 'marisatomei', }, 'playlist_mincount': 33, }, { 'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html', 'info_dict': { 'title': 'Loitering With Intent Nude Scenes - Aznude', - 'id': 'view/movie/l/loiteringwithintent.html', + 'id': 'loiteringwithintent', }, 'playlist_mincount': 2, } ] @@ -117,11 +117,11 @@ class AZNudeCollectionIE(InfoExtractor): title = self._search_regex(r'(?:)(?P<thetitle>.+)(?:)', webpage, 'title', default=None).title() parse_result = urlparse(url) - url_prefix = parse_result.scheme + '://' + parse_result.netloc + base_url = '%s://%s' % (parse_result.scheme, parse_result.netloc) entries = [] - for path in re.findall(r'(?:]+href=")(?P[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"| [^"]+")[^>]*>)', webpage): - if not path.startswith("//"): - entries.append( self.url_result(urljoin(url_prefix, path), AZNudeIE.ie_key()) ) + for path in re.findall(r'(?:]+href=")(?P[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"| [^"]+")[^"]+lightbox=[^>]*>)', webpage): + entries.append( self.url_result(urljoin(base_url, path), AZNudeIE.ie_key()) ) return self.playlist_result(entries, page_id, title) + diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 596de0be5..07964b9d4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -81,8 +81,7 @@ from .awaan import ( from .azmedien import AZMedienIE from .aznude import ( AZNudeIE, - AZNudeCelebIE, - AZNudeMovieIE, + AZNudeCollectionIE, ) from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE