From 894d065aea97045e08120f055b32a3d51b408426 Mon Sep 17 00:00:00 2001 From: hojel Date: Sat, 10 May 2014 01:31:11 -0700 Subject: [PATCH] add empflix, hardsextube, hentaistigma, nuvid, slutload extractor --- youtube_dl/extractor/__init__.py | 5 +++ youtube_dl/extractor/empflix.py | 46 ++++++++++++++++++++++++++ youtube_dl/extractor/hardsextube.py | 48 ++++++++++++++++++++++++++++ youtube_dl/extractor/hentaistigma.py | 43 +++++++++++++++++++++++++ youtube_dl/extractor/nuvid.py | 39 ++++++++++++++++++++++ youtube_dl/extractor/slutload.py | 46 ++++++++++++++++++++++++++ 6 files changed, 227 insertions(+) create mode 100644 youtube_dl/extractor/empflix.py create mode 100644 youtube_dl/extractor/hardsextube.py create mode 100644 youtube_dl/extractor/hentaistigma.py create mode 100644 youtube_dl/extractor/nuvid.py create mode 100644 youtube_dl/extractor/slutload.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e89b5cf9d..2cd024c07 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -54,6 +54,7 @@ from .ebaumsworld import EbaumsWorldIE from .ehow import EHowIE from .eighttracks import EightTracksIE from .eitb import EitbIE +from .empflix import EmpflixIE from .escapist import EscapistIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE @@ -82,7 +83,9 @@ from .gametrailers import GametrailersIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .hardsextube import HardSexTubeIE from .hark import HarkIE +from .hentaistigma import HentaiStigmaIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .huffpost import HuffPostIE @@ -134,6 +137,7 @@ from .myspace import MySpaceIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .naver import NaverIE +from .nuvid import NuvidIE from .nba import NBAIE from .nbc import NBCNewsIE from .ndtv import NDTVIE @@ -171,6 +175,7 @@ from .servingsys import ServingSysIE from .sina import SinaIE from .slashdot import SlashdotIE from .slideshare import SlideshareIE +from .slutload import SlutloadIE from .smotri import ( SmotriIE, SmotriCommunityIE, diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py new file mode 100644 index 000000000..97f083159 --- /dev/null +++ b/youtube_dl/extractor/empflix.py @@ -0,0 +1,46 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + +class EmpflixIE(InfoExtractor): + _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P[^\.]+)\.html' + _TEST = { + u'url': u'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', + u'file': u'Amateur-Finger-Fuck-33051.flv', + u'md5': u'5e5cc160f38ca9857f318eb97146e13e', + u'info_dict': { + u"title": u"Amateur Finger Fuck", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + age_limit = self._rta_search(webpage) + + # Get the video title + video_title = self._html_search_regex(r'

(?P.*?)</h2>', + webpage, u'title').strip() + + cfg_url = self._html_search_regex(r'flashvars\.config = escape\("([^"]+)"', + webpage, u'flashvars.config').strip() + + cfg_xml = self._download_xml(cfg_url, video_id, note=u'Downloading metadata') + video_url = cfg_xml.find('videoLink').text + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': 'flv', + 'age_limit': age_limit} + + return [info] diff --git a/youtube_dl/extractor/hardsextube.py b/youtube_dl/extractor/hardsextube.py new file mode 100644 index 000000000..86ea3b2ad --- /dev/null +++ b/youtube_dl/extractor/hardsextube.py @@ -0,0 +1,48 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + +class HardSexTubeIE(InfoExtractor): + _VALID_URL = r'^(?:http://)?(?:\w+\.)?hardsextube\.com/video/(?P<videoid>\d+)' + _TEST = { + u'url': u'http://www.hardsextube.com/video/939998/', + u'file': u'939998.mp4', + u'md5': u'9ffeca92da23e4b74e4116322496f44a', + u'info_dict': { + u"title": u"FUCKING MY REALDOLL AGAIN - ANAL AND VAGINAL", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + murl = url.replace('www.', 'm.') + webpage = self._download_webpage(murl, video_id) + + # Get the video title + result = re.search(r'<img class="videoThumbs" src="([^"]+)"[^>]*title="([^"]+)"', webpage) + if result is None: + raise ExtractorError(u'ERROR: unable to extract title') + + video_thumb = result.group(1) + video_title = result.group(2) + + # Get the video url + video_url = self._html_search_regex( + r'<div id="videoThumbs"[^>]*>\s+<a href="([^"]+)"', webpage, u'video url') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'thumbnail': video_thumb, + 'ext': 'mp4', + 'age_limit': 18} + + return [info] diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py new file mode 100644 index 000000000..4f2d30599 --- /dev/null +++ b/youtube_dl/extractor/hentaistigma.py @@ -0,0 +1,43 @@ +import re + +from .common import InfoExtractor + +class HentaiStigmaIE(InfoExtractor): + _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<videoid>[^/]+)' + _TEST = { + u'url': u'http://hentai.animestigma.com/inyouchuu-etsu-bonus/', + u'file': u'inyouchuu-etsu-bonus.mp4', + u'md5': u'4e3d07422a68a4cc363d8f57c8bf0d23', + u'info_dict': { + u"title": u"Inyouchuu Etsu Bonus", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + # Get the video title + video_title = self._html_search_regex(r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>', + webpage, u'title').strip() + + # Get the wrapper url + wrap_url = self._html_search_regex(r'<iframe src="([^"]+mp4)"', webpage, u'wrapper url') + + # Get wrapper content + wrap_webpage = self._download_webpage(wrap_url, video_id) + + video_url = self._html_search_regex(r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, u'video url') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'format': 'mp4', + 'age_limit': 18} + + return [info] diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py new file mode 100644 index 000000000..051c9314d --- /dev/null +++ b/youtube_dl/extractor/nuvid.py @@ -0,0 +1,39 @@ +import re + +from .common import InfoExtractor + +class NuvidIE(InfoExtractor): + _VALID_URL = r'^https?://(?:\w+\.)?nuvid\.com/video/(?P<videoid>\d+)' + _TEST = { + u'url': u'http://m.nuvid.com/video/1310741/', + u'file': u'1310741.mp4', + u'md5': u'eab207b7ac4fccfb4e23c86201f11277', + u'info_dict': { + u"title": u"Horny babes show their awesome bodeis and", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + murl = url.replace('www.', 'm.') + webpage = self._download_webpage(murl, video_id) + + video_title = self._html_search_regex(r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'video_title').strip() + + video_url = 'http://m.nuvid.com'+self._html_search_regex(r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"', webpage, 'video_url') + + video_thumb = self._html_search_regex(r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"', webpage, 'video_thumb') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'thumbnail': video_thumb, + 'ext': 'mp4', + 'age_limit': 18} + + return [info] diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py new file mode 100644 index 000000000..095adfc15 --- /dev/null +++ b/youtube_dl/extractor/slutload.py @@ -0,0 +1,46 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + +class SlutloadIE(InfoExtractor): + _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<videoid>[^/]+)/?$' + _TEST = { + u'url': u'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', + u'file': u'TD73btpBqSxc.mp4', + u'md5': u'0cf531ae8006b530bd9df947a6a0df77', + u'info_dict': { + u"title": u"virginie baisee en cam", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + # Get the video title + video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', + webpage, u'title').strip() + + # Get the video url + result = re.compile(r'<div id="vidPlayer"\s+data-url="([^"]+)"\s+previewer-file="([^"]+)"', re.S).search(webpage) + if result is None: + raise ExtractorError(u'ERROR: unable to extract video_url') + + video_url, video_thumb = result.group(1,2) + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'thumbnail': video_thumb, + 'ext': 'mp4', + 'age_limit': 18} + + return [info]