From b31309ec855ec28e68dcc4f9971880581ea13a63 Mon Sep 17 00:00:00 2001 From: Julien Brun Date: Wed, 15 Oct 2014 22:19:41 +0200 Subject: [PATCH] [WatchIndianPorn] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/watchindianporn.py | 51 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/watchindianporn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e38d2663..91438089f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -445,6 +445,7 @@ from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE +from .watchindianporn import WatchIndianPornIE from .wayofthemaster import WayOfTheMasterIE from .wdr import ( WDRIE, diff --git a/youtube_dl/extractor/watchindianporn.py b/youtube_dl/extractor/watchindianporn.py new file mode 100644 index 000000000..970600057 --- /dev/null +++ b/youtube_dl/extractor/watchindianporn.py @@ -0,0 +1,51 @@ +from __future__ import unicode_literals + +import re +import datetime + +from .common import InfoExtractor + + +class WatchIndianPornIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pwatchindianporn\.net/video/.+?-(?P[A-z0-9]+).html)$' + _TEST = { + 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', + 'md5': '9afb80675550406ed9a63ac2819ef69d', + 'info_dict': { + 'id': 'dW2mtctxJfs', + 'ext': 'mp4', + 'upload_date': '20140213', + 'uploader': 'Don', + 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number' + } + } + + def _real_extract(self, url): + + mobj = re.match(self._VALID_URL, url) + + video_id = self._match_id(url) + url = "http://www." + mobj.group('url') + + webpage = self._download_webpage(url, video_id) + + ftm = '%B %d, %Y' + date = self._html_search_regex(r'class="aup">Added: (.*?)', webpage, 'date') + d = datetime.datetime.strptime(date, ftm) + upload_date = d.strftime('%Y%m%d') + + title = self._html_search_regex(r'

(.*?)', webpage, 'title') + video_url = self._html_search_regex(r'var playlist = \[ \{ url: escape\(\'(.*?)\'\) \} \]', webpage, 'video_url') + thumbnail = self._html_search_regex(r'(.*?)', webpage, 'uploader') + categories = re.findall(r'http://www.watchindianporn.net/search/video/(?:.+?)">(.*?)', webpage) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'categories': categories + }