From 2004ccc0b0429c1fd8960146da10e8d5ef0aaf37 Mon Sep 17 00:00:00 2001 From: Lord-Simon Date: Sun, 17 Aug 2014 04:30:11 +0200 Subject: [PATCH 1/2] Add ToukouCity to extractors --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/toukoucity.py | 59 ++++++++++++++++++++++++++++++ youtube_dl/utils.py | 1 + 3 files changed, 61 insertions(+) create mode 100644 youtube_dl/extractor/toukoucity.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 27602e0c0..1c1bb42eb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -321,6 +321,7 @@ from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE from .toutv import TouTvIE +from .toukoucity import ToukouCityIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE diff --git a/youtube_dl/extractor/toukoucity.py b/youtube_dl/extractor/toukoucity.py new file mode 100644 index 000000000..96d8890b9 --- /dev/null +++ b/youtube_dl/extractor/toukoucity.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import urllib2 + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + +class ToukouCityIE(InfoExtractor): + IE_NAME = 'ToukouCity' + IE_DESC = '無料アダルトビデオが豊富なカテゴリーで楽しめる' + _VALID_URL = r'https?://(?:www\.)?toukoucity\.to/video/(?P[\w\d]+)/?' + _TEST = { + 'url': 'http://toukoucity.to/video/igy3nBwTEb/', + 'md5': 'd36db92d7a2034312ab692ba97f216ab', + 'info_dict': { + 'id': 'igy3nBwTEb', + 'filesize': 366450899, + 'ext': 'mp4', + 'title': u'ドラえもん', + 'description': u'のび太のブリキの迷宮(ラビリンス)', + 'thumbnail': 'http://img.toukoucity.to/igy3nBwTEb/thumbnail_12.jpg', + 'upload_date': '20140805', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + self.report_extraction(video_id) + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'

(.+?)

', webpage, u'title') + video_url = self._search_regex(r'so\.addVariable\(\'file\',\'(.+?\.mp4)\'\)', webpage, u'video_url') + thumbnail = self._search_regex(r'so\.addVariable\(\'image\',\'(.+?)\'\)', webpage, u'thumbnail') + upload_date = unified_strdate(self._search_regex(r'(.+?)', webpage, u'upload_date')) + extension = video_url.split(".")[-1] + view_count = self._html_search_regex(r'(.+?)', webpage, u'view_count') + player_url = self._search_regex(r'SWFObject\(\'(.+?)\'.+?\)', webpage, u'player_url') + description = self._search_regex(r'

(.+?)

', webpage, u'description') + filesize = int(urllib2.urlopen(video_url).headers["Content-Length"]) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'ext': extension, + 'player_url': player_url, + 'description': description, + 'view_count': view_count, + 'webpage_url': url, + 'filesize': filesize, + } \ No newline at end of file diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 65b492fb3..aa81b8bc4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -829,6 +829,7 @@ def unified_strdate(date_str): '%Y-%m-%d', '%d.%m.%Y', '%d/%m/%Y', + '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%d.%m.%Y %H:%M', From c6b2afd3a33d2a65d55ef6f22fd8826c6e6b12d5 Mon Sep 17 00:00:00 2001 From: Lord-Simon Date: Sun, 17 Aug 2014 04:48:16 +0200 Subject: [PATCH 2/2] Fix urlopen in Python3 --- youtube_dl/extractor/toukoucity.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/toukoucity.py b/youtube_dl/extractor/toukoucity.py index 96d8890b9..b591f20a0 100644 --- a/youtube_dl/extractor/toukoucity.py +++ b/youtube_dl/extractor/toukoucity.py @@ -2,7 +2,12 @@ from __future__ import unicode_literals import re -import urllib2 +try: + # Python 3 urllib + from urllib.request import urlopen +except ImportError: + # Fall back to Python 2's urllib2 + from urllib2 import urlopen from .common import InfoExtractor from ..utils import ( @@ -42,7 +47,7 @@ class ToukouCityIE(InfoExtractor): view_count = self._html_search_regex(r'(.+?)', webpage, u'view_count') player_url = self._search_regex(r'SWFObject\(\'(.+?)\'.+?\)', webpage, u'player_url') description = self._search_regex(r'

(.+?)

', webpage, u'description') - filesize = int(urllib2.urlopen(video_url).headers["Content-Length"]) + filesize = int(urlopen(video_url).headers["Content-Length"]) return { 'id': video_id,