1
0
mirror of https://github.com/l1ving/youtube-dl synced 2024-12-31 00:22:53 +08:00

[youjizz] Modernize (#4131)

This commit is contained in:
Philipp Hagemeister 2014-11-12 15:19:23 +01:00
parent ef89dba58f
commit 28465df1ff

View File

@ -9,40 +9,30 @@ from ..utils import (
class YouJizzIE(InfoExtractor): class YouJizzIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$' _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
_TEST = { _TEST = {
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
'file': '2189178.flv',
'md5': '07e15fa469ba384c7693fd246905547c', 'md5': '07e15fa469ba384c7693fd246905547c',
'info_dict': { 'info_dict': {
'id': '2189178',
'ext': 'flv',
"title": "Zeichentrick 1", "title": "Zeichentrick 1",
"age_limit": 18, "age_limit": 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('videoid')
# Get webpage content
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
video_title = self._html_search_regex(
r'<title>\s*(.*)\s*</title>', webpage, 'title')
# Get the video title embed_page_url = self._search_regex(
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
webpage, 'title').strip() webpage, 'embed page')
webpage = self._download_webpage(
# Get the embed page embed_page_url, video_id, note='downloading embed page')
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
raise ExtractorError('ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
webpage = self._download_webpage(embed_page_url, video_id)
# Get the video URL # Get the video URL
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage) m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)