1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-07 06:32:55 +08:00

[spankwire] Fix extraction

This commit is contained in:
Sergey M․ 2015-06-16 21:19:18 +06:00
parent f3aecb27a4
commit 447053668f

View File

@ -27,7 +27,7 @@ class SpankwireIE(InfoExtractor):
'description': 'Crazy Bitch X rated music video.', 'description': 'Crazy Bitch X rated music video.',
'uploader': 'oreusz', 'uploader': 'oreusz',
'uploader_id': '124697', 'uploader_id': '124697',
'upload_date': '20070508', 'upload_date': '20070507',
'age_limit': 18, 'age_limit': 18,
} }
} }
@ -44,7 +44,7 @@ class SpankwireIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'<h1>([^<]+)', webpage, 'title') r'<h1>([^<]+)', webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<', r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
webpage, 'description', fatal=False) webpage, 'description', fatal=False)
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']', r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
@ -64,12 +64,12 @@ class SpankwireIE(InfoExtractor):
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>', r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex( comment_count = str_to_int(self._html_search_regex(
r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>', r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
webpage, 'comment count', fatal=False)) webpage, 'comment count', fatal=False))
video_urls = list(map( video_urls = list(map(
compat_urllib_parse.unquote, compat_urllib_parse.unquote,
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage))) re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1: if webpage.find('flashvars\.encrypted = "true"') != -1:
password = self._search_regex( password = self._search_regex(
r'flashvars\.video_title = "([^"]+)', r'flashvars\.video_title = "([^"]+)',