From 1323ab2a8f22f05967678dedc85e656ef6339be7 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Mon, 8 Sep 2014 00:54:14 +0200 Subject: [PATCH] find video parameters in iframe --- youtube_dl/extractor/tumblr.py | 52 ++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 2882c1809..d93773b0e 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -11,7 +11,8 @@ from ..utils import ( class TumblrIE(InfoExtractor): _VALID_URL = r'http://(?P.*?)\.tumblr\.com/((post)|(video))/(?P\d*)($|/)' - _TESTS = [{ + _TESTS = [ + { 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', 'md5': '479bb068e5b16462f5176a6828829767', 'info_dict': { @@ -31,29 +32,61 @@ class TumblrIE(InfoExtractor): 'description': 'md5:dba62ac8639482759c8eb10ce474586a', 'thumbnail': 're:http://.*\.jpg', } - }] + }, { + 'url': 'http://anotherkindofhorse.tumblr.com/post/96805380497', + 'md5': '84a5c3c1cb2325a9a9e900d1726e956a', + 'info_dict': { + 'id': '96805380497', + 'ext': 'mp4', + 'title': 'Tumblr', + 'description': 'md5:06e250cb873c721abee97e543f9997d3', + 'thumbnail': 're:http://.*\.jpg', + } + } + ] def _real_extract(self, url): m_url = re.match(self._VALID_URL, url) video_id = m_url.group('id') blog = m_url.group('blog_name') - url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) - webpage = self._download_webpage(url, video_id) + video_thumbnails = [] + + # try "old" way first + purl = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) + webpage = self._download_webpage(purl, video_id) re_video = r'src=\\x22(?Phttp://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P.*?)\\x22' % (blog, video_id) video = re.search(re_video, webpage) - if video is None: - raise ExtractorError('Unable to extract video') - video_url = video.group('video_url') - ext = video.group('ext') video_thumbnail = self._search_regex( r'posters.*?\[\\x22(.*?)\\x22', - webpage, 'thumbnail', fatal=False) # We pick the first poster + webpage, 'thumbnail', default="", fatal=False) # We pick the first poster + if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\\\/', '/') + if video is None: + # This did not work - search for iframe + iframe_m = re.search(r'
.+?