From 9f8b88ac15aed5f4aea10b203f01a5bd951f4516 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Tue, 28 Jan 2014 17:10:56 +0100 Subject: [PATCH 1/4] Retrieve the URLs of all availale thumbnails --- youtube_dl/extractor/tumblr.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 544369068..6ddbbe05d 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -34,11 +34,19 @@ class TumblrIE(InfoExtractor): video_url = video.group('video_url') ext = video.group('ext') - video_thumbnail = self._search_regex( - r'posters.*?\[\\x22(.*?)\\x22', - webpage, 'thumbnail', fatal=False) # We pick the first poster - if video_thumbnail: - video_thumbnail = video_thumbnail.replace('\\\\/', '/') + # retrieve all available thumbnails + thumb_list = [] + ma = re.search(r'posters.*?\[(?P\\x22.*?\\x22)]', webpage) + if not ma is None: + for t in ma.group('thumb').replace('\\\\/', '/').split(','): + t = t.replace('\\x22','"') + if (t[0]=='"') and (t[-1]=='"'): + thumb_list.append(t[1:-1]) + + # take the first, if user only wants one + single_thumb = None + if len(thumb_list)>0: + single_thumb = thumb_list[0] # The only place where you can get a title, it's not complete, # but searching in other places doesn't work for all videos @@ -48,6 +56,7 @@ class TumblrIE(InfoExtractor): return [{'id': video_id, 'url': video_url, 'title': video_title, - 'thumbnail': video_thumbnail, + 'thumbnails': thumb_list, + 'thumbnail': single_thumb, 'ext': ext }] From c748ee85af126dde0431cbecc138b10d3e56b147 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Tue, 28 Jan 2014 22:50:41 +0100 Subject: [PATCH 2/4] Removed storage to "thumbnail" key and some cosmetic changes --- youtube_dl/extractor/tumblr.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 6ddbbe05d..7d0d419e9 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -38,16 +38,11 @@ class TumblrIE(InfoExtractor): thumb_list = [] ma = re.search(r'posters.*?\[(?P\\x22.*?\\x22)]', webpage) if not ma is None: - for t in ma.group('thumb').replace('\\\\/', '/').split(','): - t = t.replace('\\x22','"') - if (t[0]=='"') and (t[-1]=='"'): + for t in ma.group('thumb').replace(r'\\/', '/').split(','): + t = t.replace(r'\x22','"') + if (t[0] == '"') and (t[-1] == '"'): thumb_list.append(t[1:-1]) - # take the first, if user only wants one - single_thumb = None - if len(thumb_list)>0: - single_thumb = thumb_list[0] - # The only place where you can get a title, it's not complete, # but searching in other places doesn't work for all videos video_title = self._html_search_regex(r'(?P<title>.*?)(?: \| Tumblr)?', @@ -57,6 +52,5 @@ class TumblrIE(InfoExtractor): 'url': video_url, 'title': video_title, 'thumbnails': thumb_list, - 'thumbnail': single_thumb, 'ext': ext }] From 89ad100660b7789e0dd7c66218664a563ec1dd68 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Wed, 29 Jan 2014 00:12:54 +0100 Subject: [PATCH 3/4] URLs of multiple thumbnails must be encoded as a list of dicts --- youtube_dl/extractor/tumblr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 7d0d419e9..10f26d631 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -41,7 +41,7 @@ class TumblrIE(InfoExtractor): for t in ma.group('thumb').replace(r'\\/', '/').split(','): t = t.replace(r'\x22','"') if (t[0] == '"') and (t[-1] == '"'): - thumb_list.append(t[1:-1]) + thumb_list.append( {"url": t[1:-1]} ) # The only place where you can get a title, it's not complete, # but searching in other places doesn't work for all videos From 7bbaf4d413ff2d556d1a162616dfba2a3d5299a8 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Wed, 29 Jan 2014 16:12:38 +0100 Subject: [PATCH 4/4] using "string-escape" decoding --- youtube_dl/extractor/tumblr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 10f26d631..666879d50 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -38,8 +38,7 @@ class TumblrIE(InfoExtractor): thumb_list = [] ma = re.search(r'posters.*?\[(?P\\x22.*?\\x22)]', webpage) if not ma is None: - for t in ma.group('thumb').replace(r'\\/', '/').split(','): - t = t.replace(r'\x22','"') + for t in ma.group('thumb').decode('string-escape').replace(r'\/',r'/').split(','): if (t[0] == '"') and (t[-1] == '"'): thumb_list.append( {"url": t[1:-1]} )