From 07e6c7bef6966c4b80dfb2b15845818fa10f4c67 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 01:41:34 +0100 Subject: [PATCH 1/7] Fix issue [#7918](https://github.com/rg3/youtube-dl/issues/7918) trying to download single images from imgur produced error. E.g. https://imgur.com/gallery/YcAQlkx Issue was caused 'http://imgur.com/gallery//album_images/hit.json?all=true' returns a json with data field containing id of all images contained in the album. But for a single image, data filed is empty. Added `if` condition to check if its empty. --- youtube_dl/extractor/imgur.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 70c8ca64e..128a61497 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -113,12 +113,15 @@ class ImgurAlbumIE(InfoExtractor): def _real_extract(self, url): album_id = self._match_id(url) - album_images = self._download_json( - 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, - album_id)['data']['images'] + album_img_data = self._download_json( + 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,album_id)['data'] - entries = [ - self.url_result('http://imgur.com/%s' % image['hash']) - for image in album_images if image.get('hash')] + if(len(album_img_data) == 0): + entries = [self.url_result('http://imgur.com/%s' % album_id)] + else: + album_images = album_img_data['images'] + entries = [ + self.url_result('http://imgur.com/%s' % image['hash']) + for image in album_images if image.get('hash')] return self.playlist_result(entries, album_id) From e11069985fa8a0e6448a81a527e9dae398ede807 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 01:50:07 +0100 Subject: [PATCH 2/7] Add test to verify fix of issue #7918 --- youtube_dl/extractor/imgur.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 128a61497..5ee6c450d 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -102,13 +102,19 @@ class ImgurIE(InfoExtractor): class ImgurAlbumIE(InfoExtractor): _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P[a-zA-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://imgur.com/gallery/Q95ko', 'info_dict': { 'id': 'Q95ko', }, 'playlist_count': 25, - } + },{ + 'url': 'https://imgur.com/gallery/YcAQlkx', + 'info_dict': { + 'id': 'YcAQlkx', + }, + 'playlist_count': 1, + }] def _real_extract(self, url): album_id = self._match_id(url) From a1a646be831e66bb925b105725cc3e800d4e9641 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 16:11:10 +0100 Subject: [PATCH 3/7] remove outer parentheses in if --- youtube_dl/extractor/imgur.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 5ee6c450d..25f3e3529 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -122,7 +122,7 @@ class ImgurAlbumIE(InfoExtractor): album_img_data = self._download_json( 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,album_id)['data'] - if(len(album_img_data) == 0): + if len(album_img_data) == 0 : entries = [self.url_result('http://imgur.com/%s' % album_id)] else: album_images = album_img_data['images'] From 6a5bcb2cabd30a086bc3a9d0bc8d92c003fc9c7d Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 16:40:31 +0100 Subject: [PATCH 4/7] checked code with flake8 --- YcAQlkx.info.json | 1 + youtube_dl/extractor/imgur.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 YcAQlkx.info.json diff --git a/YcAQlkx.info.json b/YcAQlkx.info.json new file mode 100644 index 000000000..aa73baf56 --- /dev/null +++ b/YcAQlkx.info.json @@ -0,0 +1 @@ +{"display_id": "YcAQlkx", "extractor": "ImgurAlbum", "playlist": null, "format": "0 - unknown", "url": "http://imgur.com/YcAQlkx", "extractor_key": "ImgurAlbum", "title": "Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....", "playlist_index": null, "ext": "unknown_video", "fulltitle": "Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....", "webpage_url": "https://imgur.com/gallery/YcAQlkx", "_filename": "YcAQlkx.unknown_video", "format_id": "0", "http_headers": {"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", "Accept-Language": "en-us,en;q=0.5", "Accept-Encoding": "gzip, deflate", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)"}, "id": "YcAQlkx", "webpage_url_basename": "YcAQlkx"} \ No newline at end of file diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 25f3e3529..a8d5e8c6c 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -108,7 +108,7 @@ class ImgurAlbumIE(InfoExtractor): 'id': 'Q95ko', }, 'playlist_count': 25, - },{ + }, { 'url': 'https://imgur.com/gallery/YcAQlkx', 'info_dict': { 'id': 'YcAQlkx', @@ -120,9 +120,9 @@ class ImgurAlbumIE(InfoExtractor): album_id = self._match_id(url) album_img_data = self._download_json( - 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,album_id)['data'] + 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, album_id)['data'] - if len(album_img_data) == 0 : + if len(album_img_data) == 0: entries = [self.url_result('http://imgur.com/%s' % album_id)] else: album_images = album_img_data['images'] From 133643c36c0c767457868eef4fd156c199aee81c Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 17:17:57 +0100 Subject: [PATCH 5/7] not returning list in case of single images. --- YcAQlkx.info.json | 1 - youtube_dl/extractor/imgur.py | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) delete mode 100644 YcAQlkx.info.json diff --git a/YcAQlkx.info.json b/YcAQlkx.info.json deleted file mode 100644 index aa73baf56..000000000 --- a/YcAQlkx.info.json +++ /dev/null @@ -1 +0,0 @@ -{"display_id": "YcAQlkx", "extractor": "ImgurAlbum", "playlist": null, "format": "0 - unknown", "url": "http://imgur.com/YcAQlkx", "extractor_key": "ImgurAlbum", "title": "Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....", "playlist_index": null, "ext": "unknown_video", "fulltitle": "Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....", "webpage_url": "https://imgur.com/gallery/YcAQlkx", "_filename": "YcAQlkx.unknown_video", "format_id": "0", "http_headers": {"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", "Accept-Language": "en-us,en;q=0.5", "Accept-Encoding": "gzip, deflate", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)"}, "id": "YcAQlkx", "webpage_url_basename": "YcAQlkx"} \ No newline at end of file diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index a8d5e8c6c..bd967c4a8 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -112,8 +112,11 @@ class ImgurAlbumIE(InfoExtractor): 'url': 'https://imgur.com/gallery/YcAQlkx', 'info_dict': { 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + 'description': 'Imgur: The most awesome images on the Internet.' + }, - 'playlist_count': 1, }] def _real_extract(self, url): @@ -123,7 +126,7 @@ class ImgurAlbumIE(InfoExtractor): 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, album_id)['data'] if len(album_img_data) == 0: - entries = [self.url_result('http://imgur.com/%s' % album_id)] + return self.url_result('http://imgur.com/%s' % album_id) else: album_images = album_img_data['images'] entries = [ From 76716e4e5989a691371c1647c12ad9ba664adf08 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 19:02:34 +0100 Subject: [PATCH 6/7] using the fact that id with length 5 are albums and more are single videos. Also for single videos ie ImgurIE both urls - http://imgur.com/gallery/oWeAMW2 and http://imgur.com/oWeAMW2 are equally fine. Change regex to allow thuis. For albums urls - http://imgur.com/gallery/Q95ko and http://imgur.com/Q95ko are ok. Change regex to allow this also. --- youtube_dl/extractor/imgur.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index bd967c4a8..b63047af5 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -13,7 +13,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{6,})' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -100,7 +100,7 @@ class ImgurIE(InfoExtractor): class ImgurAlbumIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{5})(?![a-zA-Z0-9])' _TESTS = [{ 'url': 'http://imgur.com/gallery/Q95ko', From f4aa6de6463d7eb3c4363f97339a114c1352ad50 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 19:20:52 +0100 Subject: [PATCH 7/7] update description in ImgurIE Tests. Also move single video test 'https://imgur.com/gallery/YcAQlkx' from ImgurAlbumIE to ImgurIE. --- youtube_dl/extractor/imgur.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index b63047af5..88423f179 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor): 'id': 'A61SaA1', 'ext': 'mp4', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', - 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', + 'description': 'Imgur: The most awesome images on the Internet.', }, }, { 'url': 'https://imgur.com/A61SaA1', @@ -29,8 +29,17 @@ class ImgurIE(InfoExtractor): 'id': 'A61SaA1', 'ext': 'mp4', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', - 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', + 'description': 'Imgur: The most awesome images on the Internet.', }, + }, { + 'url': 'https://imgur.com/gallery/YcAQlkx', + 'info_dict': { + 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + 'description': 'Imgur: The most awesome images on the Internet.' + + } }] def _real_extract(self, url): @@ -102,22 +111,13 @@ class ImgurIE(InfoExtractor): class ImgurAlbumIE(InfoExtractor): _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{5})(?![a-zA-Z0-9])' - _TESTS = [{ + _TEST = { 'url': 'http://imgur.com/gallery/Q95ko', 'info_dict': { 'id': 'Q95ko', }, 'playlist_count': 25, - }, { - 'url': 'https://imgur.com/gallery/YcAQlkx', - 'info_dict': { - 'id': 'YcAQlkx', - 'ext': 'mp4', - 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', - 'description': 'Imgur: The most awesome images on the Internet.' - - }, - }] + } def _real_extract(self, url): album_id = self._match_id(url)