From 85c99fbc7046a87fa9e8a04d0d7db06ab19e09b8 Mon Sep 17 00:00:00 2001 From: Enes Date: Fri, 1 Jun 2018 01:05:41 +0300 Subject: [PATCH 1/3] [openload] improvement for determinating file extension --- youtube_dl/extractor/openload.py | 14 ++++++++++++-- youtube_dl/utils.py | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 650f95656..21d3af9b7 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -301,6 +301,10 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.xyz/f/WwRBpzW8Wtk', 'only_matching': True, + }, { + # Its title has not got its extension but url has it + 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' @@ -354,13 +358,19 @@ class OpenloadIE(InfoExtractor): entry = entries[0] if entries else {} subtitles = entry.get('subtitles') + # Some videos have special name and some of these + # have not got their extension on their title + # If url has their file name, it has always its extension + video_ext = determine_ext(title, None) + if video_ext is None: + video_ext = determine_ext(url, 'mp4') + info_dict = { 'id': video_id, 'title': title, 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'url': video_url, - # Seems all videos have extensions in their titles - 'ext': determine_ext(title, 'mp4'), + 'ext': video_ext, 'subtitles': subtitles, 'http_headers': headers, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 027d12785..df449b53d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True): def determine_ext(url, default_ext='unknown_video'): - if url is None: + if url is None or '.' not in url: return default_ext guess = url.partition('?')[0].rpartition('.')[2] if re.match(r'^[A-Za-z0-9]+$', guess): From c6e9442782439c4e392978a8a66a6264e9041973 Mon Sep 17 00:00:00 2001 From: Enes Date: Fri, 1 Jun 2018 02:13:47 +0300 Subject: [PATCH 2/3] [openload] added test for determine_ext and determining file extension was simplified --- test/test_utils.py | 1 + youtube_dl/extractor/openload.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a1fe6fdb2..b4b9e8d7c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -360,6 +360,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') + self.assertEqual(determine_ext('foobar', None), None) def test_find_xpath_attr(self): testxml = ''' diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 21d3af9b7..7ede4038a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -361,9 +361,7 @@ class OpenloadIE(InfoExtractor): # Some videos have special name and some of these # have not got their extension on their title # If url has their file name, it has always its extension - video_ext = determine_ext(title, None) - if video_ext is None: - video_ext = determine_ext(url, 'mp4') + video_ext = determine_ext(title, None) or determine_ext(url, 'mp4') info_dict = { 'id': video_id, From 79565865660fd3d57c5cd9e03c102a0050b962fc Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sat, 2 Jun 2018 00:14:20 +0700 Subject: [PATCH 3/3] Update openload.py --- youtube_dl/extractor/openload.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 438692a57..d264fe206 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -367,17 +367,12 @@ class OpenloadIE(InfoExtractor): entry = entries[0] if entries else {} subtitles = entry.get('subtitles') - # Some videos have special name and some of these - # have not got their extension on their title - # If url has their file name, it has always its extension - video_ext = determine_ext(title, None) or determine_ext(url, 'mp4') - info_dict = { 'id': video_id, 'title': title, 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'url': video_url, - 'ext': video_ext, + 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), 'subtitles': subtitles, 'http_headers': headers, }