From 60e555b6429f9b3aa68a328b3ecec3d8e7e00310 Mon Sep 17 00:00:00 2001 From: mxamin Date: Sun, 18 Sep 2016 23:36:05 +0430 Subject: [PATCH 1/2] [Aparat] Add Multiple Formats Support --- youtube_dl/extractor/aparat.py | 76 ++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 025e29aa4..de95f3765 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -2,10 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - ExtractorError, - HEADRequest, -) +from ..utils import ExtractorError class AparatIE(InfoExtractor): @@ -32,27 +29,70 @@ class AparatIE(InfoExtractor): embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id webpage = self._download_webpage(embed_url, video_id) - file_list = self._parse_json(self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) - for i, item in enumerate(file_list[0]): - video_url = item['file'] - req = HEADRequest(video_url) - res = self._request_webpage( - req, video_id, note='Testing video URL %d' % i, errnote=False) - if res: - break - else: + file_list = self._parse_json( + self._search_regex( + r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', + webpage, + 'file list', + default='[]' + ), + video_id + ) + file_list_pseudo = self._parse_json( + self._search_regex( + r'fileListPseudo\s*=\s*JSON\.parse\(\'([^\']+)\'\)', + webpage, + 'file list pseudo', + default='[]' + ), + video_id + ) + + total_file_list = [] + if file_list: + total_file_list.extend(file_list[0]) + + if file_list_pseudo: + total_file_list.extend(file_list_pseudo[0]) + + if not total_file_list: raise ExtractorError('No working video URLs found') - title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') + labels = { + 'unknown': 0, + '270p': 1, + '360p': 2, + '720p': 3, + '1080p': 4 + } + formats = [] + for item in total_file_list: + video = {} + video['url'] = item['file'] + video['format'] = item['type'] + video['ext'] = 'mp4' + video_label = item.get('label', 'unknown') + video['label'] = labels.get(video_label, 0) + + formats.append(video) + + formats = sorted(formats, key=lambda x: x['label']) + title = self._search_regex( + r'\s+title:\s*"([^"]+)"', + webpage, + 'title' + ) thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + r'image:\s*"([^"]+)"', + webpage, + 'thumbnail', + fatal=False + ) return { 'id': video_id, 'title': title, - 'url': video_url, - 'ext': 'mp4', + 'formats': formats, 'thumbnail': thumbnail, 'age_limit': self._family_friendly_search(webpage), } From 964c30406283e445c0c8f4a7826588cbfdea755f Mon Sep 17 00:00:00 2001 From: mxamin Date: Sun, 25 Sep 2016 12:00:25 +0330 Subject: [PATCH 2/2] [Aparat] Apply Requested Notes - Used `_sort_formats` instead of `sorted` - Removed code dupliction of extracting video URLs - Put back video URL checking --- youtube_dl/extractor/aparat.py | 67 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index de95f3765..df4e81f66 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ExtractorError, HEADRequest class AparatIE(InfoExtractor): @@ -29,36 +29,20 @@ class AparatIE(InfoExtractor): embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id webpage = self._download_webpage(embed_url, video_id) - file_list = self._parse_json( - self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', - webpage, - 'file list', - default='[]' - ), - video_id - ) - file_list_pseudo = self._parse_json( - self._search_regex( - r'fileListPseudo\s*=\s*JSON\.parse\(\'([^\']+)\'\)', - webpage, - 'file list pseudo', - default='[]' - ), - video_id - ) + patterns = [ + r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', + r'fileListPseudo\s*=\s*JSON\.parse\(\'([^\']+)\'\)' + ] + file_list = [] + for p in patterns: + res = self._parse_json( + self._search_regex(p, webpage, 'file list', default='[]'), + video_id + ) + if res: + file_list.extend(res[0]) - total_file_list = [] - if file_list: - total_file_list.extend(file_list[0]) - - if file_list_pseudo: - total_file_list.extend(file_list_pseudo[0]) - - if not total_file_list: - raise ExtractorError('No working video URLs found') - - labels = { + prefs = { 'unknown': 0, '270p': 1, '360p': 2, @@ -66,17 +50,32 @@ class AparatIE(InfoExtractor): '1080p': 4 } formats = [] - for item in total_file_list: + for i, item in enumerate(file_list): + # check for video availability + video_url = item['file'] + req = HEADRequest(video_url) + res = self._request_webpage( + req, + video_id, + note='Testing video URL %d' % (i + 1), + errnote=False + ) + if not res: + continue + video = {} video['url'] = item['file'] video['format'] = item['type'] video['ext'] = 'mp4' - video_label = item.get('label', 'unknown') - video['label'] = labels.get(video_label, 0) + video_pref = item.get('label', 'unknown') + video['preference'] = prefs.get(video_pref, -1) formats.append(video) - formats = sorted(formats, key=lambda x: x['label']) + if not formats: + raise ExtractorError('No working video URLs found') + + self._sort_formats(formats) title = self._search_regex( r'\s+title:\s*"([^"]+)"', webpage,