From c3f44d7db0783a402a7a72511c4665fa2b363a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 03:10:08 +0600 Subject: [PATCH 1/9] [extractor/common] Improve _sort_formats behavior customization * Introduce the notion of criterion * Improve absolute criteria preference * Add ability to tweak relative criteria preference * Add ability to exclude criteria from consideration --- youtube_dl/extractor/common.py | 124 +++++++++++++++++++++++++++------ 1 file changed, 103 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2823b1d18..8a4a3de14 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -764,7 +764,57 @@ class InfoExtractor(object): html, '%s form' % form_id, group='form') return self._hidden_inputs(form) - def _sort_formats(self, formats, field_preference=None): + def _sort_formats(self, formats, criteria_preference=None, + criteria_relative_preference=None, criteria_exclusion=None): + """ + Sort formats. + + Sorting behavior can be customized by specifying absolute or relative criteria + order and criteria exclusion. Criteria is either a field from formats dictionary + or a synthetic expression calculated based on some data from formats dictionary. + Currently following criteria are supported for sorting customization: + + Field criteria (see description of formats dict for each field explanation): + format_id + width + height + tbr + abr + vbr + fps + filesize + filesize_approx + language_preference + quality + source_preference + + Synthetic criteria: + preference Calculated based on preference field with some corrections + proto_preference Calculated based on protocol field and prioritizes + direct HTTP(s) URLs + ext_preference Video extension preference, calculated based on ext field + and value of prefer_free_formats setting + audio_ext_preference Audio extension preference, calculated based on ext field + and value of prefer_free_formats setting + + Default criteria preference (from most priority to least): + preference + language_preference + quality + tbr + filesize + vbr + height + width + proto_preference + ext_preference + abr + audio_ext_preference + fps + filesize_approx + source_preference + format_id + """ if not formats: raise ExtractorError('No video formats found') @@ -774,9 +824,6 @@ class InfoExtractor(object): if not f.get('ext') and 'url' in f: f['ext'] = determine_ext(f['url']) - if isinstance(field_preference, (list, tuple)): - return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) - preference = f.get('preference') if preference is None: preference = 0 @@ -806,24 +853,59 @@ class InfoExtractor(object): ext_preference = -1 audio_ext_preference = 0 - return ( - preference, - f.get('language_preference') if f.get('language_preference') is not None else -1, - f.get('quality') if f.get('quality') is not None else -1, - f.get('tbr') if f.get('tbr') is not None else -1, - f.get('filesize') if f.get('filesize') is not None else -1, - f.get('vbr') if f.get('vbr') is not None else -1, - f.get('height') if f.get('height') is not None else -1, - f.get('width') if f.get('width') is not None else -1, - proto_preference, - ext_preference, - f.get('abr') if f.get('abr') is not None else -1, - audio_ext_preference, - f.get('fps') if f.get('fps') is not None else -1, - f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, - f.get('source_preference') if f.get('source_preference') is not None else -1, - f.get('format_id') if f.get('format_id') is not None else '', + def synthetic_criterion(name, value): + return name, value + + def field_criterion(name, default=-1): + return name, f.get(name) if f.get(name) is not None else default + + default_key = ( + synthetic_criterion('preference', preference), + field_criterion('language_preference'), + field_criterion('quality'), + field_criterion('tbr'), + field_criterion('filesize'), + field_criterion('vbr'), + field_criterion('height'), + field_criterion('width'), + synthetic_criterion('proto_preference', proto_preference), + synthetic_criterion('ext_preference', ext_preference), + field_criterion('abr'), + synthetic_criterion('audio_ext_preference', audio_ext_preference), + field_criterion('fps'), + field_criterion('filesize_approx'), + field_criterion('source_preference'), + field_criterion('format_id', ''), ) + + if (not criteria_preference and not criteria_relative_preference and + not criteria_exclusion): + return default_key + + default_criteria_order = list(list(zip(*default_key))[0]) + criteria_order = [] + if criteria_preference: + for field in criteria_preference: + if field in default_criteria_order: + criteria_order.append(field) + else: + relative_order = (list(criteria_relative_preference) + if criteria_relative_preference else []) + while default_criteria_order: + field = default_criteria_order[0] + if field in relative_order: + for rel_field in relative_order: + if rel_field in default_criteria_order: + criteria_order.append(rel_field) + default_criteria_order.remove(rel_field) + else: + if not criteria_exclusion or field not in criteria_exclusion: + criteria_order.append(field) + default_criteria_order.remove(field) + + default_key_dict = dict(default_key) + return [default_key_dict[field] for field in criteria_order] + formats.sort(key=_formats_key) def _check_formats(self, formats, video_id): From 86d5fb968e96d87a9d6a734bef6c3d388c9798be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 03:10:51 +0600 Subject: [PATCH 2/9] [test_YoutubeDL] Add tests for _sort_formats --- test/test_YoutubeDL.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 0388c0bf3..b5b7edcbd 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -102,6 +102,68 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'flv') + def test_sort_formats(self): + # Format with higher tbr has lower resolution + formats = [ + {'format_id': '1', 'tbr': 1000, 'height': 640, 'url': TEST_URL}, + {'format_id': '2', 'tbr': 1050, 'height': 460, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # Format with better tbr is selected by default + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '2') + + # Force considered fields to height, width and tbr exclusively in this order, + # better height is selected + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats'], criteria_preference=('height', 'width', 'tbr')) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '1') + + # Force considered fields to height and width exclusively in this order, + # tbr is excluded completely, better height is selected + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats'], criteria_preference=('height', 'width')) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '1') + + # Tweak relative field preference (prefer height and width over tbr) preserving + # all remaining default fields, better height is selected + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats'], criteria_relative_preference=('height', 'width', 'tbr')) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '1') + + # Tweak relative field preference (prefer width over height) preserving + # all remaining default fields, better tbr is selected since height and width + # preference not changed relatively to tbr + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats'], criteria_relative_preference=('width', 'height')) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '2') + + # Exclude tbr from consideration along with default sorting, + # better height is selected + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats'], criteria_exclusion=('tbr',)) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '1') + def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, From ad20c8d0fcd75241579fe726e56f4cd3d2edcaad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 03:11:23 +0600 Subject: [PATCH 3/9] [youtube] Prefer height over width over ext preference over tbr when sorting formats (Closes #6018, closes #8001) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4aac2cc03..16d395318 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1551,7 +1551,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if f.get('vcodec') != 'none': f['stretched_ratio'] = ratio - self._sort_formats(formats) + self._sort_formats( + formats, + criteria_relative_preference=('height', 'width', 'ext_preference', 'tbr')) return { 'id': video_id, From 7fcbe8a101e3f41a2a1f551bd9d44c064eac2cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 03:12:04 +0600 Subject: [PATCH 4/9] [vimeo] Adapt to terminology change --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 7af699982..5cb9c23e7 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -441,7 +441,7 @@ class VimeoIE(VimeoBaseInfoExtractor): m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) + self._sort_formats(formats, criteria_preference=('preference', 'height', 'width', 'fps', 'format_id')) subtitles = {} text_tracks = config['request'].get('text_tracks') From cedd8d286a6e606a68f9395878f5b88394f99f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 03:14:57 +0600 Subject: [PATCH 5/9] [extractor/common] Fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8a4a3de14..28fdf4a95 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -770,7 +770,7 @@ class InfoExtractor(object): Sort formats. Sorting behavior can be customized by specifying absolute or relative criteria - order and criteria exclusion. Criteria is either a field from formats dictionary + order and criteria exclusion. Criterion is either a field from formats dictionary or a synthetic expression calculated based on some data from formats dictionary. Currently following criteria are supported for sorting customization: From 60e4c6d6d498e0bbabd78f0a1a34aeb93d123cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 03:51:07 +0600 Subject: [PATCH 6/9] [extractor/common] Fix typo and calm down flake8 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 28fdf4a95..24680b866 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -904,7 +904,7 @@ class InfoExtractor(object): default_criteria_order.remove(field) default_key_dict = dict(default_key) - return [default_key_dict[field] for field in criteria_order] + return [default_key_dict[criterion] for criterion in criteria_order] formats.sort(key=_formats_key) From 69c5f149fa5478d5436c845b58a89f35383f32c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 04:53:07 +0600 Subject: [PATCH 7/9] [extractor/common] Fix key return value when no criteria specified --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 24680b866..c632b5182 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -880,7 +880,7 @@ class InfoExtractor(object): if (not criteria_preference and not criteria_relative_preference and not criteria_exclusion): - return default_key + return [value for _, value in default_key] default_criteria_order = list(list(zip(*default_key))[0]) criteria_order = [] From 8afe782893c99bfce2ce7c482f142fa6bdea8f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 04:53:41 +0600 Subject: [PATCH 8/9] [extractor/common] Simplify default criteria order construction --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c632b5182..45fa0ec5d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -882,7 +882,7 @@ class InfoExtractor(object): not criteria_exclusion): return [value for _, value in default_key] - default_criteria_order = list(list(zip(*default_key))[0]) + default_criteria_order = [name for name, _ in default_key] criteria_order = [] if criteria_preference: for field in criteria_preference: From 1a27909314869671a4653bb9a833e40428e05d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 3 Jan 2016 04:55:33 +0600 Subject: [PATCH 9/9] [extractor/common] Use more meaningful name for default criteria value set --- youtube_dl/extractor/common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 45fa0ec5d..e73644092 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -859,7 +859,7 @@ class InfoExtractor(object): def field_criterion(name, default=-1): return name, f.get(name) if f.get(name) is not None else default - default_key = ( + default_criteria_values = ( synthetic_criterion('preference', preference), field_criterion('language_preference'), field_criterion('quality'), @@ -880,9 +880,9 @@ class InfoExtractor(object): if (not criteria_preference and not criteria_relative_preference and not criteria_exclusion): - return [value for _, value in default_key] + return [value for _, value in default_criteria_values] - default_criteria_order = [name for name, _ in default_key] + default_criteria_order = [name for name, _ in default_criteria_values] criteria_order = [] if criteria_preference: for field in criteria_preference: @@ -903,8 +903,8 @@ class InfoExtractor(object): criteria_order.append(field) default_criteria_order.remove(field) - default_key_dict = dict(default_key) - return [default_key_dict[criterion] for criterion in criteria_order] + default_criteria_dict = dict(default_criteria_values) + return [default_criteria_dict[criterion] for criterion in criteria_order] formats.sort(key=_formats_key)