From d439060bdafeb00edd312be3428488ab3b996e13 Mon Sep 17 00:00:00 2001 From: Felipe Barriga Richards Date: Mon, 7 Nov 2016 00:14:26 -0300 Subject: [PATCH 1/2] [xattr] Added more fields. --- youtube_dl/postprocessor/xattrpp.py | 50 +++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index b0aed9ca7..fbd148634 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -1,5 +1,5 @@ from __future__ import unicode_literals - +from sys import version_info from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( @@ -23,6 +23,34 @@ class XAttrMetadataPP(PostProcessor): # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' # + @staticmethod + def write_xattr(path, key, value): + """ proxy to make it easier to mockup and run unit tests. """ + write_xattr(path, key, value) + + @staticmethod + def get_tags(info): + """ Get comma-separated and non-duplicated keywords from tags and categories. """ + + mixed = [] + tags = info.get('tags') + if tags is not None and len(tags) > 0: + mixed += tags + + categories = info.get('categories') + if categories is not None and len(categories) > 0: + mixed += categories + + if len(mixed) > 0: + if version_info.major < 3: + mixed = map(lambda x: x.decode('utf-8'), mixed) + + mixed = set(map(lambda x: x.lower(), mixed)) + mixed = sorted(mixed) + return ','.join(mixed).encode('utf-8') + + return None + def run(self, info): """ Set extended attributes on downloaded file (if xattr support is found). """ @@ -33,6 +61,7 @@ class XAttrMetadataPP(PostProcessor): try: xattr_mapping = { + 'user.xdg.origin.url': 'webpage_url', 'user.xdg.referrer.url': 'webpage_url', # 'user.xdg.comment': 'description', 'user.dublincore.title': 'title', @@ -52,9 +81,26 @@ class XAttrMetadataPP(PostProcessor): value = hyphenate_date(value) byte_value = value.encode('utf-8') - write_xattr(filename, xattrname, byte_value) + self.write_xattr(filename, xattrname, byte_value) num_written += 1 + tags = self.get_tags(info) + if tags is not None and len(tags) > 0: + self.write_xattr(filename, 'user.xdg.tags', tags) + self.write_xattr(filename, 'user.dublincore.subject', tags) + num_written += 2 + + if info.get('age_limit') is not None and info.get('age_limit') >= 18: + self.write_xattr(filename, 'user.dublincore.audience', 'adults'.encode('utf-8')) + num_written += 1 + else: + self.write_xattr(filename, 'user.dublincore.audience', 'everybody'.encode('utf-8')) + num_written += 1 + + self.write_xattr(filename, 'user.dublincore.type', 'MovingImage'.encode('utf-8')) + self.write_xattr(filename, 'user.creator', 'youtube-dl'.encode('utf-8')) + num_written += 2 + return [], info except XAttrUnavailableError as e: From 33ad3004ddc9c8e87639c0cfb7417a2c1545bd98 Mon Sep 17 00:00:00 2001 From: Felipe Barriga Richards Date: Fri, 18 Nov 2016 14:07:50 -0300 Subject: [PATCH 2/2] [xattr] test/test_postprocessors.py: added tests. --- test/test_postprocessors.py | 165 ++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index addb69d6f..5c2479722 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- from __future__ import unicode_literals @@ -9,9 +10,173 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.postprocessor import MetadataFromTitlePP +from youtube_dl.postprocessor import XAttrMetadataPP +from test.helper import ( + FakeYDL +) class TestMetadataFromTitle(unittest.TestCase): def test_format_to_regex(self): pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s') self.assertEqual(pp._titleregex, '(?P.+)\ \-\ (?P<artist>.+)') + + +class TestXAttrMetadataPP(unittest.TestCase): + def test_run(self): + def sort_by_key(x): + return sorted(x, key=lambda x: x.get('key')) + + pp = XAttrMetadataPP(None) + pp._downloader = FakeYDL() + + written_xattrs = [] + pp.write_xattr = lambda path, key, value: written_xattrs.append({'path': path, 'key': key, 'value': value}) + + # test empty raise exception + with self.assertRaises(KeyError) as context: + info = {} + pp.run(info) + self.assertEqual(written_xattrs, []) + + # minimal test case + written_xattrs = [] + info = {'filepath': 'foo'} + a, b = pp.run(info) + self.assertEqual(a, []) + self.assertEqual(b, info) + self.assertEqual(sort_by_key(written_xattrs), [ + {'key': 'user.creator', 'path': 'foo', 'value': b'youtube-dl'}, + {'key': 'user.dublincore.audience', 'path': 'foo', 'value': b'everybody'}, + {'key': 'user.dublincore.type', 'path': 'foo', 'value': b'MovingImage'} + ]) + + # minimal test case with age limit >= 18 + written_xattrs = [] + info = {'filepath': 'foo', 'age_limit': 18} + a, b = pp.run(info) + self.assertEqual(a, []) + self.assertEqual(b, info) + self.assertEqual(sort_by_key(written_xattrs), [ + {'key': 'user.creator', 'value': b'youtube-dl', 'path': 'foo'}, + {'key': 'user.dublincore.audience', 'value': b'adults', 'path': 'foo'}, + {'key': 'user.dublincore.type', 'value': b'MovingImage', 'path': 'foo'} + ]) + + # complete test-case + written_xattrs = [] + info = { + 'filepath': 'foo_filepath', + 'age_limit': 19, + 'webpage_url': 'foo_webpage', + 'title': 'foo_title', + 'upload_date': 'foo_upload_date', + 'description': 'foo_description', + 'uploader': 'foo_uploader', + 'format': 'foo_format', + 'tags': ['foo', 'bar'], + 'categories': ['FOO', 'baz', 'bar'], + 'foo': 'this_should_be_ignored', + } + a, b = pp.run(info) + self.assertEqual(a, []) + self.assertEqual(b, info) + self.assertListEqual(sort_by_key(written_xattrs), [ + {'key': 'user.creator', 'path': 'foo_filepath', 'value': b'youtube-dl'}, + {'key': 'user.dublincore.audience', 'path': 'foo_filepath', 'value': b'adults'}, + {'key': 'user.dublincore.contributor', 'path': 'foo_filepath', 'value': b'foo_uploader'}, + {'key': 'user.dublincore.date', 'path': 'foo_filepath', 'value': b'foo_upload_date'}, + {'key': 'user.dublincore.description', 'path': 'foo_filepath', 'value': b'foo_description'}, + {'key': 'user.dublincore.format', 'path': 'foo_filepath', 'value': b'foo_format'}, + {'key': 'user.dublincore.subject', 'path': 'foo_filepath', 'value': b'bar,baz,foo'}, + {'key': 'user.dublincore.title', 'path': 'foo_filepath', 'value': b'foo_title'}, + {'key': 'user.dublincore.type', 'path': 'foo_filepath', 'value': b'MovingImage'}, + {'key': 'user.xdg.origin.url', 'path': 'foo_filepath', 'value': b'foo_webpage'}, + {'key': 'user.xdg.referrer.url', 'path': 'foo_filepath', 'value': b'foo_webpage'}, + {'key': 'user.xdg.tags', 'path': 'foo_filepath', 'value': b'bar,baz,foo'} + ]) + + # test-case with empty tags and categories + written_xattrs = [] + info = { + 'filepath': 'foo_filepath', + 'age_limit': 19, + 'webpage_url': 'foo_webpage', + 'title': 'foo_title', + 'upload_date': 'foo_upload_date', + 'description': 'foo_description', + 'uploader': 'foo_uploader', + 'format': 'foo_format', + 'tags': [], + 'categories': None, + 'foo': 'this_should_be_ignored', + } + a, b = pp.run(info) + self.assertEqual(a, []) + self.assertEqual(b, info) + self.assertEqual(sort_by_key(written_xattrs), [ + {'path': 'foo_filepath', 'value': b'youtube-dl', 'key': 'user.creator'}, + {'path': 'foo_filepath', 'value': b'adults', 'key': 'user.dublincore.audience'}, + {'path': 'foo_filepath', 'value': b'foo_uploader', 'key': 'user.dublincore.contributor'}, + {'path': 'foo_filepath', 'value': b'foo_upload_date', 'key': 'user.dublincore.date'}, + {'path': 'foo_filepath', 'value': b'foo_description', 'key': 'user.dublincore.description'}, + {'path': 'foo_filepath', 'value': b'foo_format', 'key': 'user.dublincore.format'}, + {'path': 'foo_filepath', 'value': b'foo_title', 'key': 'user.dublincore.title'}, + {'path': 'foo_filepath', 'value': b'MovingImage', 'key': 'user.dublincore.type'}, + {'path': 'foo_filepath', 'value': b'foo_webpage', 'key': 'user.xdg.origin.url'}, + {'path': 'foo_filepath', 'value': b'foo_webpage', 'key': 'user.xdg.referrer.url'} + ]) + + def test_get_tags(self): + # test empty values + self.assertEqual(XAttrMetadataPP.get_tags({}), None) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': None}), None) + self.assertEqual(XAttrMetadataPP.get_tags({'categories': None}), None) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': None, 'categories': None}), None) + + # lower-case tags + self.assertEqual(XAttrMetadataPP.get_tags({ + 'tags': ['foo', 'FOO'], + 'categories': ['Foo', 'BAR'] + }), 'bar,foo'.encode('utf-8')) + + # test tags alone + self.assertEqual(XAttrMetadataPP.get_tags({'tags': ['foo']}), 'foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': ['foo', 'foo']}), 'foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': ['foo', 'bar']}), 'bar,foo'.encode('utf-8')) # tags are sorted + + # test categories alone + self.assertEqual(XAttrMetadataPP.get_tags({'categories': ['foo']}), 'foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'categories': ['foo', 'foo']}), 'foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'categories': ['foo', 'bar']}), 'bar,foo'.encode('utf-8')) # tags are sorted + + # test tags + categories + self.assertEqual(XAttrMetadataPP.get_tags({'tags': ['foo'], 'categories': None}), 'foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': None, 'categories': ['foo']}), 'foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': ['foo'], 'categories': ['bar']}), 'bar,foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({'tags': ['bar'], 'categories': ['foo']}), 'bar,foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({ + 'tags': ['foo', 'bar'], + 'categories': ['foo'] + }), 'bar,foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({ + 'tags': ['foo', 'bar'], + 'categories': ['bar', 'foo'] + }), 'bar,foo'.encode('utf-8')) + self.assertEqual(XAttrMetadataPP.get_tags({ + 'tags': ['bar', 'baz'], + 'categories': ['bar', 'foo'] + }), 'bar,baz,foo'.encode('utf-8')) + + # test unicode + categories = ['H₂O', 'РУ́ССКИЙ', '€ÃĂÀÂÁÅÄ'] + if sys.version_info.major < 3: + categories = ['H₂O'.encode('utf-8'), 'РУ́ССКИЙ'.encode('utf-8'), '€ÃĂÀÂÁÅÄ'.encode('utf-8')] + + self.assertEqual( + XAttrMetadataPP.get_tags({'categories': categories}), + 'h₂o,ру́сский,€ãăàâáåä'.encode('utf-8') + ) + +if __name__ == '__main__': + unittest.main()