From b15128a953db9cf1c922aa63bc626fe17628293c Mon Sep 17 00:00:00 2001 From: phiresky Date: Wed, 4 Mar 2015 22:33:56 +0100 Subject: [PATCH 1/2] Add metadata from title parser --- youtube_dl/__init__.py | 5 ++ youtube_dl/options.py | 9 ++++ youtube_dl/postprocessor/__init__.py | 2 + youtube_dl/postprocessor/ffmpeg.py | 10 +++- youtube_dl/postprocessor/metadatafromtitle.py | 50 +++++++++++++++++++ 5 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/postprocessor/metadatafromtitle.py diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a08ddd670..852b2fc3d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -213,6 +213,11 @@ def _real_main(argv=None): # PostProcessors postprocessors = [] # Add the metadata pp first, the other pps will copy it + if opts.metafromtitle: + postprocessors.append({ + 'key': 'MetadataFromTitle', + 'titleformat': opts.metafromtitle + }) if opts.addmetadata: postprocessors.append({'key': 'FFmpegMetadata'}) if opts.extractaudio: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index a4ca8adc4..7b52c762e 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -735,6 +735,15 @@ def parseOpts(overrideArguments=None): '--add-metadata', action='store_true', dest='addmetadata', default=False, help='write metadata to the video file') + postproc.add_option( + '--metadata-from-title', + metavar='FORMAT', dest='metafromtitle', + help='parse additional metadata like song title / artist from the video title. \n' + 'The format syntax is the same as --output, ' + 'the parsed parameters replace existing values.\n' + 'Additional templates: %(songtitle), %(album), %(artist). \n' + 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like ' + '"Coldplay - Paradise"') postproc.add_option( '--xattrs', action='store_true', dest='xattrs', default=False, diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index 708df3dd4..f39acadce 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -15,6 +15,7 @@ from .ffmpeg import ( ) from .xattrpp import XAttrMetadataPP from .execafterdownload import ExecAfterDownloadPP +from .metadatafromtitle import MetadataFromTitlePP def get_postprocessor(key): @@ -34,5 +35,6 @@ __all__ = [ 'FFmpegPostProcessor', 'FFmpegSubtitlesConvertorPP', 'FFmpegVideoConvertorPP', + 'MetadataFromTitlePP', 'XAttrMetadataPP', ] diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 30094c2f3..a17113cbf 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -541,11 +541,15 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): def run(self, info): metadata = {} - if info.get('title') is not None: + if info.get('songtitle') is not None: + metadata['title'] = info['songtitle'] + elif info.get('title') is not None: metadata['title'] = info['title'] if info.get('upload_date') is not None: metadata['date'] = info['upload_date'] - if info.get('uploader') is not None: + if info.get('artist') is not None: + metadata['artist'] = info['artist'] + elif info.get('uploader') is not None: metadata['artist'] = info['uploader'] elif info.get('uploader_id') is not None: metadata['artist'] = info['uploader_id'] @@ -554,6 +558,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor): metadata['comment'] = info['description'] if info.get('webpage_url') is not None: metadata['purl'] = info['webpage_url'] + if info.get('album') is not None: + metadata['album'] = info['album'] if not metadata: self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py new file mode 100644 index 000000000..e5921cdb5 --- /dev/null +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import PostProcessor +from ..utils import PostProcessingError + + +class MetadataFromTitlePPError(PostProcessingError): + pass + + +class MetadataFromTitlePP(PostProcessor): + def __init__(self, downloader, titleformat): + self._titleformat = titleformat + self._titleregex, self._attributes = self.fmtToRegex(titleformat) + + def fmtToRegex(self, fmt): + """ + Converts a string like + '%(title)s - %(artist)s' + to a regex like + '(?P.+)\ \-\ (?P<artist>.+)' + and a list of the named groups [title, artist] + """ + lastpos = 0 + regex = "" + groups = [] + # replace %(..)s with regex group and escape other string parts + for match in re.finditer(r'%\((\w+)\)s', fmt): + regex += re.escape(fmt[lastpos:match.start()]) + groupname = match.group(1) + groups.append(groupname) + regex += r'(?P<' + groupname + '>.+)' + lastpos = match.end() + if lastpos < len(fmt): + regex += re.escape(fmt[lastpos:len(fmt)]) + return regex, groups + + def run(self, info): + title = info['title'] + match = re.match(self._titleregex, title) + if match is None: + raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat) + for attribute in self._attributes: + value = match.group(attribute) + info[attribute] = value + self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) + + return True, info From 6a0a6bbb6eaefb5623900955ecfef10eda267019 Mon Sep 17 00:00:00 2001 From: phiresky <phiresky@users.noreply.github.com> Date: Tue, 10 Mar 2015 19:35:36 +0100 Subject: [PATCH 2/2] regex group names from dict --- youtube_dl/postprocessor/metadatafromtitle.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index e5921cdb5..4c9d3aafe 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -13,7 +13,7 @@ class MetadataFromTitlePPError(PostProcessingError): class MetadataFromTitlePP(PostProcessor): def __init__(self, downloader, titleformat): self._titleformat = titleformat - self._titleregex, self._attributes = self.fmtToRegex(titleformat) + self._titleregex = self.fmtToRegex(titleformat) def fmtToRegex(self, fmt): """ @@ -29,20 +29,18 @@ class MetadataFromTitlePP(PostProcessor): # replace %(..)s with regex group and escape other string parts for match in re.finditer(r'%\((\w+)\)s', fmt): regex += re.escape(fmt[lastpos:match.start()]) - groupname = match.group(1) - groups.append(groupname) - regex += r'(?P<' + groupname + '>.+)' + regex += r'(?P<' + match.group(1) + '>.+)' lastpos = match.end() if lastpos < len(fmt): regex += re.escape(fmt[lastpos:len(fmt)]) - return regex, groups + return regex def run(self, info): title = info['title'] match = re.match(self._titleregex, title) if match is None: raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat) - for attribute in self._attributes: + for attribute, value in match.groupdict().items(): value = match.group(attribute) info[attribute] = value self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)