From c9983bfc125f8a849cbf03fa90f0192610ccd027 Mon Sep 17 00:00:00 2001 From: Carl Harris Date: Sun, 29 Nov 2015 02:26:16 -0500 Subject: [PATCH] allowed multiple --match and --reject title regexes and added test --- test/test_optParse.py | 55 ++ youtube_dl/YoutubeDL.py | 12 +- youtube_dl/__init__.py | 6 +- youtube_dl/options/__init__.py | 6 + youtube_dl/options/options_argparse.py | 690 +++++++++++++++++++++++++ 5 files changed, 762 insertions(+), 7 deletions(-) create mode 100644 test/test_optParse.py create mode 100644 youtube_dl/options/__init__.py create mode 100644 youtube_dl/options/options_argparse.py diff --git a/test/test_optParse.py b/test/test_optParse.py new file mode 100644 index 000000000..d6218ae9f --- /dev/null +++ b/test/test_optParse.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +from youtube_dl.options.options_argparse import parseOpts + +from itertools import repeat + +# Jimquesition +URL = 'https://www.youtube.com/watch?v=DpG4t54g2fk' + +class TestOptionParser(unittest.TestCase): + + def test(self): + #import pdb; pdb.set_trace() + with self.assertRaises(SystemExit): + backup = sys.stdout + sys.stdout = StringIO() + parseOpts(['--help']) + sys.stdout = backup + + def test_multiple_regex_patterns(self): + # no patterns given + parser, args, args = parseOpts([URL]) + self.assertIs(args.rejecttitle, None) + self.assertIs(args.matchtitle, None) + + #sys.stdout = sys.stderr + + regexes = [ 'REGEX' + str(i) for i in range(10) ] + def getargs(option): + "return [ option, regex[0], option, regex[1], option, regex[N] ]" + return [x for t in zip(repeat(option), regexes) for x in t] + [URL] + + args = getargs('--reject-title') + parser, args, args = parseOpts(args) + self.assertListEqual(args.rejecttitle, regexes) + + args = getargs('--match-title') + parser, args, args = parseOpts(args) + self.assertListEqual(args.matchtitle, regexes) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9a8c7da05..8d9ddcea3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -596,12 +596,16 @@ class YoutubeDL(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) if matchtitle: - if not re.search(matchtitle, title, re.IGNORECASE): - return '"' + title + '" title did not match pattern "' + matchtitle + '"' + for pattern in matchtitle: + if re.search(pattern, title, re.IGNORECASE): + break + else: + return '"' + title + '" title did not match pattern "' + pattern + '"' rejecttitle = self.params.get('rejecttitle', False) if rejecttitle: - if re.search(rejecttitle, title, re.IGNORECASE): - return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + for pattern in rejecttitle: + if re.search(pattern, title, re.IGNORECASE): + return '"' + title + '" title matched reject pattern "' + pattern + '"' date = info_dict.get('upload_date', None) if date is not None: dateRange = self.params.get('daterange', DateRange()) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9f131f5db..9df33f1a2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -92,7 +92,7 @@ def _real_main(argv=None): write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + args + all_urls = batch_urls + args.URL all_urls = [url.strip() for url in all_urls] _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] @@ -324,8 +324,8 @@ def _real_main(argv=None): 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, 'subtitleslangs': opts.subtitleslangs, - 'matchtitle': decodeOption(opts.matchtitle), - 'rejecttitle': decodeOption(opts.rejecttitle), + 'matchtitle': map(decodeOption, opts.matchtitle or []), + 'rejecttitle': map(decodeOption, opts.rejecttitle or []), 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'verbose': opts.verbose, diff --git a/youtube_dl/options/__init__.py b/youtube_dl/options/__init__.py new file mode 100644 index 000000000..c3a0f36b1 --- /dev/null +++ b/youtube_dl/options/__init__.py @@ -0,0 +1,6 @@ +import sys + +if sys.version_info >= (2,7,0): + from .options_argparse import parseOpts +else: + from .options import parseOpts diff --git a/youtube_dl/options/options_argparse.py b/youtube_dl/options/options_argparse.py new file mode 100644 index 000000000..af2e4aaff --- /dev/null +++ b/youtube_dl/options/options_argparse.py @@ -0,0 +1,690 @@ +from __future__ import unicode_literals + +import argparse +import sys + +from youtube_dl.downloader.external import list_external_downloaders +from youtube_dl.compat import ( + compat_expanduser, + compat_get_terminal_size, + compat_getenv, + compat_kwargs, + compat_shlex_split, +) +from youtube_dl.utils import ( + preferredencoding, + write_string, +) +from youtube_dl.version import __version__ + +from .common import * + +def parseOpts(overrideArguments=None): + parser = argparse.ArgumentParser(usage='%(prog)s [OPTIONS] URL [URL...]') + + general = parser.add_argument_group('General Options') + general.add_argument('URL', nargs='+') + + general.add_argument( + '--version', + action='version', version=__version__, + help='Print program version and exit') + general.add_argument( + '-U', '--update', + action='store_true', dest='update_self', + help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') + general.add_argument( + '-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', default=False, + help='Continue on download errors, for example to skip unavailable videos in a playlist') + general.add_argument( + '--abort-on-error', + action='store_false', dest='ignoreerrors', + help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') + general.add_argument( + '--dump-user-agent', + action='store_true', dest='dump_user_agent', default=False, + help='Display the current browser identification') + general.add_argument( + '--list-extractors', + action='store_true', dest='list_extractors', default=False, + help='List all supported extractors') + general.add_argument( + '--extractor-descriptions', + action='store_true', dest='list_extractor_descriptions', default=False, + help='Output descriptions of all supported extractors') + general.add_argument( + '--force-generic-extractor', + action='store_true', dest='force_generic_extractor', default=False, + help='Force extraction to use the generic extractor') + general.add_argument( + '--default-search', + dest='default_search', metavar='PREFIX', + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') + general.add_argument( + '--ignore-config', + action='store_true', + help='Do not read configuration files. ' + 'When given in the global configuration file /etc/youtube-dl.conf: ' + 'Do not read the user configuration in ~/.config/youtube-dl/config ' + '(%%APPDATA%%/youtube-dl/config.txt on Windows)') + general.add_argument( + '--flat-playlist', + action='store_const', dest='extract_flat', const='in_playlist', + default=False, + help='Do not extract the videos of a playlist, only list them.') + general.add_argument( + '--no-color', '--no-colors', + action='store_true', dest='no_color', + default=False, + help='Do not emit color codes in output') + + network = parser.add_argument_group('Network Options') + network.add_argument( + '--proxy', dest='proxy', metavar='URL', + help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') + network.add_argument( + '--socket-timeout', + dest='socket_timeout', type=float, default=None, metavar='SECONDS', + help='Time to wait before giving up, in seconds') + network.add_argument( + '--source-address', + metavar='IP', dest='source_address', default=None, + help='Client-side IP address to bind to (experimental)', + ) + network.add_argument( + '-4', '--force-ipv4', + action='store_const', const='0.0.0.0', dest='source_address', + help='Make all connections via IPv4 (experimental)', + ) + network.add_argument( + '-6', '--force-ipv6', + action='store_const', const='::', dest='source_address', + help='Make all connections via IPv6 (experimental)', + ) + network.add_argument( + '--cn-verification-proxy', + dest='cn_verification_proxy', default=None, metavar='URL', + help='Use this proxy to verify the IP address for some Chinese sites. ' + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)' + ) + + selection = parser.add_argument_group('Video Selection') + selection.add_argument( + '--playlist-start', + dest='playliststart', metavar='NUMBER', default=1, type=int, + help='Playlist video to start at (default is %(default)s)') + selection.add_argument( + '--playlist-end', + dest='playlistend', metavar='NUMBER', default=None, type=int, + help='Playlist video to end at (default is last)') + selection.add_argument( + '--playlist-items', + dest='playlist_items', metavar='ITEM_SPEC', default=None, + help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') + selection.add_argument( + '--match-title', action='append', + dest='matchtitle', metavar='REGEX', + help='Download only matching titles (regex or caseless sub-string)') + selection.add_argument( + '--reject-title', action='append', + dest='rejecttitle', metavar='REGEX', + help='Skip download for matching titles (regex or caseless sub-string)') + selection.add_argument( + '--max-downloads', + dest='max_downloads', metavar='NUMBER', type=int, default=None, + help='Abort after downloading NUMBER files') + selection.add_argument( + '--min-filesize', + metavar='SIZE', dest='min_filesize', default=None, + help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)') + selection.add_argument( + '--max-filesize', + metavar='SIZE', dest='max_filesize', default=None, + help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)') + selection.add_argument( + '--date', + metavar='DATE', dest='date', default=None, + help='Download only videos uploaded in this date') + selection.add_argument( + '--datebefore', + metavar='DATE', dest='datebefore', default=None, + help='Download only videos uploaded on or before this date (i.e. inclusive)') + selection.add_argument( + '--dateafter', + metavar='DATE', dest='dateafter', default=None, + help='Download only videos uploaded on or after this date (i.e. inclusive)') + selection.add_argument( + '--min-views', + metavar='COUNT', dest='min_views', default=None, type=int, + help='Do not download any videos with less than COUNT views') + selection.add_argument( + '--max-views', + metavar='COUNT', dest='max_views', default=None, type=int, + help='Do not download any videos with more than COUNT views') + selection.add_argument( + '--match-filter', + metavar='FILTER', dest='match_filter', default=None, + help=( + 'Generic video filter (experimental). ' + 'Specify any key (see help for -o for a list of available keys) to' + ' match if the key is present, ' + '!key to check if the key is not present,' + 'key > NUMBER (like "comment_count > 12", also works with ' + '>=, <, <=, !=, =) to compare against a number, and ' + '& to require multiple matches. ' + 'Values which are not known are excluded unless you' + ' put a question mark (?) after the operator.' + 'For example, to only match videos that have been liked more than ' + '100 times and disliked less than 50 times (or the dislike ' + 'functionality is not available at the given service), but who ' + 'also have a description, use --match-filter ' + '"like_count > 100 & dislike_count