From bb6f776271c318cbb9e5b30a123b469f9d060b45 Mon Sep 17 00:00:00 2001
From: Daniel Miranda <danielkza2@gmail.com>
Date: Sat, 17 Sep 2016 23:29:55 -0300
Subject: [PATCH] Add CLI options to filter usable extractors

Add `--enable-extractors` and `--disable-extractors` options, which make
it possible to restrict the set of extractors to be considered when
downloading. This is useful to handle URLs that match multiple
extractors (although this should be rare), or only using particular
modes of some extractors (for example, only live videos for Twitch,
enabling only `twitch:stream`).

Both options can be specified multiple times, and each argument is
interpreted as a comma-separated list of fnmatch patterns, to allow the
use of wildcards. Comparisons to extractor names are case-insensitive.
The order of the arguments is not relevant - matching always proceeds as
follows:

- Initialize the set of considered extractors to all available
- If --enable-extractors is specified, remove all extractors that
*don't* match those patterns from consideration
- If --disable-extractors is specified, remove all extractors that *do*
match those patterns from consideration
- If --age-limit is specified, remove all extractors that are not
suitable from consideration

Therefore, disables and the age limit take precedence over enables.
---
 README.md              |  8 +++++
 test/test_execution.py | 66 ++++++++++++++++++++++++++++++++++++++++
 youtube_dl/__init__.py | 69 +++++++++++++++++++++++++++++++++++++++---
 youtube_dl/options.py  |  8 +++++
 4 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 4debe15fe..c7a9f7995 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,14 @@ which means you can modify it, redistribute it or use it however you like.
                                      extractors
     --force-generic-extractor        Force extraction to use the generic
                                      extractor
+    --enable-extractors EXTRACTORS   Enable only the chosen extractors. Comma-
+                                     separated list of patterns, wildcards
+                                     allowed. Example:
+                                     "twitch:*,youtube:*,vimeo"
+    --disable-extractors EXTRACTORS  Disable the chosen extractors. Comma-
+                                     separated list of patterns, wildcards
+                                     allowed. Example:
+                                     "twitch:*,youtube:*,vimeo"
     --default-search PREFIX          Use this prefix for unqualified URLs. For
                                      example "gvsearch2:" downloads two videos
                                      from google videos for youtube-dl "large
diff --git a/test/test_execution.py b/test/test_execution.py
index 620db080e..df13c8cc3 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -11,6 +11,7 @@ import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from youtube_dl.utils import encodeArgument
+from youtube_dl.extractor import gen_extractors, get_info_extractor
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
@@ -39,5 +40,70 @@ class TestExecution(unittest.TestCase):
         _, stderr = p.communicate()
         self.assertFalse(stderr)
 
+ALL_EXTRACTORS = [ie.IE_NAME for ie in gen_extractors() if ie._WORKING]
+EXTRACTOR_CASES = {
+    'unrestricted': {
+        'result': ALL_EXTRACTORS
+    },
+    'enable_all': {
+        'enable': '*',
+        'result': ALL_EXTRACTORS
+    },
+    'disable_all': {
+        'disable': '*',
+        'result': []
+    },
+    'enable_disable_all': {
+        'enable': '*',
+        'disable': '*',
+        'result': []
+    },
+    'enable_some': {
+        'enable': 'youtube,youporn',
+        'result': ['youtube', 'YouPorn']
+    },
+    'enable_and_filter': {
+        'enable': 'twitch:*',
+        'disable': 'twitch:stream',
+        'result': [ie for ie in ALL_EXTRACTORS if ie.startswith('twitch:') and ie != 'twitch:stream']
+    },
+    'enable_age_restricted': {
+        'enable': 'youporn',
+        'age_limit': 16,
+        'result': []
+    }
+}
+
+def gen_extractor_case(case):
+    enable = case.get('enable')
+    disable = case.get('disable')
+    age_limit = case.get('age_limit')
+    result = case['result']
+
+    def template(self):
+        args = [sys.executable, 'youtube_dl/__main__.py', '--list-extractors']
+        if enable:
+            args.extend(['--enable-extractors', enable])
+        if disable:
+            args.extend(['--disable-extractors', disable])
+        if age_limit:
+            args.extend(['--age-limit', str(age_limit)])
+
+        out = subprocess.check_output(args, cwd=rootDir, stderr=_DEV_NULL).decode('utf-8')
+        extractors = filter(lambda e: e and 'BROKEN' not in e, out.split('\n'))
+        self.assertItemsEqual(extractors, result)
+
+    return template
+
+class TestExtractorSelection(unittest.TestCase):
+    pass
+
+for name, case in EXTRACTOR_CASES.items():
+    test_method = gen_extractor_case(case)
+    test_name = str('test_' + name)
+    test_method.__name__ = test_name
+    setattr(TestExtractorSelection, test_name, test_method)
+    del test_method
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 1cf3140a0..3696751d3 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -10,7 +10,8 @@ import io
 import os
 import random
 import sys
-
+import fnmatch
+from collections import OrderedDict
 
 from .options import (
     parseOpts,
@@ -40,7 +41,7 @@ from .update import update_self
 from .downloader import (
     FileDownloader,
 )
-from .extractor import gen_extractors, list_extractors
+from .extractor import gen_extractors, gen_extractor_classes
 from .extractor.adobepass import MSO_INFO
 from .YoutubeDL import YoutubeDL
 
@@ -100,15 +101,67 @@ def _real_main(argv=None):
     _enc = preferredencoding()
     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
 
+    def get_usable_extractors(enable_patterns, disable_patterns, age_limit):
+        # Unfortunately it's necessary to create instances of all extractors
+        # instead of just looking at the classes, because some of them don't
+        # override the ie_key() classmethod to the correct value.
+
+        all_extractors = OrderedDict((ie.IE_NAME.lower(), ie) for ie in gen_extractors())
+        extractors = OrderedDict() if enable_patterns else all_extractors
+
+        if enable_patterns:
+            all_names = list(all_extractors.keys())
+            for pattern in enable_patterns:
+                accepted_names = fnmatch.filter(all_names, pattern)
+                for name in accepted_names:
+                    if name not in extractors:
+                        if opts.verbose:
+                            write_string('[debug] Enabling extractor %s\n' % name)
+
+                        extractors[name] = all_extractors[name]
+
+        if disable_patterns:
+            for pattern in disable_patterns:
+                rejected_names = fnmatch.filter(extractors.keys(), pattern)
+                for name in rejected_names:
+                    if opts.verbose:
+                        write_string('[debug] Disabling extractor %s\n' % name)
+
+                    del extractors[name]
+
+        if age_limit:
+            for name, extractor in extractors.items():
+                if not extractor.is_suitable(age_limit):
+                    if opts.verbose:
+                        write_string('[debug] Extractor %s selected by filter, but ignored due to age limit\n' % name)
+
+                    del extractors[name]
+
+        return extractors.values()
+
+    def patterns_from_args(args):
+        if not args:
+            return
+
+        for arg in args:
+            for pattern in arg.split(','):
+                yield pattern.lower()
+
+    enable_extractors = list(patterns_from_args(opts.enable_extractors))
+    disable_extractors = list(patterns_from_args(opts.disable_extractors))
+    extractors = get_usable_extractors(enable_extractors, disable_extractors, opts.age_limit)
+
     if opts.list_extractors:
-        for ie in list_extractors(opts.age_limit):
+        extractors.sort(key=lambda ie: ie.IE_NAME.lower())
+        for ie in extractors:
             write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
             matchedUrls = [url for url in all_urls if ie.suitable(url)]
             for mu in matchedUrls:
                 write_string('  ' + mu + '\n', out=sys.stdout)
         sys.exit(0)
     if opts.list_extractor_descriptions:
-        for ie in list_extractors(opts.age_limit):
+        extractors.sort(key=lambda ie: ie.IE_NAME.lower())
+        for ie in extractors:
             if not ie._WORKING:
                 continue
             desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
@@ -413,7 +466,10 @@ def _real_main(argv=None):
 
     }
 
-    with YoutubeDL(ydl_opts) as ydl:
+    if not extractors:
+        parser.error('No usable extractors selected')
+
+    with YoutubeDL(ydl_opts, auto_init=False) as ydl:
         # Update version
         if opts.update_self:
             update_self(ydl.to_screen, opts.verbose, ydl._opener)
@@ -422,6 +478,9 @@ def _real_main(argv=None):
         if opts.rm_cachedir:
             ydl.cache.remove()
 
+        for extractor in extractors:
+            ydl.add_info_extractor(extractor)
+
         # Maybe do nothing
         if (len(all_urls) < 1) and (opts.load_info_filename is None):
             if opts.update_self or opts.rm_cachedir:
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 53497fbc6..6ca7db66d 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -167,6 +167,14 @@ def parseOpts(overrideArguments=None):
         '--force-generic-extractor',
         action='store_true', dest='force_generic_extractor', default=False,
         help='Force extraction to use the generic extractor')
+    general.add_option(
+        '--enable-extractors', metavar='EXTRACTORS',
+        action='append', dest='enable_extractors',
+        help='Enable only the chosen extractors. Comma-separated list of patterns, wildcards allowed. Example: "twitch:*,youtube:*,vimeo"')
+    general.add_option(
+        '--disable-extractors', metavar='EXTRACTORS',
+        action='append', dest='disable_extractors',
+        help='Disable the chosen extractors. Comma-separated list of patterns, wildcards allowed. Example: "twitch:*,youtube:*,vimeo"')
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',