diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
index e4b4f58254..6d9b277429 100644
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@@ -1,30 +1,28 @@
+import importlib
+import random
import re
-from ..utils import bug_reports_message, write_string
+from ..utils import bug_reports_message, classproperty, write_string
class LazyLoadMetaClass(type):
def __getattr__(cls, name):
- if '_real_class' not in cls.__dict__:
+ # "is_suitable" requires "_TESTS". However, they bloat the lazy_extractors
+ if '_real_class' not in cls.__dict__ and name not in ('is_suitable', 'get_testcases'):
write_string(
'WARNING: Falling back to normal extractor since lazy extractor '
- f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
- return getattr(cls._get_real_class(), name)
+ f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
+ return getattr(cls.real_class, name)
class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
- _module = None
- _WORKING = True
-
- @classmethod
- def _get_real_class(cls):
+ @classproperty
+ def real_class(cls):
if '_real_class' not in cls.__dict__:
- mod = __import__(cls._module, fromlist=(cls.__name__,))
- cls._real_class = getattr(mod, cls.__name__)
+ cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__)
return cls._real_class
def __new__(cls, *args, **kwargs):
- real_cls = cls._get_real_class()
- instance = real_cls.__new__(real_cls)
+ instance = cls.real_class.__new__(cls.real_class)
instance.__init__(*args, **kwargs)
return instance
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 6dc8fed90a..8ddc54b9b7 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,101 +1,125 @@
#!/usr/bin/env python3
import os
+import optparse
import sys
from inspect import getsource
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py'
-if os.path.exists(lazy_extractors_filename):
- os.remove(lazy_extractors_filename)
-# Block plugins from loading
-plugins_dirname = 'ytdlp_plugins'
-plugins_blocked_dirname = 'ytdlp_plugins_blocked'
-if os.path.exists(plugins_dirname):
- os.rename(plugins_dirname, plugins_blocked_dirname)
-
-from yt_dlp.extractor import _ALL_CLASSES
-from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
-
-if os.path.exists(plugins_blocked_dirname):
- os.rename(plugins_blocked_dirname, plugins_dirname)
-
-with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
- module_template = f.read()
-
-CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
-module_contents = [
- module_template,
- *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
- '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
-
-ie_template = '''
+NO_ATTR = object()
+STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_WORKING', '_NETRC_MACHINE']
+CLASS_METHODS = [
+ 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id',
+]
+IE_TEMPLATE = '''
class {name}({bases}):
- _module = '{module}'
+ _module = {module!r}
'''
+with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
+ MODULE_TEMPLATE = f.read()
-def get_base_name(base):
- if base is InfoExtractor:
- return 'LazyLoadExtractor'
- elif base is SearchInfoExtractor:
- return 'LazyLoadSearchExtractor'
- else:
- return base.__name__
+def main():
+ parser = optparse.OptionParser(usage='%prog [OUTFILE.py]')
+ args = parser.parse_args()[1] or ['yt_dlp/extractor/lazy_extractors.py']
+ if len(args) != 1:
+ parser.error('Expected only an output filename')
+
+ lazy_extractors_filename = args[0]
+ if os.path.exists(lazy_extractors_filename):
+ os.remove(lazy_extractors_filename)
+
+ _ALL_CLASSES = get_all_ies() # Must be before import
+
+ from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
+
+ DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
+ module_src = '\n'.join((
+ MODULE_TEMPLATE,
+ ' _module = None',
+ *extra_ie_code(DummyInfoExtractor),
+ '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
+ *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
+ ))
+
+ with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
+ f.write(f'{module_src}\n')
-def build_lazy_ie(ie, name):
- s = ie_template.format(
- name=name,
- bases=', '.join(map(get_base_name, ie.__bases__)),
- module=ie.__module__)
+def get_all_ies():
+ PLUGINS_DIRNAME = 'ytdlp_plugins'
+ BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
+ if os.path.exists(PLUGINS_DIRNAME):
+ os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
+ try:
+ from yt_dlp.extractor import _ALL_CLASSES
+ finally:
+ if os.path.exists(BLOCKED_DIRNAME):
+ os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
+ return _ALL_CLASSES
+
+
+def extra_ie_code(ie, base=None):
+ for var in STATIC_CLASS_PROPERTIES:
+ val = getattr(ie, var)
+ if val != (getattr(base, var) if base else NO_ATTR):
+ yield f' {var} = {val!r}'
+ yield ''
+
+ for name in CLASS_METHODS:
+ f = getattr(ie, name)
+ if not base or f.__func__ != getattr(base, name).__func__:
+ yield getsource(f)
+
+
+def build_ies(ies, bases, attr_base):
+ names = []
+ for ie in sort_ies(ies, bases):
+ yield build_lazy_ie(ie, ie.__name__, attr_base)
+ if ie in ies:
+ names.append(ie.__name__)
+
+ yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
+
+
+def sort_ies(ies, ignored_bases):
+ """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
+ classes, returned_classes = ies[:-1], set()
+ assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
+ while classes:
+ for c in classes[:]:
+ bases = set(c.__bases__) - {object, *ignored_bases}
+ restart = False
+ for b in bases:
+ if b not in classes and b not in returned_classes:
+ assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
+ classes.insert(0, b)
+ restart = True
+ if restart:
+ break
+ if bases <= returned_classes:
+ yield c
+ returned_classes.add(c)
+ classes.remove(c)
+ break
+ yield ies[-1]
+
+
+def build_lazy_ie(ie, name, attr_base):
+ bases = ', '.join({
+ 'InfoExtractor': 'LazyLoadExtractor',
+ 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
+ }.get(base.__name__, base.__name__) for base in ie.__bases__)
+
+ s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
valid_url = getattr(ie, '_VALID_URL', None)
if not valid_url and hasattr(ie, '_make_valid_url'):
valid_url = ie._make_valid_url()
if valid_url:
s += f' _VALID_URL = {valid_url!r}\n'
- if not ie._WORKING:
- s += ' _WORKING = False\n'
- if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
- s += f'\n{getsource(ie.suitable)}'
- return s
+ return s + '\n'.join(extra_ie_code(ie, attr_base))
-# find the correct sorting and add the required base classes so that subclasses
-# can be correctly created
-classes = _ALL_CLASSES[:-1]
-ordered_cls = []
-while classes:
- for c in classes[:]:
- bases = set(c.__bases__) - {object, InfoExtractor, SearchInfoExtractor}
- stop = False
- for b in bases:
- if b not in classes and b not in ordered_cls:
- if b.__name__ == 'GenericIE':
- exit()
- classes.insert(0, b)
- stop = True
- if stop:
- break
- if all(b in ordered_cls for b in bases):
- ordered_cls.append(c)
- classes.remove(c)
- break
-ordered_cls.append(_ALL_CLASSES[-1])
-
-names = []
-for ie in ordered_cls:
- name = ie.__name__
- src = build_lazy_ie(ie, name)
- module_contents.append(src)
- if ie in _ALL_CLASSES:
- names.append(name)
-
-module_contents.append(
- '\n_ALL_CLASSES = [{}]'.format(', '.join(names)))
-
-module_src = '\n'.join(module_contents) + '\n'
-
-with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
- f.write(module_src)
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 5531fec4d8..d8c53c5e13 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -5,7 +5,7 @@
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from yt_dlp.extractor import list_extractors
+from yt_dlp.extractor import list_extractor_classes
def main():
@@ -14,7 +14,7 @@ def main():
if len(args) != 1:
parser.error('Expected an output filename')
- out = '\n'.join(ie.description() for ie in list_extractors(None) if ie.IE_DESC is not False)
+ out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False)
with open(args[0], 'w', encoding='utf-8') as outf:
outf.write(f'# Supported sites\n{out}\n')
diff --git a/supportedsites.md b/supportedsites.md
index 31bd27768a..7663c09d40 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -431,7 +431,6 @@ # Supported sites
- **gem.cbc.ca**: [cbcgem]
- **gem.cbc.ca:live**
- **gem.cbc.ca:playlist**
- - **generic**: Generic downloader that works on some sites
- **Gettr**
- **GettrStreaming**
- **Gfycat**
@@ -1553,3 +1552,4 @@ # Supported sites
- **zingmp3:album**
- **zoom**
- **Zype**
+ - **generic**: Generic downloader that works on some sites
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 924604631a..0a8bf37b65 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -11,7 +11,7 @@
from .compat import compat_getpass, compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader import FileDownloader
-from .extractor import list_extractors
+from .extractor import GenericIE, list_extractor_classes
from .extractor.adobepass import MSO_INFO
from .extractor.common import InfoExtractor
from .options import parseOpts
@@ -76,14 +76,20 @@ def get_urls(urls, batchfile, verbose):
def print_extractor_information(opts, urls):
out = ''
if opts.list_extractors:
- for ie in list_extractors(opts.age_limit):
+ urls = dict.fromkeys(urls, False)
+ for ie in list_extractor_classes(opts.age_limit):
out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n'
- out += ''.join(f' {url}\n' for url in filter(ie.suitable, urls))
+ if ie == GenericIE:
+ matched_urls = [url for url, matched in urls.items() if not matched]
+ else:
+ matched_urls = tuple(filter(ie.suitable, urls.keys()))
+ urls.update(dict.fromkeys(matched_urls, True))
+ out += ''.join(f' {url}\n' for url in matched_urls)
elif opts.list_extractor_descriptions:
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
out = '\n'.join(
ie.description(markdown=False, search_examples=_SEARCHES)
- for ie in list_extractors(opts.age_limit) if ie.working() and ie.IE_DESC is not False) + '\n'
+ for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
elif opts.ap_list_mso:
out = 'Supported TV Providers:\n%s\n' % render_table(
['mso', 'mso name'],
@@ -862,7 +868,7 @@ def main(argv=None):
sys.exit(f'\nERROR: {e}')
-from .extractor import gen_extractors
+from .extractor import gen_extractors, list_extractors
__all__ = [
'main',
'YoutubeDL',
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py
index 506ffe87ce..afd3d05ac8 100644
--- a/yt_dlp/extractor/__init__.py
+++ b/yt_dlp/extractor/__init__.py
@@ -37,11 +37,17 @@ def gen_extractors():
return [klass() for klass in gen_extractor_classes()]
-def list_extractors(age_limit):
+def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
- return sorted(filter(
- lambda ie: ie.is_suitable(age_limit),
- gen_extractors()), key=lambda ie: ie.IE_NAME.lower())
+ yield from sorted(filter(
+ lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405
+ gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
+ yield GenericIE # noqa: F405
+
+
+def list_extractors(age_limit=None):
+ """Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
+ return [ie() for ie in list_extractor_classes(age_limit)]
def get_info_extractor(ie_name):
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 23d57ddaf5..e2460b36ac 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -40,6 +40,7 @@
age_restricted,
base_url,
bug_reports_message,
+ classproperty,
clean_html,
determine_ext,
determine_protocol,
@@ -710,9 +711,9 @@ def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
return cls.__name__[:-2]
- @property
- def IE_NAME(self):
- return type(self).__name__[:-2]
+ @classproperty
+ def IE_NAME(cls):
+ return cls.__name__[:-2]
@staticmethod
def __can_accept_status_code(err, expected_status):
@@ -3624,56 +3625,57 @@ def _apply_first_set_cookie_header(self, url_handle, cookie):
self._set_cookie(domain, cookie, value)
break
- def get_testcases(self, include_onlymatching=False):
- t = getattr(self, '_TEST', None)
+ @classmethod
+ def get_testcases(cls, include_onlymatching=False):
+ t = getattr(cls, '_TEST', None)
if t:
- assert not hasattr(self, '_TESTS'), \
- '%s has _TEST and _TESTS' % type(self).__name__
+ assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
tests = [t]
else:
- tests = getattr(self, '_TESTS', [])
+ tests = getattr(cls, '_TESTS', [])
for t in tests:
if not include_onlymatching and t.get('only_matching', False):
continue
- t['name'] = type(self).__name__[:-len('IE')]
+ t['name'] = cls.ie_key()
yield t
- def is_suitable(self, age_limit):
+ @classmethod
+ def is_suitable(cls, age_limit):
""" Test whether the extractor is generally suitable for the given
age limit (i.e. pornographic sites are not, all others usually are) """
any_restricted = False
- for tc in self.get_testcases(include_onlymatching=False):
+ for tc in cls.get_testcases(include_onlymatching=False):
if tc.get('playlist', []):
tc = tc['playlist'][0]
- is_restricted = age_restricted(
- tc.get('info_dict', {}).get('age_limit'), age_limit)
+ is_restricted = age_restricted(tc.get('info_dict', {}).get('age_limit'), age_limit)
if not is_restricted:
return True
any_restricted = any_restricted or is_restricted
return not any_restricted
- def description(self, *, markdown=True, search_examples=None):
+ @classmethod
+ def description(cls, *, markdown=True, search_examples=None):
"""Description of the extractor"""
desc = ''
- if self._NETRC_MACHINE:
+ if cls._NETRC_MACHINE:
if markdown:
- desc += f' [{self._NETRC_MACHINE}]'
+ desc += f' [{cls._NETRC_MACHINE}]'
else:
- desc += f' [{self._NETRC_MACHINE}]'
- if self.IE_DESC is False:
+ desc += f' [{cls._NETRC_MACHINE}]'
+ if cls.IE_DESC is False:
desc += ' [HIDDEN]'
- elif self.IE_DESC:
- desc += f' {self.IE_DESC}'
- if self.SEARCH_KEY:
- desc += f'; "{self.SEARCH_KEY}:" prefix'
+ elif cls.IE_DESC:
+ desc += f' {cls.IE_DESC}'
+ if cls.SEARCH_KEY:
+ desc += f'; "{cls.SEARCH_KEY}:" prefix'
if search_examples:
_COUNTS = ('', '5', '10', 'all')
- desc += f' (Example: "{self.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
- if not self.working():
+ desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
+ if not cls.working():
desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
- name = f' - **{self.IE_NAME}**' if markdown else self.IE_NAME
+ name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME
return f'{name}:{desc}' if desc else name
def extract_subtitles(self, *args, **kwargs):
@@ -3849,6 +3851,6 @@ def _search_results(self, query):
"""Returns an iterator of search results"""
raise NotImplementedError('This method must be implemented by subclasses')
- @property
- def SEARCH_KEY(self):
- return self._SEARCH_KEY
+ @classproperty
+ def SEARCH_KEY(cls):
+ return cls._SEARCH_KEY
diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py
index 74c40efd93..708b72fae3 100644
--- a/yt_dlp/extractor/drtv.py
+++ b/yt_dlp/extractor/drtv.py
@@ -18,6 +18,7 @@
url_or_none,
)
+
class DRTVIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py
index 140fa4a963..32cae429ee 100644
--- a/yt_dlp/extractor/testurl.py
+++ b/yt_dlp/extractor/testurl.py
@@ -8,55 +8,36 @@ class TestURLIE(InfoExtractor):
""" Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
- _VALID_URL = r'test(?:url)?:(?P(?P.+?)(?:_(?P[0-9]+))?)$'
+ _VALID_URL = r'test(?:url)?:(?P.+?)(?:_(?P[0-9]+))?$'
def _real_extract(self, url):
- from ..extractor import gen_extractors
+ from ..extractor import gen_extractor_classes
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- extractor_id = mobj.group('extractor')
- all_extractors = gen_extractors()
+ extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
rex = re.compile(extractor_id, flags=re.IGNORECASE)
- matching_extractors = [
- e for e in all_extractors if rex.search(e.IE_NAME)]
+ matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
if len(matching_extractors) == 0:
- raise ExtractorError(
- 'No extractors matching %r found' % extractor_id,
- expected=True)
+ raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True)
elif len(matching_extractors) > 1:
- # Is it obvious which one to pick?
- try:
+ try: # Check for exact match
extractor = next(
ie for ie in matching_extractors
if ie.IE_NAME.lower() == extractor_id.lower())
except StopIteration:
raise ExtractorError(
- ('Found multiple matching extractors: %s' %
- ' '.join(ie.IE_NAME for ie in matching_extractors)),
+ 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
expected=True)
else:
extractor = matching_extractors[0]
- num_str = mobj.group('num')
- num = int(num_str) if num_str else 0
-
- testcases = []
- t = getattr(extractor, '_TEST', None)
- if t:
- testcases.append(t)
- testcases.extend(getattr(extractor, '_TESTS', []))
-
+ testcases = tuple(extractor.get_testcases(True))
try:
- tc = testcases[num]
+ tc = testcases[int(num or 0)]
except IndexError:
raise ExtractorError(
- ('Test case %d not found, got only %d tests' %
- (num, len(testcases))),
- expected=True)
+ f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
- self.to_screen('Test URL: %s' % tc['url'])
-
- return self.url_result(tc['url'], video_id=video_id)
+ self.to_screen(f'Test URL: {tc["url"]}')
+ return self.url_result(tc['url'])
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 907b079ec4..97c0a2f15a 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -31,6 +31,7 @@
NO_DEFAULT,
ExtractorError,
bug_reports_message,
+ classproperty,
clean_html,
datetime_from_str,
dict_get,
@@ -5781,16 +5782,17 @@ def _real_extract(self, url):
class YoutubeFeedsInfoExtractor(InfoExtractor):
"""
Base class for feed extractors
- Subclasses must define the _FEED_NAME property.
+ Subclasses must re-define the _FEED_NAME property.
"""
_LOGIN_REQUIRED = True
+ _FEED_NAME = 'feeds'
def _real_initialize(self):
YoutubeBaseInfoExtractor._check_login_required(self)
- @property
+ @classproperty
def IE_NAME(self):
- return 'youtube:%s' % self._FEED_NAME
+ return f'youtube:{self._FEED_NAME}'
def _real_extract(self, url):
return self.url_result(
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index ba73c2191d..82eb30af6d 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -5321,6 +5321,7 @@ def merge_headers(*dicts):
class classproperty:
def __init__(self, f):
+ functools.update_wrapper(self, f)
self.f = f
def __get__(self, _, cls):