mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-28 09:31:01 +01:00
[extractor] Use classmethod/property where possible
and refactor lazy extractors accordingly. This reduces the need to create extractor instances
This commit is contained in:
parent
7ddbf09c25
commit
82d020804d
@ -1,30 +1,28 @@
|
|||||||
|
import importlib
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ..utils import bug_reports_message, write_string
|
from ..utils import bug_reports_message, classproperty, write_string
|
||||||
|
|
||||||
|
|
||||||
class LazyLoadMetaClass(type):
|
class LazyLoadMetaClass(type):
|
||||||
def __getattr__(cls, name):
|
def __getattr__(cls, name):
|
||||||
if '_real_class' not in cls.__dict__:
|
# "is_suitable" requires "_TESTS". However, they bloat the lazy_extractors
|
||||||
|
if '_real_class' not in cls.__dict__ and name not in ('is_suitable', 'get_testcases'):
|
||||||
write_string(
|
write_string(
|
||||||
'WARNING: Falling back to normal extractor since lazy extractor '
|
'WARNING: Falling back to normal extractor since lazy extractor '
|
||||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
|
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
|
||||||
return getattr(cls._get_real_class(), name)
|
return getattr(cls.real_class, name)
|
||||||
|
|
||||||
|
|
||||||
class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
|
class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
|
||||||
_module = None
|
@classproperty
|
||||||
_WORKING = True
|
def real_class(cls):
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _get_real_class(cls):
|
|
||||||
if '_real_class' not in cls.__dict__:
|
if '_real_class' not in cls.__dict__:
|
||||||
mod = __import__(cls._module, fromlist=(cls.__name__,))
|
cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__)
|
||||||
cls._real_class = getattr(mod, cls.__name__)
|
|
||||||
return cls._real_class
|
return cls._real_class
|
||||||
|
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
real_cls = cls._get_real_class()
|
instance = cls.real_class.__new__(cls.real_class)
|
||||||
instance = real_cls.__new__(real_cls)
|
|
||||||
instance.__init__(*args, **kwargs)
|
instance.__init__(*args, **kwargs)
|
||||||
return instance
|
return instance
|
||||||
|
@ -1,101 +1,125 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
|
import optparse
|
||||||
import sys
|
import sys
|
||||||
from inspect import getsource
|
from inspect import getsource
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py'
|
|
||||||
if os.path.exists(lazy_extractors_filename):
|
NO_ATTR = object()
|
||||||
|
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_WORKING', '_NETRC_MACHINE']
|
||||||
|
CLASS_METHODS = [
|
||||||
|
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id',
|
||||||
|
]
|
||||||
|
IE_TEMPLATE = '''
|
||||||
|
class {name}({bases}):
|
||||||
|
_module = {module!r}
|
||||||
|
'''
|
||||||
|
with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
|
||||||
|
MODULE_TEMPLATE = f.read()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = optparse.OptionParser(usage='%prog [OUTFILE.py]')
|
||||||
|
args = parser.parse_args()[1] or ['yt_dlp/extractor/lazy_extractors.py']
|
||||||
|
if len(args) != 1:
|
||||||
|
parser.error('Expected only an output filename')
|
||||||
|
|
||||||
|
lazy_extractors_filename = args[0]
|
||||||
|
if os.path.exists(lazy_extractors_filename):
|
||||||
os.remove(lazy_extractors_filename)
|
os.remove(lazy_extractors_filename)
|
||||||
|
|
||||||
# Block plugins from loading
|
_ALL_CLASSES = get_all_ies() # Must be before import
|
||||||
plugins_dirname = 'ytdlp_plugins'
|
|
||||||
plugins_blocked_dirname = 'ytdlp_plugins_blocked'
|
|
||||||
if os.path.exists(plugins_dirname):
|
|
||||||
os.rename(plugins_dirname, plugins_blocked_dirname)
|
|
||||||
|
|
||||||
from yt_dlp.extractor import _ALL_CLASSES
|
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
|
||||||
|
|
||||||
if os.path.exists(plugins_blocked_dirname):
|
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
|
||||||
os.rename(plugins_blocked_dirname, plugins_dirname)
|
module_src = '\n'.join((
|
||||||
|
MODULE_TEMPLATE,
|
||||||
|
' _module = None',
|
||||||
|
*extra_ie_code(DummyInfoExtractor),
|
||||||
|
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||||
|
*build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
|
||||||
|
))
|
||||||
|
|
||||||
with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
|
with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
||||||
module_template = f.read()
|
f.write(f'{module_src}\n')
|
||||||
|
|
||||||
CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
|
|
||||||
module_contents = [
|
|
||||||
module_template,
|
|
||||||
*[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
|
|
||||||
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
|
||||||
|
|
||||||
ie_template = '''
|
|
||||||
class {name}({bases}):
|
|
||||||
_module = '{module}'
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
def get_base_name(base):
|
def get_all_ies():
|
||||||
if base is InfoExtractor:
|
PLUGINS_DIRNAME = 'ytdlp_plugins'
|
||||||
return 'LazyLoadExtractor'
|
BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
|
||||||
elif base is SearchInfoExtractor:
|
if os.path.exists(PLUGINS_DIRNAME):
|
||||||
return 'LazyLoadSearchExtractor'
|
os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
|
||||||
else:
|
try:
|
||||||
return base.__name__
|
from yt_dlp.extractor import _ALL_CLASSES
|
||||||
|
finally:
|
||||||
|
if os.path.exists(BLOCKED_DIRNAME):
|
||||||
|
os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
|
||||||
|
return _ALL_CLASSES
|
||||||
|
|
||||||
|
|
||||||
def build_lazy_ie(ie, name):
|
def extra_ie_code(ie, base=None):
|
||||||
s = ie_template.format(
|
for var in STATIC_CLASS_PROPERTIES:
|
||||||
name=name,
|
val = getattr(ie, var)
|
||||||
bases=', '.join(map(get_base_name, ie.__bases__)),
|
if val != (getattr(base, var) if base else NO_ATTR):
|
||||||
module=ie.__module__)
|
yield f' {var} = {val!r}'
|
||||||
|
yield ''
|
||||||
|
|
||||||
|
for name in CLASS_METHODS:
|
||||||
|
f = getattr(ie, name)
|
||||||
|
if not base or f.__func__ != getattr(base, name).__func__:
|
||||||
|
yield getsource(f)
|
||||||
|
|
||||||
|
|
||||||
|
def build_ies(ies, bases, attr_base):
|
||||||
|
names = []
|
||||||
|
for ie in sort_ies(ies, bases):
|
||||||
|
yield build_lazy_ie(ie, ie.__name__, attr_base)
|
||||||
|
if ie in ies:
|
||||||
|
names.append(ie.__name__)
|
||||||
|
|
||||||
|
yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
|
||||||
|
|
||||||
|
|
||||||
|
def sort_ies(ies, ignored_bases):
|
||||||
|
"""find the correct sorting and add the required base classes so that subclasses can be correctly created"""
|
||||||
|
classes, returned_classes = ies[:-1], set()
|
||||||
|
assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
|
||||||
|
while classes:
|
||||||
|
for c in classes[:]:
|
||||||
|
bases = set(c.__bases__) - {object, *ignored_bases}
|
||||||
|
restart = False
|
||||||
|
for b in bases:
|
||||||
|
if b not in classes and b not in returned_classes:
|
||||||
|
assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
|
||||||
|
classes.insert(0, b)
|
||||||
|
restart = True
|
||||||
|
if restart:
|
||||||
|
break
|
||||||
|
if bases <= returned_classes:
|
||||||
|
yield c
|
||||||
|
returned_classes.add(c)
|
||||||
|
classes.remove(c)
|
||||||
|
break
|
||||||
|
yield ies[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def build_lazy_ie(ie, name, attr_base):
|
||||||
|
bases = ', '.join({
|
||||||
|
'InfoExtractor': 'LazyLoadExtractor',
|
||||||
|
'SearchInfoExtractor': 'LazyLoadSearchExtractor',
|
||||||
|
}.get(base.__name__, base.__name__) for base in ie.__bases__)
|
||||||
|
|
||||||
|
s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
|
||||||
valid_url = getattr(ie, '_VALID_URL', None)
|
valid_url = getattr(ie, '_VALID_URL', None)
|
||||||
if not valid_url and hasattr(ie, '_make_valid_url'):
|
if not valid_url and hasattr(ie, '_make_valid_url'):
|
||||||
valid_url = ie._make_valid_url()
|
valid_url = ie._make_valid_url()
|
||||||
if valid_url:
|
if valid_url:
|
||||||
s += f' _VALID_URL = {valid_url!r}\n'
|
s += f' _VALID_URL = {valid_url!r}\n'
|
||||||
if not ie._WORKING:
|
return s + '\n'.join(extra_ie_code(ie, attr_base))
|
||||||
s += ' _WORKING = False\n'
|
|
||||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
|
||||||
s += f'\n{getsource(ie.suitable)}'
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
# find the correct sorting and add the required base classes so that subclasses
|
if __name__ == '__main__':
|
||||||
# can be correctly created
|
main()
|
||||||
classes = _ALL_CLASSES[:-1]
|
|
||||||
ordered_cls = []
|
|
||||||
while classes:
|
|
||||||
for c in classes[:]:
|
|
||||||
bases = set(c.__bases__) - {object, InfoExtractor, SearchInfoExtractor}
|
|
||||||
stop = False
|
|
||||||
for b in bases:
|
|
||||||
if b not in classes and b not in ordered_cls:
|
|
||||||
if b.__name__ == 'GenericIE':
|
|
||||||
exit()
|
|
||||||
classes.insert(0, b)
|
|
||||||
stop = True
|
|
||||||
if stop:
|
|
||||||
break
|
|
||||||
if all(b in ordered_cls for b in bases):
|
|
||||||
ordered_cls.append(c)
|
|
||||||
classes.remove(c)
|
|
||||||
break
|
|
||||||
ordered_cls.append(_ALL_CLASSES[-1])
|
|
||||||
|
|
||||||
names = []
|
|
||||||
for ie in ordered_cls:
|
|
||||||
name = ie.__name__
|
|
||||||
src = build_lazy_ie(ie, name)
|
|
||||||
module_contents.append(src)
|
|
||||||
if ie in _ALL_CLASSES:
|
|
||||||
names.append(name)
|
|
||||||
|
|
||||||
module_contents.append(
|
|
||||||
'\n_ALL_CLASSES = [{}]'.format(', '.join(names)))
|
|
||||||
|
|
||||||
module_src = '\n'.join(module_contents) + '\n'
|
|
||||||
|
|
||||||
with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
|
||||||
f.write(module_src)
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from yt_dlp.extractor import list_extractors
|
from yt_dlp.extractor import list_extractor_classes
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -14,7 +14,7 @@ def main():
|
|||||||
if len(args) != 1:
|
if len(args) != 1:
|
||||||
parser.error('Expected an output filename')
|
parser.error('Expected an output filename')
|
||||||
|
|
||||||
out = '\n'.join(ie.description() for ie in list_extractors(None) if ie.IE_DESC is not False)
|
out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False)
|
||||||
|
|
||||||
with open(args[0], 'w', encoding='utf-8') as outf:
|
with open(args[0], 'w', encoding='utf-8') as outf:
|
||||||
outf.write(f'# Supported sites\n{out}\n')
|
outf.write(f'# Supported sites\n{out}\n')
|
||||||
|
@ -431,7 +431,6 @@ # Supported sites
|
|||||||
- **gem.cbc.ca**: [<abbr title="netrc machine"><em>cbcgem</em></abbr>]
|
- **gem.cbc.ca**: [<abbr title="netrc machine"><em>cbcgem</em></abbr>]
|
||||||
- **gem.cbc.ca:live**
|
- **gem.cbc.ca:live**
|
||||||
- **gem.cbc.ca:playlist**
|
- **gem.cbc.ca:playlist**
|
||||||
- **generic**: Generic downloader that works on some sites
|
|
||||||
- **Gettr**
|
- **Gettr**
|
||||||
- **GettrStreaming**
|
- **GettrStreaming**
|
||||||
- **Gfycat**
|
- **Gfycat**
|
||||||
@ -1553,3 +1552,4 @@ # Supported sites
|
|||||||
- **zingmp3:album**
|
- **zingmp3:album**
|
||||||
- **zoom**
|
- **zoom**
|
||||||
- **Zype**
|
- **Zype**
|
||||||
|
- **generic**: Generic downloader that works on some sites
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
from .compat import compat_getpass, compat_os_name, compat_shlex_quote
|
from .compat import compat_getpass, compat_os_name, compat_shlex_quote
|
||||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||||
from .downloader import FileDownloader
|
from .downloader import FileDownloader
|
||||||
from .extractor import list_extractors
|
from .extractor import GenericIE, list_extractor_classes
|
||||||
from .extractor.adobepass import MSO_INFO
|
from .extractor.adobepass import MSO_INFO
|
||||||
from .extractor.common import InfoExtractor
|
from .extractor.common import InfoExtractor
|
||||||
from .options import parseOpts
|
from .options import parseOpts
|
||||||
@ -76,14 +76,20 @@ def get_urls(urls, batchfile, verbose):
|
|||||||
def print_extractor_information(opts, urls):
|
def print_extractor_information(opts, urls):
|
||||||
out = ''
|
out = ''
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
for ie in list_extractors(opts.age_limit):
|
urls = dict.fromkeys(urls, False)
|
||||||
|
for ie in list_extractor_classes(opts.age_limit):
|
||||||
out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n'
|
out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n'
|
||||||
out += ''.join(f' {url}\n' for url in filter(ie.suitable, urls))
|
if ie == GenericIE:
|
||||||
|
matched_urls = [url for url, matched in urls.items() if not matched]
|
||||||
|
else:
|
||||||
|
matched_urls = tuple(filter(ie.suitable, urls.keys()))
|
||||||
|
urls.update(dict.fromkeys(matched_urls, True))
|
||||||
|
out += ''.join(f' {url}\n' for url in matched_urls)
|
||||||
elif opts.list_extractor_descriptions:
|
elif opts.list_extractor_descriptions:
|
||||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||||
out = '\n'.join(
|
out = '\n'.join(
|
||||||
ie.description(markdown=False, search_examples=_SEARCHES)
|
ie.description(markdown=False, search_examples=_SEARCHES)
|
||||||
for ie in list_extractors(opts.age_limit) if ie.working() and ie.IE_DESC is not False) + '\n'
|
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
|
||||||
elif opts.ap_list_mso:
|
elif opts.ap_list_mso:
|
||||||
out = 'Supported TV Providers:\n%s\n' % render_table(
|
out = 'Supported TV Providers:\n%s\n' % render_table(
|
||||||
['mso', 'mso name'],
|
['mso', 'mso name'],
|
||||||
@ -862,7 +868,7 @@ def main(argv=None):
|
|||||||
sys.exit(f'\nERROR: {e}')
|
sys.exit(f'\nERROR: {e}')
|
||||||
|
|
||||||
|
|
||||||
from .extractor import gen_extractors
|
from .extractor import gen_extractors, list_extractors
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'main',
|
'main',
|
||||||
'YoutubeDL',
|
'YoutubeDL',
|
||||||
|
@ -37,11 +37,17 @@ def gen_extractors():
|
|||||||
return [klass() for klass in gen_extractor_classes()]
|
return [klass() for klass in gen_extractor_classes()]
|
||||||
|
|
||||||
|
|
||||||
def list_extractors(age_limit):
|
def list_extractor_classes(age_limit=None):
|
||||||
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
|
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
|
||||||
return sorted(filter(
|
yield from sorted(filter(
|
||||||
lambda ie: ie.is_suitable(age_limit),
|
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405
|
||||||
gen_extractors()), key=lambda ie: ie.IE_NAME.lower())
|
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
|
||||||
|
yield GenericIE # noqa: F405
|
||||||
|
|
||||||
|
|
||||||
|
def list_extractors(age_limit=None):
|
||||||
|
"""Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
|
||||||
|
return [ie() for ie in list_extractor_classes(age_limit)]
|
||||||
|
|
||||||
|
|
||||||
def get_info_extractor(ie_name):
|
def get_info_extractor(ie_name):
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
age_restricted,
|
age_restricted,
|
||||||
base_url,
|
base_url,
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
|
classproperty,
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
@ -710,9 +711,9 @@ def ie_key(cls):
|
|||||||
"""A string for getting the InfoExtractor with get_info_extractor"""
|
"""A string for getting the InfoExtractor with get_info_extractor"""
|
||||||
return cls.__name__[:-2]
|
return cls.__name__[:-2]
|
||||||
|
|
||||||
@property
|
@classproperty
|
||||||
def IE_NAME(self):
|
def IE_NAME(cls):
|
||||||
return type(self).__name__[:-2]
|
return cls.__name__[:-2]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __can_accept_status_code(err, expected_status):
|
def __can_accept_status_code(err, expected_status):
|
||||||
@ -3624,56 +3625,57 @@ def _apply_first_set_cookie_header(self, url_handle, cookie):
|
|||||||
self._set_cookie(domain, cookie, value)
|
self._set_cookie(domain, cookie, value)
|
||||||
break
|
break
|
||||||
|
|
||||||
def get_testcases(self, include_onlymatching=False):
|
@classmethod
|
||||||
t = getattr(self, '_TEST', None)
|
def get_testcases(cls, include_onlymatching=False):
|
||||||
|
t = getattr(cls, '_TEST', None)
|
||||||
if t:
|
if t:
|
||||||
assert not hasattr(self, '_TESTS'), \
|
assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
|
||||||
'%s has _TEST and _TESTS' % type(self).__name__
|
|
||||||
tests = [t]
|
tests = [t]
|
||||||
else:
|
else:
|
||||||
tests = getattr(self, '_TESTS', [])
|
tests = getattr(cls, '_TESTS', [])
|
||||||
for t in tests:
|
for t in tests:
|
||||||
if not include_onlymatching and t.get('only_matching', False):
|
if not include_onlymatching and t.get('only_matching', False):
|
||||||
continue
|
continue
|
||||||
t['name'] = type(self).__name__[:-len('IE')]
|
t['name'] = cls.ie_key()
|
||||||
yield t
|
yield t
|
||||||
|
|
||||||
def is_suitable(self, age_limit):
|
@classmethod
|
||||||
|
def is_suitable(cls, age_limit):
|
||||||
""" Test whether the extractor is generally suitable for the given
|
""" Test whether the extractor is generally suitable for the given
|
||||||
age limit (i.e. pornographic sites are not, all others usually are) """
|
age limit (i.e. pornographic sites are not, all others usually are) """
|
||||||
|
|
||||||
any_restricted = False
|
any_restricted = False
|
||||||
for tc in self.get_testcases(include_onlymatching=False):
|
for tc in cls.get_testcases(include_onlymatching=False):
|
||||||
if tc.get('playlist', []):
|
if tc.get('playlist', []):
|
||||||
tc = tc['playlist'][0]
|
tc = tc['playlist'][0]
|
||||||
is_restricted = age_restricted(
|
is_restricted = age_restricted(tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
|
||||||
if not is_restricted:
|
if not is_restricted:
|
||||||
return True
|
return True
|
||||||
any_restricted = any_restricted or is_restricted
|
any_restricted = any_restricted or is_restricted
|
||||||
return not any_restricted
|
return not any_restricted
|
||||||
|
|
||||||
def description(self, *, markdown=True, search_examples=None):
|
@classmethod
|
||||||
|
def description(cls, *, markdown=True, search_examples=None):
|
||||||
"""Description of the extractor"""
|
"""Description of the extractor"""
|
||||||
desc = ''
|
desc = ''
|
||||||
if self._NETRC_MACHINE:
|
if cls._NETRC_MACHINE:
|
||||||
if markdown:
|
if markdown:
|
||||||
desc += f' [<abbr title="netrc machine"><em>{self._NETRC_MACHINE}</em></abbr>]'
|
desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]'
|
||||||
else:
|
else:
|
||||||
desc += f' [{self._NETRC_MACHINE}]'
|
desc += f' [{cls._NETRC_MACHINE}]'
|
||||||
if self.IE_DESC is False:
|
if cls.IE_DESC is False:
|
||||||
desc += ' [HIDDEN]'
|
desc += ' [HIDDEN]'
|
||||||
elif self.IE_DESC:
|
elif cls.IE_DESC:
|
||||||
desc += f' {self.IE_DESC}'
|
desc += f' {cls.IE_DESC}'
|
||||||
if self.SEARCH_KEY:
|
if cls.SEARCH_KEY:
|
||||||
desc += f'; "{self.SEARCH_KEY}:" prefix'
|
desc += f'; "{cls.SEARCH_KEY}:" prefix'
|
||||||
if search_examples:
|
if search_examples:
|
||||||
_COUNTS = ('', '5', '10', 'all')
|
_COUNTS = ('', '5', '10', 'all')
|
||||||
desc += f' (Example: "{self.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
|
desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
|
||||||
if not self.working():
|
if not cls.working():
|
||||||
desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
|
desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
|
||||||
|
|
||||||
name = f' - **{self.IE_NAME}**' if markdown else self.IE_NAME
|
name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME
|
||||||
return f'{name}:{desc}' if desc else name
|
return f'{name}:{desc}' if desc else name
|
||||||
|
|
||||||
def extract_subtitles(self, *args, **kwargs):
|
def extract_subtitles(self, *args, **kwargs):
|
||||||
@ -3849,6 +3851,6 @@ def _search_results(self, query):
|
|||||||
"""Returns an iterator of search results"""
|
"""Returns an iterator of search results"""
|
||||||
raise NotImplementedError('This method must be implemented by subclasses')
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
@property
|
@classproperty
|
||||||
def SEARCH_KEY(self):
|
def SEARCH_KEY(cls):
|
||||||
return self._SEARCH_KEY
|
return cls._SEARCH_KEY
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DRTVIE(InfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
|
@ -8,55 +8,36 @@ class TestURLIE(InfoExtractor):
|
|||||||
""" Allows addressing of the test cases as test:yout.*be_1 """
|
""" Allows addressing of the test cases as test:yout.*be_1 """
|
||||||
|
|
||||||
IE_DESC = False # Do not list
|
IE_DESC = False # Do not list
|
||||||
_VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
|
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
from ..extractor import gen_extractors
|
from ..extractor import gen_extractor_classes
|
||||||
|
|
||||||
mobj = self._match_valid_url(url)
|
extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
|
||||||
video_id = mobj.group('id')
|
|
||||||
extractor_id = mobj.group('extractor')
|
|
||||||
all_extractors = gen_extractors()
|
|
||||||
|
|
||||||
rex = re.compile(extractor_id, flags=re.IGNORECASE)
|
rex = re.compile(extractor_id, flags=re.IGNORECASE)
|
||||||
matching_extractors = [
|
matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
|
||||||
e for e in all_extractors if rex.search(e.IE_NAME)]
|
|
||||||
|
|
||||||
if len(matching_extractors) == 0:
|
if len(matching_extractors) == 0:
|
||||||
raise ExtractorError(
|
raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True)
|
||||||
'No extractors matching %r found' % extractor_id,
|
|
||||||
expected=True)
|
|
||||||
elif len(matching_extractors) > 1:
|
elif len(matching_extractors) > 1:
|
||||||
# Is it obvious which one to pick?
|
try: # Check for exact match
|
||||||
try:
|
|
||||||
extractor = next(
|
extractor = next(
|
||||||
ie for ie in matching_extractors
|
ie for ie in matching_extractors
|
||||||
if ie.IE_NAME.lower() == extractor_id.lower())
|
if ie.IE_NAME.lower() == extractor_id.lower())
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
('Found multiple matching extractors: %s' %
|
'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
|
||||||
' '.join(ie.IE_NAME for ie in matching_extractors)),
|
|
||||||
expected=True)
|
expected=True)
|
||||||
else:
|
else:
|
||||||
extractor = matching_extractors[0]
|
extractor = matching_extractors[0]
|
||||||
|
|
||||||
num_str = mobj.group('num')
|
testcases = tuple(extractor.get_testcases(True))
|
||||||
num = int(num_str) if num_str else 0
|
|
||||||
|
|
||||||
testcases = []
|
|
||||||
t = getattr(extractor, '_TEST', None)
|
|
||||||
if t:
|
|
||||||
testcases.append(t)
|
|
||||||
testcases.extend(getattr(extractor, '_TESTS', []))
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tc = testcases[num]
|
tc = testcases[int(num or 0)]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
('Test case %d not found, got only %d tests' %
|
f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
|
||||||
(num, len(testcases))),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
self.to_screen('Test URL: %s' % tc['url'])
|
self.to_screen(f'Test URL: {tc["url"]}')
|
||||||
|
return self.url_result(tc['url'])
|
||||||
return self.url_result(tc['url'], video_id=video_id)
|
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
|
classproperty,
|
||||||
clean_html,
|
clean_html,
|
||||||
datetime_from_str,
|
datetime_from_str,
|
||||||
dict_get,
|
dict_get,
|
||||||
@ -5781,16 +5782,17 @@ def _real_extract(self, url):
|
|||||||
class YoutubeFeedsInfoExtractor(InfoExtractor):
|
class YoutubeFeedsInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
Base class for feed extractors
|
Base class for feed extractors
|
||||||
Subclasses must define the _FEED_NAME property.
|
Subclasses must re-define the _FEED_NAME property.
|
||||||
"""
|
"""
|
||||||
_LOGIN_REQUIRED = True
|
_LOGIN_REQUIRED = True
|
||||||
|
_FEED_NAME = 'feeds'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
YoutubeBaseInfoExtractor._check_login_required(self)
|
YoutubeBaseInfoExtractor._check_login_required(self)
|
||||||
|
|
||||||
@property
|
@classproperty
|
||||||
def IE_NAME(self):
|
def IE_NAME(self):
|
||||||
return 'youtube:%s' % self._FEED_NAME
|
return f'youtube:{self._FEED_NAME}'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
|
@ -5321,6 +5321,7 @@ def merge_headers(*dicts):
|
|||||||
|
|
||||||
class classproperty:
|
class classproperty:
|
||||||
def __init__(self, f):
|
def __init__(self, f):
|
||||||
|
functools.update_wrapper(self, f)
|
||||||
self.f = f
|
self.f = f
|
||||||
|
|
||||||
def __get__(self, _, cls):
|
def __get__(self, _, cls):
|
||||||
|
Loading…
Reference in New Issue
Block a user