diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index cdf19c4d60..94e113d442 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -131,6 +131,8 @@ def parse_qs(url): }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, }, + # ios has HLS live streams + # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 'ios': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { @@ -162,6 +164,8 @@ def parse_qs(url): }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26 }, + # mweb has 'ultralow' formats + # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8', 'INNERTUBE_CONTEXT': { @@ -205,23 +209,28 @@ def build_innertube_clients(): class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" + + _RESERVED_NAMES = ( + r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|' + r'shorts|movies|results|shared|hashtag|trending|feed|feeds|' + r'browse|oembed|get_video_info|iframe_api|s/player|' + r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') + + _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' + + _NETRC_MACHINE = 'youtube' + + # If True it will raise an error if no login info is provided + _LOGIN_REQUIRED = False + + r''' # Unused since login is broken _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' - - _RESERVED_NAMES = ( - r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|' - r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|' - r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') - - _NETRC_MACHINE = 'youtube' - # If True it will raise an error if no login info is provided - _LOGIN_REQUIRED = False - - _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' + ''' def _login(self): """ @@ -460,10 +469,9 @@ def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web return _func(ytcfg) or _func(self._get_default_ytcfg(default_client)) def _extract_client_name(self, ytcfg, default_client='web'): - return ( - try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) - or self._ytcfg_get_safe( - ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientName'], compat_str, default_client)) + return self._ytcfg_get_safe( + ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], + lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client) @staticmethod def _extract_session_index(*data): @@ -473,10 +481,9 @@ def _extract_session_index(*data): return session_index def _extract_client_version(self, ytcfg, default_client='web'): - return ( - try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) - or self._ytcfg_get_safe( - ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion'], compat_str, default_client)) + return self._ytcfg_get_safe( + ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], + lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client) def _extract_api_key(self, ytcfg=None, default_client='web'): return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) @@ -1181,7 +1188,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'Tq92D6wQ1mg', 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', - 'ext': 'mp4','upload_date': '20191227', + 'ext': 'mp4', + 'upload_date': '20191227', 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', 'uploader': 'Projekt Melody', 'description': 'md5:17eccca93a786d51bc67646756894066', @@ -2741,8 +2749,8 @@ def feed_entry(name): for f in formats: if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled f['source_preference'] = -10 - note = f.get('format_note') - f['format_note'] = f'{note} (throttled)' if note else '(throttled)' + # TODO: this method is not reliable + f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)' # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this