import base64 import calendar import collections import copy import datetime as dt import enum import functools import hashlib import itertools import json import math import os.path import random import re import shlex import sys import threading import time import traceback import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper from ..jsinterp import JSInterpreter from ..networking.exceptions import HTTPError, TransportError, network_exceptions from ..utils import ( NO_DEFAULT, ExtractorError, LazyList, UserNotLive, bug_reports_message, classproperty, clean_html, datetime_from_str, dict_get, filesize_from_tbr, filter_dict, float_or_none, format_field, get_first, int_or_none, is_html, join_nonempty, js_to_json, mimetype2ext, orderedSet, parse_codecs, parse_count, parse_duration, parse_iso8601, parse_qs, qualities, remove_start, smuggle_url, str_or_none, str_to_int, strftime_or_none, time_seconds, traverse_obj, try_call, try_get, unescapeHTML, unified_strdate, unified_timestamp, unsmuggle_url, update_url_query, url_or_none, urljoin, variadic, ) STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client' STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token' # any clients starting with _ cannot be explicitly requested by the user INNERTUBE_CLIENTS = { 'web': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', 'clientVersion': '2.20240726.00.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, }, # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats 'web_safari': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', 'clientVersion': '2.20240726.00.00', 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, }, 'web_embedded': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_EMBEDDED_PLAYER', 'clientVersion': '1.20240723.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, }, 'web_music': { 'INNERTUBE_HOST': 'music.youtube.com', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_REMIX', 'clientVersion': '1.20240724.00.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, # This client now requires sign-in for every video 'web_creator': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_CREATOR', 'clientVersion': '1.20240723.03.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, 'REQUIRE_AUTH': True, }, 'android': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', 'clientVersion': '19.44.38', 'androidSdkVersion': 30, 'userAgent': 'com.google.android.youtube/19.44.38 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, }, # This client now requires sign-in for every video 'android_music': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_MUSIC', 'clientVersion': '7.27.52', 'androidSdkVersion': 30, 'userAgent': 'com.google.android.apps.youtube.music/7.27.52 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, 'REQUIRE_AUTH': True, }, # This client now requires sign-in for every video 'android_creator': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_CREATOR', 'clientVersion': '24.45.100', 'androidSdkVersion': 30, 'userAgent': 'com.google.android.apps.youtube.creator/24.45.100 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, 'REQUIRE_AUTH': True, }, # YouTube Kids videos aren't returned on this client for some reason 'android_vr': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_VR', 'clientVersion': '1.60.19', 'deviceMake': 'Oculus', 'deviceModel': 'Quest 3', 'androidSdkVersion': 32, 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', 'osName': 'Android', 'osVersion': '12L', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'REQUIRE_JS_PLAYER': False, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 'ios': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS', 'clientVersion': '19.45.4', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', 'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'osName': 'iPhone', 'osVersion': '18.1.0.22B83', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'REQUIRE_JS_PLAYER': False, }, # This client now requires sign-in for every video 'ios_music': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MUSIC', 'clientVersion': '7.27.0', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', 'userAgent': 'com.google.ios.youtubemusic/7.27.0 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'osName': 'iPhone', 'osVersion': '18.1.0.22B83', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_AUTH': True, }, # This client now requires sign-in for every video 'ios_creator': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_CREATOR', 'clientVersion': '24.45.100', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', 'userAgent': 'com.google.ios.ytcreator/24.45.100 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'osName': 'iPhone', 'osVersion': '18.1.0.22B83', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_AUTH': True, }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MWEB', 'clientVersion': '2.20240726.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, }, 'tv': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5', 'clientVersion': '7.20240724.13.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, }, # This client now requires sign-in for every video # It was previously an age-gate workaround for videos that were `playable_in_embed` # It may still be useful if signed into an EU account that is not age-verified 'tv_embedded': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, 'REQUIRE_AUTH': True, }, # This client now requires sign-in for every video # It may be able to receive pre-merged video+audio 720p/1080p streams 'mediaconnect': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MEDIA_CONNECT_FRONTEND', 'clientVersion': '0.1', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_AUTH': True, }, } def _split_innertube_client(client_name): variant, *base = client_name.rsplit('.', 1) if base: return variant, base[0], variant base, *variant = client_name.split('_', 1) return client_name, base, variant[0] if variant else None def short_client_name(client_name): main, *parts = _split_innertube_client(client_name)[0].split('_') return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper() def build_innertube_clients(): THIRD_PARTY = { 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg.setdefault('REQUIRE_PO_TOKEN', False) ytcfg.setdefault('REQUIRE_AUTH', False) ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') _, base_client, variant = _split_innertube_client(client) ytcfg['priority'] = 10 * priority(base_client) if variant == 'embedded': ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 elif variant: ytcfg['priority'] -= 3 build_innertube_clients() class BadgeType(enum.Enum): AVAILABILITY_UNLISTED = enum.auto() AVAILABILITY_PRIVATE = enum.auto() AVAILABILITY_PUBLIC = enum.auto() AVAILABILITY_PREMIUM = enum.auto() AVAILABILITY_SUBSCRIPTION = enum.auto() LIVE_NOW = enum.auto() VERIFIED = enum.auto() class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _RESERVED_NAMES = ( r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|' r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|' r'browse|oembed|get_video_info|iframe_api|s/player|source|' r'storefront|oops|index|account|t/terms|about|upload|signin|logout') _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' # _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False _INVIDIOUS_SITES = ( # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', r'(?:(?:www|dev)\.)?invidio\.us', # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md r'(?:www\.)?invidious\.pussthecat\.org', r'(?:www\.)?invidious\.zee\.li', r'(?:www\.)?invidious\.ethibox\.fr', r'(?:www\.)?iv\.ggtyler\.dev', r'(?:www\.)?inv\.vern\.i2p', r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion', r'(?:www\.)?inv\.riverside\.rocks', r'(?:www\.)?invidious\.silur\.me', r'(?:www\.)?inv\.bp\.projectsegfau\.lt', r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion', r'(?:www\.)?invidious\.slipfox\.xyz', r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion', r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion', r'(?:www\.)?invidious\.tiekoetter\.com', r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion', r'(?:www\.)?invidious\.nerdvpn\.de', r'(?:www\.)?invidious\.weblibre\.org', r'(?:www\.)?inv\.odyssey346\.dev', r'(?:www\.)?invidious\.dhusch\.de', r'(?:www\.)?iv\.melmac\.space', r'(?:www\.)?watch\.thekitty\.zone', r'(?:www\.)?invidious\.privacydev\.net', r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion', r'(?:www\.)?invidious\.drivet\.xyz', r'(?:www\.)?vid\.priv\.au', r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion', r'(?:www\.)?inv\.vern\.cc', r'(?:www\.)?invidious\.esmailelbob\.xyz', r'(?:www\.)?invidious\.sethforprivacy\.com', r'(?:www\.)?yt\.oelrichsgarcia\.de', r'(?:www\.)?yt\.artemislena\.eu', r'(?:www\.)?invidious\.flokinet\.to', r'(?:www\.)?invidious\.baczek\.me', r'(?:www\.)?y\.com\.sb', r'(?:www\.)?invidious\.epicsite\.xyz', r'(?:www\.)?invidious\.lidarshield\.cloud', r'(?:www\.)?yt\.funami\.tech', r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion', r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion', r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion', # youtube-dl invidious instances list r'(?:(?:www|no)\.)?invidiou\.sh', r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', r'(?:www\.)?invidious\.kabi\.tk', r'(?:www\.)?invidious\.mastodon\.host', r'(?:www\.)?invidious\.zapashcanon\.fr', r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks', r'(?:www\.)?invidious\.tinfoil-hat\.net', r'(?:www\.)?invidious\.himiko\.cloud', r'(?:www\.)?invidious\.reallyancient\.tech', r'(?:www\.)?invidious\.tube', r'(?:www\.)?invidiou\.site', r'(?:www\.)?invidious\.site', r'(?:www\.)?invidious\.xyz', r'(?:www\.)?invidious\.nixnet\.xyz', r'(?:www\.)?invidious\.048596\.xyz', r'(?:www\.)?invidious\.drycat\.fr', r'(?:www\.)?inv\.skyn3t\.in', r'(?:www\.)?tube\.poal\.co', r'(?:www\.)?tube\.connect\.cafe', r'(?:www\.)?vid\.wxzm\.sx', r'(?:www\.)?vid\.mint\.lgbt', r'(?:www\.)?vid\.puffyan\.us', r'(?:www\.)?yewtu\.be', r'(?:www\.)?yt\.elukerio\.org', r'(?:www\.)?yt\.lelux\.fi', r'(?:www\.)?invidious\.ggc-project\.de', r'(?:www\.)?yt\.maisputain\.ovh', r'(?:www\.)?ytprivate\.com', r'(?:www\.)?invidious\.13ad\.de', r'(?:www\.)?invidious\.toot\.koeln', r'(?:www\.)?invidious\.fdn\.fr', r'(?:www\.)?watch\.nettohikari\.com', r'(?:www\.)?invidious\.namazso\.eu', r'(?:www\.)?invidious\.silkky\.cloud', r'(?:www\.)?invidious\.exonip\.de', r'(?:www\.)?invidious\.riverside\.rocks', r'(?:www\.)?invidious\.blamefran\.net', r'(?:www\.)?invidious\.moomoo\.de', r'(?:www\.)?ytb\.trom\.tf', r'(?:www\.)?yt\.cyberhost\.uk', r'(?:www\.)?kgg2m7yk5aybusll\.onion', r'(?:www\.)?qklhadlycap4cnod\.onion', r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion', r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion', r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances r'(?:www\.)?piped\.kavin\.rocks', r'(?:www\.)?piped\.tokhmi\.xyz', r'(?:www\.)?piped\.syncpundit\.io', r'(?:www\.)?piped\.mha\.fi', r'(?:www\.)?watch\.whatever\.social', r'(?:www\.)?piped\.garudalinux\.org', r'(?:www\.)?piped\.rivo\.lol', r'(?:www\.)?piped-libre\.kavin\.rocks', r'(?:www\.)?yt\.jae\.fi', r'(?:www\.)?piped\.mint\.lgbt', r'(?:www\.)?il\.ax', r'(?:www\.)?piped\.esmailelbob\.xyz', r'(?:www\.)?piped\.projectsegfau\.lt', r'(?:www\.)?piped\.privacydev\.net', r'(?:www\.)?piped\.palveluntarjoaja\.eu', r'(?:www\.)?piped\.smnz\.de', r'(?:www\.)?piped\.adminforge\.de', r'(?:www\.)?watch\.whatevertinfoil\.de', r'(?:www\.)?piped\.qdi\.fi', r'(?:(?:www|cf)\.)?piped\.video', r'(?:www\.)?piped\.aeong\.one', r'(?:www\.)?piped\.moomoo\.me', r'(?:www\.)?piped\.chauvet\.pro', r'(?:www\.)?watch\.leptons\.xyz', r'(?:www\.)?pd\.vern\.cc', r'(?:www\.)?piped\.hostux\.net', r'(?:www\.)?piped\.lunar\.icu', # Hyperpipe instances from https://hyperpipe.codeberg.page/ r'(?:www\.)?hyperpipe\.surge\.sh', r'(?:www\.)?hyperpipe\.esmailelbob\.xyz', r'(?:www\.)?listen\.whatever\.social', r'(?:www\.)?music\.adminforge\.de', ) # extracted from account/account_menu ep # XXX: These are the supported YouTube UI and API languages, # which is slightly different from languages supported for translation in YouTube studio _SUPPORTED_LANG_CODES = [ 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es', 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv', 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi', 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw', 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml', 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', ] _IGNORED_WARNINGS = { 'Unavailable videos will be hidden during playback', 'Unavailable videos are hidden', } _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' _NETRC_MACHINE = 'youtube' def ucid_or_none(self, ucid): return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) def handle_or_none(self, handle): return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None) def handle_from_url(self, url): return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})', url, 'channel handle', default=None) def ucid_from_url(self, url): return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})', url, 'channel id', default=None) @functools.cached_property def _preferred_lang(self): """ Returns a language code supported by YouTube for the user preferred language. Returns None if no preferred language set. """ preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0] if not preferred_lang: return if preferred_lang not in self._SUPPORTED_LANG_CODES: raise ExtractorError( f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.', expected=True) elif preferred_lang != 'en': self.report_warning( f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.') return preferred_lang def _initialize_consent(self): cookies = self._get_cookies('https://www.youtube.com/') if cookies.get('__Secure-3PSID'): return socs = cookies.get('SOCS') if socs and not socs.value.startswith('CAA'): # not consented return self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes) def _initialize_pref(self): cookies = self._get_cookies('https://www.youtube.com/') pref_cookie = cookies.get('PREF') pref = {} if pref_cookie: try: pref = dict(urllib.parse.parse_qsl(pref_cookie.value)) except ValueError: self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'}) self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref)) def _real_initialize(self): self._initialize_pref() self._initialize_consent() self._check_login_required() def _perform_login(self, username, password): auth_type, _, user = (username or '').partition('+') if auth_type != 'oauth': raise ExtractorError(self._youtube_login_hint, expected=True) self._initialize_oauth(user, password) ''' OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv). For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see: - https://developers.google.com/identity/protocols/oauth2/limited-input-device - https://accounts.google.com/.well-known/openid-configuration - https://www.rfc-editor.org/rfc/rfc8628 - https://www.rfc-editor.org/rfc/rfc6749 Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2, which applies some modifications to the response (such as returning errors as 200 OK). Since the client works with the standard API, we will use that as it is well-documented. ''' _OAUTH_PROFILE = None _OAUTH_ACCESS_TOKEN_CACHE = {} _OAUTH_DISPLAY_ID = 'oauth' # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv _OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com' _OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT' _OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content' # From https://accounts.google.com/.well-known/openid-configuration # Technically, these should be fetched dynamically and not hard-coded. # However, as these endpoints rarely change, we can risk saving an extra request for every invocation. _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code' _OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token' @property def _oauth_cache_key(self): return f'oauth_refresh_token_{self._OAUTH_PROFILE}' def _read_oauth_error_response(self, response): return traverse_obj( self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False), ({json.loads}, 'error', {str})) def _set_oauth_info(self, token_response): YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({ 'access_token': token_response['access_token'], 'token_type': token_response['token_type'], 'expiry': time_seconds( seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10), }) refresh_token = traverse_obj(token_response, ('refresh_token', {str})) if refresh_token: self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token def _initialize_oauth(self, user, refresh_token): self._OAUTH_PROFILE = user or 'default' if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE: self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"') return YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {} if refresh_token: msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token' if self.get_param('cachedir') is not False: msg += ' and caching token to disk; you should supply an empty password next time' self.to_screen(msg) self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) else: refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key) if refresh_token: YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token try: token_response = self._refresh_token(refresh_token) except ExtractorError as e: error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '') self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}') token_response = self._oauth_authorize else: token_response = self._oauth_authorize self._set_oauth_info(token_response) self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"') def _refresh_token(self, refresh_token): try: token_response = self._download_json( self._OAUTH_TOKEN_ENDPOINT, video_id=self._OAUTH_DISPLAY_ID, note='Refreshing access token', data=json.dumps({ 'client_id': self._OAUTH_CLIENT_ID, 'client_secret': self._OAUTH_CLIENT_SECRET, 'refresh_token': refresh_token, 'grant_type': 'refresh_token', }).encode(), headers={'Content-Type': 'application/json'}) except ExtractorError as e: if isinstance(e.cause, HTTPError): error = self._read_oauth_error_response(e.cause.response) if error == 'invalid_grant': # RFC6749 § 5.2 raise ExtractorError( 'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)', expected=True, video_id=self._OAUTH_DISPLAY_ID) raise ExtractorError( f'Failed to refresh access token: Authorization server returned error {error}', video_id=self._OAUTH_DISPLAY_ID) raise return token_response @property def _oauth_authorize(self): code_response = self._download_json( self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT, video_id=self._OAUTH_DISPLAY_ID, note='Initializing authorization flow', data=json.dumps({ 'client_id': self._OAUTH_CLIENT_ID, 'scope': self._OAUTH_SCOPE, }).encode(), headers={'Content-Type': 'application/json'}) verification_url = traverse_obj(code_response, ('verification_url', {str})) user_code = traverse_obj(code_response, ('user_code', {str})) if not verification_url or not user_code: raise ExtractorError( 'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID) # note: The whitespace is intentional self.to_screen( f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, ' f'go to {verification_url} and enter code {user_code}') # RFC8628 § 3.5: default poll interval is 5 seconds if not provided poll_interval = traverse_obj(code_response, ('interval', {int}), default=5) for retry in self.RetryManager(): while True: try: token_response = self._download_json( self._OAUTH_TOKEN_ENDPOINT, video_id=self._OAUTH_DISPLAY_ID, note=False, errnote='Failed to request access token', data=json.dumps({ 'client_id': self._OAUTH_CLIENT_ID, 'client_secret': self._OAUTH_CLIENT_SECRET, 'device_code': code_response['device_code'], 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code', }).encode(), headers={'Content-Type': 'application/json'}) except ExtractorError as e: if isinstance(e.cause, TransportError): retry.error = e break elif isinstance(e.cause, HTTPError): error = self._read_oauth_error_response(e.cause.response) if not error: retry.error = e break if error == 'authorization_pending': time.sleep(poll_interval) continue elif error == 'expired_token': raise ExtractorError( 'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID) elif error == 'access_denied': raise ExtractorError( 'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID) elif error == 'slow_down': # RFC8628 § 3.5: add 5 seconds to the poll interval poll_interval += 5 time.sleep(poll_interval) continue else: raise ExtractorError( f'Authorization server returned an error when fetching access token: {error}', video_id=self._OAUTH_DISPLAY_ID) raise return token_response def _update_oauth(self): token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) if token is None or token['expiry'] > time.time(): return self._set_oauth_info(self._refresh_token(token['refresh_token'])) @property def _youtube_login_hint(self): return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, ' f'or else u{self._login_hint(method="cookies")[1:]}. ' 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. ' 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies') def _check_login_required(self): if self._LOGIN_REQUIRED and not self.is_authenticated: self.raise_login_required( f'Login details are needed to download this content. {self._youtube_login_hint}', method=None) _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' def _get_default_ytcfg(self, client='web'): return copy.deepcopy(INNERTUBE_CLIENTS[client]) def _get_innertube_host(self, client='web'): return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST'] def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'): # try_get but with fallback to default ytcfg client values when present _func = lambda y: try_get(y, getter, expected_type) return _func(ytcfg) or _func(self._get_default_ytcfg(default_client)) def _extract_client_name(self, ytcfg, default_client='web'): return self._ytcfg_get_safe( ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client) def _extract_client_version(self, ytcfg, default_client='web'): return self._ytcfg_get_safe( ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client) def _select_api_hostname(self, req_api_hostname, default_client=None): return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0] or req_api_hostname or self._get_innertube_host(default_client or 'web')) def _extract_context(self, ytcfg=None, default_client='web'): context = get_first( (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) # Enforce language and tz for extraction client_context = traverse_obj(context, 'client', expected_type=dict, default={}) client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0}) return context _SAPISID = None def _generate_sapisidhash_header(self, origin='https://www.youtube.com'): time_now = round(time.time()) if self._SAPISID is None: yt_cookies = self._get_cookies('https://www.youtube.com') # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is. # See: https://github.com/yt-dlp/yt-dlp/issues/393 sapisid_cookie = dict_get( yt_cookies, ('__Secure-3PAPISID', 'SAPISID')) if sapisid_cookie and sapisid_cookie.value: self._SAPISID = sapisid_cookie.value self.write_debug('Extracted SAPISID cookie') # SAPISID cookie is required if not already present if not yt_cookies.get('SAPISID'): self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie') self._set_cookie( '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600) else: self._SAPISID = False if not self._SAPISID: return None # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323 sapisidhash = hashlib.sha1( f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest() return f'SAPISIDHASH {time_now}_{sapisidhash}' def _call_api(self, ep, query, video_id, fatal=True, headers=None, note='Downloading API JSON', errnote='Unable to download API page', context=None, api_key=None, api_hostname=None, default_client='web'): data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)} data.update(query) real_headers = self.generate_api_headers(default_client=default_client) real_headers.update({'content-type': 'application/json'}) if headers: real_headers.update(headers) return self._download_json( f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, query=filter_dict({ 'key': self._configuration_arg( 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0], 'prettyPrint': 'false', }, cndn=lambda _, v: v)) def extract_yt_initial_data(self, item_id, webpage, fatal=True): return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) @staticmethod def _extract_session_index(*data): """ Index of current account in account list. See: https://github.com/yt-dlp/yt-dlp/pull/519 """ for ytcfg in data: session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX'])) if session_index is not None: return session_index def _data_sync_id_to_delegated_session_id(self, data_sync_id): if not data_sync_id: return # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel # and just "user_syncid||" for primary channel. We only want the channel_syncid channel_syncid, _, user_syncid = data_sync_id.partition('||') if user_syncid: return channel_syncid def _extract_account_syncid(self, *args): """ Extract current session ID required to download private playlists of secondary channels @params response and/or ytcfg """ # ytcfg includes channel_syncid if on secondary channel if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)): return delegated_sid data_sync_id = self._extract_data_sync_id(*args) return self._data_sync_id_to_delegated_session_id(data_sync_id) def _extract_data_sync_id(self, *args): """ Extract current account dataSyncId. In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID|| @params response and/or ytcfg """ if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]: return data_sync_id return traverse_obj( args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any)) def _extract_visitor_data(self, *args): """ Extracts visitorData from an API response or ytcfg Appears to be used to track session state """ if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]: return visitor_data return get_first( args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))], expected_type=str) @functools.cached_property def is_authenticated(self): return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) def extract_ytcfg(self, video_id, webpage): if not webpage: return {} return self._parse_json( self._search_regex( r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', default='{}'), video_id, fatal=False) or {} def _generate_oauth_headers(self): self._update_oauth() oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) if not oauth_token: return {} return { 'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}', } def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): headers = {} account_syncid = account_syncid or self._extract_account_syncid(ytcfg) if account_syncid: headers['X-Goog-PageId'] = account_syncid if session_index is None: session_index = self._extract_session_index(ytcfg) if account_syncid or session_index is not None: headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0 auth = self._generate_sapisidhash_header(origin) if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin return headers def generate_api_headers( self, *, ytcfg=None, account_syncid=None, session_index=None, visitor_data=None, api_hostname=None, default_client='web', **kwargs): origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) headers = { 'X-YouTube-Client-Name': str( self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), 'Origin': origin, 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), **self._generate_oauth_headers(), **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), } return filter_dict(headers) def _generate_webpage_headers(self): return self._generate_oauth_headers() def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', 'web_music': 'https://music.youtube.com', 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', }.get(client) if not url: return {} webpage = self._download_webpage( url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', headers=self._generate_webpage_headers()) return self.extract_ytcfg(video_id, webpage) or {} @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { 'continuation': continuation, } # TODO: Inconsistency with clickTrackingParams. # Currently we have a fixed ctp contained within context (from ytcfg) # and a ctp in root query for continuation. if ctp: query['clickTracking'] = {'clickTrackingParams': ctp} return query @classmethod def _extract_next_continuation_data(cls, renderer): next_continuation = try_get( renderer, (lambda x: x['continuations'][0]['nextContinuationData'], lambda x: x['continuation']['reloadContinuationData']), dict) if not next_continuation: return continuation = next_continuation.get('continuation') if not continuation: return ctp = next_continuation.get('clickTrackingParams') return cls._build_api_continuation_query(continuation, ctp) @classmethod def _extract_continuation_ep_data(cls, continuation_ep: dict): if isinstance(continuation_ep, dict): continuation = try_get( continuation_ep, lambda x: x['continuationCommand']['token'], str) if not continuation: return ctp = continuation_ep.get('clickTrackingParams') return cls._build_api_continuation_query(continuation, ctp) @classmethod def _extract_continuation(cls, renderer): next_continuation = cls._extract_next_continuation_data(renderer) if next_continuation: return next_continuation return traverse_obj(renderer, ( ('contents', 'items', 'rows'), ..., 'continuationItemRenderer', ('continuationEndpoint', ('button', 'buttonRenderer', 'command')), ), get_all=False, expected_type=cls._extract_continuation_ep_data) @classmethod def _extract_alerts(cls, data): for alert_dict in try_get(data, lambda x: x['alerts'], list) or []: if not isinstance(alert_dict, dict): continue for alert in alert_dict.values(): alert_type = alert.get('type') if not alert_type: continue message = cls._get_text(alert, 'text') if message: yield alert_type, message def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): errors, warnings = [], [] for alert_type, alert_message in alerts: if alert_type.lower() == 'error' and fatal: errors.append([alert_type, alert_message]) elif alert_message not in self._IGNORED_WARNINGS: warnings.append([alert_type, alert_message]) for alert_type, alert_message in (warnings + errors[:-1]): self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected) def _extract_and_report_alerts(self, data, *args, **kwargs): return self._report_alerts(self._extract_alerts(data), *args, **kwargs) def _extract_badges(self, badge_list: list): """ Extract known BadgeType's from a list of badge renderers. @returns [{'type': BadgeType}] """ icon_type_map = { 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED, 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE, 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC, 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED, 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED, 'CHECK': BadgeType.VERIFIED, } badge_style_map = { 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION, 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM, 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW, 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED, 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED, } label_map = { 'unlisted': BadgeType.AVAILABILITY_UNLISTED, 'private': BadgeType.AVAILABILITY_PRIVATE, 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION, 'live': BadgeType.LIVE_NOW, 'premium': BadgeType.AVAILABILITY_PREMIUM, 'verified': BadgeType.VERIFIED, 'official artist channel': BadgeType.VERIFIED, } badges = [] for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))): badge_type = ( icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str)) or badge_style_map.get(traverse_obj(badge, 'style')) ) if badge_type: badges.append({'type': badge_type}) continue # fallback, won't work in some languages label = traverse_obj( badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='') for match, label_badge_type in label_map.items(): if match in label.lower(): badges.append({'type': label_badge_type}) break return badges @staticmethod def _has_badge(badges, badge_type): return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type)) @staticmethod def _get_text(data, *path_list, max_runs=None): for path in path_list or [None]: if path is None: obj = [data] else: obj = traverse_obj(data, path, default=[]) if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)): obj = [obj] for item in obj: text = try_get(item, lambda x: x['simpleText'], str) if text: return text runs = try_get(item, lambda x: x['runs'], list) or [] if not runs and isinstance(item, list): runs = item runs = runs[:min(len(runs), max_runs or len(runs))] text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str)) if text: return text def _get_count(self, data, *path_list): count_text = self._get_text(data, *path_list) or '' count = parse_count(count_text) if count is None: count = str_to_int( self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None)) return count @staticmethod def _extract_thumbnails(data, *path_list, final_key='thumbnails'): """ Extract thumbnails from thumbnails dict @param path_list: path list to level that contains 'thumbnails' key """ thumbnails = [] for path in path_list or [()]: for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)): thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue # Sometimes youtube gives a wrong thumbnail URL. See: # https://github.com/yt-dlp/yt-dlp/issues/233 # https://github.com/ytdl-org/youtube-dl/issues/28023 if 'maxresdefault' in thumbnail_url: thumbnail_url = thumbnail_url.split('?')[0] thumbnails.append({ 'url': thumbnail_url, 'height': int_or_none(thumbnail.get('height')), 'width': int_or_none(thumbnail.get('width')), }) return thumbnails @staticmethod def extract_relative_time(relative_time_text): """ Extracts a relative time from string and converts to dt object e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' """ # XXX: this could be moved to a general function in utils/_utils.py # The relative time text strings are roughly the same as what # Javascript's Intl.RelativeTimeFormat function generates. # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat mobj = re.search( r'(?Ptoday|yesterday|now)|(?P