diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4e812af992..ee0277fd75 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3410,11 +3410,16 @@ def process_language(container, base_url, lang_code, sub_name, query): if caption_track.get('kind') != 'asr': trans_code += f'-{lang_code}' trans_name += format_field(lang_name, template=' from %s') - process_language( - automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code}) + # Add an "-orig" label to the original language so that it can be distinguished. + # The subs are returned without "-orig" as well for compatibility if lang_code == f'a-{trans_code}': process_language( - automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {'tlang': trans_code}) + automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) + # Setting tlang=lang returns damaged subtitles. + # Not using lang_code == f'a-{trans_code}' here for future-proofing + orig_lang = parse_qs(base_url).get('lang', [None])[-1] + process_language(automatic_captions, base_url, trans_code, trans_name, + {} if orig_lang == trans_code else {'tlang': trans_code}) info['automatic_captions'] = automatic_captions info['subtitles'] = subtitles