From b901e4fb8a612783cfa72bf54788eec0208bc46a Mon Sep 17 00:00:00 2001 From: Lucas Rademaker <44430780+lr4d@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:39:47 +0545 Subject: [PATCH] [Zoom] add intepreter audio formats Use the options `-f "best+mergeall[vcodec=none]" --audio-multistreams` to merge all intepreter tracks alongside the main video --- yt_dlp/extractor/zoom.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py index fe2db846ad..a7cc8dfefc 100644 --- a/yt_dlp/extractor/zoom.py +++ b/yt_dlp/extractor/zoom.py @@ -86,7 +86,6 @@ def _get_real_webpage(self, url, base_url, video_id, url_type): def _real_extract(self, url): base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id') - query = {} if url_type == 'share': webpage = self._get_real_webpage(url, base_url, video_id, 'share') @@ -95,7 +94,6 @@ def _real_extract(self, url): f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}', video_id, note='Downloading share info JSON')['result']['redirectUrl'] url = urljoin(base_url, redirect_path) - query['continueMode'] = 'true' webpage = self._get_real_webpage(url, base_url, video_id, 'play') file_id = self._get_page_data(webpage, video_id)['fileId'] @@ -104,10 +102,13 @@ def _real_extract(self, url): raise ExtractorError('Unable to extract file ID') data = self._download_json( - f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query, + f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query={ + 'continueMode': 'true', # Makes this return value include interpreter audio information + }, note='Downloading play info JSON')['result'] subtitles = {} + # XXX: Would be more appropriate to parse chapters separate from subtitles for _type in ('transcript', 'cc', 'chapter'): if data.get(f'{_type}Url'): subtitles[_type] = [{ @@ -117,6 +118,19 @@ def _real_extract(self, url): formats = [] + if data.get('interpreterAudioList'): + for audio in data.get('interpreterAudioList'): + formats.append({ + 'format_note': f'Intepreter: {audio["languageText"]}', + 'url': audio['audioUrl'], + 'format_id': f'interpreter-{ audio["icon"].lower()}', + 'ext': 'm4a', + # There doesn't seem to be an explicit field for a standardized language code, + # sometimes the `language` field may be more accurate than `icon` + 'language': audio['icon'].lower(), + 'vcodec': 'none', + }) + if data.get('viewMp4Url'): formats.append({ 'format_note': 'Camera stream',