From 0fe51254cb878cf5f65801e2b62424a185665639 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 29 May 2022 01:00:41 +0530 Subject: [PATCH] [extractor/youtube] Bring back `_extract_chapters_from_description` Closes #3886 --- yt_dlp/extractor/youtube.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e41e746489..245778dff2 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2715,6 +2715,21 @@ def _extract_chapters_from_engagement_panel(self, data, duration): for contents in content_list ))), []) + @staticmethod + def _extract_chapters_from_description(description, duration): + chapters = [{'start_time': 0}] + for timestamp, title in re.findall( + r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''): + start = parse_duration(timestamp) + if start and title and chapters[-1]['start_time'] < start < duration: + chapters[-1]['end_time'] = start + chapters.append({ + 'start_time': start, + 'title': title, + }) + chapters[-1]['end_time'] = duration + return chapters[1:] + def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration): chapters = [] last_chapter = {'start_time': 0} @@ -3668,6 +3683,7 @@ def process_language(container, base_url, lang_code, sub_name, query): info['chapters'] = ( self._extract_chapters_from_json(initial_data, duration) or self._extract_chapters_from_engagement_panel(initial_data, duration) + or self._extract_chapters_from_description(video_description, duration) or None) contents = traverse_obj(