From 85553414ae3007fe866b307b3befd3b9d2423679 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 2 Feb 2022 07:28:01 +0530 Subject: [PATCH] [generic] Allow further processing of json_ld URL Closes #2578 --- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/generic.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a2f160a82..31b1bab3b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1447,7 +1447,7 @@ def extract_chapter_information(e): 'title': part.get('name'), 'start_time': part.get('startOffset'), 'end_time': part.get('endOffset'), - } for part in e.get('hasPart', []) if part.get('@type') == 'Clip'] + } for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip'] for idx, (last_c, current_c, next_c) in enumerate(zip( [{'end_time': 0}] + chapters, chapters, chapters[1:])): current_c['end_time'] = current_c['end_time'] or next_c['start_time'] diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 131319d25..2b59d076f 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3815,13 +3815,16 @@ def _real_extract(self, url): # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld(webpage, video_id, default={}) - if json_ld.get('url'): + if json_ld.get('url') not in (url, None): self.report_detected('JSON LD') - if determine_ext(json_ld.get('url')) == 'm3u8': + if determine_ext(json_ld['url']) == 'm3u8': json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles( json_ld['url'], video_id, 'mp4') json_ld.pop('url') self._sort_formats(json_ld['formats']) + else: + json_ld['_type'] = 'url_transparent' + json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}) return merge_dicts(json_ld, info_dict) def check_video(vurl):