From 8a3da4c68c1bf50ba69af10ea7855e2f7a2b38b4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 18 Aug 2022 22:15:49 +0000 Subject: [PATCH] [extractor/instagram] Fix bugs in 7d3b98be4c4567b985ba7d7b17057e930457edc9 (#4701) Authored by: bashonly --- yt_dlp/extractor/instagram.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 1d8e79495..e997a3fbb 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -378,12 +378,12 @@ def _real_extract(self, url): self.report_warning('Instagram API is not granting access', video_id) else: if self._get_cookies(url).get('sessionid'): - media = traverse_obj(self._download_json( + media.update(traverse_obj(self._download_json( f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id, fatal=False, note='Downloading video info', headers={ **self._API_HEADERS, 'X-CSRFToken': csrf_token.value, - }), ('items', 0)) + }), ('items', 0)) or {}) if media: return self._extract_product(media) @@ -405,15 +405,15 @@ def _real_extract(self, url): 'query_hash': '9f8827793ef34641b2fb195d4d41151c', 'variables': json.dumps(variables, separators=(',', ':')), }) - media = traverse_obj(general_info, ('data', 'shortcode_media')) + media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {}) if not media: self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id) webpage, urlh = self._download_webpage_handle(url, video_id) shared_data = self._search_json( - r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) + r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {} - if self._LOGIN_URL not in urlh.geturl(): + if shared_data and self._LOGIN_URL not in urlh.geturl(): media.update(traverse_obj( shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'), ('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {}) @@ -424,7 +424,7 @@ def _real_extract(self, url): additional_data = self._search_json( r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*', webpage, 'additional data', video_id, fatal=False) if not additional_data: - self.raise_login_required('Requested content was not found, the content might be private') + self.raise_login_required('Requested content is not available, rate-limit reached or login required') product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict) if product_item: