From c86e433c35fe5da6cb29f3539eef97497f84ed38 Mon Sep 17 00:00:00 2001 From: sqrtNOT <77981959+sqrtNOT@users.noreply.github.com> Date: Tue, 25 Apr 2023 10:21:06 +0000 Subject: [PATCH] [extractor/NiconicoSeries] Fix extraction (#6898) Authored by: sqrtNOT --- yt_dlp/extractor/niconico.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index cacefeb429..30b4d7216f 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -660,10 +660,10 @@ def _real_extract(self, url): class NiconicoSeriesIE(InfoExtractor): IE_NAME = 'niconico:series' - _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P\d+)' + _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P\d+)' _TESTS = [{ - 'url': 'https://www.nicovideo.jp/series/110226', + 'url': 'https://www.nicovideo.jp/user/44113208/series/110226', 'info_dict': { 'id': '110226', 'title': 'ご立派ァ!のシリーズ', @@ -683,7 +683,7 @@ class NiconicoSeriesIE(InfoExtractor): def _real_extract(self, url): list_id = self._match_id(url) - webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id) + webpage = self._download_webpage(url, list_id) title = self._search_regex( (r'「(.+)(全', @@ -691,10 +691,9 @@ def _real_extract(self, url): webpage, 'title', fatal=False) if title: title = unescapeHTML(title) - playlist = [ - self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id) - for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)] - return self.playlist_result(playlist, list_id, title) + json_data = next(self._yield_json_ld(webpage, None, fatal=False)) + return self.playlist_from_matches( + traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE) class NiconicoHistoryIE(NiconicoPlaylistBaseIE):