update pattern

This commit is contained in:
kclauhk 2024-08-09 22:39:55 +08:00
parent 49a3edd276
commit 5ce0b127b2
2 changed files with 12 additions and 3 deletions

View File

@ -738,6 +738,13 @@
GettrStreamingIE, GettrStreamingIE,
) )
from .giantbomb import GiantBombIE from .giantbomb import GiantBombIE
from .giphy import (
GiphyChannelIE,
GiphyIE,
GiphySearchIE,
GiphySearchURLIE,
GiphyStoriesIE,
)
from .glide import GlideIE from .glide import GlideIE
from .globalplayer import ( from .globalplayer import (
GlobalPlayerAudioEpisodeIE, GlobalPlayerAudioEpisodeIE,

View File

@ -1,4 +1,5 @@
import itertools import itertools
import re
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..utils import ( from ..utils import (
@ -215,7 +216,7 @@ def _real_extract(self, url):
# search for: \"gif\":{\"type\":\"...}, # search for: \"gif\":{\"type\":\"...},
if json_str := self._html_search_regex(r'\\"\w+\\":({\\"type\\":\\"(?!emoji).*?is_dynamic\\":\w+}),', if json_str := self._html_search_regex(r'\\"\w+\\":({\\"type\\":\\"(?!emoji).*?is_dynamic\\":\w+}),',
webpage, 'video_data', default=None): webpage, 'video_data', default=None):
gif_data = self._parse_json(json_str.encode('utf-8').decode('unicode_escape'), video_id) gif_data = self._parse_json(json_str.replace(r'\"', '"'), video_id)
# search for: gif: {"...}, # search for: gif: {"...},
elif json_str := self._html_search_regex(r'\s+\w+:\s*({".*?}),\n\s+', webpage, 'video_data', default='{}'): elif json_str := self._html_search_regex(r'\s+\w+:\s*({".*?}),\n\s+', webpage, 'video_data', default='{}'):
gif_data = self._parse_json(json_str, video_id) gif_data = self._parse_json(json_str, video_id)
@ -253,9 +254,10 @@ def _real_extract(self, url):
if data := gif_data.get('user'): if data := gif_data.get('user'):
if isinstance(data, str): if isinstance(data, str):
idx = data.replace('$', '') idx = data.replace('$', '')
if json_str := self._html_search_regex(rf'"{idx}:({{.*?}})\\n"]\)</script>', if json_str := self._html_search_regex(rf'\\n{idx}:({{.*?}})\\n\w+:',
webpage, 'uploader_data', default=None): webpage, 'uploader_data', default=None):
data = self._parse_json(json_str.encode('utf-8').decode('unicode_escape'), video_id, fatal=False) json_str = re.sub(r'"\]\)self\.__next_f\.push\(\[\d+,"', '', json_str).replace(r'\"', '"')
data = self._parse_json(json_str, video_id, fatal=False)
if isinstance(data, dict): if isinstance(data, dict):
uploader = traverse_obj(data, { uploader = traverse_obj(data, {
'uploader': (('display_name', 'name', 'attribution_display_name', 'username'), 'uploader': (('display_name', 'name', 'attribution_display_name', 'username'),