From 19afd9ea513fc2cd29b7242544cfe0dec1db892e Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Sun, 30 Jan 2022 15:35:39 +0200 Subject: [PATCH] [GlomexEmbed] Avoid large match objects Closes #2512 Authored by: zmousm --- yt_dlp/extractor/glomex.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 1d387bdfd9..a6477faabf 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -198,8 +198,13 @@ def _extract_urls(cls, webpage, origin_url): )+ )''' % {'quot_re': r'["\']', 'url_re': VALID_SRC} - for mobj in re.finditer(EMBED_RE, webpage): - mdict = mobj.groupdict() + for mtup in re.findall(EMBED_RE, webpage): + # re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512 + mdict = dict(zip(( + 'url', '_', + 'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player', + 'script_tag', '_', '_', 'integration_js', '_', 'id_js', + ), mtup)) if mdict.get('url'): url = unescapeHTML(mdict['url']) if not cls.suitable(url):