mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-02-23 23:03:05 +01:00
[parsing] search for case-insensitive tag names
This commit is contained in:
parent
6169b3eca8
commit
65f91148fc
@ -222,6 +222,10 @@ class TestParsing(unittest.TestCase):
|
||||
get_element_text_and_html_by_tag('orphan', f'<orphan>{html}'), ('', '<orphan>'))
|
||||
self.assertIsNone(get_element_text_and_html_by_tag('orphan', f'{html}</orphan>'))
|
||||
|
||||
# ignore case on tags
|
||||
ci_html = f'<SpAn>{html}</sPaN>'
|
||||
self.assertEqual(get_element_text_and_html_by_tag('span', ci_html), (html, ci_html))
|
||||
|
||||
def test_strict_html_parsing(self):
|
||||
class StrictTagParser(HTMLTagParser):
|
||||
STRICT = True
|
||||
|
@ -245,7 +245,7 @@ class MatchingElementParser(HTMLTagParser):
|
||||
value_regex = re.escape(value_regex)
|
||||
|
||||
return rf'''(?x)
|
||||
<(?:{tag})
|
||||
<(?i:{tag})
|
||||
(?:\s(?:[^>"'\\]|"[^"\\]*"|'[^'\\]*')*)?
|
||||
\s{re.escape(attribute)}\s*=\s*(?P<_q>['"])(?-x:{value_regex})(?P=_q)
|
||||
'''
|
||||
@ -263,7 +263,7 @@ class MatchingElementParser(HTMLTagParser):
|
||||
def matchfunc(tag_str, _attrs):
|
||||
return tag_str == tag
|
||||
|
||||
tag_regex = rf'''<\s*{re.escape(tag)}(?:\s(?:[^>"'\\]|"[^"\\]*"|'[^'\\]*')*)?>'''
|
||||
tag_regex = rf'''<\s*(?i:{re.escape(tag)})(?:\s(?:[^>"'\\]|"[^"\\]*"|'[^'\\]*')*)?>'''
|
||||
yield from cls.iter_tags(tag_regex, html, matchfunc=matchfunc)
|
||||
|
||||
@classmethod
|
||||
|
Loading…
x
Reference in New Issue
Block a user