docs/xml_lexer.py

93 lines
2.7 KiB
Python
Raw Normal View History

2021-12-28 23:29:33 +01:00
from pygments.lexer import RegexLexer
from pygments.token import Text, Comment, Operator, Name, String
from pygments.lexers import _lexer_cache
from pygments.lexers._mapping import LEXERS
def italic_attr(lexer, m):
yield m.start(), String, '"'
yield m.start() + 2, Comment, m.group()[3:-1]
yield m.end() - 1, String, '"'
2022-04-11 20:27:15 +02:00
2021-12-28 23:29:33 +01:00
def italic_generic(lexer, m):
yield m.start(), Comment, m.group()
2022-04-11 20:27:15 +02:00
2021-12-28 23:29:33 +01:00
def italic_tag(lexer, m):
yield m.start(), Name.Tag, "<"
name = m.group()[3:]
if name.endswith(">"):
yield m.start() + 1, Comment, name[:-1]
yield m.end() - 1, Name.Tag, ">"
else:
yield m.start() + 1, Comment, name
2022-04-11 20:27:15 +02:00
2021-12-28 23:29:33 +01:00
def italic_tag_close(lexer, m):
yield m.start(), Name.Tag, "</"
yield m.start() + 4, Comment, m.group()[4:-1]
yield m.end() - 1, Name.Tag, ">"
2022-04-11 20:27:15 +02:00
2021-12-28 23:29:33 +01:00
def repeat_tag_close(lexer, m):
before, _, after = m.group().partition("[]")
yield m.start(), Name.Tag, before
yield m.start() + len(before), Operator, "[]"
yield m.start() + len(before) + 2, Name.Tag, after
2022-04-11 20:27:15 +02:00
2021-12-28 23:29:33 +01:00
def italic_attr_name(lexer, m):
name, _, after = m.group().partition("*")
yield m.start(), Name.Attribute, name
yield m.start() + len(name), Operator, "*"
yield m.start() + len(name) + 1, Name.Attribute, after
class CustomXMLLexer(RegexLexer):
name = "customxml"
aliases = ["cxml"]
tokens = {
'root': [
(r'\s*\.\.\.\w*', Comment),
('[^<&]+', Text),
(r'&\S*?;', Name.Entity),
(r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
(r'<!--(.|\n)*?-->', Comment.Multiline),
(r'<\?.*?\?>', Comment.Preproc),
('<![^>]*>', Comment.Preproc),
(r'<\s*[\w:.-]+', Name.Tag, 'tag'),
(r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
(r'<\s*\?\?[\w:.-]+', italic_tag, 'tag'),
(r'<\s*/\s*\?\?[\w:.-]+\s*>', italic_tag_close),
(r'<\s*/\s*[\w:.-]+\[\]\s*>', repeat_tag_close),
],
'tag': [
(r'\*', Operator),
(r'\[\]', Operator),
(r'\s+', Text),
(r'\.\.\.\w*', italic_generic),
(r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
(r'[\w.:-]+\*\s*=', italic_attr_name, 'attr'),
(r'/?\s*>', Name.Tag, '#pop'),
],
'attr': [
(r'\s+', Text),
(r'"\?\?[^"]*?"', italic_attr, "#pop"),
('".*?"', String, '#pop'),
("'.*?'", String, '#pop'),
(r'[^\s>]+', String, '#pop'),
],
}
def analyse_text(text):
print("hi?")
2022-04-11 20:27:15 +02:00
2021-12-28 23:29:33 +01:00
_lexer_cache[CustomXMLLexer.__name__] = CustomXMLLexer
LEXERS["CustomXMLLexer"] = ("xml_lexer", "CustomXMLLexer", ("cxml", ), (), ())
__all__ = ("CustomXMLLexer", )