Fixed search plugin crashing on nested headlines

2025-02-18 19:14:10 +01:00 · 2023-01-08 09:39:05 +01:00 · 2023-01-08 09:39:05 +01:00 · 81e7b8c7fc
commit 81e7b8c7fc
parent c4d61cdc41
2 changed files with 76 additions and 28 deletions
--- a/material/plugins/search/plugin.py
+++ b/material/plugins/search/plugin.py
@ -266,6 +266,10 @@ class Element:
        self.tag   = tag
        self.attrs = attrs
    # String representation
    def __repr__(self):
        return self.tag
    # Support comparison (compare by tag only)
    def __eq__(self, other):
        if other is Element:
@ -291,12 +295,22 @@ class Section:
    """
    # Initialize HTML section
-    def __init__(self, el):
+    def __init__(self, el, depth = 0):
        self.el = el
        self.depth = depth
        # Initialize section data
        self.text  = []
        self.title = []
        self.id = None
    # String representation
    def __repr__(self):
        if self.id:
            return "#".join([self.el.tag, self.id])
        else:
            return self.el.tag
    # Check whether the section should be excluded
    def is_excluded(self):
        return self.el.is_excluded()
@ -350,15 +364,16 @@ class Parser(HTMLParser):
        # Handle headings
        if tag in ([f"h{x}" for x in range(1, 7)]):
            depth = len(self.context)
            if "id" in attrs:
                # Ensure top-level section
                if tag != "h1" and not self.data:
-                    self.section = Section(Element("hx"))
+                    self.section = Section(Element("hx"), depth)
                    self.data.append(self.section)
                # Set identifier, if not first section
-                self.section = Section(el)
+                self.section = Section(el, depth)
                if self.data:
                    self.section.id = attrs["id"]
@ -398,6 +413,20 @@ class Parser(HTMLParser):
        if not self.context or self.context[-1] != tag:
            return
        # Check whether we're exiting the current context, which happens when
        # a headline is nested in another element. In that case, we close the
        # current section, continuing to append data to the previous section,
        # which could also be a nested section – see https://bit.ly/3IxxIJZ
        if self.section.depth > len(self.context):
            for section in reversed(self.data):
                if section.depth and section.depth <= len(self.context):
                    # Set depth to 0 in order to denote that the current section
                    # is exited and must not be considered again.
                    self.section.depth = 0
                    self.section = section
                    break
        # Remove element from skip list
        el = self.context.pop()
        if el in self.skip:
@ -407,18 +436,13 @@ class Parser(HTMLParser):
        # Render closing tag if kept
        if not self.skip.intersection(self.context):
            if tag in self.keep:
                # Check whether we're inside the section title
                data = self.section.text
-                if self.section.el in reversed(self.context):
+                if self.section.el in self.context:
                    data = self.section.title
                # Remove element if empty (or only whitespace)
                if data[-1] == f"<{tag}>":
                    del data[-1:]
                elif data[-1].isspace() and data[-2] == f"<{tag}>":
                    del data[-2:]
                # Append to section title or text
                else:
                data.append(f"</{tag}>")
    # Called for the text contents of each tag
@ -439,7 +463,7 @@ class Parser(HTMLParser):
            self.data.append(self.section)
        # Handle section headline
-        if self.section.el in reversed(self.context):
+        if self.section.el in self.context:
            permalink = False
            for el in self.context:
                if el.tag == "a" and el.attrs.get("class") == "headerlink":
--- a/src/plugins/search/plugin.py
+++ b/src/plugins/search/plugin.py
@ -266,6 +266,10 @@ class Element:
        self.tag   = tag
        self.attrs = attrs
    # String representation
    def __repr__(self):
        return self.tag
    # Support comparison (compare by tag only)
    def __eq__(self, other):
        if other is Element:
@ -291,12 +295,22 @@ class Section:
    """
    # Initialize HTML section
-    def __init__(self, el):
+    def __init__(self, el, depth = 0):
        self.el = el
        self.depth = depth
        # Initialize section data
        self.text  = []
        self.title = []
        self.id = None
    # String representation
    def __repr__(self):
        if self.id:
            return "#".join([self.el.tag, self.id])
        else:
            return self.el.tag
    # Check whether the section should be excluded
    def is_excluded(self):
        return self.el.is_excluded()
@ -350,15 +364,16 @@ class Parser(HTMLParser):
        # Handle headings
        if tag in ([f"h{x}" for x in range(1, 7)]):
            depth = len(self.context)
            if "id" in attrs:
                # Ensure top-level section
                if tag != "h1" and not self.data:
-                    self.section = Section(Element("hx"))
+                    self.section = Section(Element("hx"), depth)
                    self.data.append(self.section)
                # Set identifier, if not first section
-                self.section = Section(el)
+                self.section = Section(el, depth)
                if self.data:
                    self.section.id = attrs["id"]
@ -398,6 +413,20 @@ class Parser(HTMLParser):
        if not self.context or self.context[-1] != tag:
            return
        # Check whether we're exiting the current context, which happens when
        # a headline is nested in another element. In that case, we close the
        # current section, continuing to append data to the previous section,
        # which could also be a nested section – see https://bit.ly/3IxxIJZ
        if self.section.depth > len(self.context):
            for section in reversed(self.data):
                if section.depth and section.depth <= len(self.context):
                    # Set depth to 0 in order to denote that the current section
                    # is exited and must not be considered again.
                    self.section.depth = 0
                    self.section = section
                    break
        # Remove element from skip list
        el = self.context.pop()
        if el in self.skip:
@ -407,18 +436,13 @@ class Parser(HTMLParser):
        # Render closing tag if kept
        if not self.skip.intersection(self.context):
            if tag in self.keep:
                # Check whether we're inside the section title
                data = self.section.text
-                if self.section.el in reversed(self.context):
+                if self.section.el in self.context:
                    data = self.section.title
                # Remove element if empty (or only whitespace)
                if data[-1] == f"<{tag}>":
                    del data[-1:]
                elif data[-1].isspace() and data[-2] == f"<{tag}>":
                    del data[-2:]
                # Append to section title or text
                else:
                data.append(f"</{tag}>")
    # Called for the text contents of each tag
@ -439,7 +463,7 @@ class Parser(HTMLParser):
            self.data.append(self.section)
        # Handle section headline
-        if self.section.el in reversed(self.context):
+        if self.section.el in self.context:
            permalink = False
            for el in self.context:
                if el.tag == "a" and el.attrs.get("class") == "headerlink":