build: Switch to better interval tree implementation

2025-01-30 03:27:25 +01:00 · 2023-05-07 23:27:43 +02:00 · 2023-05-07 23:27:43 +02:00 · 5a6e5d2255
commit 5a6e5d2255
parent 82111617a4
13 changed files with 308 additions and 430 deletions
--- a/cmake/build_helpers.cmake
+++ b/cmake/build_helpers.cmake
@ -410,7 +410,7 @@ endfunction()

 macro(setupCompilerWarnings target)
    set(IMHEX_COMMON_FLAGS "-Wall -Wextra -Wpedantic -Werror")
-    set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds")
+    set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds -Wno-dangling-reference")

    set(CMAKE_C_FLAGS    "${CMAKE_C_FLAGS}    ${IMHEX_C_FLAGS}")
    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  ${IMHEX_C_FLAGS}")
--- a/lib/external/intervaltree/LICENSE
+++ b/lib/external/intervaltree/LICENSE
@ -1,19 +1,23 @@
-Copyright (c) 2011 Erik Garrison
+The MIT License

-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
+Copyright (c) 2019     Dana-Farber Cancer Institute

-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/lib/external/intervaltree/README.md
+++ b/lib/external/intervaltree/README.md
@ -1,37 +1,133 @@
-# intervaltree
+## Introduction

-## Overview
-
-An interval tree can be used to efficiently find a set of numeric intervals overlapping or containing another interval.
-
-This library provides a basic implementation of an interval tree using C++ templates, allowing the insertion of arbitrary types into the tree.
+cgranges is a small C library for genomic interval overlap queries: given a
+genomic region *r* and a set of regions *R*, finding all regions in *R* that
+overlaps *r*. Although this library is based on [interval tree][itree], a well
+known data structure, the core algorithm of cgranges is distinct from all
+existing implementations to the best of our knowledge.  Specifically, the
+interval tree in cgranges is implicitly encoded as a plain sorted array
+(similar to [binary heap][bheap] but packed differently). Tree
+traversal is achieved by jumping between array indices. This treatment makes
+cgranges very efficient and compact in memory. The core algorithm can be
+implemented in ~50 lines of C++ code, much shorter than others as well. Please
+see the code comments in [cpp/IITree.h](cpp/IITree.h) for details.

 ## Usage

-Add `#include "IntervalTree.h"` to the source files in which you will use the interval tree.
+### Test with BED coverage

-To make an IntervalTree to contain objects of class T, use:
+For testing purposes, this repo implements the [bedtools coverage][bedcov] tool
+with cgranges. The source code is located in the [test/](test) directory. You
+can compile and run the test with:
+```sh
+cd test && make
+./bedcov-cr test1.bed test2.bed
+```
+The first BED file is loaded into RAM and indexed. The depth and the breadth of
+coverage of each region in the second file is computed by query against the
+index of the first file.

-```c++
-vector<Interval<T> > intervals;
-T a, b, c;
-intervals.push_back(Interval<T>(2, 10, a));
-intervals.push_back(Interval<T>(3, 4, b));
-intervals.push_back(Interval<T>(20, 100, c));
-IntervalTree<T> tree;
-tree = IntervalTree<T>(intervals);
+The [test/](test) directory also contains a few other implementations based on
+[IntervalTree.h][ekg-itree] in C++, [quicksect][quicksect] in Cython and
+[ncls][ncls] in Cython. The table below shows timing and peak memory on two
+test BEDs available in the release page. The first BED contains GenCode
+annotations with ~1.2 million lines, mixing all types of features. The second
+contains ~10 million direct-RNA mappings. Time1a/Mem1a indexes the GenCode BED
+into memory. Time1b adds whole chromosome intervals to the GenCode BED when
+indexing. Time2/Mem2 indexes the RNA-mapping BED into memory. Numbers are
+averaged over 5 runs.
+
+|Algo.   |Lang. |Cov|Program         |Time1a|Time1b|Mem1a   |Time2 |Mem2    |
+|:-------|:-----|:-:|:---------------|-----:|-----:|-------:|-----:|-------:|
+|IAITree |C     |Y  |cgranges        |9.0s  |13.9s |19.1MB  |4.6s  |138.4MB |
+|IAITree |C++   |Y  |cpp/iitree.h    |11.1s |24.5s |22.4MB  |5.8s  |160.4MB |
+|CITree  |C++   |Y  |IntervalTree.h  |17.4s |17.4s |27.2MB  |10.5s |179.5MB |
+|IAITree |C     |N  |cgranges        |7.6s  |13.0s |19.1MB  |4.1s  |138.4MB |
+|AIList  |C     |N  |3rd-party/AIList|7.9s  |8.1s  |14.4MB  |6.5s  |104.8MB |
+|NCList  |C     |N  |3rd-party/NCList|13.0s |13.4s |21.4MB  |10.6s |183.0MB |
+|AITree  |C     |N  |3rd-party/AITree|16.8s |18.4s |73.4MB  |27.3s |546.4MB |
+|IAITree |Cython|N  |cgranges        |56.6s |63.9s |23.4MB  |43.9s |143.1MB |
+|binning |C++   |Y  |bedtools        |201.9s|280.4s|478.5MB |149.1s|3438.1MB|
+
+Here, IAITree = implicit augmented interval tree, used by cgranges;
+CITree = centered interval tree, used by [Erik Garrison's
+IntervalTree][itree]; AIList = augmented interval list, by [Feng et
+al][ailist]; NCList = nested containment list, taken from [ncls][ncls] by Feng
+et al; AITree = augmented interval tree, from [kerneltree][kerneltree].
+"Cov" indicates whether the program calculates breadth of coverage.
+Comments:
+
+* AIList keeps start and end only. IAITree and CITree addtionally store a
+  4-byte "ID" field per interval to reference the source of interval. This is
+  partly why AIList uses the least memory.
+
+* IAITree is more sensitive to the worse case: the presence of an interval
+  spanning the whole chromosome.
+
+* IAITree uses an efficient radix sort. CITree uses std::sort from STL, which
+  is ok. AIList and NCList use qsort from libc, which is slow. Faster sorting
+  leads to faster indexing.
+
+* IAITree in C++ uses identical core algorithm to the C version, but limited by
+  its APIs, it wastes time on memory locality and management. CITree has a
+  similar issue.
+
+* Computing coverage is better done when the returned list of intervals are
+  start sorted. IAITree returns sorted list. CITree doesn't. Not sure about
+  others. Computing coverage takes a couple of seconds. Sorting will be slower.
+
+* Printing intervals also takes a noticeable fraction of time. Custom printf
+  equivalent would be faster.
+
+* IAITree+Cython is a wrapper around the C version of cgranges. Cython adds
+  significant overhead.
+
+* Bedtools is designed for a variety of applications in addition to computing
+  coverage. It may keep other information in its internal data structure. This
+  micro-benchmark may be unfair to bedtools.
+
+* In general, the performance is affected a lot by subtle implementation
+  details. CITree, IAITree, NCList and AIList are all broadly comparable in
+  performance. AITree is not recommended when indexed intervals are immutable.
+
+### Use cgranges as a C library
+
+```c
+cgranges_t *cr = cr_init(); // initialize a cgranges_t object
+cr_add(cr, "chr1", 20, 30, 0); // add a genomic interval
+cr_add(cr, "chr2", 10, 30, 1);
+cr_add(cr, "chr1", 10, 25, 2);
+cr_index(cr); // index
+
+int64_t i, n, *b = 0, max_b = 0;
+n = cr_overlap(cr, "chr1", 15, 22, &b, &max_b); // overlap query; output array b[] can be reused
+for (i = 0; i < n; ++i) // traverse overlapping intervals
+	printf("%d\t%d\t%d\n", cr_start(cr, b[i]), cr_end(cr, b[i]), cr_label(cr, b[i]));
+free(b); // b[] is allocated by malloc() inside cr_overlap(), so needs to be freed with free()
+
+cr_destroy(cr);
 ```

-Now, it's possible to query the tree and obtain a set of intervals which are contained within the start and stop coordinates.
+### Use IITree as a C++ library

-```c++
-vector<Interval<T> > results;
-tree.findContained(start, stop, results);
-cout << "found " << results.size() << " overlapping intervals" << endl;
+```cpp
+IITree<int, int> tree;
+tree.add(12, 34, 0); // add an interval
+tree.add(0, 23, 1);
+tree.add(34, 56, 2);
+tree.index(); // index
+std::vector<size_t> a;
+tree.overlap(22, 25, a); // retrieve overlaps
+for (size_t i = 0; i < a.size(); ++i)
+	printf("%d\t%d\t%d\n", tree.start(a[i]), tree.end(a[i]), tree.data(a[i]));
 ```

-The function IntervalTree::findOverlapping provides a method to find all those intervals which are contained or partially overlap the interval (start, stop).
-
-### Author: Erik Garrison <erik.garrison@gmail.com>
-
-### License: MIT
+[bedcov]: https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html
+[ekg-itree]: https://github.com/ekg/intervaltree
+[quicksect]: https://github.com/brentp/quicksect
+[ncls]: https://github.com/hunt-genes/ncls
+[citree]: https://en.wikipedia.org/wiki/Interval_tree#Centered_interval_tree
+[itree]: https://en.wikipedia.org/wiki/Interval_tree
+[bheap]: https://en.wikipedia.org/wiki/Binary_heap
+[ailist]: https://www.biorxiv.org/content/10.1101/593657v1
+[kerneltree]: https://github.com/biocore-ntnu/kerneltree
--- a/lib/external/intervaltree/include/IITree.h
+++ b/lib/external/intervaltree/include/IITree.h
@ -0,0 +1,88 @@
+#pragma once
+
+#include <vector>
+#include <algorithm>
+#include <cstring>
+#include <cstdlib>
+
+template<typename S, typename T> // "S" is a scalar type; "T" is the type of data associated with each interval
+class IITree {
+    struct StackCell {
+        size_t x; // node
+        int w; // w: 0 if left child hasn't been processed
+        StackCell() {};
+        StackCell(size_t x_, int w_) : x(x_), w(w_) {};
+    };
+    struct Interval {
+        S st, en, max;
+        T data;
+        Interval() = default;
+        Interval(const S &s, const S &e, const T &d) : st(s), en(e), max(e), data(d) { }
+    };
+    struct IntervalLess {
+        bool operator()(const Interval &intervalA, const Interval &intervalB) const { return intervalA.st < intervalB.st; }
+    };
+    std::vector<Interval> a;
+    size_t layout_recur(Interval *b, size_t i = 0, size_t k = 0) { // see https://algorithmica.org/en/eytzinger
+        if (k < a.size()) {
+            i = layout_recur(b, i, (k<<1) + 1);
+            b[k] = a[i++];
+            i = layout_recur(b, i, (k<<1) + 2);
+        }
+        return i;
+    }
+    void index_BFS(Interval *interval, size_t n) { // set Interval::max
+        int t = 0;
+        StackCell stack[64];
+        stack[t++] = StackCell(0, 0);
+        while (t) {
+            StackCell z = stack[--t];
+            size_t k = z.x, l = k<<1|1, r = l + 1;
+            if (z.w == 2) { // Interval::max for both children are computed
+                interval[k].max = interval[k].en;
+                if (l < n && interval[k].max < interval[l].max) interval[k].max = interval[l].max;
+                if (r < n && interval[k].max < interval[r].max) interval[k].max = interval[r].max;
+            } else { // go down into the two children
+                stack[t++] = StackCell(k, z.w + 1);
+                if (l + z.w < n)
+                    stack[t++] = StackCell(l + z.w, 0);
+            }
+        }
+    }
+public:
+    void add(const S &s, const S &e, const T &d) { a.push_back(Interval(s, e, d)); }
+    void index() {
+        std::sort(a.begin(), a.end(), IntervalLess());
+        std::vector<Interval> b(a.size());
+        layout_recur(b.data());
+        a.clear();
+        std::copy(b.begin(), b.end(), std::back_inserter(a));
+        index_BFS(a.data(), a.size());
+    }
+    bool overlap(const S &st, const S &en, std::vector<size_t> &out) const {
+        int t = 0;
+        std::array<StackCell, 64> stack;
+        out.clear();
+        if (a.empty()) return false;
+        stack[t++] = StackCell(0, 0); // push the root; this is a top down traversal
+        while (t) { // the following guarantees that numbers in out[] are always sorted
+            StackCell z = stack[--t];
+            size_t l = (z.x<<1) + 1, r = l + 1;
+            if (l >= a.size()) { // a leaf node
+                if (st < a[z.x].en && a[z.x].st <= en) out.push_back(z.x);
+            } else if (z.w == 0) { // if left child not processed
+                stack[t++] = StackCell(z.x, 1); // re-add node z.x, but mark the left child having been processed
+                if (l < a.size() && a[l].max > st)
+                    stack[t++] = StackCell(l, 0);
+            } else if (a[z.x].st <= en) { // need to push the right child
+                if (st < a[z.x].en) out.push_back(z.x); // test if z.x overlaps the query; if yes, append to out[]
+                if (r < a.size()) stack[t++] = StackCell(r, 0);
+            }
+        }
+        return out.size() > 0? true : false;
+    }
+    size_t size(void) const { return a.size(); }
+    const S &start(size_t i) const { return a[i].st; }
+    const S &end(size_t i) const { return a[i].en; }
+    const T &data(size_t i) const { return a[i].data; }
+};
--- a/lib/external/intervaltree/include/IntervalTree.h
+++ b/lib/external/intervaltree/include/IntervalTree.h
@ -1,325 +0,0 @@
-#ifndef __INTERVAL_TREE_H
-#define __INTERVAL_TREE_H
-
-#include <vector>
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <cassert>
-#include <limits>
-
-#ifdef USE_INTERVAL_TREE_NAMESPACE
-namespace interval_tree {
-#endif
-template <class Scalar, typename Value>
-class Interval {
-public:
-    Scalar start;
-    Scalar stop;
-    Value value;
-    Interval(const Scalar& s, const Scalar& e, const Value& v)
-    : start(std::min(s, e))
-    , stop(std::max(s, e))
-    , value(v) 
-    {}
-};
-
-template <class Scalar, typename Value>
-Value intervalStart(const Interval<Scalar,Value>& i) {
-    return i.start;
-}
-
-template <class Scalar, typename Value>
-Value intervalStop(const Interval<Scalar, Value>& i) {
-    return i.stop;
-}
-
-template <class Scalar, typename Value>
-std::ostream& operator<<(std::ostream& out, const Interval<Scalar, Value>& i) {
-    out << "Interval(" << i.start << ", " << i.stop << "): " << i.value;
-    return out;
-}
-
-template <class Scalar, class Value>
-class IntervalTree {
-public:
-    typedef Interval<Scalar, Value> interval;
-    typedef std::vector<interval> interval_vector;
-
-
-    struct IntervalStartCmp {
-        bool operator()(const interval& a, const interval& b) {
-            return a.start < b.start;
-        }
-    };
-
-    struct IntervalStopCmp {
-        bool operator()(const interval& a, const interval& b) {
-            return a.stop < b.stop;
-        }
-    };
-
-    IntervalTree()
-        : left(nullptr)
-        , right(nullptr)
-        , center(0)
-    {}
-
-    ~IntervalTree() = default;
-
-    std::unique_ptr<IntervalTree> clone() const {
-        return std::unique_ptr<IntervalTree>(new IntervalTree(*this));
-    }
-
-    IntervalTree(const IntervalTree& other)
-    :   intervals(other.intervals),
-        left(other.left ? other.left->clone() : nullptr),
-        right(other.right ? other.right->clone() : nullptr),
-        center(other.center)
-    {}
-
-    IntervalTree& operator=(IntervalTree&&) = default;
-    IntervalTree(IntervalTree&&) = default;
-
-    IntervalTree& operator=(const IntervalTree& other) {
-        center = other.center;
-        intervals = other.intervals;
-        left = other.left ? other.left->clone() : nullptr;
-        right = other.right ? other.right->clone() : nullptr;
-        return *this;
-    }
-
-    IntervalTree(
-            interval_vector&& ivals,
-            std::size_t depth = 16,
-            std::size_t minbucket = 64,
-            std::size_t maxbucket = 512, 
-            Scalar leftextent = 0,
-            Scalar rightextent = 0)
-      : left(nullptr)
-      , right(nullptr)
-    {
-        --depth;
-        const auto minmaxStop = std::minmax_element(ivals.begin(), ivals.end(), 
-                                                    IntervalStopCmp());
-        const auto minmaxStart = std::minmax_element(ivals.begin(), ivals.end(), 
-                                                     IntervalStartCmp());
-        if (!ivals.empty()) {
-            center = (minmaxStart.first->start + minmaxStop.second->stop) / 2;
-        }
-        if (leftextent == 0 && rightextent == 0) {
-            // sort intervals by start
-            std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
-        } else {
-            assert(std::is_sorted(ivals.begin(), ivals.end(), IntervalStartCmp()));
-        }
-        if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) {
-            std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
-            intervals = std::move(ivals);
-            assert(is_valid().first);
-            return;
-        } else {
-            Scalar leftp = 0;
-            Scalar rightp = 0;
-
-            if (leftextent || rightextent) {
-                leftp = leftextent;
-                rightp = rightextent;
-            } else {
-                leftp = ivals.front().start;
-                rightp = std::max_element(ivals.begin(), ivals.end(),
-                                          IntervalStopCmp())->stop;
-            }
-
-            interval_vector lefts;
-            interval_vector rights;
-
-            for (typename interval_vector::const_iterator i = ivals.begin(); 
-                 i != ivals.end(); ++i) {
-                const interval& interval = *i;
-                if (interval.stop < center) {
-                    lefts.push_back(interval);
-                } else if (interval.start > center) {
-                    rights.push_back(interval);
-                } else {
-                    assert(interval.start <= center);
-                    assert(center <= interval.stop);
-                    intervals.push_back(interval);
-                }
-            }
-
-            if (!lefts.empty()) {
-                left.reset(new IntervalTree(std::move(lefts), 
-                                            depth, minbucket, maxbucket,
-                                            leftp, center));
-            }
-            if (!rights.empty()) {
-                right.reset(new IntervalTree(std::move(rights), 
-                                             depth, minbucket, maxbucket, 
-                                             center, rightp));
-            }
-        }
-        assert(is_valid().first);
-    }
-
-    // Call f on all intervals near the range [start, stop]:
-    template <class UnaryFunction>
-    void visit_near(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
-        if (!intervals.empty() && ! (stop < intervals.front().start)) {
-            for (auto & i : intervals) {
-              f(i);
-            }
-        }
-        if (left && start <= center) {
-            left->visit_near(start, stop, f);
-        }
-        if (right && stop >= center) {
-            right->visit_near(start, stop, f);
-        }
-    }
-
-    // Call f on all intervals crossing pos
-    template <class UnaryFunction>
-    void visit_overlapping(const Scalar& pos, UnaryFunction f) const {
-        visit_overlapping(pos, pos, f);
-    }
-
-    // Call f on all intervals overlapping [start, stop]
-    template <class UnaryFunction>
-    void visit_overlapping(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
-        auto filterF = [&](const interval& interval) {
-            if (interval.stop >= start && interval.start <= stop) {
-                // Only apply f if overlapping
-                f(interval);
-            }
-        };
-        visit_near(start, stop, filterF);
-    }
-
-    // Call f on all intervals contained within [start, stop]
-    template <class UnaryFunction>
-    void visit_contained(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
-        auto filterF = [&](const interval& interval) {
-            if (start <= interval.start && interval.stop <= stop) {
-                f(interval);
-            }
-        };
-        visit_near(start, stop, filterF);
-    }
-
-    interval_vector findOverlapping(const Scalar& start, const Scalar& stop) const {
-        interval_vector result;
-        visit_overlapping(start, stop,
-                          [&](const interval& interval) { 
-                            result.emplace_back(interval); 
-                          });
-        return result;
-    }
-
-    interval_vector findContained(const Scalar& start, const Scalar& stop) const {
-        interval_vector result;
-        visit_contained(start, stop,
-                        [&](const interval& interval) { 
-                          result.push_back(interval); 
-                        });
-        return result;
-    }
-    bool empty() const {
-        if (left && !left->empty()) {
-            return false;
-        }
-        if (!intervals.empty()) { 
-            return false;
-        }
-        if (right && !right->empty()) {
-            return false;
-        }
-        return true;
-    }
-
-    template <class UnaryFunction>
-    void visit_all(UnaryFunction f) const {
-        if (left) {
-            left->visit_all(f);
-        }
-        std::for_each(intervals.begin(), intervals.end(), f);
-        if (right) {
-            right->visit_all(f);
-        }
-    }
-
-    std::pair<Scalar, Scalar> extentBruitForce() const {
-        struct Extent {
-            std::pair<Scalar, Scalar> x = {std::numeric_limits<Scalar>::max(),
-                                                       std::numeric_limits<Scalar>::min() };
-            void operator()(const interval & interval) {
-                x.first  = std::min(x.first,  interval.start);
-                x.second = std::max(x.second, interval.stop);
-            }
-                                                                };
-                                            Extent extent;
-
-        visit_all([&](const interval & interval) { extent(interval); });
-        return extent.x;
-                                            }
-
-    // Check all constraints.
-    // If first is false, second is invalid.
-    std::pair<bool, std::pair<Scalar, Scalar>> is_valid() const {
-        const auto minmaxStop = std::minmax_element(intervals.begin(), intervals.end(), 
-                                                    IntervalStopCmp());
-        const auto minmaxStart = std::minmax_element(intervals.begin(), intervals.end(), 
-                                                     IntervalStartCmp());
-        
-        std::pair<bool, std::pair<Scalar, Scalar>> result = {true, { std::numeric_limits<Scalar>::max(),
-                                                                     std::numeric_limits<Scalar>::min() }};
-        if (!intervals.empty()) {
-            result.second.first   = std::min(result.second.first,  minmaxStart.first->start);
-            result.second.second  = std::min(result.second.second, minmaxStop.second->stop);
-        }
-        if (left) {
-            auto valid = left->is_valid();
-            result.first &= valid.first;
-            result.second.first   = std::min(result.second.first,  valid.second.first);
-            result.second.second  = std::min(result.second.second, valid.second.second);
-            if (!result.first) { return result; }
-            if (valid.second.second >= center) {
-                result.first = false;
-                return result;
-            }
-        }
-        if (right) {
-            auto valid = right->is_valid();
-            result.first &= valid.first;
-            result.second.first   = std::min(result.second.first,  valid.second.first);
-            result.second.second  = std::min(result.second.second, valid.second.second);
-            if (!result.first) { return result; }
-            if (valid.second.first <= center) { 
-                result.first = false;
-                return result;
-            }
-        }
-        if (!std::is_sorted(intervals.begin(), intervals.end(), IntervalStartCmp())) {
-            result.first = false;
-        }
-        return result;        
-    }
-
-    void clear() {
-        left.reset();
-        right.reset();
-        intervals.clear();
-        center = 0;
-    }
-
-private:
-    interval_vector intervals;
-    std::unique_ptr<IntervalTree> left;
-    std::unique_ptr<IntervalTree> right;
-    Scalar center;
-};
-#ifdef USE_INTERVAL_TREE_NAMESPACE
-}
-#endif
-
-#endif
--- a/lib/external/pattern_language
+++ b/lib/external/pattern_language
@ -1 +1 @@
-Subproject commit 9a687a5364ea27aa838f499afedb8e231f238a40
+Subproject commit 20a21a7de0db4be0b63553ebac90950dbf2a58fe
--- a/plugins/builtin/include/content/providers/intel_hex_provider.hpp
+++ b/plugins/builtin/include/content/providers/intel_hex_provider.hpp
@ -2,7 +2,7 @@

 #include <hex/providers/provider.hpp>

-#include <IntervalTree.h>
+#include <IITree.h>

 namespace hex::plugin::builtin {

@ -44,7 +44,7 @@ namespace hex::plugin::builtin {
    protected:
        bool m_dataValid = false;
        size_t m_dataSize = 0x00;
-        interval_tree::IntervalTree<u64, std::vector<u8>> m_data;
+        IITree<u64, std::vector<u8>> m_data;

        std::fs::path m_sourceFilePath;
    };
--- a/plugins/builtin/include/content/views/view_diff.hpp
+++ b/plugins/builtin/include/content/views/view_diff.hpp
@ -12,7 +12,7 @@

 #include "ui/hex_editor.hpp"

-#include <IntervalTree.h>
+#include <IITree.h>

 namespace hex::plugin::builtin {

--- a/plugins/builtin/include/content/views/view_find.hpp
+++ b/plugins/builtin/include/content/views/view_find.hpp
@ -9,7 +9,7 @@
 #include <atomic>
 #include <vector>

-#include <IntervalTree.h>
+#include <IITree.h>

 namespace hex::plugin::builtin {

@ -94,11 +94,11 @@ namespace hex::plugin::builtin {

        } m_searchSettings, m_decodeSettings;

-        using OccurrenceTree = interval_tree::IntervalTree<u64, Occurrence>;
+        using OccurrenceTree = IITree<u64, Occurrence>;

-        std::map<prv::Provider*, std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
-        std::map<prv::Provider*, OccurrenceTree> m_occurrenceTree;
-        std::map<prv::Provider*, std::string> m_currFilter;
+        PerProvider<std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
+        PerProvider<OccurrenceTree> m_occurrenceTree;
+        PerProvider<std::string> m_currFilter;

        TaskHolder m_searchTask, m_filterTask;
        bool m_settingsValid = false;
--- a/plugins/builtin/source/content/providers/intel_hex_provider.cpp
+++ b/plugins/builtin/source/content/providers/intel_hex_provider.cpp
@ -161,26 +161,37 @@ namespace hex::plugin::builtin {
    void IntelHexProvider::setBaseAddress(u64 address) {
        auto oldBase = this->getBaseAddress();

-        auto intervals = this->m_data.findOverlapping(oldBase, oldBase + this->getActualSize());
+        std::vector<size_t> indices;
+        this->m_data.overlap(oldBase, oldBase + this->getActualSize(), indices);

-        for (auto &interval : intervals) {
-            interval.start = (interval.start - oldBase) + address;
-            interval.stop  = (interval.stop  - oldBase) + address;
+        IITree<u64, std::vector<u8>> intervals;
+        for (auto &index : indices) {
+            intervals.add(
+                    (this->m_data.start(index) - oldBase) + address,
+                    (this->m_data.end(index) - oldBase) + address,
+                    this->m_data.data(index)
+            );
        }

        this->m_data = std::move(intervals);
+        this->m_data.index();

        Provider::setBaseAddress(address);
    }

    void IntelHexProvider::readRaw(u64 offset, void *buffer, size_t size) {
-        auto intervals = this->m_data.findOverlapping(offset, (offset + size) - 1);
+        std::vector<size_t> indices;
+        this->m_data.overlap(offset, (offset + size) - 1, indices);

        std::memset(buffer, 0x00, size);
        auto bytes = reinterpret_cast<u8*>(buffer);
-        for (const auto &interval : intervals) {
-            for (u32 i = std::max(interval.start, offset); i <= interval.stop && (i - offset) < size; i++) {
-                bytes[i - offset] =  interval.value[i - interval.start];
+        for (const auto &index : indices) {
+            auto start = this->m_data.start(index);
+            auto end   = this->m_data.end(index);
+            auto data  = this->m_data.data(index);
+
+            for (u32 i = std::max(start, offset); i <= end && (i - offset) < size; i++) {
+                bytes[i - offset] = data[i - start];
            }
        }
    }
@ -203,15 +214,15 @@ namespace hex::plugin::builtin {
            return false;

        u64 maxAddress = 0x00;
-        decltype(this->m_data)::interval_vector intervals;
        for (auto &[address, bytes] : data) {
            auto endAddress = (address + bytes.size()) - 1;
-            intervals.emplace_back(address, endAddress, std::move(bytes));
+            this->m_data.add(address, endAddress, std::move(bytes));

            if (endAddress > maxAddress)
                maxAddress = endAddress;
        }
-        this->m_data = std::move(intervals);
+        this->m_data.index();
+
        this->m_dataSize = maxAddress + 1;
        this->m_dataValid = true;

@ -254,17 +265,22 @@ namespace hex::plugin::builtin {
    }

    std::pair<Region, bool> IntelHexProvider::getRegionValidity(u64 address) const {
-        auto intervals = this->m_data.findOverlapping(address, address);
-        if (intervals.empty()) {
+        std::vector<size_t> indices;
+        this->m_data.overlap(address, address, indices);
+        if (indices.empty()) {
            return Provider::getRegionValidity(address);
        }

-        auto closestInterval = intervals.front();
-        for (const auto &interval : intervals) {
-            if (interval.start < closestInterval.start)
-                closestInterval = interval;
+        auto closestIndex = indices.front();
+        for (const auto &index : indices) {
+            if (this->m_data.start(index) < this->m_data.start(closestIndex))
+                closestIndex = index;
        }
-        return { Region { closestInterval.start, (closestInterval.stop - closestInterval.start) + 1}, true };
+
+        auto start = this->m_data.start(closestIndex);
+        auto end   = this->m_data.end(closestIndex);
+
+        return { Region { start, (end - start) + 1 }, true };
    }

    void IntelHexProvider::loadSettings(const nlohmann::json &settings) {
--- a/plugins/builtin/source/content/providers/motorola_srec_provider.cpp
+++ b/plugins/builtin/source/content/providers/motorola_srec_provider.cpp
@ -180,15 +180,15 @@ namespace hex::plugin::builtin {
            return false;

        u64 maxAddress = 0x00;
-        decltype(this->m_data)::interval_vector intervals;
        for (auto &[address, bytes] : data) {
            auto endAddress = (address + bytes.size()) - 1;
-            intervals.emplace_back(address, endAddress, std::move(bytes));
+            this->m_data.add(address, endAddress, std::move(bytes));

            if (endAddress > maxAddress)
                maxAddress = endAddress;
        }
-        this->m_data = std::move(intervals);
+        this->m_data.index();
+
        this->m_dataSize = maxAddress + 1;
        this->m_dataValid = true;

--- a/plugins/builtin/source/content/views/view_find.cpp
+++ b/plugins/builtin/source/content/views/view_find.cpp
@ -22,9 +22,8 @@ namespace hex::plugin::builtin {
            if (this->m_searchTask.isRunning())
                return { };

-            auto provider = ImHexApi::Provider::get();
-
-            if (!this->m_occurrenceTree[provider].findOverlapping(address, address).empty())
+            std::vector<size_t> occurrences;
+            if (this->m_occurrenceTree->overlap(address, address, occurrences))
                return HighlightColor();
            else
                return std::nullopt;
@ -36,10 +35,8 @@ namespace hex::plugin::builtin {
            if (this->m_searchTask.isRunning())
                return;

-            auto provider = ImHexApi::Provider::get();
-
-            auto occurrences = this->m_occurrenceTree[provider].findOverlapping(address, address);
-            if (occurrences.empty())
+            std::vector<size_t> occurrences;
+            if (!this->m_occurrenceTree->overlap(address, address, occurrences))
                return;

            ImGui::BeginTooltip();
@ -51,7 +48,10 @@ namespace hex::plugin::builtin {
                    ImGui::TableNextColumn();

                    {
-                        const auto value = this->decodeValue(ImHexApi::Provider::get(), occurrence.value, 256);
+                        auto start = this->m_occurrenceTree->start(occurrence);
+                        auto end = this->m_occurrenceTree->end(occurrence) - 1;
+                        const auto &bytes = this->m_occurrenceTree->data(occurrence);
+                        const auto value = this->decodeValue(ImHexApi::Provider::get(), bytes, 256);

                        ImGui::ColorButton("##color", ImColor(HighlightColor()));
                        ImGui::SameLine(0, 10);
@ -65,7 +65,7 @@ namespace hex::plugin::builtin {
                                ImGui::TableNextColumn();
                                ImGui::TextFormatted("{}: ", "hex.builtin.common.region"_lang);
                                ImGui::TableNextColumn();
-                                ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", occurrence.value.region.getStartAddress(), occurrence.value.region.getEndAddress());
+                                ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", start, end);

                                auto demangledValue = llvm::demangle(value);

@ -494,28 +494,27 @@ namespace hex::plugin::builtin {
            switch (settings.mode) {
                using enum SearchSettings::Mode;
                case Strings:
-                    this->m_foundOccurrences[provider] = searchStrings(task, provider, searchRegion, settings.strings);
+                    this->m_foundOccurrences.get(provider) = searchStrings(task, provider, searchRegion, settings.strings);
                    break;
                case Sequence:
-                    this->m_foundOccurrences[provider] = searchSequence(task, provider, searchRegion, settings.bytes);
+                    this->m_foundOccurrences.get(provider) = searchSequence(task, provider, searchRegion, settings.bytes);
                    break;
                case Regex:
-                    this->m_foundOccurrences[provider] = searchRegex(task, provider, searchRegion, settings.regex);
+                    this->m_foundOccurrences.get(provider) = searchRegex(task, provider, searchRegion, settings.regex);
                    break;
                case BinaryPattern:
-                    this->m_foundOccurrences[provider] = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
+                    this->m_foundOccurrences.get(provider) = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
                    break;
                case Value:
-                    this->m_foundOccurrences[provider] = searchValue(task, provider, searchRegion, settings.value);
+                    this->m_foundOccurrences.get(provider) = searchValue(task, provider, searchRegion, settings.value);
                    break;
            }

-            this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
+            this->m_sortedOccurrences.get(provider) = this->m_foundOccurrences.get(provider);

-            OccurrenceTree::interval_vector intervals;
-            for (const auto &occurrence : this->m_foundOccurrences[provider])
-                intervals.emplace_back(occurrence.region.getStartAddress(), occurrence.region.getEndAddress(), occurrence);
-            this->m_occurrenceTree[provider] = std::move(intervals);
+            for (const auto &occurrence : this->m_foundOccurrences.get(provider))
+                this->m_occurrenceTree->add(occurrence.region.getStartAddress(), occurrence.region.getEndAddress() + 1, occurrence);
+            this->m_occurrenceTree->index();
        });
    }

@ -800,14 +799,14 @@ namespace hex::plugin::builtin {
                ImGui::EndDisabled();

                ImGui::SameLine();
-                ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences[provider].size());
+                ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences->size());

-                ImGui::BeginDisabled(this->m_foundOccurrences[provider].empty());
+                ImGui::BeginDisabled(this->m_foundOccurrences->empty());
                {
                    if (ImGui::Button("hex.builtin.view.find.search.reset"_lang)) {
-                        this->m_foundOccurrences[provider].clear();
-                        this->m_sortedOccurrences[provider].clear();
-                        this->m_occurrenceTree[provider].clear();
+                        this->m_foundOccurrences->clear();
+                        this->m_sortedOccurrences->clear();
+                        *this->m_occurrenceTree = {};
                    }
                }
                ImGui::EndDisabled();
@ -818,25 +817,25 @@ namespace hex::plugin::builtin {
            ImGui::Separator();
            ImGui::NewLine();

-            auto &currOccurrences = this->m_sortedOccurrences[provider];
+            auto &currOccurrences = *this->m_sortedOccurrences;

            ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x);
-            auto prevFilterLength = this->m_currFilter[provider].length();
-            if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, this->m_currFilter[provider])) {
-                if (prevFilterLength > this->m_currFilter[provider].length())
-                    this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
+            auto prevFilterLength = this->m_currFilter->length();
+            if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, *this->m_currFilter)) {
+                if (prevFilterLength > this->m_currFilter->length())
+                    *this->m_sortedOccurrences = *this->m_foundOccurrences;

                if (this->m_filterTask.isRunning())
                    this->m_filterTask.interrupt();

-                if (!this->m_currFilter[provider].empty()) {
+                if (!this->m_currFilter->empty()) {
                    this->m_filterTask = TaskManager::createTask("Filtering", currOccurrences.size(), [this, provider, &currOccurrences](Task &task) {
                        u64 progress = 0;
                        currOccurrences.erase(std::remove_if(currOccurrences.begin(), currOccurrences.end(), [this, provider, &task, &progress](const auto &region) {
                            task.update(progress);
                            progress += 1;

-                            return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter[provider]);
+                            return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter.get(provider));
                        }), currOccurrences.end());
                    });
                }
--- a/plugins/builtin/source/content/views/view_hashes.cpp
+++ b/plugins/builtin/source/content/views/view_hashes.cpp
@ -18,7 +18,7 @@ namespace hex::plugin::builtin {

            auto selection = ImHexApi::HexEditor::getSelection();

-            if (ImGui::GetIO().KeyShift) {
+            if (selection.has_value() && ImGui::GetIO().KeyShift) {
                auto &hashFunctions = this->m_hashFunctions.get(selection->getProvider());
                if (!hashFunctions.empty() && selection.has_value() && selection->overlaps(Region { address, size })) {
                    ImGui::BeginTooltip();