build: Switch to better interval tree implementation

2024-11-28 09:30:51 +01:00 · 2023-05-07 23:27:43 +02:00 · 2023-05-07 23:27:43 +02:00 · 5a6e5d2255
commit 5a6e5d2255
parent 82111617a4
13 changed files with 308 additions and 430 deletions
--- a/cmake/build_helpers.cmake
+++ b/cmake/build_helpers.cmake
@ -410,7 +410,7 @@ endfunction()
 macro(setupCompilerWarnings target)
    set(IMHEX_COMMON_FLAGS "-Wall -Wextra -Wpedantic -Werror")
-    set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds")
+    set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds -Wno-dangling-reference")
    set(CMAKE_C_FLAGS    "${CMAKE_C_FLAGS}    ${IMHEX_C_FLAGS}")
    set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  ${IMHEX_C_FLAGS}")
--- a/lib/external/intervaltree/LICENSE
+++ b/lib/external/intervaltree/LICENSE
@ -1,19 +1,23 @@
-Copyright (c) 2011 Erik Garrison
+The MIT License
-Permission is hereby granted, free of charge, to any person obtaining a copy of
+Copyright (c) 2019     Dana-Farber Cancer Institute
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
+Permission is hereby granted, free of charge, to any person obtaining
-copies or substantial portions of the Software.
+a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+The above copyright notice and this permission notice shall be
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+included in all copies or substantial portions of the Software.
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/lib/external/intervaltree/README.md
+++ b/lib/external/intervaltree/README.md
@ -1,37 +1,133 @@
-# intervaltree
+## Introduction
-## Overview
+cgranges is a small C library for genomic interval overlap queries: given a
-
+genomic region *r* and a set of regions *R*, finding all regions in *R* that
-An interval tree can be used to efficiently find a set of numeric intervals overlapping or containing another interval.
+overlaps *r*. Although this library is based on [interval tree][itree], a well
-
+known data structure, the core algorithm of cgranges is distinct from all
-This library provides a basic implementation of an interval tree using C++ templates, allowing the insertion of arbitrary types into the tree.
+existing implementations to the best of our knowledge.  Specifically, the
 interval tree in cgranges is implicitly encoded as a plain sorted array
 (similar to [binary heap][bheap] but packed differently). Tree
 traversal is achieved by jumping between array indices. This treatment makes
 cgranges very efficient and compact in memory. The core algorithm can be
 implemented in ~50 lines of C++ code, much shorter than others as well. Please
 see the code comments in [cpp/IITree.h](cpp/IITree.h) for details.
 ## Usage
-Add `#include "IntervalTree.h"` to the source files in which you will use the interval tree.
+### Test with BED coverage
-To make an IntervalTree to contain objects of class T, use:
+For testing purposes, this repo implements the [bedtools coverage][bedcov] tool
 with cgranges. The source code is located in the [test/](test) directory. You
 can compile and run the test with:
 ```sh
 cd test && make
 ./bedcov-cr test1.bed test2.bed
 ```
 The first BED file is loaded into RAM and indexed. The depth and the breadth of
 coverage of each region in the second file is computed by query against the
 index of the first file.
-```c++
+The [test/](test) directory also contains a few other implementations based on
-vector<Interval<T> > intervals;
+[IntervalTree.h][ekg-itree] in C++, [quicksect][quicksect] in Cython and
-T a, b, c;
+[ncls][ncls] in Cython. The table below shows timing and peak memory on two
-intervals.push_back(Interval<T>(2, 10, a));
+test BEDs available in the release page. The first BED contains GenCode
-intervals.push_back(Interval<T>(3, 4, b));
+annotations with ~1.2 million lines, mixing all types of features. The second
-intervals.push_back(Interval<T>(20, 100, c));
+contains ~10 million direct-RNA mappings. Time1a/Mem1a indexes the GenCode BED
-IntervalTree<T> tree;
+into memory. Time1b adds whole chromosome intervals to the GenCode BED when
-tree = IntervalTree<T>(intervals);
+indexing. Time2/Mem2 indexes the RNA-mapping BED into memory. Numbers are
 averaged over 5 runs.
 |Algo.   |Lang. |Cov|Program         |Time1a|Time1b|Mem1a   |Time2 |Mem2    |
 |:-------|:-----|:-:|:---------------|-----:|-----:|-------:|-----:|-------:|
 |IAITree |C     |Y  |cgranges        |9.0s  |13.9s |19.1MB  |4.6s  |138.4MB |
 |IAITree |C++   |Y  |cpp/iitree.h    |11.1s |24.5s |22.4MB  |5.8s  |160.4MB |
 |CITree  |C++   |Y  |IntervalTree.h  |17.4s |17.4s |27.2MB  |10.5s |179.5MB |
 |IAITree |C     |N  |cgranges        |7.6s  |13.0s |19.1MB  |4.1s  |138.4MB |
 |AIList  |C     |N  |3rd-party/AIList|7.9s  |8.1s  |14.4MB  |6.5s  |104.8MB |
 |NCList  |C     |N  |3rd-party/NCList|13.0s |13.4s |21.4MB  |10.6s |183.0MB |
 |AITree  |C     |N  |3rd-party/AITree|16.8s |18.4s |73.4MB  |27.3s |546.4MB |
 |IAITree |Cython|N  |cgranges        |56.6s |63.9s |23.4MB  |43.9s |143.1MB |
 |binning |C++   |Y  |bedtools        |201.9s|280.4s|478.5MB |149.1s|3438.1MB|
 Here, IAITree = implicit augmented interval tree, used by cgranges;
 CITree = centered interval tree, used by [Erik Garrison's
 IntervalTree][itree]; AIList = augmented interval list, by [Feng et
 al][ailist]; NCList = nested containment list, taken from [ncls][ncls] by Feng
 et al; AITree = augmented interval tree, from [kerneltree][kerneltree].
 "Cov" indicates whether the program calculates breadth of coverage.
 Comments:
 * AIList keeps start and end only. IAITree and CITree addtionally store a
  4-byte "ID" field per interval to reference the source of interval. This is
  partly why AIList uses the least memory.
 * IAITree is more sensitive to the worse case: the presence of an interval
  spanning the whole chromosome.
 * IAITree uses an efficient radix sort. CITree uses std::sort from STL, which
  is ok. AIList and NCList use qsort from libc, which is slow. Faster sorting
  leads to faster indexing.
 * IAITree in C++ uses identical core algorithm to the C version, but limited by
  its APIs, it wastes time on memory locality and management. CITree has a
  similar issue.
 * Computing coverage is better done when the returned list of intervals are
  start sorted. IAITree returns sorted list. CITree doesn't. Not sure about
  others. Computing coverage takes a couple of seconds. Sorting will be slower.
 * Printing intervals also takes a noticeable fraction of time. Custom printf
  equivalent would be faster.
 * IAITree+Cython is a wrapper around the C version of cgranges. Cython adds
  significant overhead.
 * Bedtools is designed for a variety of applications in addition to computing
  coverage. It may keep other information in its internal data structure. This
  micro-benchmark may be unfair to bedtools.
 * In general, the performance is affected a lot by subtle implementation
  details. CITree, IAITree, NCList and AIList are all broadly comparable in
  performance. AITree is not recommended when indexed intervals are immutable.
 ### Use cgranges as a C library
 ```c
 cgranges_t *cr = cr_init(); // initialize a cgranges_t object
 cr_add(cr, "chr1", 20, 30, 0); // add a genomic interval
 cr_add(cr, "chr2", 10, 30, 1);
 cr_add(cr, "chr1", 10, 25, 2);
 cr_index(cr); // index
 int64_t i, n, *b = 0, max_b = 0;
 n = cr_overlap(cr, "chr1", 15, 22, &b, &max_b); // overlap query; output array b[] can be reused
 for (i = 0; i < n; ++i) // traverse overlapping intervals
 	printf("%d\t%d\t%d\n", cr_start(cr, b[i]), cr_end(cr, b[i]), cr_label(cr, b[i]));
 free(b); // b[] is allocated by malloc() inside cr_overlap(), so needs to be freed with free()
 cr_destroy(cr);
 ```
-Now, it's possible to query the tree and obtain a set of intervals which are contained within the start and stop coordinates.
+### Use IITree as a C++ library
-```c++
+```cpp
-vector<Interval<T> > results;
+IITree<int, int> tree;
-tree.findContained(start, stop, results);
+tree.add(12, 34, 0); // add an interval
-cout << "found " << results.size() << " overlapping intervals" << endl;
+tree.add(0, 23, 1);
 tree.add(34, 56, 2);
 tree.index(); // index
 std::vector<size_t> a;
 tree.overlap(22, 25, a); // retrieve overlaps
 for (size_t i = 0; i < a.size(); ++i)
 	printf("%d\t%d\t%d\n", tree.start(a[i]), tree.end(a[i]), tree.data(a[i]));
 ```
-The function IntervalTree::findOverlapping provides a method to find all those intervals which are contained or partially overlap the interval (start, stop).
+[bedcov]: https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html
-
+[ekg-itree]: https://github.com/ekg/intervaltree
-### Author: Erik Garrison <erik.garrison@gmail.com>
+[quicksect]: https://github.com/brentp/quicksect
-
+[ncls]: https://github.com/hunt-genes/ncls
-### License: MIT
+[citree]: https://en.wikipedia.org/wiki/Interval_tree#Centered_interval_tree
 [itree]: https://en.wikipedia.org/wiki/Interval_tree
 [bheap]: https://en.wikipedia.org/wiki/Binary_heap
 [ailist]: https://www.biorxiv.org/content/10.1101/593657v1
 [kerneltree]: https://github.com/biocore-ntnu/kerneltree
--- a/lib/external/intervaltree/include/IITree.h
+++ b/lib/external/intervaltree/include/IITree.h
@ -0,0 +1,88 @@
 #pragma once
 #include <vector>
 #include <algorithm>
 #include <cstring>
 #include <cstdlib>
 template<typename S, typename T> // "S" is a scalar type; "T" is the type of data associated with each interval
 class IITree {
    struct StackCell {
        size_t x; // node
        int w; // w: 0 if left child hasn't been processed
        StackCell() {};
        StackCell(size_t x_, int w_) : x(x_), w(w_) {};
    };
    struct Interval {
        S st, en, max;
        T data;
        Interval() = default;
        Interval(const S &s, const S &e, const T &d) : st(s), en(e), max(e), data(d) { }
    };
    struct IntervalLess {
        bool operator()(const Interval &intervalA, const Interval &intervalB) const { return intervalA.st < intervalB.st; }
    };
    std::vector<Interval> a;
    size_t layout_recur(Interval *b, size_t i = 0, size_t k = 0) { // see https://algorithmica.org/en/eytzinger
        if (k < a.size()) {
            i = layout_recur(b, i, (k<<1) + 1);
            b[k] = a[i++];
            i = layout_recur(b, i, (k<<1) + 2);
        }
        return i;
    }
    void index_BFS(Interval *interval, size_t n) { // set Interval::max
        int t = 0;
        StackCell stack[64];
        stack[t++] = StackCell(0, 0);
        while (t) {
            StackCell z = stack[--t];
            size_t k = z.x, l = k<<1|1, r = l + 1;
            if (z.w == 2) { // Interval::max for both children are computed
                interval[k].max = interval[k].en;
                if (l < n && interval[k].max < interval[l].max) interval[k].max = interval[l].max;
                if (r < n && interval[k].max < interval[r].max) interval[k].max = interval[r].max;
            } else { // go down into the two children
                stack[t++] = StackCell(k, z.w + 1);
                if (l + z.w < n)
                    stack[t++] = StackCell(l + z.w, 0);
            }
        }
    }
 public:
    void add(const S &s, const S &e, const T &d) { a.push_back(Interval(s, e, d)); }
    void index() {
        std::sort(a.begin(), a.end(), IntervalLess());
        std::vector<Interval> b(a.size());
        layout_recur(b.data());
        a.clear();
        std::copy(b.begin(), b.end(), std::back_inserter(a));
        index_BFS(a.data(), a.size());
    }
    bool overlap(const S &st, const S &en, std::vector<size_t> &out) const {
        int t = 0;
        std::array<StackCell, 64> stack;
        out.clear();
        if (a.empty()) return false;
        stack[t++] = StackCell(0, 0); // push the root; this is a top down traversal
        while (t) { // the following guarantees that numbers in out[] are always sorted
            StackCell z = stack[--t];
            size_t l = (z.x<<1) + 1, r = l + 1;
            if (l >= a.size()) { // a leaf node
                if (st < a[z.x].en && a[z.x].st <= en) out.push_back(z.x);
            } else if (z.w == 0) { // if left child not processed
                stack[t++] = StackCell(z.x, 1); // re-add node z.x, but mark the left child having been processed
                if (l < a.size() && a[l].max > st)
                    stack[t++] = StackCell(l, 0);
            } else if (a[z.x].st <= en) { // need to push the right child
                if (st < a[z.x].en) out.push_back(z.x); // test if z.x overlaps the query; if yes, append to out[]
                if (r < a.size()) stack[t++] = StackCell(r, 0);
            }
        }
        return out.size() > 0? true : false;
    }
    size_t size(void) const { return a.size(); }
    const S &start(size_t i) const { return a[i].st; }
    const S &end(size_t i) const { return a[i].en; }
    const T &data(size_t i) const { return a[i].data; }
 };
--- a/lib/external/intervaltree/include/IntervalTree.h
+++ b/lib/external/intervaltree/include/IntervalTree.h
@ -1,325 +0,0 @@
 #ifndef __INTERVAL_TREE_H
 #define __INTERVAL_TREE_H
 #include <vector>
 #include <algorithm>
 #include <iostream>
 #include <memory>
 #include <cassert>
 #include <limits>
 #ifdef USE_INTERVAL_TREE_NAMESPACE
 namespace interval_tree {
 #endif
 template <class Scalar, typename Value>
 class Interval {
 public:
    Scalar start;
    Scalar stop;
    Value value;
    Interval(const Scalar& s, const Scalar& e, const Value& v)
    : start(std::min(s, e))
    , stop(std::max(s, e))
    , value(v) 
    {}
 };
 template <class Scalar, typename Value>
 Value intervalStart(const Interval<Scalar,Value>& i) {
    return i.start;
 }
 template <class Scalar, typename Value>
 Value intervalStop(const Interval<Scalar, Value>& i) {
    return i.stop;
 }
 template <class Scalar, typename Value>
 std::ostream& operator<<(std::ostream& out, const Interval<Scalar, Value>& i) {
    out << "Interval(" << i.start << ", " << i.stop << "): " << i.value;
    return out;
 }
 template <class Scalar, class Value>
 class IntervalTree {
 public:
    typedef Interval<Scalar, Value> interval;
    typedef std::vector<interval> interval_vector;
    struct IntervalStartCmp {
        bool operator()(const interval& a, const interval& b) {
            return a.start < b.start;
        }
    };
    struct IntervalStopCmp {
        bool operator()(const interval& a, const interval& b) {
            return a.stop < b.stop;
        }
    };
    IntervalTree()
        : left(nullptr)
        , right(nullptr)
        , center(0)
    {}
    ~IntervalTree() = default;
    std::unique_ptr<IntervalTree> clone() const {
        return std::unique_ptr<IntervalTree>(new IntervalTree(*this));
    }
    IntervalTree(const IntervalTree& other)
    :   intervals(other.intervals),
        left(other.left ? other.left->clone() : nullptr),
        right(other.right ? other.right->clone() : nullptr),
        center(other.center)
    {}
    IntervalTree& operator=(IntervalTree&&) = default;
    IntervalTree(IntervalTree&&) = default;
    IntervalTree& operator=(const IntervalTree& other) {
        center = other.center;
        intervals = other.intervals;
        left = other.left ? other.left->clone() : nullptr;
        right = other.right ? other.right->clone() : nullptr;
        return *this;
    }
    IntervalTree(
            interval_vector&& ivals,
            std::size_t depth = 16,
            std::size_t minbucket = 64,
            std::size_t maxbucket = 512, 
            Scalar leftextent = 0,
            Scalar rightextent = 0)
      : left(nullptr)
      , right(nullptr)
    {
        --depth;
        const auto minmaxStop = std::minmax_element(ivals.begin(), ivals.end(), 
                                                    IntervalStopCmp());
        const auto minmaxStart = std::minmax_element(ivals.begin(), ivals.end(), 
                                                     IntervalStartCmp());
        if (!ivals.empty()) {
            center = (minmaxStart.first->start + minmaxStop.second->stop) / 2;
        }
        if (leftextent == 0 && rightextent == 0) {
            // sort intervals by start
            std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
        } else {
            assert(std::is_sorted(ivals.begin(), ivals.end(), IntervalStartCmp()));
        }
        if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) {
            std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
            intervals = std::move(ivals);
            assert(is_valid().first);
            return;
        } else {
            Scalar leftp = 0;
            Scalar rightp = 0;
            if (leftextent || rightextent) {
                leftp = leftextent;
                rightp = rightextent;
            } else {
                leftp = ivals.front().start;
                rightp = std::max_element(ivals.begin(), ivals.end(),
                                          IntervalStopCmp())->stop;
            }
            interval_vector lefts;
            interval_vector rights;
            for (typename interval_vector::const_iterator i = ivals.begin(); 
                 i != ivals.end(); ++i) {
                const interval& interval = *i;
                if (interval.stop < center) {
                    lefts.push_back(interval);
                } else if (interval.start > center) {
                    rights.push_back(interval);
                } else {
                    assert(interval.start <= center);
                    assert(center <= interval.stop);
                    intervals.push_back(interval);
                }
            }
            if (!lefts.empty()) {
                left.reset(new IntervalTree(std::move(lefts), 
                                            depth, minbucket, maxbucket,
                                            leftp, center));
            }
            if (!rights.empty()) {
                right.reset(new IntervalTree(std::move(rights), 
                                             depth, minbucket, maxbucket, 
                                             center, rightp));
            }
        }
        assert(is_valid().first);
    }
    // Call f on all intervals near the range [start, stop]:
    template <class UnaryFunction>
    void visit_near(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
        if (!intervals.empty() && ! (stop < intervals.front().start)) {
            for (auto & i : intervals) {
              f(i);
            }
        }
        if (left && start <= center) {
            left->visit_near(start, stop, f);
        }
        if (right && stop >= center) {
            right->visit_near(start, stop, f);
        }
    }
    // Call f on all intervals crossing pos
    template <class UnaryFunction>
    void visit_overlapping(const Scalar& pos, UnaryFunction f) const {
        visit_overlapping(pos, pos, f);
    }
    // Call f on all intervals overlapping [start, stop]
    template <class UnaryFunction>
    void visit_overlapping(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
        auto filterF = [&](const interval& interval) {
            if (interval.stop >= start && interval.start <= stop) {
                // Only apply f if overlapping
                f(interval);
            }
        };
        visit_near(start, stop, filterF);
    }
    // Call f on all intervals contained within [start, stop]
    template <class UnaryFunction>
    void visit_contained(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
        auto filterF = [&](const interval& interval) {
            if (start <= interval.start && interval.stop <= stop) {
                f(interval);
            }
        };
        visit_near(start, stop, filterF);
    }
    interval_vector findOverlapping(const Scalar& start, const Scalar& stop) const {
        interval_vector result;
        visit_overlapping(start, stop,
                          [&](const interval& interval) { 
                            result.emplace_back(interval); 
                          });
        return result;
    }
    interval_vector findContained(const Scalar& start, const Scalar& stop) const {
        interval_vector result;
        visit_contained(start, stop,
                        [&](const interval& interval) { 
                          result.push_back(interval); 
                        });
        return result;
    }
    bool empty() const {
        if (left && !left->empty()) {
            return false;
        }
        if (!intervals.empty()) { 
            return false;
        }
        if (right && !right->empty()) {
            return false;
        }
        return true;
    }
    template <class UnaryFunction>
    void visit_all(UnaryFunction f) const {
        if (left) {
            left->visit_all(f);
        }
        std::for_each(intervals.begin(), intervals.end(), f);
        if (right) {
            right->visit_all(f);
        }
    }
    std::pair<Scalar, Scalar> extentBruitForce() const {
        struct Extent {
            std::pair<Scalar, Scalar> x = {std::numeric_limits<Scalar>::max(),
                                                       std::numeric_limits<Scalar>::min() };
            void operator()(const interval & interval) {
                x.first  = std::min(x.first,  interval.start);
                x.second = std::max(x.second, interval.stop);
            }
                                                                };
                                            Extent extent;
        visit_all([&](const interval & interval) { extent(interval); });
        return extent.x;
                                            }
    // Check all constraints.
    // If first is false, second is invalid.
    std::pair<bool, std::pair<Scalar, Scalar>> is_valid() const {
        const auto minmaxStop = std::minmax_element(intervals.begin(), intervals.end(), 
                                                    IntervalStopCmp());
        const auto minmaxStart = std::minmax_element(intervals.begin(), intervals.end(), 
                                                     IntervalStartCmp());
        std::pair<bool, std::pair<Scalar, Scalar>> result = {true, { std::numeric_limits<Scalar>::max(),
                                                                     std::numeric_limits<Scalar>::min() }};
        if (!intervals.empty()) {
            result.second.first   = std::min(result.second.first,  minmaxStart.first->start);
            result.second.second  = std::min(result.second.second, minmaxStop.second->stop);
        }
        if (left) {
            auto valid = left->is_valid();
            result.first &= valid.first;
            result.second.first   = std::min(result.second.first,  valid.second.first);
            result.second.second  = std::min(result.second.second, valid.second.second);
            if (!result.first) { return result; }
            if (valid.second.second >= center) {
                result.first = false;
                return result;
            }
        }
        if (right) {
            auto valid = right->is_valid();
            result.first &= valid.first;
            result.second.first   = std::min(result.second.first,  valid.second.first);
            result.second.second  = std::min(result.second.second, valid.second.second);
            if (!result.first) { return result; }
            if (valid.second.first <= center) { 
                result.first = false;
                return result;
            }
        }
        if (!std::is_sorted(intervals.begin(), intervals.end(), IntervalStartCmp())) {
            result.first = false;
        }
        return result;        
    }
    void clear() {
        left.reset();
        right.reset();
        intervals.clear();
        center = 0;
    }
 private:
    interval_vector intervals;
    std::unique_ptr<IntervalTree> left;
    std::unique_ptr<IntervalTree> right;
    Scalar center;
 };
 #ifdef USE_INTERVAL_TREE_NAMESPACE
 }
 #endif
 #endif
--- a/lib/external/pattern_language
+++ b/lib/external/pattern_language
@ -1 +1 @@
-Subproject commit 9a687a5364ea27aa838f499afedb8e231f238a40
+Subproject commit 20a21a7de0db4be0b63553ebac90950dbf2a58fe
--- a/plugins/builtin/include/content/providers/intel_hex_provider.hpp
+++ b/plugins/builtin/include/content/providers/intel_hex_provider.hpp
@ -2,7 +2,7 @@
 #include <hex/providers/provider.hpp>
-#include <IntervalTree.h>
+#include <IITree.h>
 namespace hex::plugin::builtin {
@ -44,7 +44,7 @@ namespace hex::plugin::builtin {
    protected:
        bool m_dataValid = false;
        size_t m_dataSize = 0x00;
-        interval_tree::IntervalTree<u64, std::vector<u8>> m_data;
+        IITree<u64, std::vector<u8>> m_data;
        std::fs::path m_sourceFilePath;
    };
--- a/plugins/builtin/include/content/views/view_diff.hpp
+++ b/plugins/builtin/include/content/views/view_diff.hpp
@ -12,7 +12,7 @@
 #include "ui/hex_editor.hpp"
-#include <IntervalTree.h>
+#include <IITree.h>
 namespace hex::plugin::builtin {
--- a/plugins/builtin/include/content/views/view_find.hpp
+++ b/plugins/builtin/include/content/views/view_find.hpp
@ -9,7 +9,7 @@
 #include <atomic>
 #include <vector>
-#include <IntervalTree.h>
+#include <IITree.h>
 namespace hex::plugin::builtin {
@ -94,11 +94,11 @@ namespace hex::plugin::builtin {
        } m_searchSettings, m_decodeSettings;
-        using OccurrenceTree = interval_tree::IntervalTree<u64, Occurrence>;
+        using OccurrenceTree = IITree<u64, Occurrence>;
-        std::map<prv::Provider*, std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
+        PerProvider<std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
-        std::map<prv::Provider*, OccurrenceTree> m_occurrenceTree;
+        PerProvider<OccurrenceTree> m_occurrenceTree;
-        std::map<prv::Provider*, std::string> m_currFilter;
+        PerProvider<std::string> m_currFilter;
        TaskHolder m_searchTask, m_filterTask;
        bool m_settingsValid = false;
--- a/plugins/builtin/source/content/providers/intel_hex_provider.cpp
+++ b/plugins/builtin/source/content/providers/intel_hex_provider.cpp
@ -161,26 +161,37 @@ namespace hex::plugin::builtin {
    void IntelHexProvider::setBaseAddress(u64 address) {
        auto oldBase = this->getBaseAddress();
-        auto intervals = this->m_data.findOverlapping(oldBase, oldBase + this->getActualSize());
+        std::vector<size_t> indices;
        this->m_data.overlap(oldBase, oldBase + this->getActualSize(), indices);
-        for (auto &interval : intervals) {
+        IITree<u64, std::vector<u8>> intervals;
-            interval.start = (interval.start - oldBase) + address;
+        for (auto &index : indices) {
-            interval.stop  = (interval.stop  - oldBase) + address;
+            intervals.add(
                    (this->m_data.start(index) - oldBase) + address,
                    (this->m_data.end(index) - oldBase) + address,
                    this->m_data.data(index)
            );
        }
        this->m_data = std::move(intervals);
        this->m_data.index();
        Provider::setBaseAddress(address);
    }
    void IntelHexProvider::readRaw(u64 offset, void *buffer, size_t size) {
-        auto intervals = this->m_data.findOverlapping(offset, (offset + size) - 1);
+        std::vector<size_t> indices;
        this->m_data.overlap(offset, (offset + size) - 1, indices);
        std::memset(buffer, 0x00, size);
        auto bytes = reinterpret_cast<u8*>(buffer);
-        for (const auto &interval : intervals) {
+        for (const auto &index : indices) {
-            for (u32 i = std::max(interval.start, offset); i <= interval.stop && (i - offset) < size; i++) {
+            auto start = this->m_data.start(index);
-                bytes[i - offset] =  interval.value[i - interval.start];
+            auto end   = this->m_data.end(index);
            auto data  = this->m_data.data(index);
            for (u32 i = std::max(start, offset); i <= end && (i - offset) < size; i++) {
                bytes[i - offset] = data[i - start];
            }
        }
    }
@ -203,15 +214,15 @@ namespace hex::plugin::builtin {
            return false;
        u64 maxAddress = 0x00;
        decltype(this->m_data)::interval_vector intervals;
        for (auto &[address, bytes] : data) {
            auto endAddress = (address + bytes.size()) - 1;
-            intervals.emplace_back(address, endAddress, std::move(bytes));
+            this->m_data.add(address, endAddress, std::move(bytes));
            if (endAddress > maxAddress)
                maxAddress = endAddress;
        }
-        this->m_data = std::move(intervals);
+        this->m_data.index();
        this->m_dataSize = maxAddress + 1;
        this->m_dataValid = true;
@ -254,17 +265,22 @@ namespace hex::plugin::builtin {
    }
    std::pair<Region, bool> IntelHexProvider::getRegionValidity(u64 address) const {
-        auto intervals = this->m_data.findOverlapping(address, address);
+        std::vector<size_t> indices;
-        if (intervals.empty()) {
+        this->m_data.overlap(address, address, indices);
        if (indices.empty()) {
            return Provider::getRegionValidity(address);
        }
-        auto closestInterval = intervals.front();
+        auto closestIndex = indices.front();
-        for (const auto &interval : intervals) {
+        for (const auto &index : indices) {
-            if (interval.start < closestInterval.start)
+            if (this->m_data.start(index) < this->m_data.start(closestIndex))
-                closestInterval = interval;
+                closestIndex = index;
        }
-        return { Region { closestInterval.start, (closestInterval.stop - closestInterval.start) + 1}, true };
+
        auto start = this->m_data.start(closestIndex);
        auto end   = this->m_data.end(closestIndex);
        return { Region { start, (end - start) + 1 }, true };
    }
    void IntelHexProvider::loadSettings(const nlohmann::json &settings) {
--- a/plugins/builtin/source/content/providers/motorola_srec_provider.cpp
+++ b/plugins/builtin/source/content/providers/motorola_srec_provider.cpp
@ -180,15 +180,15 @@ namespace hex::plugin::builtin {
            return false;
        u64 maxAddress = 0x00;
        decltype(this->m_data)::interval_vector intervals;
        for (auto &[address, bytes] : data) {
            auto endAddress = (address + bytes.size()) - 1;
-            intervals.emplace_back(address, endAddress, std::move(bytes));
+            this->m_data.add(address, endAddress, std::move(bytes));
            if (endAddress > maxAddress)
                maxAddress = endAddress;
        }
-        this->m_data = std::move(intervals);
+        this->m_data.index();
        this->m_dataSize = maxAddress + 1;
        this->m_dataValid = true;
--- a/plugins/builtin/source/content/views/view_find.cpp
+++ b/plugins/builtin/source/content/views/view_find.cpp
@ -22,9 +22,8 @@ namespace hex::plugin::builtin {
            if (this->m_searchTask.isRunning())
                return { };
-            auto provider = ImHexApi::Provider::get();
+            std::vector<size_t> occurrences;
-
+            if (this->m_occurrenceTree->overlap(address, address, occurrences))
            if (!this->m_occurrenceTree[provider].findOverlapping(address, address).empty())
                return HighlightColor();
            else
                return std::nullopt;
@ -36,10 +35,8 @@ namespace hex::plugin::builtin {
            if (this->m_searchTask.isRunning())
                return;
-            auto provider = ImHexApi::Provider::get();
+            std::vector<size_t> occurrences;
-
+            if (!this->m_occurrenceTree->overlap(address, address, occurrences))
            auto occurrences = this->m_occurrenceTree[provider].findOverlapping(address, address);
            if (occurrences.empty())
                return;
            ImGui::BeginTooltip();
@ -51,7 +48,10 @@ namespace hex::plugin::builtin {
                    ImGui::TableNextColumn();
                    {
-                        const auto value = this->decodeValue(ImHexApi::Provider::get(), occurrence.value, 256);
+                        auto start = this->m_occurrenceTree->start(occurrence);
                        auto end = this->m_occurrenceTree->end(occurrence) - 1;
                        const auto &bytes = this->m_occurrenceTree->data(occurrence);
                        const auto value = this->decodeValue(ImHexApi::Provider::get(), bytes, 256);
                        ImGui::ColorButton("##color", ImColor(HighlightColor()));
                        ImGui::SameLine(0, 10);
@ -65,7 +65,7 @@ namespace hex::plugin::builtin {
                                ImGui::TableNextColumn();
                                ImGui::TextFormatted("{}: ", "hex.builtin.common.region"_lang);
                                ImGui::TableNextColumn();
-                                ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", occurrence.value.region.getStartAddress(), occurrence.value.region.getEndAddress());
+                                ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", start, end);
                                auto demangledValue = llvm::demangle(value);
@ -494,28 +494,27 @@ namespace hex::plugin::builtin {
            switch (settings.mode) {
                using enum SearchSettings::Mode;
                case Strings:
-                    this->m_foundOccurrences[provider] = searchStrings(task, provider, searchRegion, settings.strings);
+                    this->m_foundOccurrences.get(provider) = searchStrings(task, provider, searchRegion, settings.strings);
                    break;
                case Sequence:
-                    this->m_foundOccurrences[provider] = searchSequence(task, provider, searchRegion, settings.bytes);
+                    this->m_foundOccurrences.get(provider) = searchSequence(task, provider, searchRegion, settings.bytes);
                    break;
                case Regex:
-                    this->m_foundOccurrences[provider] = searchRegex(task, provider, searchRegion, settings.regex);
+                    this->m_foundOccurrences.get(provider) = searchRegex(task, provider, searchRegion, settings.regex);
                    break;
                case BinaryPattern:
-                    this->m_foundOccurrences[provider] = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
+                    this->m_foundOccurrences.get(provider) = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
                    break;
                case Value:
-                    this->m_foundOccurrences[provider] = searchValue(task, provider, searchRegion, settings.value);
+                    this->m_foundOccurrences.get(provider) = searchValue(task, provider, searchRegion, settings.value);
                    break;
            }
-            this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
+            this->m_sortedOccurrences.get(provider) = this->m_foundOccurrences.get(provider);
-            OccurrenceTree::interval_vector intervals;
+            for (const auto &occurrence : this->m_foundOccurrences.get(provider))
-            for (const auto &occurrence : this->m_foundOccurrences[provider])
+                this->m_occurrenceTree->add(occurrence.region.getStartAddress(), occurrence.region.getEndAddress() + 1, occurrence);
-                intervals.emplace_back(occurrence.region.getStartAddress(), occurrence.region.getEndAddress(), occurrence);
+            this->m_occurrenceTree->index();
            this->m_occurrenceTree[provider] = std::move(intervals);
        });
    }
@ -800,14 +799,14 @@ namespace hex::plugin::builtin {
                ImGui::EndDisabled();
                ImGui::SameLine();
-                ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences[provider].size());
+                ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences->size());
-                ImGui::BeginDisabled(this->m_foundOccurrences[provider].empty());
+                ImGui::BeginDisabled(this->m_foundOccurrences->empty());
                {
                    if (ImGui::Button("hex.builtin.view.find.search.reset"_lang)) {
-                        this->m_foundOccurrences[provider].clear();
+                        this->m_foundOccurrences->clear();
-                        this->m_sortedOccurrences[provider].clear();
+                        this->m_sortedOccurrences->clear();
-                        this->m_occurrenceTree[provider].clear();
+                        *this->m_occurrenceTree = {};
                    }
                }
                ImGui::EndDisabled();
@ -818,25 +817,25 @@ namespace hex::plugin::builtin {
            ImGui::Separator();
            ImGui::NewLine();
-            auto &currOccurrences = this->m_sortedOccurrences[provider];
+            auto &currOccurrences = *this->m_sortedOccurrences;
            ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x);
-            auto prevFilterLength = this->m_currFilter[provider].length();
+            auto prevFilterLength = this->m_currFilter->length();
-            if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, this->m_currFilter[provider])) {
+            if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, *this->m_currFilter)) {
-                if (prevFilterLength > this->m_currFilter[provider].length())
+                if (prevFilterLength > this->m_currFilter->length())
-                    this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
+                    *this->m_sortedOccurrences = *this->m_foundOccurrences;
                if (this->m_filterTask.isRunning())
                    this->m_filterTask.interrupt();
-                if (!this->m_currFilter[provider].empty()) {
+                if (!this->m_currFilter->empty()) {
                    this->m_filterTask = TaskManager::createTask("Filtering", currOccurrences.size(), [this, provider, &currOccurrences](Task &task) {
                        u64 progress = 0;
                        currOccurrences.erase(std::remove_if(currOccurrences.begin(), currOccurrences.end(), [this, provider, &task, &progress](const auto &region) {
                            task.update(progress);
                            progress += 1;
-                            return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter[provider]);
+                            return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter.get(provider));
                        }), currOccurrences.end());
                    });
                }
--- a/plugins/builtin/source/content/views/view_hashes.cpp
+++ b/plugins/builtin/source/content/views/view_hashes.cpp
@ -18,7 +18,7 @@ namespace hex::plugin::builtin {
            auto selection = ImHexApi::HexEditor::getSelection();
-            if (ImGui::GetIO().KeyShift) {
+            if (selection.has_value() && ImGui::GetIO().KeyShift) {
                auto &hashFunctions = this->m_hashFunctions.get(selection->getProvider());
                if (!hashFunctions.empty() && selection.has_value() && selection->overlaps(Region { address, size })) {
                    ImGui::BeginTooltip();
		`@ -1 +1 @@`
			`Subproject commit 9a687a5364ea27aa838f499afedb8e231f238a40`				`Subproject commit 20a21a7de0db4be0b63553ebac90950dbf2a58fe`