1
0
mirror of synced 2024-11-28 09:30:51 +01:00

build: Switch to better interval tree implementation

This commit is contained in:
WerWolv 2023-05-07 23:27:43 +02:00
parent 82111617a4
commit 5a6e5d2255
13 changed files with 308 additions and 430 deletions

View File

@ -410,7 +410,7 @@ endfunction()
macro(setupCompilerWarnings target)
set(IMHEX_COMMON_FLAGS "-Wall -Wextra -Wpedantic -Werror")
set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds")
set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds -Wno-dangling-reference")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${IMHEX_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IMHEX_C_FLAGS}")

View File

@ -1,19 +1,23 @@
Copyright (c) 2011 Erik Garrison
The MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
Copyright (c) 2019 Dana-Farber Cancer Institute
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,37 +1,133 @@
# intervaltree
## Introduction
## Overview
An interval tree can be used to efficiently find a set of numeric intervals overlapping or containing another interval.
This library provides a basic implementation of an interval tree using C++ templates, allowing the insertion of arbitrary types into the tree.
cgranges is a small C library for genomic interval overlap queries: given a
genomic region *r* and a set of regions *R*, finding all regions in *R* that
overlaps *r*. Although this library is based on [interval tree][itree], a well
known data structure, the core algorithm of cgranges is distinct from all
existing implementations to the best of our knowledge. Specifically, the
interval tree in cgranges is implicitly encoded as a plain sorted array
(similar to [binary heap][bheap] but packed differently). Tree
traversal is achieved by jumping between array indices. This treatment makes
cgranges very efficient and compact in memory. The core algorithm can be
implemented in ~50 lines of C++ code, much shorter than others as well. Please
see the code comments in [cpp/IITree.h](cpp/IITree.h) for details.
## Usage
Add `#include "IntervalTree.h"` to the source files in which you will use the interval tree.
### Test with BED coverage
To make an IntervalTree to contain objects of class T, use:
For testing purposes, this repo implements the [bedtools coverage][bedcov] tool
with cgranges. The source code is located in the [test/](test) directory. You
can compile and run the test with:
```sh
cd test && make
./bedcov-cr test1.bed test2.bed
```
The first BED file is loaded into RAM and indexed. The depth and the breadth of
coverage of each region in the second file is computed by query against the
index of the first file.
```c++
vector<Interval<T> > intervals;
T a, b, c;
intervals.push_back(Interval<T>(2, 10, a));
intervals.push_back(Interval<T>(3, 4, b));
intervals.push_back(Interval<T>(20, 100, c));
IntervalTree<T> tree;
tree = IntervalTree<T>(intervals);
The [test/](test) directory also contains a few other implementations based on
[IntervalTree.h][ekg-itree] in C++, [quicksect][quicksect] in Cython and
[ncls][ncls] in Cython. The table below shows timing and peak memory on two
test BEDs available in the release page. The first BED contains GenCode
annotations with ~1.2 million lines, mixing all types of features. The second
contains ~10 million direct-RNA mappings. Time1a/Mem1a indexes the GenCode BED
into memory. Time1b adds whole chromosome intervals to the GenCode BED when
indexing. Time2/Mem2 indexes the RNA-mapping BED into memory. Numbers are
averaged over 5 runs.
|Algo. |Lang. |Cov|Program |Time1a|Time1b|Mem1a |Time2 |Mem2 |
|:-------|:-----|:-:|:---------------|-----:|-----:|-------:|-----:|-------:|
|IAITree |C |Y |cgranges |9.0s |13.9s |19.1MB |4.6s |138.4MB |
|IAITree |C++ |Y |cpp/iitree.h |11.1s |24.5s |22.4MB |5.8s |160.4MB |
|CITree |C++ |Y |IntervalTree.h |17.4s |17.4s |27.2MB |10.5s |179.5MB |
|IAITree |C |N |cgranges |7.6s |13.0s |19.1MB |4.1s |138.4MB |
|AIList |C |N |3rd-party/AIList|7.9s |8.1s |14.4MB |6.5s |104.8MB |
|NCList |C |N |3rd-party/NCList|13.0s |13.4s |21.4MB |10.6s |183.0MB |
|AITree |C |N |3rd-party/AITree|16.8s |18.4s |73.4MB |27.3s |546.4MB |
|IAITree |Cython|N |cgranges |56.6s |63.9s |23.4MB |43.9s |143.1MB |
|binning |C++ |Y |bedtools |201.9s|280.4s|478.5MB |149.1s|3438.1MB|
Here, IAITree = implicit augmented interval tree, used by cgranges;
CITree = centered interval tree, used by [Erik Garrison's
IntervalTree][itree]; AIList = augmented interval list, by [Feng et
al][ailist]; NCList = nested containment list, taken from [ncls][ncls] by Feng
et al; AITree = augmented interval tree, from [kerneltree][kerneltree].
"Cov" indicates whether the program calculates breadth of coverage.
Comments:
* AIList keeps start and end only. IAITree and CITree addtionally store a
4-byte "ID" field per interval to reference the source of interval. This is
partly why AIList uses the least memory.
* IAITree is more sensitive to the worse case: the presence of an interval
spanning the whole chromosome.
* IAITree uses an efficient radix sort. CITree uses std::sort from STL, which
is ok. AIList and NCList use qsort from libc, which is slow. Faster sorting
leads to faster indexing.
* IAITree in C++ uses identical core algorithm to the C version, but limited by
its APIs, it wastes time on memory locality and management. CITree has a
similar issue.
* Computing coverage is better done when the returned list of intervals are
start sorted. IAITree returns sorted list. CITree doesn't. Not sure about
others. Computing coverage takes a couple of seconds. Sorting will be slower.
* Printing intervals also takes a noticeable fraction of time. Custom printf
equivalent would be faster.
* IAITree+Cython is a wrapper around the C version of cgranges. Cython adds
significant overhead.
* Bedtools is designed for a variety of applications in addition to computing
coverage. It may keep other information in its internal data structure. This
micro-benchmark may be unfair to bedtools.
* In general, the performance is affected a lot by subtle implementation
details. CITree, IAITree, NCList and AIList are all broadly comparable in
performance. AITree is not recommended when indexed intervals are immutable.
### Use cgranges as a C library
```c
cgranges_t *cr = cr_init(); // initialize a cgranges_t object
cr_add(cr, "chr1", 20, 30, 0); // add a genomic interval
cr_add(cr, "chr2", 10, 30, 1);
cr_add(cr, "chr1", 10, 25, 2);
cr_index(cr); // index
int64_t i, n, *b = 0, max_b = 0;
n = cr_overlap(cr, "chr1", 15, 22, &b, &max_b); // overlap query; output array b[] can be reused
for (i = 0; i < n; ++i) // traverse overlapping intervals
printf("%d\t%d\t%d\n", cr_start(cr, b[i]), cr_end(cr, b[i]), cr_label(cr, b[i]));
free(b); // b[] is allocated by malloc() inside cr_overlap(), so needs to be freed with free()
cr_destroy(cr);
```
Now, it's possible to query the tree and obtain a set of intervals which are contained within the start and stop coordinates.
### Use IITree as a C++ library
```c++
vector<Interval<T> > results;
tree.findContained(start, stop, results);
cout << "found " << results.size() << " overlapping intervals" << endl;
```cpp
IITree<int, int> tree;
tree.add(12, 34, 0); // add an interval
tree.add(0, 23, 1);
tree.add(34, 56, 2);
tree.index(); // index
std::vector<size_t> a;
tree.overlap(22, 25, a); // retrieve overlaps
for (size_t i = 0; i < a.size(); ++i)
printf("%d\t%d\t%d\n", tree.start(a[i]), tree.end(a[i]), tree.data(a[i]));
```
The function IntervalTree::findOverlapping provides a method to find all those intervals which are contained or partially overlap the interval (start, stop).
### Author: Erik Garrison <erik.garrison@gmail.com>
### License: MIT
[bedcov]: https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html
[ekg-itree]: https://github.com/ekg/intervaltree
[quicksect]: https://github.com/brentp/quicksect
[ncls]: https://github.com/hunt-genes/ncls
[citree]: https://en.wikipedia.org/wiki/Interval_tree#Centered_interval_tree
[itree]: https://en.wikipedia.org/wiki/Interval_tree
[bheap]: https://en.wikipedia.org/wiki/Binary_heap
[ailist]: https://www.biorxiv.org/content/10.1101/593657v1
[kerneltree]: https://github.com/biocore-ntnu/kerneltree

View File

@ -0,0 +1,88 @@
#pragma once
#include <vector>
#include <algorithm>
#include <cstring>
#include <cstdlib>
template<typename S, typename T> // "S" is a scalar type; "T" is the type of data associated with each interval
class IITree {
struct StackCell {
size_t x; // node
int w; // w: 0 if left child hasn't been processed
StackCell() {};
StackCell(size_t x_, int w_) : x(x_), w(w_) {};
};
struct Interval {
S st, en, max;
T data;
Interval() = default;
Interval(const S &s, const S &e, const T &d) : st(s), en(e), max(e), data(d) { }
};
struct IntervalLess {
bool operator()(const Interval &intervalA, const Interval &intervalB) const { return intervalA.st < intervalB.st; }
};
std::vector<Interval> a;
size_t layout_recur(Interval *b, size_t i = 0, size_t k = 0) { // see https://algorithmica.org/en/eytzinger
if (k < a.size()) {
i = layout_recur(b, i, (k<<1) + 1);
b[k] = a[i++];
i = layout_recur(b, i, (k<<1) + 2);
}
return i;
}
void index_BFS(Interval *interval, size_t n) { // set Interval::max
int t = 0;
StackCell stack[64];
stack[t++] = StackCell(0, 0);
while (t) {
StackCell z = stack[--t];
size_t k = z.x, l = k<<1|1, r = l + 1;
if (z.w == 2) { // Interval::max for both children are computed
interval[k].max = interval[k].en;
if (l < n && interval[k].max < interval[l].max) interval[k].max = interval[l].max;
if (r < n && interval[k].max < interval[r].max) interval[k].max = interval[r].max;
} else { // go down into the two children
stack[t++] = StackCell(k, z.w + 1);
if (l + z.w < n)
stack[t++] = StackCell(l + z.w, 0);
}
}
}
public:
void add(const S &s, const S &e, const T &d) { a.push_back(Interval(s, e, d)); }
void index() {
std::sort(a.begin(), a.end(), IntervalLess());
std::vector<Interval> b(a.size());
layout_recur(b.data());
a.clear();
std::copy(b.begin(), b.end(), std::back_inserter(a));
index_BFS(a.data(), a.size());
}
bool overlap(const S &st, const S &en, std::vector<size_t> &out) const {
int t = 0;
std::array<StackCell, 64> stack;
out.clear();
if (a.empty()) return false;
stack[t++] = StackCell(0, 0); // push the root; this is a top down traversal
while (t) { // the following guarantees that numbers in out[] are always sorted
StackCell z = stack[--t];
size_t l = (z.x<<1) + 1, r = l + 1;
if (l >= a.size()) { // a leaf node
if (st < a[z.x].en && a[z.x].st <= en) out.push_back(z.x);
} else if (z.w == 0) { // if left child not processed
stack[t++] = StackCell(z.x, 1); // re-add node z.x, but mark the left child having been processed
if (l < a.size() && a[l].max > st)
stack[t++] = StackCell(l, 0);
} else if (a[z.x].st <= en) { // need to push the right child
if (st < a[z.x].en) out.push_back(z.x); // test if z.x overlaps the query; if yes, append to out[]
if (r < a.size()) stack[t++] = StackCell(r, 0);
}
}
return out.size() > 0? true : false;
}
size_t size(void) const { return a.size(); }
const S &start(size_t i) const { return a[i].st; }
const S &end(size_t i) const { return a[i].en; }
const T &data(size_t i) const { return a[i].data; }
};

View File

@ -1,325 +0,0 @@
#ifndef __INTERVAL_TREE_H
#define __INTERVAL_TREE_H
#include <vector>
#include <algorithm>
#include <iostream>
#include <memory>
#include <cassert>
#include <limits>
#ifdef USE_INTERVAL_TREE_NAMESPACE
namespace interval_tree {
#endif
template <class Scalar, typename Value>
class Interval {
public:
Scalar start;
Scalar stop;
Value value;
Interval(const Scalar& s, const Scalar& e, const Value& v)
: start(std::min(s, e))
, stop(std::max(s, e))
, value(v)
{}
};
template <class Scalar, typename Value>
Value intervalStart(const Interval<Scalar,Value>& i) {
return i.start;
}
template <class Scalar, typename Value>
Value intervalStop(const Interval<Scalar, Value>& i) {
return i.stop;
}
template <class Scalar, typename Value>
std::ostream& operator<<(std::ostream& out, const Interval<Scalar, Value>& i) {
out << "Interval(" << i.start << ", " << i.stop << "): " << i.value;
return out;
}
template <class Scalar, class Value>
class IntervalTree {
public:
typedef Interval<Scalar, Value> interval;
typedef std::vector<interval> interval_vector;
struct IntervalStartCmp {
bool operator()(const interval& a, const interval& b) {
return a.start < b.start;
}
};
struct IntervalStopCmp {
bool operator()(const interval& a, const interval& b) {
return a.stop < b.stop;
}
};
IntervalTree()
: left(nullptr)
, right(nullptr)
, center(0)
{}
~IntervalTree() = default;
std::unique_ptr<IntervalTree> clone() const {
return std::unique_ptr<IntervalTree>(new IntervalTree(*this));
}
IntervalTree(const IntervalTree& other)
: intervals(other.intervals),
left(other.left ? other.left->clone() : nullptr),
right(other.right ? other.right->clone() : nullptr),
center(other.center)
{}
IntervalTree& operator=(IntervalTree&&) = default;
IntervalTree(IntervalTree&&) = default;
IntervalTree& operator=(const IntervalTree& other) {
center = other.center;
intervals = other.intervals;
left = other.left ? other.left->clone() : nullptr;
right = other.right ? other.right->clone() : nullptr;
return *this;
}
IntervalTree(
interval_vector&& ivals,
std::size_t depth = 16,
std::size_t minbucket = 64,
std::size_t maxbucket = 512,
Scalar leftextent = 0,
Scalar rightextent = 0)
: left(nullptr)
, right(nullptr)
{
--depth;
const auto minmaxStop = std::minmax_element(ivals.begin(), ivals.end(),
IntervalStopCmp());
const auto minmaxStart = std::minmax_element(ivals.begin(), ivals.end(),
IntervalStartCmp());
if (!ivals.empty()) {
center = (minmaxStart.first->start + minmaxStop.second->stop) / 2;
}
if (leftextent == 0 && rightextent == 0) {
// sort intervals by start
std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
} else {
assert(std::is_sorted(ivals.begin(), ivals.end(), IntervalStartCmp()));
}
if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) {
std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
intervals = std::move(ivals);
assert(is_valid().first);
return;
} else {
Scalar leftp = 0;
Scalar rightp = 0;
if (leftextent || rightextent) {
leftp = leftextent;
rightp = rightextent;
} else {
leftp = ivals.front().start;
rightp = std::max_element(ivals.begin(), ivals.end(),
IntervalStopCmp())->stop;
}
interval_vector lefts;
interval_vector rights;
for (typename interval_vector::const_iterator i = ivals.begin();
i != ivals.end(); ++i) {
const interval& interval = *i;
if (interval.stop < center) {
lefts.push_back(interval);
} else if (interval.start > center) {
rights.push_back(interval);
} else {
assert(interval.start <= center);
assert(center <= interval.stop);
intervals.push_back(interval);
}
}
if (!lefts.empty()) {
left.reset(new IntervalTree(std::move(lefts),
depth, minbucket, maxbucket,
leftp, center));
}
if (!rights.empty()) {
right.reset(new IntervalTree(std::move(rights),
depth, minbucket, maxbucket,
center, rightp));
}
}
assert(is_valid().first);
}
// Call f on all intervals near the range [start, stop]:
template <class UnaryFunction>
void visit_near(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
if (!intervals.empty() && ! (stop < intervals.front().start)) {
for (auto & i : intervals) {
f(i);
}
}
if (left && start <= center) {
left->visit_near(start, stop, f);
}
if (right && stop >= center) {
right->visit_near(start, stop, f);
}
}
// Call f on all intervals crossing pos
template <class UnaryFunction>
void visit_overlapping(const Scalar& pos, UnaryFunction f) const {
visit_overlapping(pos, pos, f);
}
// Call f on all intervals overlapping [start, stop]
template <class UnaryFunction>
void visit_overlapping(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
auto filterF = [&](const interval& interval) {
if (interval.stop >= start && interval.start <= stop) {
// Only apply f if overlapping
f(interval);
}
};
visit_near(start, stop, filterF);
}
// Call f on all intervals contained within [start, stop]
template <class UnaryFunction>
void visit_contained(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
auto filterF = [&](const interval& interval) {
if (start <= interval.start && interval.stop <= stop) {
f(interval);
}
};
visit_near(start, stop, filterF);
}
interval_vector findOverlapping(const Scalar& start, const Scalar& stop) const {
interval_vector result;
visit_overlapping(start, stop,
[&](const interval& interval) {
result.emplace_back(interval);
});
return result;
}
interval_vector findContained(const Scalar& start, const Scalar& stop) const {
interval_vector result;
visit_contained(start, stop,
[&](const interval& interval) {
result.push_back(interval);
});
return result;
}
bool empty() const {
if (left && !left->empty()) {
return false;
}
if (!intervals.empty()) {
return false;
}
if (right && !right->empty()) {
return false;
}
return true;
}
template <class UnaryFunction>
void visit_all(UnaryFunction f) const {
if (left) {
left->visit_all(f);
}
std::for_each(intervals.begin(), intervals.end(), f);
if (right) {
right->visit_all(f);
}
}
std::pair<Scalar, Scalar> extentBruitForce() const {
struct Extent {
std::pair<Scalar, Scalar> x = {std::numeric_limits<Scalar>::max(),
std::numeric_limits<Scalar>::min() };
void operator()(const interval & interval) {
x.first = std::min(x.first, interval.start);
x.second = std::max(x.second, interval.stop);
}
};
Extent extent;
visit_all([&](const interval & interval) { extent(interval); });
return extent.x;
}
// Check all constraints.
// If first is false, second is invalid.
std::pair<bool, std::pair<Scalar, Scalar>> is_valid() const {
const auto minmaxStop = std::minmax_element(intervals.begin(), intervals.end(),
IntervalStopCmp());
const auto minmaxStart = std::minmax_element(intervals.begin(), intervals.end(),
IntervalStartCmp());
std::pair<bool, std::pair<Scalar, Scalar>> result = {true, { std::numeric_limits<Scalar>::max(),
std::numeric_limits<Scalar>::min() }};
if (!intervals.empty()) {
result.second.first = std::min(result.second.first, minmaxStart.first->start);
result.second.second = std::min(result.second.second, minmaxStop.second->stop);
}
if (left) {
auto valid = left->is_valid();
result.first &= valid.first;
result.second.first = std::min(result.second.first, valid.second.first);
result.second.second = std::min(result.second.second, valid.second.second);
if (!result.first) { return result; }
if (valid.second.second >= center) {
result.first = false;
return result;
}
}
if (right) {
auto valid = right->is_valid();
result.first &= valid.first;
result.second.first = std::min(result.second.first, valid.second.first);
result.second.second = std::min(result.second.second, valid.second.second);
if (!result.first) { return result; }
if (valid.second.first <= center) {
result.first = false;
return result;
}
}
if (!std::is_sorted(intervals.begin(), intervals.end(), IntervalStartCmp())) {
result.first = false;
}
return result;
}
void clear() {
left.reset();
right.reset();
intervals.clear();
center = 0;
}
private:
interval_vector intervals;
std::unique_ptr<IntervalTree> left;
std::unique_ptr<IntervalTree> right;
Scalar center;
};
#ifdef USE_INTERVAL_TREE_NAMESPACE
}
#endif
#endif

@ -1 +1 @@
Subproject commit 9a687a5364ea27aa838f499afedb8e231f238a40
Subproject commit 20a21a7de0db4be0b63553ebac90950dbf2a58fe

View File

@ -2,7 +2,7 @@
#include <hex/providers/provider.hpp>
#include <IntervalTree.h>
#include <IITree.h>
namespace hex::plugin::builtin {
@ -44,7 +44,7 @@ namespace hex::plugin::builtin {
protected:
bool m_dataValid = false;
size_t m_dataSize = 0x00;
interval_tree::IntervalTree<u64, std::vector<u8>> m_data;
IITree<u64, std::vector<u8>> m_data;
std::fs::path m_sourceFilePath;
};

View File

@ -12,7 +12,7 @@
#include "ui/hex_editor.hpp"
#include <IntervalTree.h>
#include <IITree.h>
namespace hex::plugin::builtin {

View File

@ -9,7 +9,7 @@
#include <atomic>
#include <vector>
#include <IntervalTree.h>
#include <IITree.h>
namespace hex::plugin::builtin {
@ -94,11 +94,11 @@ namespace hex::plugin::builtin {
} m_searchSettings, m_decodeSettings;
using OccurrenceTree = interval_tree::IntervalTree<u64, Occurrence>;
using OccurrenceTree = IITree<u64, Occurrence>;
std::map<prv::Provider*, std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
std::map<prv::Provider*, OccurrenceTree> m_occurrenceTree;
std::map<prv::Provider*, std::string> m_currFilter;
PerProvider<std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
PerProvider<OccurrenceTree> m_occurrenceTree;
PerProvider<std::string> m_currFilter;
TaskHolder m_searchTask, m_filterTask;
bool m_settingsValid = false;

View File

@ -161,26 +161,37 @@ namespace hex::plugin::builtin {
void IntelHexProvider::setBaseAddress(u64 address) {
auto oldBase = this->getBaseAddress();
auto intervals = this->m_data.findOverlapping(oldBase, oldBase + this->getActualSize());
std::vector<size_t> indices;
this->m_data.overlap(oldBase, oldBase + this->getActualSize(), indices);
for (auto &interval : intervals) {
interval.start = (interval.start - oldBase) + address;
interval.stop = (interval.stop - oldBase) + address;
IITree<u64, std::vector<u8>> intervals;
for (auto &index : indices) {
intervals.add(
(this->m_data.start(index) - oldBase) + address,
(this->m_data.end(index) - oldBase) + address,
this->m_data.data(index)
);
}
this->m_data = std::move(intervals);
this->m_data.index();
Provider::setBaseAddress(address);
}
void IntelHexProvider::readRaw(u64 offset, void *buffer, size_t size) {
auto intervals = this->m_data.findOverlapping(offset, (offset + size) - 1);
std::vector<size_t> indices;
this->m_data.overlap(offset, (offset + size) - 1, indices);
std::memset(buffer, 0x00, size);
auto bytes = reinterpret_cast<u8*>(buffer);
for (const auto &interval : intervals) {
for (u32 i = std::max(interval.start, offset); i <= interval.stop && (i - offset) < size; i++) {
bytes[i - offset] = interval.value[i - interval.start];
for (const auto &index : indices) {
auto start = this->m_data.start(index);
auto end = this->m_data.end(index);
auto data = this->m_data.data(index);
for (u32 i = std::max(start, offset); i <= end && (i - offset) < size; i++) {
bytes[i - offset] = data[i - start];
}
}
}
@ -203,15 +214,15 @@ namespace hex::plugin::builtin {
return false;
u64 maxAddress = 0x00;
decltype(this->m_data)::interval_vector intervals;
for (auto &[address, bytes] : data) {
auto endAddress = (address + bytes.size()) - 1;
intervals.emplace_back(address, endAddress, std::move(bytes));
this->m_data.add(address, endAddress, std::move(bytes));
if (endAddress > maxAddress)
maxAddress = endAddress;
}
this->m_data = std::move(intervals);
this->m_data.index();
this->m_dataSize = maxAddress + 1;
this->m_dataValid = true;
@ -254,17 +265,22 @@ namespace hex::plugin::builtin {
}
std::pair<Region, bool> IntelHexProvider::getRegionValidity(u64 address) const {
auto intervals = this->m_data.findOverlapping(address, address);
if (intervals.empty()) {
std::vector<size_t> indices;
this->m_data.overlap(address, address, indices);
if (indices.empty()) {
return Provider::getRegionValidity(address);
}
auto closestInterval = intervals.front();
for (const auto &interval : intervals) {
if (interval.start < closestInterval.start)
closestInterval = interval;
auto closestIndex = indices.front();
for (const auto &index : indices) {
if (this->m_data.start(index) < this->m_data.start(closestIndex))
closestIndex = index;
}
return { Region { closestInterval.start, (closestInterval.stop - closestInterval.start) + 1}, true };
auto start = this->m_data.start(closestIndex);
auto end = this->m_data.end(closestIndex);
return { Region { start, (end - start) + 1 }, true };
}
void IntelHexProvider::loadSettings(const nlohmann::json &settings) {

View File

@ -180,15 +180,15 @@ namespace hex::plugin::builtin {
return false;
u64 maxAddress = 0x00;
decltype(this->m_data)::interval_vector intervals;
for (auto &[address, bytes] : data) {
auto endAddress = (address + bytes.size()) - 1;
intervals.emplace_back(address, endAddress, std::move(bytes));
this->m_data.add(address, endAddress, std::move(bytes));
if (endAddress > maxAddress)
maxAddress = endAddress;
}
this->m_data = std::move(intervals);
this->m_data.index();
this->m_dataSize = maxAddress + 1;
this->m_dataValid = true;

View File

@ -22,9 +22,8 @@ namespace hex::plugin::builtin {
if (this->m_searchTask.isRunning())
return { };
auto provider = ImHexApi::Provider::get();
if (!this->m_occurrenceTree[provider].findOverlapping(address, address).empty())
std::vector<size_t> occurrences;
if (this->m_occurrenceTree->overlap(address, address, occurrences))
return HighlightColor();
else
return std::nullopt;
@ -36,10 +35,8 @@ namespace hex::plugin::builtin {
if (this->m_searchTask.isRunning())
return;
auto provider = ImHexApi::Provider::get();
auto occurrences = this->m_occurrenceTree[provider].findOverlapping(address, address);
if (occurrences.empty())
std::vector<size_t> occurrences;
if (!this->m_occurrenceTree->overlap(address, address, occurrences))
return;
ImGui::BeginTooltip();
@ -51,7 +48,10 @@ namespace hex::plugin::builtin {
ImGui::TableNextColumn();
{
const auto value = this->decodeValue(ImHexApi::Provider::get(), occurrence.value, 256);
auto start = this->m_occurrenceTree->start(occurrence);
auto end = this->m_occurrenceTree->end(occurrence) - 1;
const auto &bytes = this->m_occurrenceTree->data(occurrence);
const auto value = this->decodeValue(ImHexApi::Provider::get(), bytes, 256);
ImGui::ColorButton("##color", ImColor(HighlightColor()));
ImGui::SameLine(0, 10);
@ -65,7 +65,7 @@ namespace hex::plugin::builtin {
ImGui::TableNextColumn();
ImGui::TextFormatted("{}: ", "hex.builtin.common.region"_lang);
ImGui::TableNextColumn();
ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", occurrence.value.region.getStartAddress(), occurrence.value.region.getEndAddress());
ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", start, end);
auto demangledValue = llvm::demangle(value);
@ -494,28 +494,27 @@ namespace hex::plugin::builtin {
switch (settings.mode) {
using enum SearchSettings::Mode;
case Strings:
this->m_foundOccurrences[provider] = searchStrings(task, provider, searchRegion, settings.strings);
this->m_foundOccurrences.get(provider) = searchStrings(task, provider, searchRegion, settings.strings);
break;
case Sequence:
this->m_foundOccurrences[provider] = searchSequence(task, provider, searchRegion, settings.bytes);
this->m_foundOccurrences.get(provider) = searchSequence(task, provider, searchRegion, settings.bytes);
break;
case Regex:
this->m_foundOccurrences[provider] = searchRegex(task, provider, searchRegion, settings.regex);
this->m_foundOccurrences.get(provider) = searchRegex(task, provider, searchRegion, settings.regex);
break;
case BinaryPattern:
this->m_foundOccurrences[provider] = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
this->m_foundOccurrences.get(provider) = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
break;
case Value:
this->m_foundOccurrences[provider] = searchValue(task, provider, searchRegion, settings.value);
this->m_foundOccurrences.get(provider) = searchValue(task, provider, searchRegion, settings.value);
break;
}
this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
this->m_sortedOccurrences.get(provider) = this->m_foundOccurrences.get(provider);
OccurrenceTree::interval_vector intervals;
for (const auto &occurrence : this->m_foundOccurrences[provider])
intervals.emplace_back(occurrence.region.getStartAddress(), occurrence.region.getEndAddress(), occurrence);
this->m_occurrenceTree[provider] = std::move(intervals);
for (const auto &occurrence : this->m_foundOccurrences.get(provider))
this->m_occurrenceTree->add(occurrence.region.getStartAddress(), occurrence.region.getEndAddress() + 1, occurrence);
this->m_occurrenceTree->index();
});
}
@ -800,14 +799,14 @@ namespace hex::plugin::builtin {
ImGui::EndDisabled();
ImGui::SameLine();
ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences[provider].size());
ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences->size());
ImGui::BeginDisabled(this->m_foundOccurrences[provider].empty());
ImGui::BeginDisabled(this->m_foundOccurrences->empty());
{
if (ImGui::Button("hex.builtin.view.find.search.reset"_lang)) {
this->m_foundOccurrences[provider].clear();
this->m_sortedOccurrences[provider].clear();
this->m_occurrenceTree[provider].clear();
this->m_foundOccurrences->clear();
this->m_sortedOccurrences->clear();
*this->m_occurrenceTree = {};
}
}
ImGui::EndDisabled();
@ -818,25 +817,25 @@ namespace hex::plugin::builtin {
ImGui::Separator();
ImGui::NewLine();
auto &currOccurrences = this->m_sortedOccurrences[provider];
auto &currOccurrences = *this->m_sortedOccurrences;
ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x);
auto prevFilterLength = this->m_currFilter[provider].length();
if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, this->m_currFilter[provider])) {
if (prevFilterLength > this->m_currFilter[provider].length())
this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
auto prevFilterLength = this->m_currFilter->length();
if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, *this->m_currFilter)) {
if (prevFilterLength > this->m_currFilter->length())
*this->m_sortedOccurrences = *this->m_foundOccurrences;
if (this->m_filterTask.isRunning())
this->m_filterTask.interrupt();
if (!this->m_currFilter[provider].empty()) {
if (!this->m_currFilter->empty()) {
this->m_filterTask = TaskManager::createTask("Filtering", currOccurrences.size(), [this, provider, &currOccurrences](Task &task) {
u64 progress = 0;
currOccurrences.erase(std::remove_if(currOccurrences.begin(), currOccurrences.end(), [this, provider, &task, &progress](const auto &region) {
task.update(progress);
progress += 1;
return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter[provider]);
return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter.get(provider));
}), currOccurrences.end());
});
}

View File

@ -18,7 +18,7 @@ namespace hex::plugin::builtin {
auto selection = ImHexApi::HexEditor::getSelection();
if (ImGui::GetIO().KeyShift) {
if (selection.has_value() && ImGui::GetIO().KeyShift) {
auto &hashFunctions = this->m_hashFunctions.get(selection->getProvider());
if (!hashFunctions.empty() && selection.has_value() && selection->overlaps(Region { address, size })) {
ImGui::BeginTooltip();