build: Switch to better interval tree implementation
This commit is contained in:
parent
82111617a4
commit
5a6e5d2255
@ -410,7 +410,7 @@ endfunction()
|
||||
|
||||
macro(setupCompilerWarnings target)
|
||||
set(IMHEX_COMMON_FLAGS "-Wall -Wextra -Wpedantic -Werror")
|
||||
set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds")
|
||||
set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds -Wno-dangling-reference")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${IMHEX_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IMHEX_C_FLAGS}")
|
||||
|
34
lib/external/intervaltree/LICENSE
vendored
34
lib/external/intervaltree/LICENSE
vendored
@ -1,19 +1,23 @@
|
||||
Copyright (c) 2011 Erik Garrison
|
||||
The MIT License
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
Copyright (c) 2019 Dana-Farber Cancer Institute
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
148
lib/external/intervaltree/README.md
vendored
148
lib/external/intervaltree/README.md
vendored
@ -1,37 +1,133 @@
|
||||
# intervaltree
|
||||
## Introduction
|
||||
|
||||
## Overview
|
||||
|
||||
An interval tree can be used to efficiently find a set of numeric intervals overlapping or containing another interval.
|
||||
|
||||
This library provides a basic implementation of an interval tree using C++ templates, allowing the insertion of arbitrary types into the tree.
|
||||
cgranges is a small C library for genomic interval overlap queries: given a
|
||||
genomic region *r* and a set of regions *R*, finding all regions in *R* that
|
||||
overlaps *r*. Although this library is based on [interval tree][itree], a well
|
||||
known data structure, the core algorithm of cgranges is distinct from all
|
||||
existing implementations to the best of our knowledge. Specifically, the
|
||||
interval tree in cgranges is implicitly encoded as a plain sorted array
|
||||
(similar to [binary heap][bheap] but packed differently). Tree
|
||||
traversal is achieved by jumping between array indices. This treatment makes
|
||||
cgranges very efficient and compact in memory. The core algorithm can be
|
||||
implemented in ~50 lines of C++ code, much shorter than others as well. Please
|
||||
see the code comments in [cpp/IITree.h](cpp/IITree.h) for details.
|
||||
|
||||
## Usage
|
||||
|
||||
Add `#include "IntervalTree.h"` to the source files in which you will use the interval tree.
|
||||
### Test with BED coverage
|
||||
|
||||
To make an IntervalTree to contain objects of class T, use:
|
||||
For testing purposes, this repo implements the [bedtools coverage][bedcov] tool
|
||||
with cgranges. The source code is located in the [test/](test) directory. You
|
||||
can compile and run the test with:
|
||||
```sh
|
||||
cd test && make
|
||||
./bedcov-cr test1.bed test2.bed
|
||||
```
|
||||
The first BED file is loaded into RAM and indexed. The depth and the breadth of
|
||||
coverage of each region in the second file is computed by query against the
|
||||
index of the first file.
|
||||
|
||||
```c++
|
||||
vector<Interval<T> > intervals;
|
||||
T a, b, c;
|
||||
intervals.push_back(Interval<T>(2, 10, a));
|
||||
intervals.push_back(Interval<T>(3, 4, b));
|
||||
intervals.push_back(Interval<T>(20, 100, c));
|
||||
IntervalTree<T> tree;
|
||||
tree = IntervalTree<T>(intervals);
|
||||
The [test/](test) directory also contains a few other implementations based on
|
||||
[IntervalTree.h][ekg-itree] in C++, [quicksect][quicksect] in Cython and
|
||||
[ncls][ncls] in Cython. The table below shows timing and peak memory on two
|
||||
test BEDs available in the release page. The first BED contains GenCode
|
||||
annotations with ~1.2 million lines, mixing all types of features. The second
|
||||
contains ~10 million direct-RNA mappings. Time1a/Mem1a indexes the GenCode BED
|
||||
into memory. Time1b adds whole chromosome intervals to the GenCode BED when
|
||||
indexing. Time2/Mem2 indexes the RNA-mapping BED into memory. Numbers are
|
||||
averaged over 5 runs.
|
||||
|
||||
|Algo. |Lang. |Cov|Program |Time1a|Time1b|Mem1a |Time2 |Mem2 |
|
||||
|:-------|:-----|:-:|:---------------|-----:|-----:|-------:|-----:|-------:|
|
||||
|IAITree |C |Y |cgranges |9.0s |13.9s |19.1MB |4.6s |138.4MB |
|
||||
|IAITree |C++ |Y |cpp/iitree.h |11.1s |24.5s |22.4MB |5.8s |160.4MB |
|
||||
|CITree |C++ |Y |IntervalTree.h |17.4s |17.4s |27.2MB |10.5s |179.5MB |
|
||||
|IAITree |C |N |cgranges |7.6s |13.0s |19.1MB |4.1s |138.4MB |
|
||||
|AIList |C |N |3rd-party/AIList|7.9s |8.1s |14.4MB |6.5s |104.8MB |
|
||||
|NCList |C |N |3rd-party/NCList|13.0s |13.4s |21.4MB |10.6s |183.0MB |
|
||||
|AITree |C |N |3rd-party/AITree|16.8s |18.4s |73.4MB |27.3s |546.4MB |
|
||||
|IAITree |Cython|N |cgranges |56.6s |63.9s |23.4MB |43.9s |143.1MB |
|
||||
|binning |C++ |Y |bedtools |201.9s|280.4s|478.5MB |149.1s|3438.1MB|
|
||||
|
||||
Here, IAITree = implicit augmented interval tree, used by cgranges;
|
||||
CITree = centered interval tree, used by [Erik Garrison's
|
||||
IntervalTree][itree]; AIList = augmented interval list, by [Feng et
|
||||
al][ailist]; NCList = nested containment list, taken from [ncls][ncls] by Feng
|
||||
et al; AITree = augmented interval tree, from [kerneltree][kerneltree].
|
||||
"Cov" indicates whether the program calculates breadth of coverage.
|
||||
Comments:
|
||||
|
||||
* AIList keeps start and end only. IAITree and CITree addtionally store a
|
||||
4-byte "ID" field per interval to reference the source of interval. This is
|
||||
partly why AIList uses the least memory.
|
||||
|
||||
* IAITree is more sensitive to the worse case: the presence of an interval
|
||||
spanning the whole chromosome.
|
||||
|
||||
* IAITree uses an efficient radix sort. CITree uses std::sort from STL, which
|
||||
is ok. AIList and NCList use qsort from libc, which is slow. Faster sorting
|
||||
leads to faster indexing.
|
||||
|
||||
* IAITree in C++ uses identical core algorithm to the C version, but limited by
|
||||
its APIs, it wastes time on memory locality and management. CITree has a
|
||||
similar issue.
|
||||
|
||||
* Computing coverage is better done when the returned list of intervals are
|
||||
start sorted. IAITree returns sorted list. CITree doesn't. Not sure about
|
||||
others. Computing coverage takes a couple of seconds. Sorting will be slower.
|
||||
|
||||
* Printing intervals also takes a noticeable fraction of time. Custom printf
|
||||
equivalent would be faster.
|
||||
|
||||
* IAITree+Cython is a wrapper around the C version of cgranges. Cython adds
|
||||
significant overhead.
|
||||
|
||||
* Bedtools is designed for a variety of applications in addition to computing
|
||||
coverage. It may keep other information in its internal data structure. This
|
||||
micro-benchmark may be unfair to bedtools.
|
||||
|
||||
* In general, the performance is affected a lot by subtle implementation
|
||||
details. CITree, IAITree, NCList and AIList are all broadly comparable in
|
||||
performance. AITree is not recommended when indexed intervals are immutable.
|
||||
|
||||
### Use cgranges as a C library
|
||||
|
||||
```c
|
||||
cgranges_t *cr = cr_init(); // initialize a cgranges_t object
|
||||
cr_add(cr, "chr1", 20, 30, 0); // add a genomic interval
|
||||
cr_add(cr, "chr2", 10, 30, 1);
|
||||
cr_add(cr, "chr1", 10, 25, 2);
|
||||
cr_index(cr); // index
|
||||
|
||||
int64_t i, n, *b = 0, max_b = 0;
|
||||
n = cr_overlap(cr, "chr1", 15, 22, &b, &max_b); // overlap query; output array b[] can be reused
|
||||
for (i = 0; i < n; ++i) // traverse overlapping intervals
|
||||
printf("%d\t%d\t%d\n", cr_start(cr, b[i]), cr_end(cr, b[i]), cr_label(cr, b[i]));
|
||||
free(b); // b[] is allocated by malloc() inside cr_overlap(), so needs to be freed with free()
|
||||
|
||||
cr_destroy(cr);
|
||||
```
|
||||
|
||||
Now, it's possible to query the tree and obtain a set of intervals which are contained within the start and stop coordinates.
|
||||
### Use IITree as a C++ library
|
||||
|
||||
```c++
|
||||
vector<Interval<T> > results;
|
||||
tree.findContained(start, stop, results);
|
||||
cout << "found " << results.size() << " overlapping intervals" << endl;
|
||||
```cpp
|
||||
IITree<int, int> tree;
|
||||
tree.add(12, 34, 0); // add an interval
|
||||
tree.add(0, 23, 1);
|
||||
tree.add(34, 56, 2);
|
||||
tree.index(); // index
|
||||
std::vector<size_t> a;
|
||||
tree.overlap(22, 25, a); // retrieve overlaps
|
||||
for (size_t i = 0; i < a.size(); ++i)
|
||||
printf("%d\t%d\t%d\n", tree.start(a[i]), tree.end(a[i]), tree.data(a[i]));
|
||||
```
|
||||
|
||||
The function IntervalTree::findOverlapping provides a method to find all those intervals which are contained or partially overlap the interval (start, stop).
|
||||
|
||||
### Author: Erik Garrison <erik.garrison@gmail.com>
|
||||
|
||||
### License: MIT
|
||||
[bedcov]: https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html
|
||||
[ekg-itree]: https://github.com/ekg/intervaltree
|
||||
[quicksect]: https://github.com/brentp/quicksect
|
||||
[ncls]: https://github.com/hunt-genes/ncls
|
||||
[citree]: https://en.wikipedia.org/wiki/Interval_tree#Centered_interval_tree
|
||||
[itree]: https://en.wikipedia.org/wiki/Interval_tree
|
||||
[bheap]: https://en.wikipedia.org/wiki/Binary_heap
|
||||
[ailist]: https://www.biorxiv.org/content/10.1101/593657v1
|
||||
[kerneltree]: https://github.com/biocore-ntnu/kerneltree
|
88
lib/external/intervaltree/include/IITree.h
vendored
Normal file
88
lib/external/intervaltree/include/IITree.h
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
||||
template<typename S, typename T> // "S" is a scalar type; "T" is the type of data associated with each interval
|
||||
class IITree {
|
||||
struct StackCell {
|
||||
size_t x; // node
|
||||
int w; // w: 0 if left child hasn't been processed
|
||||
StackCell() {};
|
||||
StackCell(size_t x_, int w_) : x(x_), w(w_) {};
|
||||
};
|
||||
struct Interval {
|
||||
S st, en, max;
|
||||
T data;
|
||||
Interval() = default;
|
||||
Interval(const S &s, const S &e, const T &d) : st(s), en(e), max(e), data(d) { }
|
||||
};
|
||||
struct IntervalLess {
|
||||
bool operator()(const Interval &intervalA, const Interval &intervalB) const { return intervalA.st < intervalB.st; }
|
||||
};
|
||||
std::vector<Interval> a;
|
||||
size_t layout_recur(Interval *b, size_t i = 0, size_t k = 0) { // see https://algorithmica.org/en/eytzinger
|
||||
if (k < a.size()) {
|
||||
i = layout_recur(b, i, (k<<1) + 1);
|
||||
b[k] = a[i++];
|
||||
i = layout_recur(b, i, (k<<1) + 2);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
void index_BFS(Interval *interval, size_t n) { // set Interval::max
|
||||
int t = 0;
|
||||
StackCell stack[64];
|
||||
stack[t++] = StackCell(0, 0);
|
||||
while (t) {
|
||||
StackCell z = stack[--t];
|
||||
size_t k = z.x, l = k<<1|1, r = l + 1;
|
||||
if (z.w == 2) { // Interval::max for both children are computed
|
||||
interval[k].max = interval[k].en;
|
||||
if (l < n && interval[k].max < interval[l].max) interval[k].max = interval[l].max;
|
||||
if (r < n && interval[k].max < interval[r].max) interval[k].max = interval[r].max;
|
||||
} else { // go down into the two children
|
||||
stack[t++] = StackCell(k, z.w + 1);
|
||||
if (l + z.w < n)
|
||||
stack[t++] = StackCell(l + z.w, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
public:
|
||||
void add(const S &s, const S &e, const T &d) { a.push_back(Interval(s, e, d)); }
|
||||
void index() {
|
||||
std::sort(a.begin(), a.end(), IntervalLess());
|
||||
std::vector<Interval> b(a.size());
|
||||
layout_recur(b.data());
|
||||
a.clear();
|
||||
std::copy(b.begin(), b.end(), std::back_inserter(a));
|
||||
index_BFS(a.data(), a.size());
|
||||
}
|
||||
bool overlap(const S &st, const S &en, std::vector<size_t> &out) const {
|
||||
int t = 0;
|
||||
std::array<StackCell, 64> stack;
|
||||
out.clear();
|
||||
if (a.empty()) return false;
|
||||
stack[t++] = StackCell(0, 0); // push the root; this is a top down traversal
|
||||
while (t) { // the following guarantees that numbers in out[] are always sorted
|
||||
StackCell z = stack[--t];
|
||||
size_t l = (z.x<<1) + 1, r = l + 1;
|
||||
if (l >= a.size()) { // a leaf node
|
||||
if (st < a[z.x].en && a[z.x].st <= en) out.push_back(z.x);
|
||||
} else if (z.w == 0) { // if left child not processed
|
||||
stack[t++] = StackCell(z.x, 1); // re-add node z.x, but mark the left child having been processed
|
||||
if (l < a.size() && a[l].max > st)
|
||||
stack[t++] = StackCell(l, 0);
|
||||
} else if (a[z.x].st <= en) { // need to push the right child
|
||||
if (st < a[z.x].en) out.push_back(z.x); // test if z.x overlaps the query; if yes, append to out[]
|
||||
if (r < a.size()) stack[t++] = StackCell(r, 0);
|
||||
}
|
||||
}
|
||||
return out.size() > 0? true : false;
|
||||
}
|
||||
size_t size(void) const { return a.size(); }
|
||||
const S &start(size_t i) const { return a[i].st; }
|
||||
const S &end(size_t i) const { return a[i].en; }
|
||||
const T &data(size_t i) const { return a[i].data; }
|
||||
};
|
325
lib/external/intervaltree/include/IntervalTree.h
vendored
325
lib/external/intervaltree/include/IntervalTree.h
vendored
@ -1,325 +0,0 @@
|
||||
#ifndef __INTERVAL_TREE_H
|
||||
#define __INTERVAL_TREE_H
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
#ifdef USE_INTERVAL_TREE_NAMESPACE
|
||||
namespace interval_tree {
|
||||
#endif
|
||||
template <class Scalar, typename Value>
|
||||
class Interval {
|
||||
public:
|
||||
Scalar start;
|
||||
Scalar stop;
|
||||
Value value;
|
||||
Interval(const Scalar& s, const Scalar& e, const Value& v)
|
||||
: start(std::min(s, e))
|
||||
, stop(std::max(s, e))
|
||||
, value(v)
|
||||
{}
|
||||
};
|
||||
|
||||
template <class Scalar, typename Value>
|
||||
Value intervalStart(const Interval<Scalar,Value>& i) {
|
||||
return i.start;
|
||||
}
|
||||
|
||||
template <class Scalar, typename Value>
|
||||
Value intervalStop(const Interval<Scalar, Value>& i) {
|
||||
return i.stop;
|
||||
}
|
||||
|
||||
template <class Scalar, typename Value>
|
||||
std::ostream& operator<<(std::ostream& out, const Interval<Scalar, Value>& i) {
|
||||
out << "Interval(" << i.start << ", " << i.stop << "): " << i.value;
|
||||
return out;
|
||||
}
|
||||
|
||||
template <class Scalar, class Value>
|
||||
class IntervalTree {
|
||||
public:
|
||||
typedef Interval<Scalar, Value> interval;
|
||||
typedef std::vector<interval> interval_vector;
|
||||
|
||||
|
||||
struct IntervalStartCmp {
|
||||
bool operator()(const interval& a, const interval& b) {
|
||||
return a.start < b.start;
|
||||
}
|
||||
};
|
||||
|
||||
struct IntervalStopCmp {
|
||||
bool operator()(const interval& a, const interval& b) {
|
||||
return a.stop < b.stop;
|
||||
}
|
||||
};
|
||||
|
||||
IntervalTree()
|
||||
: left(nullptr)
|
||||
, right(nullptr)
|
||||
, center(0)
|
||||
{}
|
||||
|
||||
~IntervalTree() = default;
|
||||
|
||||
std::unique_ptr<IntervalTree> clone() const {
|
||||
return std::unique_ptr<IntervalTree>(new IntervalTree(*this));
|
||||
}
|
||||
|
||||
IntervalTree(const IntervalTree& other)
|
||||
: intervals(other.intervals),
|
||||
left(other.left ? other.left->clone() : nullptr),
|
||||
right(other.right ? other.right->clone() : nullptr),
|
||||
center(other.center)
|
||||
{}
|
||||
|
||||
IntervalTree& operator=(IntervalTree&&) = default;
|
||||
IntervalTree(IntervalTree&&) = default;
|
||||
|
||||
IntervalTree& operator=(const IntervalTree& other) {
|
||||
center = other.center;
|
||||
intervals = other.intervals;
|
||||
left = other.left ? other.left->clone() : nullptr;
|
||||
right = other.right ? other.right->clone() : nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntervalTree(
|
||||
interval_vector&& ivals,
|
||||
std::size_t depth = 16,
|
||||
std::size_t minbucket = 64,
|
||||
std::size_t maxbucket = 512,
|
||||
Scalar leftextent = 0,
|
||||
Scalar rightextent = 0)
|
||||
: left(nullptr)
|
||||
, right(nullptr)
|
||||
{
|
||||
--depth;
|
||||
const auto minmaxStop = std::minmax_element(ivals.begin(), ivals.end(),
|
||||
IntervalStopCmp());
|
||||
const auto minmaxStart = std::minmax_element(ivals.begin(), ivals.end(),
|
||||
IntervalStartCmp());
|
||||
if (!ivals.empty()) {
|
||||
center = (minmaxStart.first->start + minmaxStop.second->stop) / 2;
|
||||
}
|
||||
if (leftextent == 0 && rightextent == 0) {
|
||||
// sort intervals by start
|
||||
std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
|
||||
} else {
|
||||
assert(std::is_sorted(ivals.begin(), ivals.end(), IntervalStartCmp()));
|
||||
}
|
||||
if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) {
|
||||
std::sort(ivals.begin(), ivals.end(), IntervalStartCmp());
|
||||
intervals = std::move(ivals);
|
||||
assert(is_valid().first);
|
||||
return;
|
||||
} else {
|
||||
Scalar leftp = 0;
|
||||
Scalar rightp = 0;
|
||||
|
||||
if (leftextent || rightextent) {
|
||||
leftp = leftextent;
|
||||
rightp = rightextent;
|
||||
} else {
|
||||
leftp = ivals.front().start;
|
||||
rightp = std::max_element(ivals.begin(), ivals.end(),
|
||||
IntervalStopCmp())->stop;
|
||||
}
|
||||
|
||||
interval_vector lefts;
|
||||
interval_vector rights;
|
||||
|
||||
for (typename interval_vector::const_iterator i = ivals.begin();
|
||||
i != ivals.end(); ++i) {
|
||||
const interval& interval = *i;
|
||||
if (interval.stop < center) {
|
||||
lefts.push_back(interval);
|
||||
} else if (interval.start > center) {
|
||||
rights.push_back(interval);
|
||||
} else {
|
||||
assert(interval.start <= center);
|
||||
assert(center <= interval.stop);
|
||||
intervals.push_back(interval);
|
||||
}
|
||||
}
|
||||
|
||||
if (!lefts.empty()) {
|
||||
left.reset(new IntervalTree(std::move(lefts),
|
||||
depth, minbucket, maxbucket,
|
||||
leftp, center));
|
||||
}
|
||||
if (!rights.empty()) {
|
||||
right.reset(new IntervalTree(std::move(rights),
|
||||
depth, minbucket, maxbucket,
|
||||
center, rightp));
|
||||
}
|
||||
}
|
||||
assert(is_valid().first);
|
||||
}
|
||||
|
||||
// Call f on all intervals near the range [start, stop]:
|
||||
template <class UnaryFunction>
|
||||
void visit_near(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
|
||||
if (!intervals.empty() && ! (stop < intervals.front().start)) {
|
||||
for (auto & i : intervals) {
|
||||
f(i);
|
||||
}
|
||||
}
|
||||
if (left && start <= center) {
|
||||
left->visit_near(start, stop, f);
|
||||
}
|
||||
if (right && stop >= center) {
|
||||
right->visit_near(start, stop, f);
|
||||
}
|
||||
}
|
||||
|
||||
// Call f on all intervals crossing pos
|
||||
template <class UnaryFunction>
|
||||
void visit_overlapping(const Scalar& pos, UnaryFunction f) const {
|
||||
visit_overlapping(pos, pos, f);
|
||||
}
|
||||
|
||||
// Call f on all intervals overlapping [start, stop]
|
||||
template <class UnaryFunction>
|
||||
void visit_overlapping(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
|
||||
auto filterF = [&](const interval& interval) {
|
||||
if (interval.stop >= start && interval.start <= stop) {
|
||||
// Only apply f if overlapping
|
||||
f(interval);
|
||||
}
|
||||
};
|
||||
visit_near(start, stop, filterF);
|
||||
}
|
||||
|
||||
// Call f on all intervals contained within [start, stop]
|
||||
template <class UnaryFunction>
|
||||
void visit_contained(const Scalar& start, const Scalar& stop, UnaryFunction f) const {
|
||||
auto filterF = [&](const interval& interval) {
|
||||
if (start <= interval.start && interval.stop <= stop) {
|
||||
f(interval);
|
||||
}
|
||||
};
|
||||
visit_near(start, stop, filterF);
|
||||
}
|
||||
|
||||
interval_vector findOverlapping(const Scalar& start, const Scalar& stop) const {
|
||||
interval_vector result;
|
||||
visit_overlapping(start, stop,
|
||||
[&](const interval& interval) {
|
||||
result.emplace_back(interval);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
interval_vector findContained(const Scalar& start, const Scalar& stop) const {
|
||||
interval_vector result;
|
||||
visit_contained(start, stop,
|
||||
[&](const interval& interval) {
|
||||
result.push_back(interval);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
bool empty() const {
|
||||
if (left && !left->empty()) {
|
||||
return false;
|
||||
}
|
||||
if (!intervals.empty()) {
|
||||
return false;
|
||||
}
|
||||
if (right && !right->empty()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class UnaryFunction>
|
||||
void visit_all(UnaryFunction f) const {
|
||||
if (left) {
|
||||
left->visit_all(f);
|
||||
}
|
||||
std::for_each(intervals.begin(), intervals.end(), f);
|
||||
if (right) {
|
||||
right->visit_all(f);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Scalar, Scalar> extentBruitForce() const {
|
||||
struct Extent {
|
||||
std::pair<Scalar, Scalar> x = {std::numeric_limits<Scalar>::max(),
|
||||
std::numeric_limits<Scalar>::min() };
|
||||
void operator()(const interval & interval) {
|
||||
x.first = std::min(x.first, interval.start);
|
||||
x.second = std::max(x.second, interval.stop);
|
||||
}
|
||||
};
|
||||
Extent extent;
|
||||
|
||||
visit_all([&](const interval & interval) { extent(interval); });
|
||||
return extent.x;
|
||||
}
|
||||
|
||||
// Check all constraints.
|
||||
// If first is false, second is invalid.
|
||||
std::pair<bool, std::pair<Scalar, Scalar>> is_valid() const {
|
||||
const auto minmaxStop = std::minmax_element(intervals.begin(), intervals.end(),
|
||||
IntervalStopCmp());
|
||||
const auto minmaxStart = std::minmax_element(intervals.begin(), intervals.end(),
|
||||
IntervalStartCmp());
|
||||
|
||||
std::pair<bool, std::pair<Scalar, Scalar>> result = {true, { std::numeric_limits<Scalar>::max(),
|
||||
std::numeric_limits<Scalar>::min() }};
|
||||
if (!intervals.empty()) {
|
||||
result.second.first = std::min(result.second.first, minmaxStart.first->start);
|
||||
result.second.second = std::min(result.second.second, minmaxStop.second->stop);
|
||||
}
|
||||
if (left) {
|
||||
auto valid = left->is_valid();
|
||||
result.first &= valid.first;
|
||||
result.second.first = std::min(result.second.first, valid.second.first);
|
||||
result.second.second = std::min(result.second.second, valid.second.second);
|
||||
if (!result.first) { return result; }
|
||||
if (valid.second.second >= center) {
|
||||
result.first = false;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
if (right) {
|
||||
auto valid = right->is_valid();
|
||||
result.first &= valid.first;
|
||||
result.second.first = std::min(result.second.first, valid.second.first);
|
||||
result.second.second = std::min(result.second.second, valid.second.second);
|
||||
if (!result.first) { return result; }
|
||||
if (valid.second.first <= center) {
|
||||
result.first = false;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
if (!std::is_sorted(intervals.begin(), intervals.end(), IntervalStartCmp())) {
|
||||
result.first = false;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
left.reset();
|
||||
right.reset();
|
||||
intervals.clear();
|
||||
center = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
interval_vector intervals;
|
||||
std::unique_ptr<IntervalTree> left;
|
||||
std::unique_ptr<IntervalTree> right;
|
||||
Scalar center;
|
||||
};
|
||||
#ifdef USE_INTERVAL_TREE_NAMESPACE
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
2
lib/external/pattern_language
vendored
2
lib/external/pattern_language
vendored
@ -1 +1 @@
|
||||
Subproject commit 9a687a5364ea27aa838f499afedb8e231f238a40
|
||||
Subproject commit 20a21a7de0db4be0b63553ebac90950dbf2a58fe
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <hex/providers/provider.hpp>
|
||||
|
||||
#include <IntervalTree.h>
|
||||
#include <IITree.h>
|
||||
|
||||
namespace hex::plugin::builtin {
|
||||
|
||||
@ -44,7 +44,7 @@ namespace hex::plugin::builtin {
|
||||
protected:
|
||||
bool m_dataValid = false;
|
||||
size_t m_dataSize = 0x00;
|
||||
interval_tree::IntervalTree<u64, std::vector<u8>> m_data;
|
||||
IITree<u64, std::vector<u8>> m_data;
|
||||
|
||||
std::fs::path m_sourceFilePath;
|
||||
};
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "ui/hex_editor.hpp"
|
||||
|
||||
#include <IntervalTree.h>
|
||||
#include <IITree.h>
|
||||
|
||||
namespace hex::plugin::builtin {
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
|
||||
#include <IntervalTree.h>
|
||||
#include <IITree.h>
|
||||
|
||||
namespace hex::plugin::builtin {
|
||||
|
||||
@ -94,11 +94,11 @@ namespace hex::plugin::builtin {
|
||||
|
||||
} m_searchSettings, m_decodeSettings;
|
||||
|
||||
using OccurrenceTree = interval_tree::IntervalTree<u64, Occurrence>;
|
||||
using OccurrenceTree = IITree<u64, Occurrence>;
|
||||
|
||||
std::map<prv::Provider*, std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
|
||||
std::map<prv::Provider*, OccurrenceTree> m_occurrenceTree;
|
||||
std::map<prv::Provider*, std::string> m_currFilter;
|
||||
PerProvider<std::vector<Occurrence>> m_foundOccurrences, m_sortedOccurrences;
|
||||
PerProvider<OccurrenceTree> m_occurrenceTree;
|
||||
PerProvider<std::string> m_currFilter;
|
||||
|
||||
TaskHolder m_searchTask, m_filterTask;
|
||||
bool m_settingsValid = false;
|
||||
|
@ -161,26 +161,37 @@ namespace hex::plugin::builtin {
|
||||
void IntelHexProvider::setBaseAddress(u64 address) {
|
||||
auto oldBase = this->getBaseAddress();
|
||||
|
||||
auto intervals = this->m_data.findOverlapping(oldBase, oldBase + this->getActualSize());
|
||||
std::vector<size_t> indices;
|
||||
this->m_data.overlap(oldBase, oldBase + this->getActualSize(), indices);
|
||||
|
||||
for (auto &interval : intervals) {
|
||||
interval.start = (interval.start - oldBase) + address;
|
||||
interval.stop = (interval.stop - oldBase) + address;
|
||||
IITree<u64, std::vector<u8>> intervals;
|
||||
for (auto &index : indices) {
|
||||
intervals.add(
|
||||
(this->m_data.start(index) - oldBase) + address,
|
||||
(this->m_data.end(index) - oldBase) + address,
|
||||
this->m_data.data(index)
|
||||
);
|
||||
}
|
||||
|
||||
this->m_data = std::move(intervals);
|
||||
this->m_data.index();
|
||||
|
||||
Provider::setBaseAddress(address);
|
||||
}
|
||||
|
||||
void IntelHexProvider::readRaw(u64 offset, void *buffer, size_t size) {
|
||||
auto intervals = this->m_data.findOverlapping(offset, (offset + size) - 1);
|
||||
std::vector<size_t> indices;
|
||||
this->m_data.overlap(offset, (offset + size) - 1, indices);
|
||||
|
||||
std::memset(buffer, 0x00, size);
|
||||
auto bytes = reinterpret_cast<u8*>(buffer);
|
||||
for (const auto &interval : intervals) {
|
||||
for (u32 i = std::max(interval.start, offset); i <= interval.stop && (i - offset) < size; i++) {
|
||||
bytes[i - offset] = interval.value[i - interval.start];
|
||||
for (const auto &index : indices) {
|
||||
auto start = this->m_data.start(index);
|
||||
auto end = this->m_data.end(index);
|
||||
auto data = this->m_data.data(index);
|
||||
|
||||
for (u32 i = std::max(start, offset); i <= end && (i - offset) < size; i++) {
|
||||
bytes[i - offset] = data[i - start];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -203,15 +214,15 @@ namespace hex::plugin::builtin {
|
||||
return false;
|
||||
|
||||
u64 maxAddress = 0x00;
|
||||
decltype(this->m_data)::interval_vector intervals;
|
||||
for (auto &[address, bytes] : data) {
|
||||
auto endAddress = (address + bytes.size()) - 1;
|
||||
intervals.emplace_back(address, endAddress, std::move(bytes));
|
||||
this->m_data.add(address, endAddress, std::move(bytes));
|
||||
|
||||
if (endAddress > maxAddress)
|
||||
maxAddress = endAddress;
|
||||
}
|
||||
this->m_data = std::move(intervals);
|
||||
this->m_data.index();
|
||||
|
||||
this->m_dataSize = maxAddress + 1;
|
||||
this->m_dataValid = true;
|
||||
|
||||
@ -254,17 +265,22 @@ namespace hex::plugin::builtin {
|
||||
}
|
||||
|
||||
std::pair<Region, bool> IntelHexProvider::getRegionValidity(u64 address) const {
|
||||
auto intervals = this->m_data.findOverlapping(address, address);
|
||||
if (intervals.empty()) {
|
||||
std::vector<size_t> indices;
|
||||
this->m_data.overlap(address, address, indices);
|
||||
if (indices.empty()) {
|
||||
return Provider::getRegionValidity(address);
|
||||
}
|
||||
|
||||
auto closestInterval = intervals.front();
|
||||
for (const auto &interval : intervals) {
|
||||
if (interval.start < closestInterval.start)
|
||||
closestInterval = interval;
|
||||
auto closestIndex = indices.front();
|
||||
for (const auto &index : indices) {
|
||||
if (this->m_data.start(index) < this->m_data.start(closestIndex))
|
||||
closestIndex = index;
|
||||
}
|
||||
return { Region { closestInterval.start, (closestInterval.stop - closestInterval.start) + 1}, true };
|
||||
|
||||
auto start = this->m_data.start(closestIndex);
|
||||
auto end = this->m_data.end(closestIndex);
|
||||
|
||||
return { Region { start, (end - start) + 1 }, true };
|
||||
}
|
||||
|
||||
void IntelHexProvider::loadSettings(const nlohmann::json &settings) {
|
||||
|
@ -180,15 +180,15 @@ namespace hex::plugin::builtin {
|
||||
return false;
|
||||
|
||||
u64 maxAddress = 0x00;
|
||||
decltype(this->m_data)::interval_vector intervals;
|
||||
for (auto &[address, bytes] : data) {
|
||||
auto endAddress = (address + bytes.size()) - 1;
|
||||
intervals.emplace_back(address, endAddress, std::move(bytes));
|
||||
this->m_data.add(address, endAddress, std::move(bytes));
|
||||
|
||||
if (endAddress > maxAddress)
|
||||
maxAddress = endAddress;
|
||||
}
|
||||
this->m_data = std::move(intervals);
|
||||
this->m_data.index();
|
||||
|
||||
this->m_dataSize = maxAddress + 1;
|
||||
this->m_dataValid = true;
|
||||
|
||||
|
@ -22,9 +22,8 @@ namespace hex::plugin::builtin {
|
||||
if (this->m_searchTask.isRunning())
|
||||
return { };
|
||||
|
||||
auto provider = ImHexApi::Provider::get();
|
||||
|
||||
if (!this->m_occurrenceTree[provider].findOverlapping(address, address).empty())
|
||||
std::vector<size_t> occurrences;
|
||||
if (this->m_occurrenceTree->overlap(address, address, occurrences))
|
||||
return HighlightColor();
|
||||
else
|
||||
return std::nullopt;
|
||||
@ -36,10 +35,8 @@ namespace hex::plugin::builtin {
|
||||
if (this->m_searchTask.isRunning())
|
||||
return;
|
||||
|
||||
auto provider = ImHexApi::Provider::get();
|
||||
|
||||
auto occurrences = this->m_occurrenceTree[provider].findOverlapping(address, address);
|
||||
if (occurrences.empty())
|
||||
std::vector<size_t> occurrences;
|
||||
if (!this->m_occurrenceTree->overlap(address, address, occurrences))
|
||||
return;
|
||||
|
||||
ImGui::BeginTooltip();
|
||||
@ -51,7 +48,10 @@ namespace hex::plugin::builtin {
|
||||
ImGui::TableNextColumn();
|
||||
|
||||
{
|
||||
const auto value = this->decodeValue(ImHexApi::Provider::get(), occurrence.value, 256);
|
||||
auto start = this->m_occurrenceTree->start(occurrence);
|
||||
auto end = this->m_occurrenceTree->end(occurrence) - 1;
|
||||
const auto &bytes = this->m_occurrenceTree->data(occurrence);
|
||||
const auto value = this->decodeValue(ImHexApi::Provider::get(), bytes, 256);
|
||||
|
||||
ImGui::ColorButton("##color", ImColor(HighlightColor()));
|
||||
ImGui::SameLine(0, 10);
|
||||
@ -65,7 +65,7 @@ namespace hex::plugin::builtin {
|
||||
ImGui::TableNextColumn();
|
||||
ImGui::TextFormatted("{}: ", "hex.builtin.common.region"_lang);
|
||||
ImGui::TableNextColumn();
|
||||
ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", occurrence.value.region.getStartAddress(), occurrence.value.region.getEndAddress());
|
||||
ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", start, end);
|
||||
|
||||
auto demangledValue = llvm::demangle(value);
|
||||
|
||||
@ -494,28 +494,27 @@ namespace hex::plugin::builtin {
|
||||
switch (settings.mode) {
|
||||
using enum SearchSettings::Mode;
|
||||
case Strings:
|
||||
this->m_foundOccurrences[provider] = searchStrings(task, provider, searchRegion, settings.strings);
|
||||
this->m_foundOccurrences.get(provider) = searchStrings(task, provider, searchRegion, settings.strings);
|
||||
break;
|
||||
case Sequence:
|
||||
this->m_foundOccurrences[provider] = searchSequence(task, provider, searchRegion, settings.bytes);
|
||||
this->m_foundOccurrences.get(provider) = searchSequence(task, provider, searchRegion, settings.bytes);
|
||||
break;
|
||||
case Regex:
|
||||
this->m_foundOccurrences[provider] = searchRegex(task, provider, searchRegion, settings.regex);
|
||||
this->m_foundOccurrences.get(provider) = searchRegex(task, provider, searchRegion, settings.regex);
|
||||
break;
|
||||
case BinaryPattern:
|
||||
this->m_foundOccurrences[provider] = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
|
||||
this->m_foundOccurrences.get(provider) = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern);
|
||||
break;
|
||||
case Value:
|
||||
this->m_foundOccurrences[provider] = searchValue(task, provider, searchRegion, settings.value);
|
||||
this->m_foundOccurrences.get(provider) = searchValue(task, provider, searchRegion, settings.value);
|
||||
break;
|
||||
}
|
||||
|
||||
this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
|
||||
this->m_sortedOccurrences.get(provider) = this->m_foundOccurrences.get(provider);
|
||||
|
||||
OccurrenceTree::interval_vector intervals;
|
||||
for (const auto &occurrence : this->m_foundOccurrences[provider])
|
||||
intervals.emplace_back(occurrence.region.getStartAddress(), occurrence.region.getEndAddress(), occurrence);
|
||||
this->m_occurrenceTree[provider] = std::move(intervals);
|
||||
for (const auto &occurrence : this->m_foundOccurrences.get(provider))
|
||||
this->m_occurrenceTree->add(occurrence.region.getStartAddress(), occurrence.region.getEndAddress() + 1, occurrence);
|
||||
this->m_occurrenceTree->index();
|
||||
});
|
||||
}
|
||||
|
||||
@ -800,14 +799,14 @@ namespace hex::plugin::builtin {
|
||||
ImGui::EndDisabled();
|
||||
|
||||
ImGui::SameLine();
|
||||
ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences[provider].size());
|
||||
ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences->size());
|
||||
|
||||
ImGui::BeginDisabled(this->m_foundOccurrences[provider].empty());
|
||||
ImGui::BeginDisabled(this->m_foundOccurrences->empty());
|
||||
{
|
||||
if (ImGui::Button("hex.builtin.view.find.search.reset"_lang)) {
|
||||
this->m_foundOccurrences[provider].clear();
|
||||
this->m_sortedOccurrences[provider].clear();
|
||||
this->m_occurrenceTree[provider].clear();
|
||||
this->m_foundOccurrences->clear();
|
||||
this->m_sortedOccurrences->clear();
|
||||
*this->m_occurrenceTree = {};
|
||||
}
|
||||
}
|
||||
ImGui::EndDisabled();
|
||||
@ -818,25 +817,25 @@ namespace hex::plugin::builtin {
|
||||
ImGui::Separator();
|
||||
ImGui::NewLine();
|
||||
|
||||
auto &currOccurrences = this->m_sortedOccurrences[provider];
|
||||
auto &currOccurrences = *this->m_sortedOccurrences;
|
||||
|
||||
ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x);
|
||||
auto prevFilterLength = this->m_currFilter[provider].length();
|
||||
if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, this->m_currFilter[provider])) {
|
||||
if (prevFilterLength > this->m_currFilter[provider].length())
|
||||
this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider];
|
||||
auto prevFilterLength = this->m_currFilter->length();
|
||||
if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, *this->m_currFilter)) {
|
||||
if (prevFilterLength > this->m_currFilter->length())
|
||||
*this->m_sortedOccurrences = *this->m_foundOccurrences;
|
||||
|
||||
if (this->m_filterTask.isRunning())
|
||||
this->m_filterTask.interrupt();
|
||||
|
||||
if (!this->m_currFilter[provider].empty()) {
|
||||
if (!this->m_currFilter->empty()) {
|
||||
this->m_filterTask = TaskManager::createTask("Filtering", currOccurrences.size(), [this, provider, &currOccurrences](Task &task) {
|
||||
u64 progress = 0;
|
||||
currOccurrences.erase(std::remove_if(currOccurrences.begin(), currOccurrences.end(), [this, provider, &task, &progress](const auto ®ion) {
|
||||
task.update(progress);
|
||||
progress += 1;
|
||||
|
||||
return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter[provider]);
|
||||
return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter.get(provider));
|
||||
}), currOccurrences.end());
|
||||
});
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ namespace hex::plugin::builtin {
|
||||
|
||||
auto selection = ImHexApi::HexEditor::getSelection();
|
||||
|
||||
if (ImGui::GetIO().KeyShift) {
|
||||
if (selection.has_value() && ImGui::GetIO().KeyShift) {
|
||||
auto &hashFunctions = this->m_hashFunctions.get(selection->getProvider());
|
||||
if (!hashFunctions.empty() && selection.has_value() && selection->overlaps(Region { address, size })) {
|
||||
ImGui::BeginTooltip();
|
||||
|
Loading…
Reference in New Issue
Block a user