feat: Added chunk based entropy analysis to information view (#933)
Issue: https://github.com/WerWolv/ImHex/issues/522 Implementation of chunk based entropy analysis in diagram.hpp available from the data information view and in the pattern language. --------- Co-authored-by: WerWolv <werwolv98@gmail.com>
This commit is contained in:
parent
505c1bc274
commit
069221757f
2
lib/external/pattern_language
vendored
2
lib/external/pattern_language
vendored
@ -1 +1 @@
|
|||||||
Subproject commit cb4b5a14ae98027b9cf14e3235de0e91a34786cd
|
Subproject commit 572a481803aa19c5bd0c3ccce08d63666d01499c
|
@ -27,6 +27,20 @@ struct ImVec2;
|
|||||||
|
|
||||||
namespace hex {
|
namespace hex {
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
std::vector<T> sampleData(const std::vector<T> &data, size_t count) {
|
||||||
|
size_t stride = std::max(1.0, double(data.size()) / count);
|
||||||
|
|
||||||
|
std::vector<T> result;
|
||||||
|
result.reserve(count);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < data.size(); i += stride) {
|
||||||
|
result.push_back(data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
float operator""_scaled(long double value);
|
float operator""_scaled(long double value);
|
||||||
float operator""_scaled(unsigned long long value);
|
float operator""_scaled(unsigned long long value);
|
||||||
ImVec2 scaled(const ImVec2 &vector);
|
ImVec2 scaled(const ImVec2 &vector);
|
||||||
|
@ -3,9 +3,15 @@
|
|||||||
#include <hex.hpp>
|
#include <hex.hpp>
|
||||||
|
|
||||||
#include <imgui.h>
|
#include <imgui.h>
|
||||||
|
#include <implot.h>
|
||||||
|
|
||||||
|
#include <hex/providers/provider.hpp>
|
||||||
|
#include <hex/providers/buffered_reader.hpp>
|
||||||
|
|
||||||
#define IMGUI_DEFINE_MATH_OPERATORS
|
#define IMGUI_DEFINE_MATH_OPERATORS
|
||||||
#include <imgui_internal.h>
|
#include <imgui_internal.h>
|
||||||
|
|
||||||
|
#include <hex/helpers/logger.hpp>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
namespace hex {
|
namespace hex {
|
||||||
@ -130,6 +136,28 @@ namespace hex {
|
|||||||
this->m_processing = false;
|
this->m_processing = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void reset(u64 size) {
|
||||||
|
this->m_processing = true;
|
||||||
|
this->m_buffer.clear();
|
||||||
|
this->m_buffer.resize(size);
|
||||||
|
this->m_byteCount = 0;
|
||||||
|
this->m_fileSize = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(u8 byte) {
|
||||||
|
// Check if there is some space left
|
||||||
|
if (this->m_byteCount < this->m_fileSize) {
|
||||||
|
this->m_buffer[this->m_byteCount] = byte;
|
||||||
|
++this->m_byteCount;
|
||||||
|
if (this->m_byteCount == this->m_fileSize) {
|
||||||
|
this->m_buffer = getSampleSelection(this->m_buffer, this->m_sampleSize);
|
||||||
|
processImpl();
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void processImpl() {
|
void processImpl() {
|
||||||
this->m_glowBuffer.resize(this->m_buffer.size());
|
this->m_glowBuffer.resize(this->m_buffer.size());
|
||||||
@ -142,7 +170,7 @@ namespace hex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < (this->m_buffer.empty() ? 0 : this->m_buffer.size() - 1); i++) {
|
for (size_t i = 0; i < (this->m_buffer.empty() ? 0 : this->m_buffer.size() - 1); i++) {
|
||||||
this->m_glowBuffer[i] = std::min(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F);
|
this->m_glowBuffer[i] = std::min<float>(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->m_opacity = (log10(float(this->m_sampleSize)) / log10(float(m_highestCount))) / 10.0F;
|
this->m_opacity = (log10(float(this->m_sampleSize)) / log10(float(m_highestCount))) / 10.0F;
|
||||||
@ -151,6 +179,10 @@ namespace hex {
|
|||||||
private:
|
private:
|
||||||
size_t m_sampleSize;
|
size_t m_sampleSize;
|
||||||
|
|
||||||
|
// The number of byte processed and the size of
|
||||||
|
// the file to analyze (useful for iterative analysis)
|
||||||
|
u64 m_byteCount;
|
||||||
|
u64 m_fileSize;
|
||||||
std::vector<u8> m_buffer;
|
std::vector<u8> m_buffer;
|
||||||
std::vector<float> m_glowBuffer;
|
std::vector<float> m_glowBuffer;
|
||||||
float m_opacity = 0.0F;
|
float m_opacity = 0.0F;
|
||||||
@ -158,7 +190,6 @@ namespace hex {
|
|||||||
std::atomic<bool> m_processing = false;
|
std::atomic<bool> m_processing = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class DiagramLayeredDistribution {
|
class DiagramLayeredDistribution {
|
||||||
public:
|
public:
|
||||||
DiagramLayeredDistribution(size_t sampleSize = 0x9000) : m_sampleSize(sampleSize) { }
|
DiagramLayeredDistribution(size_t sampleSize = 0x9000) : m_sampleSize(sampleSize) { }
|
||||||
@ -200,6 +231,27 @@ namespace hex {
|
|||||||
this->m_processing = false;
|
this->m_processing = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void reset(u64 size) {
|
||||||
|
this->m_processing = true;
|
||||||
|
this->m_buffer.clear();
|
||||||
|
this->m_buffer.resize(size);
|
||||||
|
this->m_byteCount = 0;
|
||||||
|
this->m_fileSize = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(u8 byte) {
|
||||||
|
// Check if there is some space left
|
||||||
|
if (this->m_byteCount < this->m_fileSize) {
|
||||||
|
this->m_buffer[this->m_byteCount] = byte;
|
||||||
|
++this->m_byteCount;
|
||||||
|
if (this->m_byteCount == this->m_fileSize) {
|
||||||
|
this->m_buffer = getSampleSelection(this->m_buffer, this->m_sampleSize);
|
||||||
|
processImpl();
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void processImpl() {
|
void processImpl() {
|
||||||
this->m_glowBuffer.resize(this->m_buffer.size());
|
this->m_glowBuffer.resize(this->m_buffer.size());
|
||||||
@ -212,7 +264,7 @@ namespace hex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < (this->m_buffer.empty() ? 0 : this->m_buffer.size() - 1); i++) {
|
for (size_t i = 0; i < (this->m_buffer.empty() ? 0 : this->m_buffer.size() - 1); i++) {
|
||||||
this->m_glowBuffer[i] = std::min(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F);
|
this->m_glowBuffer[i] = std::min<float>(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->m_opacity = (log10(float(this->m_sampleSize)) / log10(float(m_highestCount))) / 10.0F;
|
this->m_opacity = (log10(float(this->m_sampleSize)) / log10(float(m_highestCount))) / 10.0F;
|
||||||
@ -220,6 +272,11 @@ namespace hex {
|
|||||||
private:
|
private:
|
||||||
size_t m_sampleSize;
|
size_t m_sampleSize;
|
||||||
|
|
||||||
|
// The number of byte processed and the size of
|
||||||
|
// the file to analyze (useful for iterative analysis)
|
||||||
|
u64 m_byteCount;
|
||||||
|
u64 m_fileSize;
|
||||||
|
|
||||||
std::vector<u8> m_buffer;
|
std::vector<u8> m_buffer;
|
||||||
std::vector<float> m_glowBuffer;
|
std::vector<float> m_glowBuffer;
|
||||||
float m_opacity = 0.0F;
|
float m_opacity = 0.0F;
|
||||||
@ -227,4 +284,619 @@ namespace hex {
|
|||||||
std::atomic<bool> m_processing = false;
|
std::atomic<bool> m_processing = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class DiagramChunkBasedEntropyAnalysis {
|
||||||
|
public:
|
||||||
|
DiagramChunkBasedEntropyAnalysis(u64 blockSize = 256, size_t sampleSize = 0x1000) : m_blockSize(blockSize), m_sampleSize(sampleSize) { }
|
||||||
|
|
||||||
|
void draw(ImVec2 size, ImPlotFlags flags, bool updateHandle = false) {
|
||||||
|
|
||||||
|
if (!this->m_processing && ImPlot::BeginPlot("##ChunkBasedAnalysis", size, flags)) {
|
||||||
|
ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.view.information.entropy"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock);
|
||||||
|
|
||||||
|
// Set the axis limit to [first block : last block]
|
||||||
|
ImPlot::SetupAxesLimits(
|
||||||
|
this->m_startAddress / this->m_blockSize,
|
||||||
|
this->m_endAddress / this->m_blockSize,
|
||||||
|
-0.1F, 1.1F, ImGuiCond_Always
|
||||||
|
);
|
||||||
|
|
||||||
|
// Draw the plot
|
||||||
|
ImPlot::PlotLine("##ChunkBasedAnalysisLine", this->m_xBlockEntropy.data(), this->m_yBlockEntropy.data(), this->m_blockCount);
|
||||||
|
|
||||||
|
// The parameter updateHandle is used when using the pattern language since we don't have a provider
|
||||||
|
// but just a set of bytes we won't be able to use the drag bar correctly.
|
||||||
|
if (updateHandle) {
|
||||||
|
// Set a draggable line on the plot
|
||||||
|
if (ImPlot::DragLineX(1, &this->m_handlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) {
|
||||||
|
// The line was dragged, update the position in the hex editor
|
||||||
|
|
||||||
|
// Clamp the value between the start/end of the region to analyze
|
||||||
|
this->m_handlePosition = std::clamp<double>(
|
||||||
|
this->m_handlePosition,
|
||||||
|
std::ceil(this->m_startAddress / double(this->m_blockSize)),
|
||||||
|
std::floor(this->m_endAddress / double(this->m_blockSize)));
|
||||||
|
|
||||||
|
// Compute the position inside hex editor
|
||||||
|
u64 address = u64(std::max<double>(this->m_handlePosition * this->m_blockSize, 0)) + this->m_baseAddress;
|
||||||
|
address = std::min<u64>(address, this->m_baseAddress + this->m_fileSize - 1);
|
||||||
|
ImHexApi::HexEditor::setSelection(address, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ImPlot::EndPlot();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(prv::Provider *provider, u64 chunkSize, u64 startAddress, u64 endAddress) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_chunkSize = chunkSize;
|
||||||
|
this->m_startAddress = startAddress;
|
||||||
|
this->m_endAddress = endAddress;
|
||||||
|
|
||||||
|
this->m_baseAddress = provider->getBaseAddress();
|
||||||
|
this->m_fileSize = provider->getSize();
|
||||||
|
|
||||||
|
// Get a file reader
|
||||||
|
auto reader = prv::BufferedReader(provider);
|
||||||
|
std::vector<u8> bytes = reader.read(this->m_startAddress, this->m_endAddress - this->m_startAddress);
|
||||||
|
|
||||||
|
this->processImpl(bytes);
|
||||||
|
|
||||||
|
// Set the diagram handle position to the start of the plot
|
||||||
|
this->m_handlePosition = this->m_startAddress / double(this->m_blockSize);
|
||||||
|
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(std::vector<u8> buffer, u64 chunkSize) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes (use buffer size as end address)
|
||||||
|
this->m_chunkSize = chunkSize;
|
||||||
|
this->m_startAddress = 0;
|
||||||
|
this->m_endAddress = buffer.size();
|
||||||
|
|
||||||
|
this->m_baseAddress = 0;
|
||||||
|
this->m_fileSize = buffer.size();
|
||||||
|
|
||||||
|
this->processImpl(buffer);
|
||||||
|
|
||||||
|
// Set the diagram handle position to the start of the plot
|
||||||
|
this->m_handlePosition = this->m_startAddress / double(this->m_blockSize);
|
||||||
|
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the entropy analysis
|
||||||
|
void reset(u64 chunkSize, u64 startAddress, u64 endAddress, u64 baseAddress, u64 size) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_chunkSize = chunkSize;
|
||||||
|
this->m_startAddress = startAddress;
|
||||||
|
this->m_endAddress = endAddress;
|
||||||
|
this->m_baseAddress = baseAddress;
|
||||||
|
this->m_fileSize = size;
|
||||||
|
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
|
||||||
|
// Reset and resize the array
|
||||||
|
this->m_yBlockEntropy.clear();
|
||||||
|
this->m_yBlockEntropy.resize(((this->m_endAddress - this->m_startAddress) / this->m_chunkSize) + 1);
|
||||||
|
|
||||||
|
this->m_byteCount = 0;
|
||||||
|
this->m_blockCount = 0;
|
||||||
|
|
||||||
|
// Set the diagram handle position to the start of the plot
|
||||||
|
this->m_handlePosition = this->m_startAddress / double(this->m_blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process one byte at the time
|
||||||
|
void update(u8 byte) {
|
||||||
|
u64 totalBlock = std::ceil((this->m_endAddress - this->m_startAddress) / this->m_chunkSize);
|
||||||
|
|
||||||
|
// Check if there is still some
|
||||||
|
if (this->m_blockCount < totalBlock) {
|
||||||
|
// Increment the occurrence of the current byte
|
||||||
|
this->m_blockValueCounts[byte]++;
|
||||||
|
|
||||||
|
this->m_byteCount++;
|
||||||
|
// Check if we processed one complete chunk, if so compute the entropy and start analysing the next chunk
|
||||||
|
if (((this->m_byteCount % this->m_chunkSize) == 0) || this->m_byteCount == (this->m_endAddress - this->m_startAddress)) [[unlikely]] {
|
||||||
|
this->m_yBlockEntropy[this->m_blockCount] = calculateEntropy(this->m_blockValueCounts, this->m_chunkSize);
|
||||||
|
|
||||||
|
this->m_blockCount += 1;
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we processed the last block, if so setup the X axis part of the data
|
||||||
|
if (this->m_blockCount == totalBlock) {
|
||||||
|
processFinalize();
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method used to compute the entropy of a block of size `blockSize`
|
||||||
|
// using the bytes occurrences from `valueCounts` array.
|
||||||
|
double calculateEntropy(std::array<ImU64, 256> &valueCounts, size_t blockSize) {
|
||||||
|
double entropy = 0;
|
||||||
|
|
||||||
|
for (auto count : valueCounts) {
|
||||||
|
if (count == 0) [[unlikely]]
|
||||||
|
continue;
|
||||||
|
|
||||||
|
double probability = static_cast<double>(count) / blockSize;
|
||||||
|
|
||||||
|
entropy += probability * std::log2(probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::min<double>(1.0, (-entropy) / 8); // log2(256) = 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the highest entropy value among all of the blocks
|
||||||
|
double getHighestBlockEntropy() {
|
||||||
|
double highestBlockEntropy = 0.0f;
|
||||||
|
if (!this->m_yBlockEntropy.empty())
|
||||||
|
highestBlockEntropy = *std::max_element(this->m_yBlockEntropy.begin(), this->m_yBlockEntropy.end());
|
||||||
|
return highestBlockEntropy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the number of blocks that have been processed
|
||||||
|
u64 getSize() {
|
||||||
|
return this->m_yBlockEntropy.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the size of the chunk used for this analysis
|
||||||
|
u64 getChunkSize() {
|
||||||
|
return this->m_chunkSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHandlePosition(u64 filePosition) {
|
||||||
|
this->m_handlePosition = filePosition / double(this->m_blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Private method used to factorize the process public method
|
||||||
|
void processImpl(std::vector<u8> bytes) {
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
|
||||||
|
// Reset and resize the array
|
||||||
|
this->m_yBlockEntropy.clear();
|
||||||
|
this->m_yBlockEntropy.resize(std::ceil((this->m_endAddress - this->m_startAddress) / this->m_chunkSize));
|
||||||
|
|
||||||
|
this->m_byteCount = 0;
|
||||||
|
this->m_blockCount = 0;
|
||||||
|
|
||||||
|
// Loop over each byte of the file (or a part of it)
|
||||||
|
for (u8 byte: bytes) {
|
||||||
|
// Increment the occurrence of the current byte
|
||||||
|
this->m_blockValueCounts[byte]++;
|
||||||
|
|
||||||
|
this->m_byteCount++;
|
||||||
|
// Check if we processed one complete chunk, if so compute the entropy and start analysing the next chunk
|
||||||
|
if (((this->m_byteCount % this->m_chunkSize) == 0) || this->m_byteCount == bytes.size() * 8) [[unlikely]] {
|
||||||
|
this->m_yBlockEntropy[this->m_blockCount] = calculateEntropy(this->m_blockValueCounts, this->m_chunkSize);
|
||||||
|
|
||||||
|
this->m_blockCount += 1;
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
processFinalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void processFinalize() {
|
||||||
|
// Only save at most m_sampleSize elements of the result
|
||||||
|
this->m_yBlockEntropy = sampleData(this->m_yBlockEntropy, std::min<size_t>(this->m_blockCount, this->m_sampleSize));
|
||||||
|
|
||||||
|
size_t stride = std::max(1.0, double(
|
||||||
|
std::ceil((this->m_endAddress - this->m_startAddress) / this->m_blockSize) / this->m_yBlockEntropy.size())) + 1;
|
||||||
|
|
||||||
|
this->m_blockCount = this->m_yBlockEntropy.size();
|
||||||
|
|
||||||
|
// The m_xBlockEntropy attribute is used to specify the position of entropy values
|
||||||
|
// in the plot when the Y axis doesn't start at 0
|
||||||
|
this->m_xBlockEntropy.clear();
|
||||||
|
this->m_xBlockEntropy.resize(this->m_blockCount);
|
||||||
|
for (u64 i = 0; i < this->m_blockCount; ++i)
|
||||||
|
this->m_xBlockEntropy[i] = (this->m_startAddress / this->m_blockSize) + stride*i;
|
||||||
|
--this->m_blockCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Variables used to store the parameters to process
|
||||||
|
|
||||||
|
// Chunk's size for entropy analysis
|
||||||
|
u64 m_chunkSize;
|
||||||
|
u64 m_startAddress;
|
||||||
|
u64 m_endAddress;
|
||||||
|
// Start / size of the file
|
||||||
|
u64 m_baseAddress;
|
||||||
|
u64 m_fileSize;
|
||||||
|
// The size of the blocks (for diagram drawing)
|
||||||
|
u64 m_blockSize;
|
||||||
|
|
||||||
|
// Position of the handle inside the plot
|
||||||
|
double m_handlePosition = 0.0;
|
||||||
|
|
||||||
|
// Hold the number of block that have been processed
|
||||||
|
// during the chunk based entropy analysis
|
||||||
|
u64 m_blockCount;
|
||||||
|
|
||||||
|
// Hold the number of bytes that have been processed
|
||||||
|
// during the analysis (useful for the iterative analysis)
|
||||||
|
u64 m_byteCount;
|
||||||
|
|
||||||
|
// Array used to hold the occurrences of each byte
|
||||||
|
// (useful for the iterative analysis)
|
||||||
|
std::array<ImU64, 256> m_blockValueCounts;
|
||||||
|
|
||||||
|
// Variable to hold the result of the chunk based
|
||||||
|
// entropy analysis
|
||||||
|
std::vector<double> m_xBlockEntropy;
|
||||||
|
std::vector<double> m_yBlockEntropy;
|
||||||
|
|
||||||
|
// Sampling size, number of elements displayed in the plot,
|
||||||
|
// avoid showing to many data because it decreased the frame rate
|
||||||
|
size_t m_sampleSize;
|
||||||
|
|
||||||
|
std::atomic<bool> m_processing = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DiagramByteDistribution {
|
||||||
|
public:
|
||||||
|
|
||||||
|
void draw(ImVec2 size, ImPlotFlags flags) {
|
||||||
|
|
||||||
|
if (!this->m_processing && ImPlot::BeginPlot("##distribution", size, flags)) {
|
||||||
|
ImPlot::SetupAxes("hex.builtin.common.value"_lang, "hex.builtin.common.count"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock | ImPlotAxisFlags_LogScale);
|
||||||
|
ImPlot::SetupAxesLimits(0, 256, 1, double(*std::max_element(this->m_valueCounts.begin(), this->m_valueCounts.end())) * 1.1F, ImGuiCond_Always);
|
||||||
|
|
||||||
|
constexpr static auto x = [] {
|
||||||
|
std::array<ImU64, 256> result { 0 };
|
||||||
|
std::iota(result.begin(), result.end(), 0);
|
||||||
|
return result;
|
||||||
|
}();
|
||||||
|
|
||||||
|
ImPlot::PlotBars<ImU64>("##bytes", x.data(), this->m_valueCounts.data(), x.size(), 1.0);
|
||||||
|
ImPlot::EndPlot();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(prv::Provider *provider, u64 startAddress, u64 endAddress) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_startAddress = startAddress;
|
||||||
|
this->m_endAddress = endAddress;
|
||||||
|
|
||||||
|
// Get a file reader
|
||||||
|
auto reader = prv::BufferedReader(provider);
|
||||||
|
std::vector<u8> bytes = reader.read(this->m_startAddress, this->m_endAddress - this->m_startAddress);
|
||||||
|
|
||||||
|
this->processImpl(bytes);
|
||||||
|
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(std::vector<u8> buffer) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_startAddress = 0;
|
||||||
|
this->m_endAddress = buffer.size();
|
||||||
|
|
||||||
|
this->processImpl(buffer);
|
||||||
|
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the byte distribution array
|
||||||
|
void reset() {
|
||||||
|
this->m_processing = true;
|
||||||
|
this->m_valueCounts.fill(0);
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process one byte at the time
|
||||||
|
void update(u8 byte) {
|
||||||
|
this->m_processing = true;
|
||||||
|
this->m_valueCounts[byte]++;
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return byte distribution array in it's current state
|
||||||
|
std::array<ImU64, 256> & get() {
|
||||||
|
return this->m_valueCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Private method used to factorize the process public method
|
||||||
|
void processImpl(std::vector<u8> bytes) {
|
||||||
|
// Reset the array
|
||||||
|
this->m_valueCounts.fill(0);
|
||||||
|
// Loop over each byte of the file (or a part of it)
|
||||||
|
// Increment the occurrence of the current byte
|
||||||
|
for (u8 byte : bytes)
|
||||||
|
this->m_valueCounts[byte]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Variables used to store the parameters to process
|
||||||
|
u64 m_startAddress;
|
||||||
|
u64 m_endAddress;
|
||||||
|
|
||||||
|
// Hold the result of the byte distribution analysis
|
||||||
|
std::array<ImU64, 256> m_valueCounts;
|
||||||
|
std::atomic<bool> m_processing = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DiagramByteTypesDistribution {
|
||||||
|
public:
|
||||||
|
DiagramByteTypesDistribution(u64 blockSize = 256, size_t sampleSize = 0x1000) : m_blockSize(blockSize), m_sampleSize(sampleSize){ }
|
||||||
|
|
||||||
|
void draw(ImVec2 size, ImPlotFlags flags, bool updateHandle = false) {
|
||||||
|
// Draw the result of the analysis
|
||||||
|
if (!this->m_processing && ImPlot::BeginPlot("##byte_types", size, flags)) {
|
||||||
|
ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.common.percentage"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock);
|
||||||
|
ImPlot::SetupAxesLimits(this->m_startAddress / this->m_blockSize, this->m_endAddress / this->m_blockSize, -0.1F, 100.1F, ImGuiCond_Always);
|
||||||
|
ImPlot::SetupLegend(ImPlotLocation_South, ImPlotLegendFlags_Horizontal | ImPlotLegendFlags_Outside);
|
||||||
|
|
||||||
|
constexpr static std::array Names = { "iscntrl", "isprint", "isspace", "isblank",
|
||||||
|
"isgraph", "ispunct", "isalnum", "isalpha",
|
||||||
|
"isupper", "islower", "isdigit", "isxdigit"
|
||||||
|
};
|
||||||
|
|
||||||
|
for (u32 i = 0; i < Names.size(); i++) {
|
||||||
|
ImPlot::PlotLine(Names[i], this->m_xBlockTypeDistributions.data(), this->m_yBlockTypeDistributions[i].data(), this->m_blockCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The parameter updateHandle is used when using the pattern language since we don't have a provider
|
||||||
|
// but just a set of bytes we won't be able to use the drag bar correctly.
|
||||||
|
if (updateHandle) {
|
||||||
|
// Set a draggable line on the plot
|
||||||
|
if (ImPlot::DragLineX(1, &this->m_handlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) {
|
||||||
|
// The line was dragged, update the position in the hex editor
|
||||||
|
|
||||||
|
// Clamp the value between the start/end of the region to analyze
|
||||||
|
this->m_handlePosition = std::clamp<double>(
|
||||||
|
this->m_handlePosition,
|
||||||
|
std::ceil(this->m_startAddress / double(this->m_blockSize)),
|
||||||
|
std::floor(this->m_endAddress / double(this->m_blockSize)));
|
||||||
|
|
||||||
|
// Compute the position inside hex editor
|
||||||
|
u64 address = u64(std::max<double>(this->m_handlePosition * this->m_blockSize, 0)) + this->m_baseAddress;
|
||||||
|
address = std::min<u64>(address, this->m_baseAddress + this->m_fileSize - 1);
|
||||||
|
ImHexApi::HexEditor::setSelection(address, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ImPlot::EndPlot();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(prv::Provider *provider, u64 startAddress, u64 endAddress) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_startAddress = startAddress;
|
||||||
|
this->m_endAddress = endAddress;
|
||||||
|
this->m_baseAddress = provider->getBaseAddress();
|
||||||
|
this->m_fileSize = provider->getSize();
|
||||||
|
|
||||||
|
// Get a file reader
|
||||||
|
auto reader = prv::BufferedReader(provider);
|
||||||
|
std::vector<u8> bytes = reader.read(this->m_startAddress, this->m_endAddress - this->m_startAddress);
|
||||||
|
|
||||||
|
this->processImpl(bytes);
|
||||||
|
|
||||||
|
// Set the diagram handle position to the start of the plot
|
||||||
|
this->m_handlePosition = this->m_startAddress / double(this->m_blockSize);
|
||||||
|
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void process(std::vector<u8> buffer, u64 baseAddress, u64 fileSize) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_startAddress = 0;
|
||||||
|
this->m_endAddress = buffer.size();
|
||||||
|
this->m_baseAddress = baseAddress;
|
||||||
|
this->m_fileSize = fileSize;
|
||||||
|
|
||||||
|
this->processImpl(buffer);
|
||||||
|
|
||||||
|
// Set the diagram handle position to the start of the plot
|
||||||
|
this->m_handlePosition = this->m_startAddress / double(this->m_blockSize);
|
||||||
|
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the byte type distribution analysis
|
||||||
|
void reset(u64 startAddress, u64 endAddress, u64 baseAddress, u64 size) {
|
||||||
|
this->m_processing = true;
|
||||||
|
|
||||||
|
// Update attributes
|
||||||
|
this->m_startAddress = startAddress;
|
||||||
|
this->m_endAddress = endAddress;
|
||||||
|
this->m_baseAddress = baseAddress;
|
||||||
|
this->m_fileSize = size;
|
||||||
|
|
||||||
|
this->m_byteCount = 0;
|
||||||
|
this->m_blockCount = 0;
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
|
||||||
|
// Reset and resize the array
|
||||||
|
this->m_yBlockTypeDistributions.fill({});
|
||||||
|
for (auto &blockDistribution : this->m_yBlockTypeDistributions)
|
||||||
|
blockDistribution.resize(((this->m_endAddress - this->m_startAddress) / this->m_blockSize) + 1);
|
||||||
|
|
||||||
|
// Set the diagram handle position to the start of the plot
|
||||||
|
this->m_handlePosition = this->m_startAddress / double(this->m_blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process one byte at the time
|
||||||
|
void update(u8 byte) {
|
||||||
|
u64 totalBlock = std::ceil((this->m_endAddress - this->m_startAddress) / this->m_blockSize);
|
||||||
|
// Check if there is still some block to process
|
||||||
|
if (this->m_blockCount < totalBlock) {
|
||||||
|
|
||||||
|
this->m_blockValueCounts[byte]++;
|
||||||
|
|
||||||
|
this->m_byteCount++;
|
||||||
|
if (((this->m_byteCount % this->m_blockSize) == 0) || this->m_byteCount == (this->m_endAddress - this->m_startAddress)) [[unlikely]] {
|
||||||
|
auto typeDist = calculateTypeDistribution(this->m_blockValueCounts, this->m_blockSize);
|
||||||
|
for (u8 i = 0; i < typeDist.size(); i++)
|
||||||
|
this->m_yBlockTypeDistributions[i][this->m_blockCount] = typeDist[i] * 100;
|
||||||
|
|
||||||
|
this->m_blockCount += 1;
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we processed the last block, if so setup the X axis part of the data
|
||||||
|
if (this->m_blockCount == totalBlock) {
|
||||||
|
|
||||||
|
processFinalize();
|
||||||
|
this->m_processing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the percentage of plain text character inside the analyzed region
|
||||||
|
double getPlainTextCharacterPercentage() {
|
||||||
|
double plainTextPercentage = std::reduce(this->m_yBlockTypeDistributions[2].begin(), this->m_yBlockTypeDistributions[2].end()) / this->m_yBlockTypeDistributions[2].size();
|
||||||
|
return plainTextPercentage + std::reduce(this->m_yBlockTypeDistributions[4].begin(), this->m_yBlockTypeDistributions[4].end()) / this->m_yBlockTypeDistributions[4].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHandlePosition(u64 filePosition) {
|
||||||
|
this->m_handlePosition = filePosition / double(this->m_blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<float, 12> calculateTypeDistribution(std::array<ImU64, 256> &valueCounts, size_t blockSize) {
|
||||||
|
std::array<ImU64, 12> counts = {};
|
||||||
|
|
||||||
|
for (u16 value = 0x00; value < u16(valueCounts.size()); value++) {
|
||||||
|
const auto &count = valueCounts[value];
|
||||||
|
|
||||||
|
if (count == 0) [[unlikely]]
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (std::iscntrl(value))
|
||||||
|
counts[0] += count;
|
||||||
|
if (std::isprint(value))
|
||||||
|
counts[1] += count;
|
||||||
|
if (std::isspace(value))
|
||||||
|
counts[2] += count;
|
||||||
|
if (std::isblank(value))
|
||||||
|
counts[3] += count;
|
||||||
|
if (std::isgraph(value))
|
||||||
|
counts[4] += count;
|
||||||
|
if (std::ispunct(value))
|
||||||
|
counts[5] += count;
|
||||||
|
if (std::isalnum(value))
|
||||||
|
counts[6] += count;
|
||||||
|
if (std::isalpha(value))
|
||||||
|
counts[7] += count;
|
||||||
|
if (std::isupper(value))
|
||||||
|
counts[8] += count;
|
||||||
|
if (std::islower(value))
|
||||||
|
counts[9] += count;
|
||||||
|
if (std::isdigit(value))
|
||||||
|
counts[10] += count;
|
||||||
|
if (std::isxdigit(value))
|
||||||
|
counts[11] += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<float, 12> distribution = {};
|
||||||
|
for (u32 i = 0; i < distribution.size(); i++)
|
||||||
|
distribution[i] = static_cast<float>(counts[i]) / blockSize;
|
||||||
|
|
||||||
|
return distribution;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Private method used to factorize the process public method
|
||||||
|
void processImpl(std::vector<u8> bytes) {
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
|
||||||
|
this->m_yBlockTypeDistributions.fill({});
|
||||||
|
for (auto &blockDistribution : this->m_yBlockTypeDistributions)
|
||||||
|
blockDistribution.resize(((this->m_endAddress - this->m_startAddress) / this->m_blockSize) + 1);
|
||||||
|
|
||||||
|
this->m_byteCount = 0;
|
||||||
|
this->m_blockCount = 0;
|
||||||
|
|
||||||
|
// Loop over each byte of the file (or a part of it)
|
||||||
|
for (u64 i = 0; i < bytes.size(); ++i) {
|
||||||
|
this->m_blockValueCounts[bytes[i]]++;
|
||||||
|
|
||||||
|
this->m_byteCount++;
|
||||||
|
if (((this->m_byteCount % this->m_blockSize) == 0) || this->m_byteCount == (this->m_endAddress - this->m_startAddress)) [[unlikely]] {
|
||||||
|
auto typeDist = calculateTypeDistribution(this->m_blockValueCounts, this->m_blockSize);
|
||||||
|
for (u8 i = 0; i < typeDist.size(); i++)
|
||||||
|
this->m_yBlockTypeDistributions[i][this->m_blockCount] = typeDist[i] * 100;
|
||||||
|
|
||||||
|
this->m_blockCount += 1;
|
||||||
|
this->m_blockValueCounts = { 0 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
processFinalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void processFinalize() {
|
||||||
|
// Only save at most m_sampleSize elements of the result
|
||||||
|
for (u8 i = 0; i < this->m_yBlockTypeDistributions.size(); ++i)
|
||||||
|
this->m_yBlockTypeDistributions[i] = sampleData(this->m_yBlockTypeDistributions[i], std::min<size_t>(this->m_blockCount, this->m_sampleSize));
|
||||||
|
|
||||||
|
size_t stride = std::max(1.0, double(this->m_blockCount / this->m_yBlockTypeDistributions[0].size())) + 1;
|
||||||
|
this->m_blockCount = this->m_yBlockTypeDistributions[0].size();
|
||||||
|
|
||||||
|
// The m_xBlockTypeDistributions attribute is used to specify the position of entropy
|
||||||
|
// values in the plot when the Y axis doesn't start at 0
|
||||||
|
this->m_xBlockTypeDistributions.clear();
|
||||||
|
this->m_xBlockTypeDistributions.resize(this->m_blockCount);
|
||||||
|
for (u64 i = 0; i < this->m_blockCount; ++i)
|
||||||
|
this->m_xBlockTypeDistributions[i] = (this->m_startAddress / this->m_blockSize) + stride*i;
|
||||||
|
--this->m_blockCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Variables used to store the parameters to process
|
||||||
|
|
||||||
|
// The size of the block we are considering for the analysis
|
||||||
|
u64 m_blockSize;
|
||||||
|
u64 m_startAddress;
|
||||||
|
u64 m_endAddress;
|
||||||
|
// Start / size of the file
|
||||||
|
u64 m_baseAddress;
|
||||||
|
u64 m_fileSize;
|
||||||
|
|
||||||
|
// Position of the handle inside the plot
|
||||||
|
double m_handlePosition = 0.0;
|
||||||
|
|
||||||
|
// Hold the number of block that have been processed
|
||||||
|
// during the chunk based entropy analysis
|
||||||
|
u64 m_blockCount;
|
||||||
|
|
||||||
|
// Hold the number of bytes that have been processed
|
||||||
|
// during the analysis (useful for the iterative analysis)
|
||||||
|
u64 m_byteCount;
|
||||||
|
|
||||||
|
// Sampling size, number of elements displayed in the plot,
|
||||||
|
// avoid showing to many data because it decreased the frame rate
|
||||||
|
size_t m_sampleSize;
|
||||||
|
|
||||||
|
// Array used to hold the occurrences of each byte
|
||||||
|
// (useful for the iterative analysis)
|
||||||
|
std::array<ImU64, 256> m_blockValueCounts;
|
||||||
|
|
||||||
|
// The m_xBlockTypeDistributions attributes is used to specify the position of
|
||||||
|
// the values in the plot when the Y axis doesn't start at 0
|
||||||
|
std::vector<float> m_xBlockTypeDistributions;
|
||||||
|
// Hold the result of the byte distribution analysis
|
||||||
|
std::array<std::vector<float>, 12> m_yBlockTypeDistributions;
|
||||||
|
std::atomic<bool> m_processing = false;
|
||||||
|
};
|
||||||
}
|
}
|
@ -26,13 +26,7 @@ namespace hex::plugin::builtin {
|
|||||||
double m_averageEntropy = -1.0;
|
double m_averageEntropy = -1.0;
|
||||||
double m_highestBlockEntropy = -1.0;
|
double m_highestBlockEntropy = -1.0;
|
||||||
double m_plainTextCharacterPercentage = -1.0;
|
double m_plainTextCharacterPercentage = -1.0;
|
||||||
std::vector<double> m_blockEntropy;
|
|
||||||
std::array<std::vector<float>, 12> m_blockTypeDistributions;
|
|
||||||
std::atomic<u64> m_processedBlockCount = 0;
|
|
||||||
|
|
||||||
double m_diagramHandlePosition = 0.0;
|
|
||||||
|
|
||||||
std::array<ImU64, 256> m_valueCounts = { 0 };
|
|
||||||
TaskHolder m_analyzerTask;
|
TaskHolder m_analyzerTask;
|
||||||
|
|
||||||
Region m_analyzedRegion = { 0, 0 };
|
Region m_analyzedRegion = { 0, 0 };
|
||||||
@ -42,8 +36,16 @@ namespace hex::plugin::builtin {
|
|||||||
|
|
||||||
DiagramDigram m_digram;
|
DiagramDigram m_digram;
|
||||||
DiagramLayeredDistribution m_layeredDistribution;
|
DiagramLayeredDistribution m_layeredDistribution;
|
||||||
|
DiagramByteDistribution m_byteDistribution;
|
||||||
|
DiagramByteTypesDistribution m_byteTypesDistribution;
|
||||||
|
DiagramChunkBasedEntropyAnalysis m_chunkBasedEntropy;
|
||||||
|
|
||||||
void analyze();
|
void analyze();
|
||||||
|
|
||||||
|
// User controlled input (referenced by ImgGui)
|
||||||
|
int m_inputChunkSize = 0;
|
||||||
|
int m_inputStartAddress = 0;
|
||||||
|
int m_inputEndAddress = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
@ -20,6 +20,8 @@
|
|||||||
|
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
||||||
|
#include <content/helpers/diagrams.hpp>
|
||||||
|
|
||||||
namespace hex::plugin::builtin {
|
namespace hex::plugin::builtin {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -36,20 +38,6 @@ namespace hex::plugin::builtin {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
std::vector<T> sampleData(const std::vector<T> &data, size_t count) {
|
|
||||||
size_t stride = std::max(1.0, double(data.size()) / count);
|
|
||||||
|
|
||||||
std::vector<T> result;
|
|
||||||
result.reserve(count);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < data.size(); i += stride) {
|
|
||||||
result.push_back(data[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -455,6 +443,21 @@ namespace hex::plugin::builtin {
|
|||||||
(waveData.size() / sampleRate) / 60, (waveData.size() / sampleRate) % 60);
|
(waveData.size() / sampleRate) / 60, (waveData.size() / sampleRate) % 60);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void drawChunkBasedEntropyVisualizer(pl::ptrn::Pattern &, pl::ptrn::Iteratable &, bool shouldReset, std::span<const pl::core::Token::Literal> arguments) {
|
||||||
|
// variable used to store the result to avoid having to recalculate the result at each frame
|
||||||
|
static DiagramChunkBasedEntropyAnalysis analyzer;
|
||||||
|
|
||||||
|
// compute data
|
||||||
|
if (shouldReset) {
|
||||||
|
auto pattern = arguments[0].toPattern();
|
||||||
|
auto chunkSize = arguments[1].toUnsigned();
|
||||||
|
analyzer.process(pattern->getBytes(), chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
// show results
|
||||||
|
analyzer.draw(ImVec2(400, 250), ImPlotFlags_NoChild | ImPlotFlags_CanvasOnly);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerPatternLanguageVisualizers() {
|
void registerPatternLanguageVisualizers() {
|
||||||
@ -465,6 +468,7 @@ namespace hex::plugin::builtin {
|
|||||||
ContentRegistry::PatternLanguage::addVisualizer("disassembler", drawDisassemblyVisualizer, 4);
|
ContentRegistry::PatternLanguage::addVisualizer("disassembler", drawDisassemblyVisualizer, 4);
|
||||||
ContentRegistry::PatternLanguage::addVisualizer("3d", draw3DVisualizer, 2);
|
ContentRegistry::PatternLanguage::addVisualizer("3d", draw3DVisualizer, 2);
|
||||||
ContentRegistry::PatternLanguage::addVisualizer("sound", drawSoundVisualizer, 3);
|
ContentRegistry::PatternLanguage::addVisualizer("sound", drawSoundVisualizer, 3);
|
||||||
|
ContentRegistry::PatternLanguage::addVisualizer("chunk_entropy", drawChunkBasedEntropyVisualizer, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -26,17 +26,19 @@ namespace hex::plugin::builtin {
|
|||||||
this->m_plainTextCharacterPercentage = -1.0;
|
this->m_plainTextCharacterPercentage = -1.0;
|
||||||
this->m_averageEntropy = -1.0;
|
this->m_averageEntropy = -1.0;
|
||||||
this->m_highestBlockEntropy = -1.0;
|
this->m_highestBlockEntropy = -1.0;
|
||||||
this->m_blockEntropy.clear();
|
|
||||||
this->m_blockSize = 0;
|
this->m_blockSize = 0;
|
||||||
this->m_valueCounts.fill(0x00);
|
|
||||||
this->m_dataMimeType.clear();
|
this->m_dataMimeType.clear();
|
||||||
this->m_dataDescription.clear();
|
this->m_dataDescription.clear();
|
||||||
this->m_analyzedRegion = { 0, 0 };
|
this->m_analyzedRegion = { 0, 0 };
|
||||||
});
|
});
|
||||||
|
|
||||||
EventManager::subscribe<EventRegionSelected>(this, [this](Region region) {
|
EventManager::subscribe<EventRegionSelected>(this, [this](Region region) {
|
||||||
if (this->m_blockSize != 0)
|
// Set the position of the diagram relative to the place where
|
||||||
this->m_diagramHandlePosition = region.getStartAddress() / double(this->m_blockSize);
|
// the user clicked inside the hex editor view
|
||||||
|
if (this->m_blockSize != 0) {
|
||||||
|
this->m_byteTypesDistribution.setHandlePosition(region.getStartAddress());
|
||||||
|
this->m_chunkBasedEntropy.setHandlePosition(region.getStartAddress());
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
EventManager::subscribe<EventProviderDeleted>(this, [this](const auto*) {
|
EventManager::subscribe<EventProviderDeleted>(this, [this](const auto*) {
|
||||||
@ -61,70 +63,27 @@ namespace hex::plugin::builtin {
|
|||||||
EventManager::unsubscribe<EventProviderDeleted>(this);
|
EventManager::unsubscribe<EventProviderDeleted>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double calculateEntropy(std::array<ImU64, 256> &valueCounts, size_t blockSize) {
|
|
||||||
double entropy = 0;
|
|
||||||
|
|
||||||
for (auto count : valueCounts) {
|
|
||||||
if (count == 0) [[unlikely]]
|
|
||||||
continue;
|
|
||||||
|
|
||||||
double probability = static_cast<double>(count) / blockSize;
|
|
||||||
|
|
||||||
entropy += probability * std::log2(probability);
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::min(1.0, (-entropy) / 8); // log2(256) = 8
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::array<float, 12> calculateTypeDistribution(std::array<ImU64, 256> &valueCounts, size_t blockSize) {
|
|
||||||
std::array<ImU64, 12> counts = {};
|
|
||||||
|
|
||||||
for (u16 value = 0x00; value < u16(valueCounts.size()); value++) {
|
|
||||||
const auto &count = valueCounts[value];
|
|
||||||
|
|
||||||
if (count == 0) [[unlikely]]
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (std::iscntrl(value))
|
|
||||||
counts[0] += count;
|
|
||||||
if (std::isprint(value))
|
|
||||||
counts[1] += count;
|
|
||||||
if (std::isspace(value))
|
|
||||||
counts[2] += count;
|
|
||||||
if (std::isblank(value))
|
|
||||||
counts[3] += count;
|
|
||||||
if (std::isgraph(value))
|
|
||||||
counts[4] += count;
|
|
||||||
if (std::ispunct(value))
|
|
||||||
counts[5] += count;
|
|
||||||
if (std::isalnum(value))
|
|
||||||
counts[6] += count;
|
|
||||||
if (std::isalpha(value))
|
|
||||||
counts[7] += count;
|
|
||||||
if (std::isupper(value))
|
|
||||||
counts[8] += count;
|
|
||||||
if (std::islower(value))
|
|
||||||
counts[9] += count;
|
|
||||||
if (std::isdigit(value))
|
|
||||||
counts[10] += count;
|
|
||||||
if (std::isxdigit(value))
|
|
||||||
counts[11] += count;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::array<float, 12> distribution = {};
|
|
||||||
for (u32 i = 0; i < distribution.size(); i++)
|
|
||||||
distribution[i] = static_cast<float>(counts[i]) / blockSize;
|
|
||||||
|
|
||||||
return distribution;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ViewInformation::analyze() {
|
void ViewInformation::analyze() {
|
||||||
this->m_analyzerTask = TaskManager::createTask("hex.builtin.view.information.analyzing", 0, [this](auto &task) {
|
this->m_analyzerTask = TaskManager::createTask("hex.builtin.view.information.analyzing", 0, [this](auto &task) {
|
||||||
auto provider = ImHexApi::Provider::get();
|
auto provider = ImHexApi::Provider::get();
|
||||||
|
|
||||||
task.setMaxValue(provider->getSize());
|
if ((this->m_inputChunkSize <= 0)
|
||||||
|
|| (this->m_inputStartAddress < 0)
|
||||||
|
|| (this->m_inputStartAddress >= this->m_inputEndAddress)
|
||||||
|
|| ((size_t) this->m_inputEndAddress > provider->getSize())) {
|
||||||
|
// Invalid parameters, set default one
|
||||||
|
this->m_inputChunkSize = 256;
|
||||||
|
this->m_inputStartAddress = 0;
|
||||||
|
this->m_inputEndAddress = provider->getSize();
|
||||||
|
}
|
||||||
|
|
||||||
this->m_analyzedRegion = { provider->getBaseAddress(), provider->getBaseAddress() + provider->getSize() };
|
task.setMaxValue(this->m_inputEndAddress - this->m_inputStartAddress);
|
||||||
|
|
||||||
|
// Modify the analyzed region
|
||||||
|
this->m_analyzedRegion = {
|
||||||
|
provider->getBaseAddress() + this->m_inputStartAddress,
|
||||||
|
size_t(this->m_inputEndAddress - this->m_inputStartAddress)
|
||||||
|
};
|
||||||
|
|
||||||
{
|
{
|
||||||
magic::compile();
|
magic::compile();
|
||||||
@ -133,66 +92,48 @@ namespace hex::plugin::builtin {
|
|||||||
this->m_dataMimeType = magic::getMIMEType(provider);
|
this->m_dataMimeType = magic::getMIMEType(provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->m_dataValid = true;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
this->m_blockSize = std::max<u32>(std::ceil(provider->getSize() / 2048.0F), 256);
|
this->m_blockSize = std::max<u32>(std::ceil(provider->getSize() / 2048.0F), 256);
|
||||||
|
|
||||||
std::array<ImU64, 256> blockValueCounts = { 0 };
|
|
||||||
|
|
||||||
const auto blockCount = (provider->getSize() / this->m_blockSize) + 1;
|
|
||||||
|
|
||||||
this->m_blockTypeDistributions.fill({});
|
|
||||||
this->m_blockEntropy.clear();
|
|
||||||
this->m_blockEntropy.resize(blockCount);
|
|
||||||
for (auto &blockDistribution : this->m_blockTypeDistributions)
|
|
||||||
blockDistribution.resize(blockCount);
|
|
||||||
|
|
||||||
this->m_valueCounts.fill(0);
|
|
||||||
this->m_processedBlockCount = 0;
|
|
||||||
this->m_averageEntropy = -1.0;
|
this->m_averageEntropy = -1.0;
|
||||||
this->m_highestBlockEntropy = -1.0;
|
this->m_highestBlockEntropy = -1.0;
|
||||||
this->m_plainTextCharacterPercentage = -1.0;
|
this->m_plainTextCharacterPercentage = -1.0;
|
||||||
|
|
||||||
this->m_digram.process(provider, this->m_analyzedRegion.getStartAddress(), this->m_analyzedRegion.getSize());
|
// Setup / start each analysis
|
||||||
this->m_layeredDistribution.process(provider, this->m_analyzedRegion.getStartAddress(), this->m_analyzedRegion.getSize());
|
|
||||||
|
|
||||||
|
this->m_byteDistribution.reset();
|
||||||
|
this->m_digram.reset(this->m_inputEndAddress - this->m_inputStartAddress);
|
||||||
|
this->m_layeredDistribution.reset(this->m_inputEndAddress - this->m_inputStartAddress);
|
||||||
|
this->m_byteTypesDistribution.reset(this->m_inputStartAddress, this->m_inputEndAddress,
|
||||||
|
provider->getBaseAddress(), provider->getSize());
|
||||||
|
this->m_chunkBasedEntropy.reset(this->m_inputChunkSize, this->m_inputStartAddress, this->m_inputEndAddress,
|
||||||
|
provider->getBaseAddress(), provider->getSize());
|
||||||
|
|
||||||
|
// Create a handle to the file
|
||||||
auto reader = prv::BufferedReader(provider);
|
auto reader = prv::BufferedReader(provider);
|
||||||
reader.setEndAddress(provider->getBaseAddress() + provider->getSize());
|
reader.seek(provider->getBaseAddress() + this->m_inputStartAddress);
|
||||||
|
reader.setEndAddress(provider->getBaseAddress() + this->m_inputEndAddress);
|
||||||
|
|
||||||
u64 count = 0;
|
u64 count = 0;
|
||||||
|
|
||||||
|
// Loop over each byte of the [part of the] file and update each analysis
|
||||||
|
// one byte at the time in order to process the file only once
|
||||||
for (u8 byte : reader) {
|
for (u8 byte : reader) {
|
||||||
this->m_valueCounts[byte]++;
|
this->m_byteDistribution.update(byte);
|
||||||
blockValueCounts[byte]++;
|
this->m_byteTypesDistribution.update(byte);
|
||||||
|
this->m_chunkBasedEntropy.update(byte);
|
||||||
count++;
|
this->m_layeredDistribution.update(byte);
|
||||||
if (((count % this->m_blockSize) == 0) || count == provider->getSize()) [[unlikely]] {
|
this->m_digram.update(byte);
|
||||||
this->m_blockEntropy[this->m_processedBlockCount] = calculateEntropy(blockValueCounts, this->m_blockSize);
|
++count;
|
||||||
|
|
||||||
{
|
|
||||||
auto typeDist = calculateTypeDistribution(blockValueCounts, this->m_blockSize);
|
|
||||||
for (u8 i = 0; i < typeDist.size(); i++)
|
|
||||||
this->m_blockTypeDistributions[i][this->m_processedBlockCount] = typeDist[i] * 100;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
this->m_processedBlockCount += 1;
|
|
||||||
blockValueCounts = { 0 };
|
|
||||||
task.update(count);
|
task.update(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->m_averageEntropy = this->m_chunkBasedEntropy.calculateEntropy(this->m_byteDistribution.get(), this->m_inputEndAddress - this->m_inputStartAddress);
|
||||||
|
this->m_highestBlockEntropy = this->m_chunkBasedEntropy.getHighestBlockEntropy();
|
||||||
|
this->m_plainTextCharacterPercentage = this->m_byteTypesDistribution.getPlainTextCharacterPercentage();
|
||||||
}
|
}
|
||||||
|
|
||||||
this->m_averageEntropy = calculateEntropy(this->m_valueCounts, provider->getSize());
|
this->m_dataValid = true;
|
||||||
if (!this->m_blockEntropy.empty())
|
|
||||||
this->m_highestBlockEntropy = *std::max_element(this->m_blockEntropy.begin(), this->m_blockEntropy.end());
|
|
||||||
else
|
|
||||||
this->m_highestBlockEntropy = 0;
|
|
||||||
|
|
||||||
this->m_plainTextCharacterPercentage = std::reduce(this->m_blockTypeDistributions[2].begin(), this->m_blockTypeDistributions[2].end()) / this->m_blockTypeDistributions[2].size();
|
|
||||||
this->m_plainTextCharacterPercentage += std::reduce(this->m_blockTypeDistributions[4].begin(), this->m_blockTypeDistributions[4].end()) / this->m_blockTypeDistributions[4].size();
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,6 +145,16 @@ namespace hex::plugin::builtin {
|
|||||||
if (ImHexApi::Provider::isValid() && provider->isReadable()) {
|
if (ImHexApi::Provider::isValid() && provider->isReadable()) {
|
||||||
ImGui::BeginDisabled(this->m_analyzerTask.isRunning());
|
ImGui::BeginDisabled(this->m_analyzerTask.isRunning());
|
||||||
{
|
{
|
||||||
|
ImGui::Header("hex.builtin.view.disassembler.settings.header"_lang);
|
||||||
|
|
||||||
|
ImGui::InputInt("hex.builtin.view.information.block_size"_lang, &this->m_inputChunkSize, ImGuiInputTextFlags_CharsDecimal);
|
||||||
|
|
||||||
|
// Clamp the values since the user can Ctrl+Click to transform the slider into a input
|
||||||
|
ImGui::SliderInt("hex.builtin.common.begin"_lang, &this->m_inputStartAddress, 0, provider->getSize(), "%d", ImGuiSliderFlags_AlwaysClamp);
|
||||||
|
|
||||||
|
// Clamp the values since the user can Ctrl+Click to transform the slider into a input
|
||||||
|
ImGui::SliderInt("hex.builtin.common.end"_lang, &this->m_inputEndAddress, 0, provider->getSize(), "%d", ImGuiSliderFlags_AlwaysClamp);
|
||||||
|
|
||||||
if (ImGui::Button("hex.builtin.view.information.analyze"_lang, ImVec2(ImGui::GetContentRegionAvail().x, 0)))
|
if (ImGui::Button("hex.builtin.view.information.analyze"_lang, ImVec2(ImGui::GetContentRegionAvail().x, 0)))
|
||||||
this->analyze();
|
this->analyze();
|
||||||
}
|
}
|
||||||
@ -215,7 +166,7 @@ namespace hex::plugin::builtin {
|
|||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->m_dataValid) {
|
if (!this->m_analyzerTask.isRunning() && this->m_dataValid) {
|
||||||
|
|
||||||
// Analyzed region
|
// Analyzed region
|
||||||
ImGui::Header("hex.builtin.view.information.region"_lang, true);
|
ImGui::Header("hex.builtin.view.information.region"_lang, true);
|
||||||
@ -279,70 +230,33 @@ namespace hex::plugin::builtin {
|
|||||||
ImGui::PushStyleColor(ImGuiCol_FrameBg, ImGui::GetColorU32(ImGuiCol_WindowBg));
|
ImGui::PushStyleColor(ImGuiCol_FrameBg, ImGui::GetColorU32(ImGuiCol_WindowBg));
|
||||||
ImPlot::PushStyleColor(ImPlotCol_FrameBg, ImGui::GetColorU32(ImGuiCol_WindowBg));
|
ImPlot::PushStyleColor(ImPlotCol_FrameBg, ImGui::GetColorU32(ImGuiCol_WindowBg));
|
||||||
|
|
||||||
|
// Display byte distribution analysis
|
||||||
ImGui::TextUnformatted("hex.builtin.view.information.distribution"_lang);
|
ImGui::TextUnformatted("hex.builtin.view.information.distribution"_lang);
|
||||||
if (ImPlot::BeginPlot("##distribution", ImVec2(-1, 0), ImPlotFlags_NoChild | ImPlotFlags_NoLegend | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect)) {
|
this->m_byteDistribution.draw(
|
||||||
ImPlot::SetupAxes("hex.builtin.common.value"_lang, "hex.builtin.common.count"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock | ImPlotAxisFlags_LogScale);
|
ImVec2(-1, 0),
|
||||||
ImPlot::SetupAxesLimits(0, 256, 1, double(*std::max_element(this->m_valueCounts.begin(), this->m_valueCounts.end())) * 1.1F, ImGuiCond_Always);
|
ImPlotFlags_NoChild | ImPlotFlags_NoLegend | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect
|
||||||
|
);
|
||||||
static auto x = [] {
|
|
||||||
std::array<ImU64, 256> result { 0 };
|
|
||||||
std::iota(result.begin(), result.end(), 0);
|
|
||||||
return result;
|
|
||||||
}();
|
|
||||||
|
|
||||||
ImPlot::PlotBars<ImU64>("##bytes", x.data(), this->m_valueCounts.data(), x.size(), 1.0);
|
|
||||||
|
|
||||||
ImPlot::EndPlot();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Display byte types distribution analysis
|
||||||
ImGui::TextUnformatted("hex.builtin.view.information.byte_types"_lang);
|
ImGui::TextUnformatted("hex.builtin.view.information.byte_types"_lang);
|
||||||
if (ImPlot::BeginPlot("##byte_types", ImVec2(-1, 0), ImPlotFlags_NoChild | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect | ImPlotFlags_AntiAliased)) {
|
this->m_byteTypesDistribution.draw(
|
||||||
ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.common.percentage"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock);
|
ImVec2(-1, 0),
|
||||||
ImPlot::SetupAxesLimits(0, this->m_blockTypeDistributions[0].size(), -0.1F, 100.1F, ImGuiCond_Always);
|
ImPlotFlags_NoChild | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect | ImPlotFlags_AntiAliased,
|
||||||
ImPlot::SetupLegend(ImPlotLocation_South, ImPlotLegendFlags_Horizontal | ImPlotLegendFlags_Outside);
|
true
|
||||||
|
);
|
||||||
constexpr static std::array Names = { "iscntrl", "isprint", "isspace", "isblank", "isgraph", "ispunct", "isalnum", "isalpha", "isupper", "islower", "isdigit", "isxdigit" };
|
|
||||||
|
|
||||||
for (u32 i = 0; i < 12; i++) {
|
|
||||||
ImPlot::PlotLine(Names[i], this->m_blockTypeDistributions[i].data(), this->m_processedBlockCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ImPlot::DragLineX(1, &this->m_diagramHandlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) {
|
|
||||||
u64 address = u64(std::max<double>(this->m_diagramHandlePosition, 0) * this->m_blockSize) + provider->getBaseAddress();
|
|
||||||
address = std::min(address, provider->getBaseAddress() + provider->getSize() - 1);
|
|
||||||
ImHexApi::HexEditor::setSelection(address, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ImPlot::EndPlot();
|
|
||||||
}
|
|
||||||
|
|
||||||
ImGui::NewLine();
|
|
||||||
|
|
||||||
|
// Display chunk based entropy analysis
|
||||||
ImGui::TextUnformatted("hex.builtin.view.information.entropy"_lang);
|
ImGui::TextUnformatted("hex.builtin.view.information.entropy"_lang);
|
||||||
|
this->m_chunkBasedEntropy.draw(
|
||||||
|
ImVec2(-1, 0),
|
||||||
|
ImPlotFlags_NoChild | ImPlotFlags_CanvasOnly | ImPlotFlags_AntiAliased,
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
if (ImPlot::BeginPlot("##entropy", ImVec2(-1, 0), ImPlotFlags_NoChild | ImPlotFlags_CanvasOnly | ImPlotFlags_AntiAliased)) {
|
|
||||||
ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.view.information.entropy"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock);
|
|
||||||
ImPlot::SetupAxesLimits(0, this->m_blockEntropy.size(), -0.1F, 1.1F, ImGuiCond_Always);
|
|
||||||
|
|
||||||
ImPlot::PlotLine("##entropy_line", this->m_blockEntropy.data(), this->m_processedBlockCount);
|
|
||||||
|
|
||||||
if (ImPlot::DragLineX(1, &this->m_diagramHandlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) {
|
|
||||||
u64 address = u64(std::max<double>(this->m_diagramHandlePosition, 0) * this->m_blockSize) + provider->getBaseAddress();
|
|
||||||
address = std::min(address, provider->getBaseAddress() + provider->getSize() - 1);
|
|
||||||
ImHexApi::HexEditor::setSelection(address, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ImPlot::EndPlot();
|
|
||||||
}
|
|
||||||
ImPlot::PopStyleColor();
|
ImPlot::PopStyleColor();
|
||||||
ImGui::PopStyleColor();
|
ImGui::PopStyleColor();
|
||||||
|
|
||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
|
|
||||||
this->m_diagramHandlePosition = std::clamp<double>(
|
|
||||||
this->m_diagramHandlePosition,
|
|
||||||
this->m_analyzedRegion.getStartAddress() / double(this->m_blockSize),
|
|
||||||
this->m_analyzedRegion.getEndAddress() / double(this->m_blockSize));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Entropy information
|
// Entropy information
|
||||||
@ -355,7 +269,7 @@ namespace hex::plugin::builtin {
|
|||||||
ImGui::TableNextColumn();
|
ImGui::TableNextColumn();
|
||||||
ImGui::TextFormatted("{}", "hex.builtin.view.information.block_size"_lang);
|
ImGui::TextFormatted("{}", "hex.builtin.view.information.block_size"_lang);
|
||||||
ImGui::TableNextColumn();
|
ImGui::TableNextColumn();
|
||||||
ImGui::TextFormatted("hex.builtin.view.information.block_size.desc"_lang, this->m_blockEntropy.size(), this->m_blockSize);
|
ImGui::TextFormatted("hex.builtin.view.information.block_size.desc"_lang, this->m_chunkBasedEntropy.getSize(), this->m_chunkBasedEntropy.getChunkSize());
|
||||||
|
|
||||||
ImGui::TableNextColumn();
|
ImGui::TableNextColumn();
|
||||||
ImGui::TextFormatted("{}", "hex.builtin.view.information.file_entropy"_lang);
|
ImGui::TextFormatted("{}", "hex.builtin.view.information.file_entropy"_lang);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user