1
0
mirror of synced 2025-01-09 21:21:38 +01:00

1112 lines
43 KiB
C++
Raw Normal View History

#pragma once
#include <hex.hpp>
#include <imgui.h>
#include <implot.h>
#include <hex/api/imhex_api.hpp>
#include <hex/api/localization_manager.hpp>
#include <hex/providers/provider.hpp>
#include <hex/providers/buffered_reader.hpp>
#include <hex/helpers/utils.hpp>
#include <imgui_internal.h>
#include <atomic>
#include <implot_internal.h>
#include <random>
#include <hex/ui/imgui_imhex_extensions.h>
namespace hex {
2023-11-10 20:47:08 +01:00
namespace impl {
2023-11-10 20:47:08 +01:00
inline int IntegerAxisFormatter(double value, char* buffer, int size, void *userData) {
u64 integer = static_cast<u64>(value);
return snprintf(buffer, size, static_cast<const char*>(userData), integer);
}
2023-11-10 20:47:08 +01:00
inline std::vector<u8> getSampleSelection(prv::Provider *provider, u64 address, size_t size, size_t sampleSize) {
const size_t sequenceCount = std::ceil(std::sqrt(sampleSize));
std::vector<u8> buffer;
if (size < sampleSize) {
buffer.resize(size);
provider->read(address, buffer.data(), size);
} else {
std::random_device randomDevice;
std::mt19937_64 random(randomDevice());
std::map<u64, std::vector<u8>> orderedData;
for (u32 i = 0; i < sequenceCount; i++) {
ssize_t offset = random() % size;
std::vector<u8> sequence;
sequence.resize(std::min<size_t>(sequenceCount, size - offset));
provider->read(address + offset, sequence.data(), sequence.size());
orderedData.insert({ offset, sequence });
}
buffer.reserve(sampleSize);
u64 lastEnd = 0x00;
for (auto &[offset, sequence] : orderedData) {
if (offset < lastEnd)
buffer.resize(buffer.size() - (lastEnd - offset));
buffer = std::move(sequence);
lastEnd = offset + buffer.size();
}
}
return buffer;
}
2023-11-10 20:47:08 +01:00
inline std::vector<u8> getSampleSelection(const std::vector<u8> &inputBuffer, size_t sampleSize) {
const size_t sequenceCount = std::ceil(std::sqrt(sampleSize));
std::vector<u8> buffer;
if (inputBuffer.size() < sampleSize) {
buffer = inputBuffer;
} else {
std::random_device randomDevice;
std::mt19937_64 random(randomDevice());
std::map<u64, std::vector<u8>> orderedData;
for (u32 i = 0; i < sequenceCount; i++) {
ssize_t offset = random() % inputBuffer.size();
std::vector<u8> sequence;
sequence.reserve(sampleSize);
2023-11-10 20:47:08 +01:00
std::copy_n(inputBuffer.begin() + offset, std::min<size_t>(sequenceCount, inputBuffer.size() - offset), std::back_inserter(sequence));
orderedData.insert({ offset, sequence });
}
buffer.reserve(sampleSize);
u64 lastEnd = 0x00;
for (auto &[offset, sequence] : orderedData) {
if (offset < lastEnd)
buffer.resize(buffer.size() - (lastEnd - offset));
buffer = std::move(sequence);
lastEnd = offset + buffer.size();
}
}
return buffer;
}
}
class DiagramDigram {
public:
explicit DiagramDigram() { }
void draw(ImVec2 size) {
if (!m_processing) {
if (!m_textureValid) {
std::vector<u32> pixels;
pixels.resize(0x100 * 0x100, 0x00);
for (size_t i = 0; i < (m_buffer.empty() ? 0 : m_buffer.size() - 1); i++) {
const u8 x = m_buffer[i];
const u8 y = m_buffer[i + 1];
auto color = ImLerp(
ImColor(0xFF, 0x6D, 0x01).Value,
ImColor(0x01, 0x93, 0xFF).Value,
float(i) / m_buffer.size()) + ImVec4(m_glowBuffer[i], m_glowBuffer[i], m_glowBuffer[i], 0.0F);
color.w = m_opacity;
auto &pixel = pixels[x * 0xFF + y];
pixel = ImAlphaBlendColors(pixel, ImColor(color));
}
m_texture = ImGuiExt::Texture::fromBitmap(reinterpret_cast<u8*>(pixels.data()), pixels.size() * 4, 0xFF, 0xFF, m_filter);
m_textureValid = m_texture.isValid();
}
}
ImGui::PushStyleColor(ImGuiCol_ChildBg, ImU32(ImColor(0, 0, 0)));
if (ImGui::BeginChild("##digram", size, ImGuiChildFlags_Border)) {
auto drawList = ImGui::GetWindowDrawList();
if (m_textureValid) {
auto pos = ImGui::GetWindowPos() + ImVec2(size.x * 0.025F, size.y * 0.025F);
drawList->AddImage(m_texture, pos, pos + size * 0.95F);
}
}
ImGui::EndChild();
ImGui::PopStyleColor();
}
void process(prv::Provider *provider, u64 address, size_t size) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_buffer = impl::getSampleSelection(provider, address, size, m_sampleSize == 0 ? size : m_sampleSize);
processImpl();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
void process(const std::vector<u8> &buffer) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_buffer = impl::getSampleSelection(buffer, m_sampleSize == 0 ? buffer.size() : m_sampleSize);
processImpl();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
void reset(u64 size) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_buffer.clear();
m_buffer.reserve(m_sampleSize == 0 ? size : m_sampleSize);
2023-12-19 13:10:25 +01:00
m_byteCount = 0;
m_fileSize = size;
m_textureValid = false;
}
void update(u8 byte) {
// Check if there is some space left
2023-12-19 13:10:25 +01:00
if (m_byteCount < m_fileSize) {
if (m_sampleSize == 0 || (m_byteCount % u64(std::ceil(double(m_fileSize) / double(m_sampleSize)))) == 0)
2023-12-19 13:10:25 +01:00
m_buffer.push_back(byte);
++m_byteCount;
if (m_byteCount == m_fileSize) {
processImpl();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
}
}
void setFiltering(ImGuiExt::Texture::Filter filter) {
m_filter = filter;
}
void setBrightness(float brightness) {
m_brightness = brightness;
}
void setSampleSize(size_t sampleSize) {
m_sampleSize = sampleSize;
}
private:
void processImpl() {
2023-12-19 13:10:25 +01:00
m_glowBuffer.resize(m_buffer.size());
std::map<u64, size_t> heatMap;
2023-12-19 13:10:25 +01:00
for (size_t i = 0; i < (m_buffer.empty() ? 0 : m_buffer.size() - 1); i++) {
auto count = ++heatMap[m_buffer[i] << 8 | heatMap[i + 1]];
2023-12-19 13:10:25 +01:00
m_highestCount = std::max(m_highestCount, count);
}
2023-12-19 13:10:25 +01:00
for (size_t i = 0; i < (m_buffer.empty() ? 0 : m_buffer.size() - 1); i++) {
m_glowBuffer[i] = std::min<float>(0.2F + (float(heatMap[m_buffer[i] << 8 | m_buffer[i + 1]]) / float(m_highestCount / 1000)), 1.0F);
}
m_opacity = (log10(float(m_sampleSize == 0 ? m_buffer.size() : m_sampleSize)) / log10(float(m_highestCount))) / (100.0F * (1.0F - m_brightness));
}
private:
ImGuiExt::Texture::Filter m_filter = ImGuiExt::Texture::Filter::Nearest;
float m_brightness = 0.5F;
2023-11-10 20:47:08 +01:00
size_t m_sampleSize = 0;
// The number of bytes processed and the size of
// the file to analyze (useful for iterative analysis)
2023-11-10 20:47:08 +01:00
u64 m_byteCount = 0;
u64 m_fileSize = 0;
std::vector<u8> m_buffer;
std::vector<float> m_glowBuffer;
float m_opacity = 0.0F;
size_t m_highestCount = 0;
std::atomic<bool> m_processing = false;
bool m_textureValid = false;
ImGuiExt::Texture m_texture;
};
class DiagramLayeredDistribution {
public:
explicit DiagramLayeredDistribution() { }
void draw(ImVec2 size) {
if (!m_processing) {
if (!m_textureValid) {
std::vector<u32> pixels;
pixels.resize(0x100 * 0x100, 0x00);
for (size_t i = 0; i < (m_buffer.empty() ? 0 : m_buffer.size() - 1); i++) {
const u8 x = m_buffer[i];
const u8 y = (float(i) / m_buffer.size()) * 0xFF;
auto color = ImLerp(ImColor(0xFF, 0x6D, 0x01).Value, ImColor(0x01, 0x93, 0xFF).Value, float(i) / m_buffer.size()) + ImVec4(m_glowBuffer[i], m_glowBuffer[i], m_glowBuffer[i], 0.0F);
color.w = m_opacity;
auto &pixel = pixels[x * 0xFF + y];
pixel = ImAlphaBlendColors(pixel, ImColor(color));
}
m_texture = ImGuiExt::Texture::fromBitmap(reinterpret_cast<u8*>(pixels.data()), pixels.size() * 4, 0xFF, 0xFF, m_filter);
m_textureValid = m_texture.isValid();
}
}
ImGui::PushStyleColor(ImGuiCol_ChildBg, ImU32(ImColor(0, 0, 0)));
if (ImGui::BeginChild("##layered_distribution", size, ImGuiChildFlags_Border)) {
auto drawList = ImGui::GetWindowDrawList();
if (m_textureValid) {
const auto pos = ImGui::GetWindowPos() + ImVec2(size.x * 0.025F, size.y * 0.025F);
drawList->AddImage(m_texture, pos, pos + size * 0.95F);
}
}
ImGui::EndChild();
ImGui::PopStyleColor();
}
void process(prv::Provider *provider, u64 address, size_t size) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_buffer = impl::getSampleSelection(provider, address, size, m_sampleSize == 0 ? size : m_sampleSize);
processImpl();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
void process(const std::vector<u8> &buffer) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_buffer = impl::getSampleSelection(buffer, m_sampleSize == 0 ? buffer.size() : m_sampleSize);
processImpl();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
void reset(u64 size) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_buffer.clear();
m_buffer.reserve(m_sampleSize == 0 ? size : m_sampleSize);
2023-12-19 13:10:25 +01:00
m_byteCount = 0;
m_fileSize = size;
m_textureValid = false;
}
void update(u8 byte) {
// Check if there is some space left
2023-12-19 13:10:25 +01:00
if (m_byteCount < m_fileSize) {
if (m_sampleSize == 0 || (m_byteCount % u64(std::ceil(double(m_fileSize) / double(m_sampleSize)))) == 0)
2023-12-19 13:10:25 +01:00
m_buffer.push_back(byte);
++m_byteCount;
if (m_byteCount == m_fileSize) {
processImpl();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
}
}
void setFiltering(ImGuiExt::Texture::Filter filter) {
m_filter = filter;
}
void setBrightness(float brightness) {
m_brightness = brightness;
}
void setSampleSize(size_t sampleSize) {
m_sampleSize = sampleSize;
}
private:
void processImpl() {
2023-12-19 13:10:25 +01:00
m_glowBuffer.resize(m_buffer.size());
std::map<u64, size_t> heatMap;
2023-12-19 13:10:25 +01:00
for (size_t i = 0; i < (m_buffer.empty() ? 0 : m_buffer.size() - 1); i++) {
auto count = ++heatMap[m_buffer[i] << 8 | heatMap[i + 1]];
2023-12-19 13:10:25 +01:00
m_highestCount = std::max(m_highestCount, count);
}
2023-12-19 13:10:25 +01:00
for (size_t i = 0; i < (m_buffer.empty() ? 0 : m_buffer.size() - 1); i++) {
m_glowBuffer[i] = std::min<float>(0.2F + (float(heatMap[m_buffer[i] << 8 | m_buffer[i + 1]]) / float(m_highestCount / 1000)), 1.0F);
}
m_opacity = (log10(float(m_sampleSize == 0 ? m_buffer.size() : m_sampleSize)) / log10(float(m_highestCount))) / (100.0F * (1.0F - m_brightness));
}
private:
ImGuiExt::Texture::Filter m_filter = ImGuiExt::Texture::Filter::Nearest;
float m_brightness = 0.5F;
2023-11-10 20:47:08 +01:00
size_t m_sampleSize = 0;
// The number of bytes processed and the size of
// the file to analyze (useful for iterative analysis)
2023-11-10 20:47:08 +01:00
u64 m_byteCount = 0;
u64 m_fileSize = 0;
std::vector<u8> m_buffer;
std::vector<float> m_glowBuffer;
float m_opacity = 0.0F;
size_t m_highestCount = 0;
std::atomic<bool> m_processing = false;
bool m_textureValid = false;
ImGuiExt::Texture m_texture;
};
class DiagramChunkBasedEntropyAnalysis {
public:
2023-11-10 20:47:08 +01:00
explicit DiagramChunkBasedEntropyAnalysis(u64 blockSize = 256, size_t sampleSize = 0x1000) : m_blockSize(blockSize), m_sampleSize(sampleSize) { }
void draw(ImVec2 size, ImPlotFlags flags, bool updateHandle = false) {
2023-12-19 13:10:25 +01:00
if (!m_processing && ImPlot::BeginPlot("##ChunkBasedAnalysis", size, flags)) {
ImPlot::SetupAxes("hex.ui.common.address"_lang, "hex.builtin.information_section.info_analysis.entropy"_lang,
ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoHighlight | ImPlotAxisFlags_NoSideSwitch,
ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoHighlight | ImPlotAxisFlags_NoSideSwitch);
2023-11-10 20:47:08 +01:00
ImPlot::SetupAxisFormat(ImAxis_X1, impl::IntegerAxisFormatter, (void*)("0x%04llX"));
ImPlot::SetupMouseText(ImPlotLocation_NorthEast);
// Set the axis limit to [first block : last block]
ImPlot::SetupAxesLimits(
2023-12-19 13:10:25 +01:00
m_xBlockEntropy.empty() ? 0 : m_xBlockEntropy.front(),
m_xBlockEntropy.empty() ? 0 : m_xBlockEntropy.back(),
-0.1F,
1.1F,
ImGuiCond_Always);
// Draw the plot
2023-12-19 13:10:25 +01:00
ImPlot::PlotLine("##ChunkBasedAnalysisLine", m_xBlockEntropy.data(), m_yBlockEntropySampled.data(), m_xBlockEntropy.size());
// The parameter updateHandle is used when using the pattern language since we don't have a provider
// but just a set of bytes, we won't be able to use the drag bar correctly.
if (updateHandle) {
// Set a draggable line on the plot
2023-12-19 13:10:25 +01:00
if (ImPlot::DragLineX(1, &m_handlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) {
// The line was dragged, update the position in the hex editor
// Clamp the value between the start/end of the region to analyze
2023-12-19 13:10:25 +01:00
m_handlePosition = std::clamp<double>(
m_handlePosition,
m_startAddress,
m_endAddress);
// Compute the position inside hex editor
2023-12-19 13:10:25 +01:00
u64 address = u64(std::max<double>(m_handlePosition, 0)) + m_baseAddress;
address = std::min<u64>(address, m_baseAddress + m_fileSize - 1);
ImHexApi::HexEditor::setSelection(address, 1);
}
}
ImPlot::EndPlot();
}
}
void process(prv::Provider *provider, u64 chunkSize, u64 startAddress, u64 endAddress) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_chunkSize = chunkSize;
m_startAddress = startAddress;
m_endAddress = endAddress;
2023-12-19 13:10:25 +01:00
m_baseAddress = provider->getBaseAddress();
m_fileSize = provider->getSize();
// Get a file reader
auto reader = prv::ProviderReader(provider);
2023-12-19 13:10:25 +01:00
std::vector<u8> bytes = reader.read(m_startAddress, m_endAddress - m_startAddress);
this->processImpl(bytes);
// Set the diagram handle position to the start of the plot
2023-12-19 13:10:25 +01:00
m_handlePosition = m_startAddress;
2023-12-19 13:10:25 +01:00
m_processing = false;
}
2023-11-10 20:47:08 +01:00
void process(const std::vector<u8> &buffer, u64 chunkSize) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes (use buffer size as end address)
2023-12-19 13:10:25 +01:00
m_chunkSize = chunkSize;
m_startAddress = 0;
m_endAddress = buffer.size();
2023-12-19 13:10:25 +01:00
m_baseAddress = 0;
m_fileSize = buffer.size();
this->processImpl(buffer);
// Set the diagram handle position to the start of the plot
2023-12-19 13:10:25 +01:00
m_handlePosition = m_startAddress;
2023-12-19 13:10:25 +01:00
m_processing = false;
}
// Reset the entropy analysis
void reset(u64 chunkSize, u64 startAddress, u64 endAddress, u64 baseAddress, u64 size) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_chunkSize = chunkSize;
m_startAddress = startAddress;
m_endAddress = endAddress;
m_baseAddress = baseAddress;
m_fileSize = size;
2023-12-19 13:10:25 +01:00
m_blockValueCounts = { 0 };
// Reset and resize the array
2023-12-19 13:10:25 +01:00
m_yBlockEntropy.clear();
2023-12-19 13:10:25 +01:00
m_byteCount = 0;
m_blockCount = 0;
// Set the diagram handle position to the start of the plot
2023-12-19 13:10:25 +01:00
m_handlePosition = m_startAddress;
}
// Process one byte at the time
void update(u8 byte) {
2023-12-19 13:10:25 +01:00
u64 totalBlock = std::ceil((m_endAddress - m_startAddress) / m_chunkSize);
// Check if there is still some
2023-12-19 13:10:25 +01:00
if (m_blockCount < totalBlock) {
// Increment the occurrence of the current byte
2023-12-19 13:10:25 +01:00
m_blockValueCounts[byte]++;
2023-12-19 13:10:25 +01:00
m_byteCount++;
// Check if we processed one complete chunk, if so compute the entropy and start analysing the next chunk
2023-12-19 13:10:25 +01:00
if (((m_byteCount % m_chunkSize) == 0) || m_byteCount == (m_endAddress - m_startAddress)) [[unlikely]] {
m_yBlockEntropy.push_back(calculateEntropy(m_blockValueCounts, m_chunkSize));
2023-12-19 13:10:25 +01:00
m_blockCount += 1;
m_blockValueCounts = { 0 };
}
// Check if we processed the last block, if so setup the X axis part of the data
2023-12-19 13:10:25 +01:00
if (m_blockCount == totalBlock) {
processFinalize();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
}
}
// Method used to compute the entropy of a block of size `blockSize`
// using the byte occurrences from `valueCounts` array.
static double calculateEntropy(const std::array<ImU64, 256> &valueCounts, size_t blockSize) {
double entropy = 0;
u8 processedValueCount = 0;
2023-11-10 20:47:08 +01:00
for (const auto count : valueCounts) {
if (count == 0) [[unlikely]]
continue;
processedValueCount += 1;
double probability = static_cast<double>(count) / blockSize;
entropy += probability * std::log2(probability);
}
if (processedValueCount == 1)
return 0.0;
return std::min<double>(1.0, (-entropy) / 8); // log2(256) = 8
}
// Return the highest entropy value among all of the blocks
double getHighestEntropyBlockValue() {
2024-02-24 23:34:29 +01:00
double result = 0.0F;
2023-12-19 13:10:25 +01:00
if (!m_yBlockEntropy.empty())
result = *std::ranges::max_element(m_yBlockEntropy);
return result;
}
// Return the highest entropy value among all of the blocks
u64 getHighestEntropyBlockAddress() {
u64 address = 0x00;
2023-12-19 13:10:25 +01:00
if (!m_yBlockEntropy.empty())
address = (std::ranges::max_element(m_yBlockEntropy) - m_yBlockEntropy.begin()) * m_blockSize;
2023-12-19 13:10:25 +01:00
return m_startAddress + address;
}
// Return the highest entropy value among all of the blocks
double getLowestEntropyBlockValue() {
2024-02-24 23:34:29 +01:00
double result = 0.0F;
2023-12-19 13:10:25 +01:00
if (m_yBlockEntropy.size() > 1)
result = *std::min_element(m_yBlockEntropy.begin(), m_yBlockEntropy.end() - 1);
return result;
}
// Return the highest entropy value among all of the blocks
u64 getLowestEntropyBlockAddress() {
u64 address = 0x00;
2023-12-19 13:10:25 +01:00
if (m_yBlockEntropy.size() > 1)
address = (std::min_element(m_yBlockEntropy.begin(), m_yBlockEntropy.end() - 1) - m_yBlockEntropy.begin()) * m_blockSize;
return m_startAddress + address;
}
// Return the number of blocks that have been processed
2023-11-10 20:47:08 +01:00
u64 getSize() const {
2023-12-19 13:10:25 +01:00
return m_yBlockEntropySampled.size();
}
// Return the size of the chunk used for this analysis
2023-11-10 20:47:08 +01:00
u64 getChunkSize() const {
2023-12-19 13:10:25 +01:00
return m_chunkSize;
}
void setHandlePosition(u64 filePosition) {
2023-12-19 13:10:25 +01:00
m_handlePosition = filePosition;
}
private:
// Private method used to factorize the process public method
2023-11-10 20:47:08 +01:00
void processImpl(const std::vector<u8> &bytes) {
2023-12-19 13:10:25 +01:00
m_blockValueCounts = { 0 };
// Reset and resize the array
2023-12-19 13:10:25 +01:00
m_yBlockEntropy.clear();
2023-12-19 13:10:25 +01:00
m_byteCount = 0;
m_blockCount = 0;
// Loop over each byte of the file (or a part of it)
for (u8 byte: bytes) {
// Increment the occurrence of the current byte
2023-12-19 13:10:25 +01:00
m_blockValueCounts[byte]++;
2023-12-19 13:10:25 +01:00
m_byteCount++;
// Check if we processed one complete chunk, if so compute the entropy and start analysing the next chunk
2023-12-19 13:10:25 +01:00
if (((m_byteCount % m_chunkSize) == 0) || m_byteCount == bytes.size() * 8) [[unlikely]] {
m_yBlockEntropy.push_back(calculateEntropy(m_blockValueCounts, m_chunkSize));
2023-12-19 13:10:25 +01:00
m_blockCount += 1;
m_blockValueCounts = { 0 };
}
}
processFinalize();
}
void processFinalize() {
// Only save at most m_sampleSize elements of the result
2023-12-19 13:10:25 +01:00
m_yBlockEntropySampled = sampleData(m_yBlockEntropy, std::min<size_t>(m_blockCount + 1, m_sampleSize));
2023-12-19 13:10:25 +01:00
if (!m_yBlockEntropySampled.empty())
m_yBlockEntropySampled.push_back(m_yBlockEntropySampled.back());
double stride = std::max(1.0, double(
2023-12-19 13:10:25 +01:00
double(std::ceil((m_endAddress - m_startAddress)) / m_blockSize) / m_yBlockEntropySampled.size()));
2023-12-19 13:10:25 +01:00
m_blockCount = m_yBlockEntropySampled.size() - 1;
// The m_xBlockEntropy attribute is used to specify the position of entropy values
// in the plot when the Y axis doesn't start at 0
2023-12-19 13:10:25 +01:00
m_xBlockEntropy.clear();
m_xBlockEntropy.resize(m_blockCount);
for (u64 i = 0; i < m_blockCount; ++i)
m_xBlockEntropy[i] = ((m_startAddress / m_blockSize) + stride * i) * m_blockSize;
m_xBlockEntropy.push_back(m_endAddress);
}
private:
// Variables used to store the parameters to process
// Chunk's size for entropy analysis
u64 m_chunkSize = 0;
u64 m_startAddress = 0x00;
u64 m_endAddress = 0x00;
// Start / size of the file
u64 m_baseAddress = 0x00;
u64 m_fileSize = 0;
// The size of the blocks (for diagram drawing)
u64 m_blockSize = 0;
// Position of the handle inside the plot
double m_handlePosition = 0.0;
// Hold the number of blocks that have been processed
// during the chunk-based entropy analysis
u64 m_blockCount = 0;
// Hold the number of bytes that have been processed
// during the analysis (useful for the iterative analysis)
u64 m_byteCount = 0;
// Array used to hold the occurrences of each byte
// (useful for the iterative analysis)
std::array<ImU64, 256> m_blockValueCounts = {};
// Variable to hold the result of the chunk-based
// entropy analysis
std::vector<double> m_xBlockEntropy;
std::vector<double> m_yBlockEntropy, m_yBlockEntropySampled;
// Sampling size, number of elements displayed in the plot,
// avoid showing to many data because it decreased the frame rate
size_t m_sampleSize = 0;
std::atomic<bool> m_processing = false;
};
class DiagramByteDistribution {
public:
2023-11-10 20:47:08 +01:00
DiagramByteDistribution() = default;
void draw(ImVec2 size, ImPlotFlags flags) {
2023-12-19 13:10:25 +01:00
if (!m_processing && ImPlot::BeginPlot("##distribution", size, flags)) {
ImPlot::SetupAxes("hex.ui.common.value"_lang, "hex.ui.common.count"_lang,
ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoHighlight | ImPlotAxisFlags_NoSideSwitch,
ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoHighlight | ImPlotAxisFlags_NoSideSwitch);
ImPlot::SetupAxisScale(ImAxis_Y1, ImPlotScale_Log10);
ImPlot::SetupAxesLimits(-1, 256, 1, double(*std::ranges::max_element(m_valueCounts)) * 1.1F, ImGuiCond_Always);
2023-11-10 20:47:08 +01:00
ImPlot::SetupAxisFormat(ImAxis_X1, impl::IntegerAxisFormatter, (void*)("0x%02llX"));
ImPlot::SetupAxisTicks(ImAxis_X1, 0, 255, 17);
ImPlot::SetupMouseText(ImPlotLocation_NorthEast);
constexpr static auto x = [] {
std::array<ImU64, 256> result { 0 };
std::iota(result.begin(), result.end(), 0);
return result;
}();
2023-12-19 13:10:25 +01:00
ImPlot::PlotBars<ImU64>("##bytes", x.data(), m_valueCounts.data(), x.size(), 1);
ImPlot::EndPlot();
}
}
void process(prv::Provider *provider, u64 startAddress, u64 endAddress) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_startAddress = startAddress;
m_endAddress = endAddress;
// Get a file reader
auto reader = prv::ProviderReader(provider);
2023-12-19 13:10:25 +01:00
std::vector<u8> bytes = reader.read(m_startAddress, m_endAddress - m_startAddress);
this->processImpl(bytes);
2023-12-19 13:10:25 +01:00
m_processing = false;
}
2023-11-10 20:47:08 +01:00
void process(const std::vector<u8> &buffer) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_startAddress = 0;
m_endAddress = buffer.size();
this->processImpl(buffer);
2023-12-19 13:10:25 +01:00
m_processing = false;
}
// Reset the byte distribution array
void reset() {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_valueCounts.fill(0);
m_processing = false;
}
// Process one byte at the time
void update(u8 byte) {
2023-12-19 13:10:25 +01:00
m_processing = true;
m_valueCounts[byte]++;
m_processing = false;
}
// Return byte distribution array in it's current state
std::array<ImU64, 256> & get() {
2023-12-19 13:10:25 +01:00
return m_valueCounts;
}
private:
// Private method used to factorize the process public method
2023-11-10 20:47:08 +01:00
void processImpl(const std::vector<u8> &bytes) {
// Reset the array
2023-12-19 13:10:25 +01:00
m_valueCounts.fill(0);
// Loop over each byte of the file (or a part of it)
// Increment the occurrence of the current byte
for (u8 byte : bytes)
2023-12-19 13:10:25 +01:00
m_valueCounts[byte]++;
}
private:
// Variables used to store the parameters to process
2023-11-10 20:47:08 +01:00
u64 m_startAddress = 0;
u64 m_endAddress = 0;
// Hold the result of the byte distribution analysis
std::array<ImU64, 256> m_valueCounts = { };
std::atomic<bool> m_processing = false;
};
class DiagramByteTypesDistribution {
private:
struct AnnotationRegion {
UnlocalizedString unlocalizedName;
Region region;
ImColor color;
};
struct Tag {
UnlocalizedString unlocalizedName;
ImU64 value;
ImAxis axis;
ImGuiCol color;
};
public:
2023-11-10 20:47:08 +01:00
explicit DiagramByteTypesDistribution(u64 blockSize = 256, size_t sampleSize = 0x1000) : m_blockSize(blockSize), m_sampleSize(sampleSize){ }
void draw(ImVec2 size, ImPlotFlags flags, bool updateHandle = false) {
// Draw the result of the analysis
2023-12-19 13:10:25 +01:00
if (!m_processing && ImPlot::BeginPlot("##byte_types", size, flags)) {
ImPlot::SetupAxes("hex.ui.common.address"_lang, "hex.ui.common.percentage"_lang,
ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoHighlight | ImPlotAxisFlags_NoSideSwitch,
ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoHighlight | ImPlotAxisFlags_NoSideSwitch);
ImPlot::SetupAxesLimits(
2023-12-19 13:10:25 +01:00
m_xBlockTypeDistributions.empty() ? 0 : m_xBlockTypeDistributions.front(),
m_xBlockTypeDistributions.empty() ? 0 : m_xBlockTypeDistributions.back(),
-0.1F,
100.1F,
ImGuiCond_Always);
ImPlot::SetupLegend(ImPlotLocation_South, ImPlotLegendFlags_Horizontal | ImPlotLegendFlags_Outside);
2023-11-10 20:47:08 +01:00
ImPlot::SetupAxisFormat(ImAxis_X1, impl::IntegerAxisFormatter, (void*)("0x%04llX"));
ImPlot::SetupMouseText(ImPlotLocation_NorthEast);
constexpr static std::array Names = { "iscntrl", "isprint", "isspace", "isblank",
"isgraph", "ispunct", "isalnum", "isalpha",
"isupper", "islower", "isdigit", "isxdigit"
};
for (u32 i = 0; i < Names.size(); i++) {
2023-12-19 13:10:25 +01:00
ImPlot::PlotLine(Names[i], m_xBlockTypeDistributions.data(), m_yBlockTypeDistributionsSampled[i].data(), m_xBlockTypeDistributions.size());
}
if (m_showAnnotations) {
u32 id = 1;
for (const auto &annotation : m_annotationRegions) {
const auto &region = annotation.region;
double xMin = region.getStartAddress();
double xMax = region.getEndAddress();
double yMin = 0.0F;
double yMax = 100.0F;
ImPlot::DragRect(id, &xMin, &yMin, &xMax, &yMax, annotation.color, ImPlotDragToolFlags_NoFit | ImPlotDragToolFlags_NoInputs);
const auto min = ImPlot::PlotToPixels(xMin, yMax);
const auto max = ImPlot::PlotToPixels(xMax, yMin);
const auto mousePos = ImPlot::PixelsToPlot(ImGui::GetMousePos());
if (ImGui::IsMouseHoveringRect(min, max)) {
ImPlot::Annotation(xMin + (xMax - xMin) / 2, mousePos.y, annotation.color, ImVec2(), false, "%s", Lang(annotation.unlocalizedName).get());
if (ImGui::IsMouseClicked(ImGuiMouseButton_Left)) {
ImHexApi::HexEditor::setSelection(annotation.region);
}
}
id += 1;
}
for (const auto &tag : m_tags) {
if (tag.axis == ImAxis_X1)
ImPlot::TagX(tag.value, ImGui::GetStyleColorVec4(tag.color), "%s", Lang(tag.unlocalizedName).get());
else if (tag.axis == ImAxis_Y1)
ImPlot::TagY(tag.value, ImGui::GetStyleColorVec4(tag.color), "%s", Lang(tag.unlocalizedName).get());
}
}
// The parameter updateHandle is used when using the pattern language since we don't have a provider
// but just a set of bytes, we won't be able to use the drag bar correctly.
if (updateHandle) {
// Set a draggable line on the plot
2023-12-19 13:10:25 +01:00
if (ImPlot::DragLineX(1, &m_handlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) {
// The line was dragged, update the position in the hex editor
// Clamp the value between the start/end of the region to analyze
2023-12-19 13:10:25 +01:00
m_handlePosition = std::clamp<double>(
m_handlePosition,
m_startAddress,
m_endAddress);
// Compute the position inside hex editor
2023-12-19 13:10:25 +01:00
u64 address = u64(std::max<double>(m_handlePosition, 0)) + m_baseAddress;
address = std::min<u64>(address, m_baseAddress + m_fileSize - 1);
ImHexApi::HexEditor::setSelection(address, 1);
}
}
ImPlot::EndPlot();
}
}
void process(prv::Provider *provider, u64 startAddress, u64 endAddress) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_startAddress = startAddress;
m_endAddress = endAddress;
m_baseAddress = provider->getBaseAddress();
m_fileSize = provider->getSize();
// Get a file reader
auto reader = prv::ProviderReader(provider);
2023-12-19 13:10:25 +01:00
std::vector<u8> bytes = reader.read(m_startAddress, m_endAddress - m_startAddress);
this->processImpl(bytes);
// Set the diagram handle position to the start of the plot
2023-12-19 13:10:25 +01:00
m_handlePosition = m_startAddress;
2023-12-19 13:10:25 +01:00
m_processing = false;
}
2023-11-10 20:47:08 +01:00
void process(const std::vector<u8> &buffer, u64 baseAddress, u64 fileSize) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_startAddress = 0;
m_endAddress = buffer.size();
m_baseAddress = baseAddress;
m_fileSize = fileSize;
this->processImpl(buffer);
// Set the diagram handle position to the start of the plot
2023-12-19 13:10:25 +01:00
m_handlePosition = m_startAddress;
2023-12-19 13:10:25 +01:00
m_processing = false;
}
// Reset the byte type distribution analysis
void reset(u64 startAddress, u64 endAddress, u64 baseAddress, u64 size) {
2023-12-19 13:10:25 +01:00
m_processing = true;
// Update attributes
2023-12-19 13:10:25 +01:00
m_startAddress = startAddress;
m_endAddress = endAddress;
m_baseAddress = baseAddress;
m_fileSize = size;
2023-12-19 13:10:25 +01:00
m_byteCount = 0;
m_blockCount = 0;
m_blockValueCounts = { 0 };
// Reset and resize the array
2023-12-19 13:10:25 +01:00
m_yBlockTypeDistributions.fill({});
// Set the diagram handle position to the start of the plot
2023-12-19 13:10:25 +01:00
m_handlePosition = m_startAddress;
m_annotationRegions.clear();
}
// Process one byte at the time
void update(u8 byte) {
2023-12-19 13:10:25 +01:00
u64 totalBlock = std::ceil((m_endAddress - m_startAddress) / m_blockSize);
// Check if there is still some block to process
2023-12-19 13:10:25 +01:00
if (m_blockCount < totalBlock) {
2023-12-19 13:10:25 +01:00
m_blockValueCounts[byte]++;
2023-12-19 13:10:25 +01:00
m_byteCount++;
if (((m_byteCount % m_blockSize) == 0) || m_byteCount == (m_endAddress - m_startAddress)) [[unlikely]] {
auto typeDist = calculateTypeDistribution(m_blockValueCounts, m_blockSize);
2023-04-08 23:34:46 +02:00
for (size_t i = 0; i < typeDist.size(); i++)
2023-12-19 13:10:25 +01:00
m_yBlockTypeDistributions[i].push_back(typeDist[i] * 100);
if (m_yBlockTypeDistributions[2].back() + m_yBlockTypeDistributions[4].back() >= 95) {
this->addRegion("hex.ui.diagram.byte_type_distribution.plain_text", Region { m_byteCount, m_blockSize }, 0x80FF00FF);
} else if (std::ranges::any_of(m_blockValueCounts, [&](auto count) { return count >= m_blockSize * 0.95F; })) {
this->addRegion("hex.ui.diagram.byte_type_distribution.similar_bytes", Region { m_byteCount, m_blockSize }, 0x8000FF00);
}
2023-12-19 13:10:25 +01:00
m_blockCount += 1;
m_blockValueCounts = { 0 };
}
// Check if we processed the last block, if so setup the X axis part of the data
2023-12-19 13:10:25 +01:00
if (m_blockCount == totalBlock) {
processFinalize();
2023-12-19 13:10:25 +01:00
m_processing = false;
}
}
}
// Return the percentage of plain text character inside the analyzed region
double getPlainTextCharacterPercentage() {
2023-12-19 13:10:25 +01:00
if (m_yBlockTypeDistributions[2].empty() || m_yBlockTypeDistributions[4].empty())
return -1.0;
2023-12-19 13:10:25 +01:00
double plainTextPercentage = std::reduce(m_yBlockTypeDistributions[2].begin(), m_yBlockTypeDistributions[2].end()) / m_yBlockTypeDistributions[2].size();
return plainTextPercentage + std::reduce(m_yBlockTypeDistributions[4].begin(), m_yBlockTypeDistributions[4].end()) / m_yBlockTypeDistributions[4].size();
}
void setHandlePosition(u64 filePosition) {
2023-12-19 13:10:25 +01:00
m_handlePosition = filePosition;
}
void enableAnnotations(bool enabled) {
m_showAnnotations = enabled;
}
private:
static std::array<float, 12> calculateTypeDistribution(const std::array<ImU64, 256> &valueCounts, size_t blockSize) {
std::array<ImU64, 12> counts = {};
for (u16 value = 0x00; value < u16(valueCounts.size()); value++) {
const auto &count = valueCounts[value];
if (count == 0) [[unlikely]]
continue;
if (std::iscntrl(value))
counts[0] += count;
if (std::isprint(value))
counts[1] += count;
if (std::isspace(value))
counts[2] += count;
if (std::isblank(value))
counts[3] += count;
if (std::isgraph(value))
counts[4] += count;
if (std::ispunct(value))
counts[5] += count;
if (std::isalnum(value))
counts[6] += count;
if (std::isalpha(value))
counts[7] += count;
if (std::isupper(value))
counts[8] += count;
if (std::islower(value))
counts[9] += count;
if (std::isdigit(value))
counts[10] += count;
if (std::isxdigit(value))
counts[11] += count;
}
std::array<float, 12> distribution = {};
for (u32 i = 0; i < distribution.size(); i++)
distribution[i] = static_cast<float>(counts[i]) / blockSize;
return distribution;
}
// Private method used to factorize the process public method
2023-11-10 20:47:08 +01:00
void processImpl(const std::vector<u8> &bytes) {
2023-12-19 13:10:25 +01:00
m_blockValueCounts = { 0 };
2023-12-19 13:10:25 +01:00
m_yBlockTypeDistributions.fill({});
m_byteCount = 0;
m_blockCount = 0;
// Loop over each byte of the file (or a part of it)
for (u8 byte : bytes) {
2023-12-19 13:10:25 +01:00
m_blockValueCounts[byte]++;
2023-12-19 13:10:25 +01:00
m_byteCount++;
if (((m_byteCount % m_blockSize) == 0) || m_byteCount == (m_endAddress - m_startAddress)) [[unlikely]] {
auto typeDist = calculateTypeDistribution(m_blockValueCounts, m_blockSize);
2023-04-08 23:34:46 +02:00
for (size_t i = 0; i < typeDist.size(); i++)
2023-12-19 13:10:25 +01:00
m_yBlockTypeDistributions[i].push_back(typeDist[i] * 100);
2023-12-19 13:10:25 +01:00
m_blockCount += 1;
m_blockValueCounts = { 0 };
}
}
processFinalize();
}
void processFinalize() {
// Only save at most m_sampleSize elements of the result
2023-12-19 13:10:25 +01:00
for (size_t i = 0; i < m_yBlockTypeDistributions.size(); ++i) {
m_yBlockTypeDistributionsSampled[i] = sampleData(m_yBlockTypeDistributions[i], std::min<size_t>(m_blockCount + 1, m_sampleSize));
2023-12-19 13:10:25 +01:00
if (!m_yBlockTypeDistributionsSampled[i].empty())
m_yBlockTypeDistributionsSampled[i].push_back(m_yBlockTypeDistributionsSampled[i].back());
}
2023-12-19 13:10:25 +01:00
double stride = std::max(1.0, double(m_blockCount) / m_yBlockTypeDistributionsSampled[0].size());
m_blockCount = m_yBlockTypeDistributionsSampled[0].size() - 1;
// The m_xBlockTypeDistributions attribute is used to specify the position of entropy
// values in the plot when the Y axis doesn't start at 0
2023-12-19 13:10:25 +01:00
m_xBlockTypeDistributions.clear();
m_xBlockTypeDistributions.resize(m_blockCount);
for (u64 i = 0; i < m_blockCount; ++i)
m_xBlockTypeDistributions[i] = m_startAddress + (stride * i * m_blockSize);
m_xBlockTypeDistributions.push_back(m_endAddress);
}
void addRegion(const UnlocalizedString &name, Region region, ImColor color) {
const auto existingRegion = std::ranges::find_if(m_annotationRegions, [this, &region](const AnnotationRegion &annotation) {
auto difference = i64(region.getEndAddress()) - i64(annotation.region.getEndAddress());
return difference > 0 && difference < i64(m_blockSize * 32);
});
if (existingRegion != m_annotationRegions.end()) {
existingRegion->region.size += region.size;
} else {
m_annotationRegions.push_back({ name, region, color });
}
}
void addTag(const UnlocalizedString &name, u64 value, ImAxis axis, ImGuiCol color) {
m_tags.push_back({ name, value, axis, color });
}
private:
// Variables used to store the parameters to process
// The size of the block we are considering for the analysis
2023-11-10 20:47:08 +01:00
u64 m_blockSize = 0;
u64 m_startAddress = 0;
u64 m_endAddress = 0;
// Start / size of the file
2023-11-10 20:47:08 +01:00
u64 m_baseAddress = 0;
u64 m_fileSize = 0;
// Position of the handle inside the plot
double m_handlePosition = 0.0;
// Hold the number of blocks that have been processed
// during the chunk-based entropy analysis
2023-11-10 20:47:08 +01:00
u64 m_blockCount = 0;
// Hold the number of bytes that have been processed
// during the analysis (useful for the iterative analysis)
2023-11-10 20:47:08 +01:00
u64 m_byteCount = 0;
// Sampling size, number of elements displayed in the plot,
// avoid showing to many data because it decreased the frame rate
2023-11-10 20:47:08 +01:00
size_t m_sampleSize = 0;
// Array used to hold the occurrences of each byte
// (useful for the iterative analysis)
std::array<ImU64, 256> m_blockValueCounts = {};
// The m_xBlockTypeDistributions attributes are used to specify the position of
// the values in the plot when the Y axis doesn't start at 0
std::vector<float> m_xBlockTypeDistributions;
// Hold the result of the byte distribution analysis
std::array<std::vector<float>, 12> m_yBlockTypeDistributions, m_yBlockTypeDistributionsSampled;
std::atomic<bool> m_processing = false;
std::vector<AnnotationRegion> m_annotationRegions;
std::vector<Tag> m_tags;
bool m_showAnnotations = true;
};
}