feat: Add ignore case and UTF16 search options to sequence searching
This commit is contained in:
parent
c7ab4a4569
commit
96db2074c6
9
.gdbinit
9
.gdbinit
@ -6,4 +6,11 @@ skip -rfu ^__gnu_debug::
|
|||||||
skip -rfu ^ImGui::
|
skip -rfu ^ImGui::
|
||||||
|
|
||||||
# Trigger breakpoint when execution reaches triggerSafeShutdown()
|
# Trigger breakpoint when execution reaches triggerSafeShutdown()
|
||||||
break triggerSafeShutdown
|
break triggerSafeShutdown
|
||||||
|
|
||||||
|
# Print backtrace after execution jumped to an invalid address
|
||||||
|
define fixbt
|
||||||
|
set $pc = *(void **)$rsp
|
||||||
|
set $rsp = $rsp + 8
|
||||||
|
bt
|
||||||
|
end
|
@ -78,6 +78,8 @@ namespace hex {
|
|||||||
[[nodiscard]] std::string encodeByteString(const std::vector<u8> &bytes);
|
[[nodiscard]] std::string encodeByteString(const std::vector<u8> &bytes);
|
||||||
[[nodiscard]] std::vector<u8> decodeByteString(const std::string &string);
|
[[nodiscard]] std::vector<u8> decodeByteString(const std::string &string);
|
||||||
|
|
||||||
|
std::wstring utf8ToUtf16(const std::string& utf8);
|
||||||
|
|
||||||
[[nodiscard]] constexpr u64 extract(u8 from, u8 to, const std::unsigned_integral auto &value) {
|
[[nodiscard]] constexpr u64 extract(u8 from, u8 to, const std::unsigned_integral auto &value) {
|
||||||
if (from < to) std::swap(from, to);
|
if (from < to) std::swap(from, to);
|
||||||
|
|
||||||
|
@ -485,6 +485,70 @@ namespace hex {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::wstring utf8ToUtf16(const std::string& utf8) {
|
||||||
|
std::vector<u32> unicodes;
|
||||||
|
|
||||||
|
for (size_t byteIndex = 0; byteIndex < utf8.size();) {
|
||||||
|
u32 unicode = 0;
|
||||||
|
size_t unicodeSize = 0;
|
||||||
|
|
||||||
|
u8 ch = utf8[byteIndex];
|
||||||
|
byteIndex += 1;
|
||||||
|
|
||||||
|
if (ch <= 0x7F) {
|
||||||
|
unicode = ch;
|
||||||
|
unicodeSize = 0;
|
||||||
|
} else if (ch <= 0xBF) {
|
||||||
|
return { };
|
||||||
|
} else if (ch <= 0xDF) {
|
||||||
|
unicode = ch&0x1F;
|
||||||
|
unicodeSize = 1;
|
||||||
|
} else if (ch <= 0xEF) {
|
||||||
|
unicode = ch&0x0F;
|
||||||
|
unicodeSize = 2;
|
||||||
|
} else if (ch <= 0xF7) {
|
||||||
|
unicode = ch&0x07;
|
||||||
|
unicodeSize = 3;
|
||||||
|
} else {
|
||||||
|
return { };
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t unicodeByteIndex = 0; unicodeByteIndex < unicodeSize; unicodeByteIndex += 1) {
|
||||||
|
if (byteIndex == utf8.size())
|
||||||
|
return { };
|
||||||
|
|
||||||
|
u8 byte = utf8[byteIndex];
|
||||||
|
if (byte < 0x80 || byte > 0xBF)
|
||||||
|
return { };
|
||||||
|
|
||||||
|
unicode <<= 6;
|
||||||
|
unicode += byte & 0x3F;
|
||||||
|
|
||||||
|
byteIndex += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unicode >= 0xD800 && unicode <= 0xDFFF)
|
||||||
|
return { };
|
||||||
|
if (unicode > 0x10FFFF)
|
||||||
|
return { };
|
||||||
|
|
||||||
|
unicodes.push_back(unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring utf16;
|
||||||
|
|
||||||
|
for (auto unicode : unicodes) {
|
||||||
|
if (unicode <= 0xFFFF)
|
||||||
|
utf16 += static_cast<wchar_t>(unicode);
|
||||||
|
else {
|
||||||
|
unicode -= 0x10000;
|
||||||
|
utf16 += static_cast<wchar_t>(((unicode >> 10) + 0xD800));
|
||||||
|
utf16 += static_cast<wchar_t>(((unicode & 0x3FF) + 0xDC00));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return utf16;
|
||||||
|
}
|
||||||
|
|
||||||
float float16ToFloat32(u16 float16) {
|
float float16ToFloat32(u16 float16) {
|
||||||
u32 sign = float16 >> 15;
|
u32 sign = float16 >> 15;
|
||||||
u32 exponent = (float16 >> 10) & 0x1F;
|
u32 exponent = (float16 >> 10) & 0x1F;
|
||||||
|
@ -63,6 +63,9 @@ namespace hex::plugin::builtin {
|
|||||||
|
|
||||||
struct Sequence {
|
struct Sequence {
|
||||||
std::string sequence;
|
std::string sequence;
|
||||||
|
|
||||||
|
StringType type = StringType::ASCII;
|
||||||
|
bool ignoreCase = false;
|
||||||
} bytes;
|
} bytes;
|
||||||
|
|
||||||
struct Regex {
|
struct Regex {
|
||||||
|
@ -895,6 +895,7 @@
|
|||||||
"hex.builtin.view.find.search.reset": "Reset",
|
"hex.builtin.view.find.search.reset": "Reset",
|
||||||
"hex.builtin.view.find.searching": "Searching...",
|
"hex.builtin.view.find.searching": "Searching...",
|
||||||
"hex.builtin.view.find.sequences": "Sequences",
|
"hex.builtin.view.find.sequences": "Sequences",
|
||||||
|
"hex.builtin.view.find.sequences.ignore_case": "Ignore case",
|
||||||
"hex.builtin.view.find.shortcut.select_all": "Select All Occurrences",
|
"hex.builtin.view.find.shortcut.select_all": "Select All Occurrences",
|
||||||
"hex.builtin.view.find.strings": "Strings",
|
"hex.builtin.view.find.strings": "Strings",
|
||||||
"hex.builtin.view.find.strings.chars": "Characters",
|
"hex.builtin.view.find.strings.chars": "Characters",
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <hex/providers/buffered_reader.hpp>
|
#include <hex/providers/buffered_reader.hpp>
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <ranges>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -23,7 +24,7 @@ namespace hex::plugin::builtin {
|
|||||||
if (m_searchTask.isRunning())
|
if (m_searchTask.isRunning())
|
||||||
return { };
|
return { };
|
||||||
|
|
||||||
if (!m_occurrenceTree->overlapping({ address, address + size }).empty())
|
if (!m_occurrenceTree->overlapping({ address, address }).empty())
|
||||||
return HighlightColor();
|
return HighlightColor();
|
||||||
else
|
else
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
@ -258,23 +259,74 @@ namespace hex::plugin::builtin {
|
|||||||
reader.seek(searchRegion.getStartAddress());
|
reader.seek(searchRegion.getStartAddress());
|
||||||
reader.setEndAddress(searchRegion.getEndAddress());
|
reader.setEndAddress(searchRegion.getEndAddress());
|
||||||
|
|
||||||
auto bytes = hex::decodeByteString(settings.sequence);
|
auto input = hex::decodeByteString(settings.sequence);
|
||||||
|
if (input.empty())
|
||||||
if (bytes.empty())
|
|
||||||
return { };
|
return { };
|
||||||
|
|
||||||
|
std::vector<u8> bytes;
|
||||||
|
Occurrence::DecodeType decodeType = Occurrence::DecodeType::Binary;
|
||||||
|
std::endian endian;
|
||||||
|
switch (settings.type) {
|
||||||
|
default:
|
||||||
|
case SearchSettings::StringType::ASCII:
|
||||||
|
bytes = input;
|
||||||
|
decodeType = Occurrence::DecodeType::ASCII;
|
||||||
|
endian = std::endian::native;
|
||||||
|
break;
|
||||||
|
case SearchSettings::StringType::UTF16LE: {
|
||||||
|
auto wString = hex::utf8ToUtf16({ input.begin(), input.end() });
|
||||||
|
|
||||||
|
bytes.resize(wString.size() * 2);
|
||||||
|
std::memcpy(bytes.data(), wString.data(), bytes.size());
|
||||||
|
decodeType = Occurrence::DecodeType::UTF16;
|
||||||
|
endian = std::endian::little;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SearchSettings::StringType::UTF16BE: {
|
||||||
|
auto wString = hex::utf8ToUtf16({ input.begin(), input.end() });
|
||||||
|
|
||||||
|
bytes.resize(wString.size() * 2);
|
||||||
|
std::memcpy(bytes.data(), wString.data(), bytes.size());
|
||||||
|
decodeType = Occurrence::DecodeType::UTF16;
|
||||||
|
endian = std::endian::big;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < bytes.size(); i += 2)
|
||||||
|
std::swap(bytes[i], bytes[i + 1]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto occurrence = reader.begin();
|
auto occurrence = reader.begin();
|
||||||
u64 progress = 0;
|
u64 progress = 0;
|
||||||
|
|
||||||
|
auto searchPredicate = [&] -> bool(*)(u8, u8) {
|
||||||
|
if (!settings.ignoreCase)
|
||||||
|
return [](u8 left, u8 right) -> bool {
|
||||||
|
return left == right;
|
||||||
|
};
|
||||||
|
else
|
||||||
|
return [](u8 left, u8 right) -> bool {
|
||||||
|
if (std::isupper(left))
|
||||||
|
left = std::tolower(left);
|
||||||
|
if (std::isupper(right))
|
||||||
|
right = std::tolower(right);
|
||||||
|
|
||||||
|
return left == right;
|
||||||
|
};
|
||||||
|
}();
|
||||||
|
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
task.update(progress);
|
task.update(progress);
|
||||||
|
|
||||||
occurrence = std::search(reader.begin(), reader.end(), std::boyer_moore_horspool_searcher(bytes.begin(), bytes.end()));
|
occurrence = std::search(reader.begin(), reader.end(), std::default_searcher(bytes.begin(), bytes.end(), searchPredicate));
|
||||||
if (occurrence == reader.end())
|
if (occurrence == reader.end())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
auto address = occurrence.getAddress();
|
auto address = occurrence.getAddress();
|
||||||
reader.seek(address + 1);
|
reader.seek(address + 1);
|
||||||
results.push_back(Occurrence{ Region { address, bytes.size() }, Occurrence::DecodeType::Binary, std::endian::native, false });
|
results.push_back(Occurrence{ Region { address, bytes.size() }, decodeType, endian, false });
|
||||||
progress = address - searchRegion.getStartAddress();
|
progress = address - searchRegion.getStartAddress();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -497,6 +549,8 @@ namespace hex::plugin::builtin {
|
|||||||
|
|
||||||
case Value:
|
case Value:
|
||||||
case Strings:
|
case Strings:
|
||||||
|
case Sequence:
|
||||||
|
case Regex:
|
||||||
{
|
{
|
||||||
switch (occurrence.decodeType) {
|
switch (occurrence.decodeType) {
|
||||||
using enum Occurrence::DecodeType;
|
using enum Occurrence::DecodeType;
|
||||||
@ -523,8 +577,6 @@ namespace hex::plugin::builtin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Sequence:
|
|
||||||
case Regex:
|
|
||||||
case BinaryPattern:
|
case BinaryPattern:
|
||||||
result = hex::encodeByteString(bytes);
|
result = hex::encodeByteString(bytes);
|
||||||
break;
|
break;
|
||||||
@ -661,6 +713,18 @@ namespace hex::plugin::builtin {
|
|||||||
|
|
||||||
ImGuiExt::InputTextIcon("hex.builtin.common.value"_lang, ICON_VS_SYMBOL_KEY, settings.sequence);
|
ImGuiExt::InputTextIcon("hex.builtin.common.value"_lang, ICON_VS_SYMBOL_KEY, settings.sequence);
|
||||||
|
|
||||||
|
if (ImGui::BeginCombo("hex.builtin.common.type"_lang, StringTypes[std::to_underlying(settings.type)].c_str())) {
|
||||||
|
for (size_t i = 0; i < StringTypes.size() - 2; i++) {
|
||||||
|
auto type = static_cast<SearchSettings::StringType>(i);
|
||||||
|
|
||||||
|
if (ImGui::Selectable(StringTypes[i].c_str(), type == settings.type))
|
||||||
|
settings.type = type;
|
||||||
|
}
|
||||||
|
ImGui::EndCombo();
|
||||||
|
}
|
||||||
|
|
||||||
|
ImGui::Checkbox("hex.builtin.view.find.sequences.ignore_case"_lang, &settings.ignoreCase);
|
||||||
|
|
||||||
m_settingsValid = !settings.sequence.empty() && !hex::decodeByteString(settings.sequence).empty();
|
m_settingsValid = !settings.sequence.empty() && !hex::decodeByteString(settings.sequence).empty();
|
||||||
|
|
||||||
ImGui::EndTabItem();
|
ImGui::EndTabItem();
|
||||||
|
Loading…
Reference in New Issue
Block a user