feat: Add ignore case and UTF16 search options to sequence searching
This commit is contained in:
parent
c7ab4a4569
commit
96db2074c6
7
.gdbinit
7
.gdbinit
@ -7,3 +7,10 @@ skip -rfu ^ImGui::
|
||||
|
||||
# Trigger breakpoint when execution reaches triggerSafeShutdown()
|
||||
break triggerSafeShutdown
|
||||
|
||||
# Print backtrace after execution jumped to an invalid address
|
||||
define fixbt
|
||||
set $pc = *(void **)$rsp
|
||||
set $rsp = $rsp + 8
|
||||
bt
|
||||
end
|
@ -78,6 +78,8 @@ namespace hex {
|
||||
[[nodiscard]] std::string encodeByteString(const std::vector<u8> &bytes);
|
||||
[[nodiscard]] std::vector<u8> decodeByteString(const std::string &string);
|
||||
|
||||
std::wstring utf8ToUtf16(const std::string& utf8);
|
||||
|
||||
[[nodiscard]] constexpr u64 extract(u8 from, u8 to, const std::unsigned_integral auto &value) {
|
||||
if (from < to) std::swap(from, to);
|
||||
|
||||
|
@ -485,6 +485,70 @@ namespace hex {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::wstring utf8ToUtf16(const std::string& utf8) {
|
||||
std::vector<u32> unicodes;
|
||||
|
||||
for (size_t byteIndex = 0; byteIndex < utf8.size();) {
|
||||
u32 unicode = 0;
|
||||
size_t unicodeSize = 0;
|
||||
|
||||
u8 ch = utf8[byteIndex];
|
||||
byteIndex += 1;
|
||||
|
||||
if (ch <= 0x7F) {
|
||||
unicode = ch;
|
||||
unicodeSize = 0;
|
||||
} else if (ch <= 0xBF) {
|
||||
return { };
|
||||
} else if (ch <= 0xDF) {
|
||||
unicode = ch&0x1F;
|
||||
unicodeSize = 1;
|
||||
} else if (ch <= 0xEF) {
|
||||
unicode = ch&0x0F;
|
||||
unicodeSize = 2;
|
||||
} else if (ch <= 0xF7) {
|
||||
unicode = ch&0x07;
|
||||
unicodeSize = 3;
|
||||
} else {
|
||||
return { };
|
||||
}
|
||||
|
||||
for (size_t unicodeByteIndex = 0; unicodeByteIndex < unicodeSize; unicodeByteIndex += 1) {
|
||||
if (byteIndex == utf8.size())
|
||||
return { };
|
||||
|
||||
u8 byte = utf8[byteIndex];
|
||||
if (byte < 0x80 || byte > 0xBF)
|
||||
return { };
|
||||
|
||||
unicode <<= 6;
|
||||
unicode += byte & 0x3F;
|
||||
|
||||
byteIndex += 1;
|
||||
}
|
||||
|
||||
if (unicode >= 0xD800 && unicode <= 0xDFFF)
|
||||
return { };
|
||||
if (unicode > 0x10FFFF)
|
||||
return { };
|
||||
|
||||
unicodes.push_back(unicode);
|
||||
}
|
||||
|
||||
std::wstring utf16;
|
||||
|
||||
for (auto unicode : unicodes) {
|
||||
if (unicode <= 0xFFFF)
|
||||
utf16 += static_cast<wchar_t>(unicode);
|
||||
else {
|
||||
unicode -= 0x10000;
|
||||
utf16 += static_cast<wchar_t>(((unicode >> 10) + 0xD800));
|
||||
utf16 += static_cast<wchar_t>(((unicode & 0x3FF) + 0xDC00));
|
||||
}
|
||||
}
|
||||
return utf16;
|
||||
}
|
||||
|
||||
float float16ToFloat32(u16 float16) {
|
||||
u32 sign = float16 >> 15;
|
||||
u32 exponent = (float16 >> 10) & 0x1F;
|
||||
|
@ -63,6 +63,9 @@ namespace hex::plugin::builtin {
|
||||
|
||||
struct Sequence {
|
||||
std::string sequence;
|
||||
|
||||
StringType type = StringType::ASCII;
|
||||
bool ignoreCase = false;
|
||||
} bytes;
|
||||
|
||||
struct Regex {
|
||||
|
@ -895,6 +895,7 @@
|
||||
"hex.builtin.view.find.search.reset": "Reset",
|
||||
"hex.builtin.view.find.searching": "Searching...",
|
||||
"hex.builtin.view.find.sequences": "Sequences",
|
||||
"hex.builtin.view.find.sequences.ignore_case": "Ignore case",
|
||||
"hex.builtin.view.find.shortcut.select_all": "Select All Occurrences",
|
||||
"hex.builtin.view.find.strings": "Strings",
|
||||
"hex.builtin.view.find.strings.chars": "Characters",
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <hex/providers/buffered_reader.hpp>
|
||||
|
||||
#include <array>
|
||||
#include <ranges>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
@ -23,7 +24,7 @@ namespace hex::plugin::builtin {
|
||||
if (m_searchTask.isRunning())
|
||||
return { };
|
||||
|
||||
if (!m_occurrenceTree->overlapping({ address, address + size }).empty())
|
||||
if (!m_occurrenceTree->overlapping({ address, address }).empty())
|
||||
return HighlightColor();
|
||||
else
|
||||
return std::nullopt;
|
||||
@ -258,23 +259,74 @@ namespace hex::plugin::builtin {
|
||||
reader.seek(searchRegion.getStartAddress());
|
||||
reader.setEndAddress(searchRegion.getEndAddress());
|
||||
|
||||
auto bytes = hex::decodeByteString(settings.sequence);
|
||||
|
||||
if (bytes.empty())
|
||||
auto input = hex::decodeByteString(settings.sequence);
|
||||
if (input.empty())
|
||||
return { };
|
||||
|
||||
std::vector<u8> bytes;
|
||||
Occurrence::DecodeType decodeType = Occurrence::DecodeType::Binary;
|
||||
std::endian endian;
|
||||
switch (settings.type) {
|
||||
default:
|
||||
case SearchSettings::StringType::ASCII:
|
||||
bytes = input;
|
||||
decodeType = Occurrence::DecodeType::ASCII;
|
||||
endian = std::endian::native;
|
||||
break;
|
||||
case SearchSettings::StringType::UTF16LE: {
|
||||
auto wString = hex::utf8ToUtf16({ input.begin(), input.end() });
|
||||
|
||||
bytes.resize(wString.size() * 2);
|
||||
std::memcpy(bytes.data(), wString.data(), bytes.size());
|
||||
decodeType = Occurrence::DecodeType::UTF16;
|
||||
endian = std::endian::little;
|
||||
|
||||
break;
|
||||
}
|
||||
case SearchSettings::StringType::UTF16BE: {
|
||||
auto wString = hex::utf8ToUtf16({ input.begin(), input.end() });
|
||||
|
||||
bytes.resize(wString.size() * 2);
|
||||
std::memcpy(bytes.data(), wString.data(), bytes.size());
|
||||
decodeType = Occurrence::DecodeType::UTF16;
|
||||
endian = std::endian::big;
|
||||
|
||||
for (size_t i = 0; i < bytes.size(); i += 2)
|
||||
std::swap(bytes[i], bytes[i + 1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto occurrence = reader.begin();
|
||||
u64 progress = 0;
|
||||
|
||||
auto searchPredicate = [&] -> bool(*)(u8, u8) {
|
||||
if (!settings.ignoreCase)
|
||||
return [](u8 left, u8 right) -> bool {
|
||||
return left == right;
|
||||
};
|
||||
else
|
||||
return [](u8 left, u8 right) -> bool {
|
||||
if (std::isupper(left))
|
||||
left = std::tolower(left);
|
||||
if (std::isupper(right))
|
||||
right = std::tolower(right);
|
||||
|
||||
return left == right;
|
||||
};
|
||||
}();
|
||||
|
||||
|
||||
while (true) {
|
||||
task.update(progress);
|
||||
|
||||
occurrence = std::search(reader.begin(), reader.end(), std::boyer_moore_horspool_searcher(bytes.begin(), bytes.end()));
|
||||
occurrence = std::search(reader.begin(), reader.end(), std::default_searcher(bytes.begin(), bytes.end(), searchPredicate));
|
||||
if (occurrence == reader.end())
|
||||
break;
|
||||
|
||||
auto address = occurrence.getAddress();
|
||||
reader.seek(address + 1);
|
||||
results.push_back(Occurrence{ Region { address, bytes.size() }, Occurrence::DecodeType::Binary, std::endian::native, false });
|
||||
results.push_back(Occurrence{ Region { address, bytes.size() }, decodeType, endian, false });
|
||||
progress = address - searchRegion.getStartAddress();
|
||||
}
|
||||
|
||||
@ -497,6 +549,8 @@ namespace hex::plugin::builtin {
|
||||
|
||||
case Value:
|
||||
case Strings:
|
||||
case Sequence:
|
||||
case Regex:
|
||||
{
|
||||
switch (occurrence.decodeType) {
|
||||
using enum Occurrence::DecodeType;
|
||||
@ -523,8 +577,6 @@ namespace hex::plugin::builtin {
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Sequence:
|
||||
case Regex:
|
||||
case BinaryPattern:
|
||||
result = hex::encodeByteString(bytes);
|
||||
break;
|
||||
@ -661,6 +713,18 @@ namespace hex::plugin::builtin {
|
||||
|
||||
ImGuiExt::InputTextIcon("hex.builtin.common.value"_lang, ICON_VS_SYMBOL_KEY, settings.sequence);
|
||||
|
||||
if (ImGui::BeginCombo("hex.builtin.common.type"_lang, StringTypes[std::to_underlying(settings.type)].c_str())) {
|
||||
for (size_t i = 0; i < StringTypes.size() - 2; i++) {
|
||||
auto type = static_cast<SearchSettings::StringType>(i);
|
||||
|
||||
if (ImGui::Selectable(StringTypes[i].c_str(), type == settings.type))
|
||||
settings.type = type;
|
||||
}
|
||||
ImGui::EndCombo();
|
||||
}
|
||||
|
||||
ImGui::Checkbox("hex.builtin.view.find.sequences.ignore_case"_lang, &settings.ignoreCase);
|
||||
|
||||
m_settingsValid = !settings.sequence.empty() && !hex::decodeByteString(settings.sequence).empty();
|
||||
|
||||
ImGui::EndTabItem();
|
||||
|
Loading…
Reference in New Issue
Block a user