feat: Added support for scanning binaries for UTF-8 strings
This commit is contained in:
parent
f1d9642cf6
commit
e2c302836f
@ -1024,7 +1024,7 @@ namespace hex {
|
|||||||
|
|
||||||
struct FindOccurrence {
|
struct FindOccurrence {
|
||||||
Region region;
|
Region region;
|
||||||
enum class DecodeType { ASCII, Binary, UTF16, Unsigned, Signed, Float, Double } decodeType;
|
enum class DecodeType { ASCII, UTF8, Binary, UTF16, Unsigned, Signed, Float, Double } decodeType;
|
||||||
std::endian endian = std::endian::native;
|
std::endian endian = std::endian::native;
|
||||||
bool selected;
|
bool selected;
|
||||||
};
|
};
|
||||||
|
@ -42,7 +42,7 @@ namespace hex::plugin::builtin {
|
|||||||
Value
|
Value
|
||||||
} mode = Mode::Strings;
|
} mode = Mode::Strings;
|
||||||
|
|
||||||
enum class StringType : int { ASCII = 0, UTF16LE = 1, UTF16BE = 2, ASCII_UTF16LE = 3, ASCII_UTF16BE = 4 };
|
enum class StringType : int { ASCII = 0, UTF8 = 1, UTF16LE = 2, UTF16BE = 3, ASCII_UTF16LE = 4, ASCII_UTF16BE = 5 };
|
||||||
|
|
||||||
struct Strings {
|
struct Strings {
|
||||||
int minLength = 5;
|
int minLength = 5;
|
||||||
|
@ -202,6 +202,8 @@ namespace hex::plugin::builtin {
|
|||||||
const auto [decodeType, endian] = [&] -> std::pair<Occurrence::DecodeType, std::endian> {
|
const auto [decodeType, endian] = [&] -> std::pair<Occurrence::DecodeType, std::endian> {
|
||||||
if (settings.type == ASCII)
|
if (settings.type == ASCII)
|
||||||
return { Occurrence::DecodeType::ASCII, std::endian::native };
|
return { Occurrence::DecodeType::ASCII, std::endian::native };
|
||||||
|
if (settings.type == UTF8)
|
||||||
|
return { Occurrence::DecodeType::UTF8, std::endian::native };
|
||||||
else if (settings.type == SearchSettings::StringType::UTF16BE)
|
else if (settings.type == SearchSettings::StringType::UTF16BE)
|
||||||
return { Occurrence::DecodeType::UTF16, std::endian::big };
|
return { Occurrence::DecodeType::UTF16, std::endian::big };
|
||||||
else if (settings.type == SearchSettings::StringType::UTF16LE)
|
else if (settings.type == SearchSettings::StringType::UTF16LE)
|
||||||
@ -210,11 +212,13 @@ namespace hex::plugin::builtin {
|
|||||||
return { Occurrence::DecodeType::Binary, std::endian::native };
|
return { Occurrence::DecodeType::Binary, std::endian::native };
|
||||||
}();
|
}();
|
||||||
|
|
||||||
size_t countedCharacters = 0;
|
i64 countedCharacters = 0;
|
||||||
u64 startAddress = reader.begin().getAddress();
|
u64 startAddress = reader.begin().getAddress();
|
||||||
u64 endAddress = reader.end().getAddress();
|
u64 endAddress = reader.end().getAddress();
|
||||||
|
|
||||||
u64 progress = 0;
|
u64 progress = 0;
|
||||||
|
u64 codePointWidth = 0;
|
||||||
|
i8 remainingCharacters = 0;
|
||||||
for (u8 byte : reader) {
|
for (u8 byte : reader) {
|
||||||
bool validChar =
|
bool validChar =
|
||||||
(settings.lowerCaseLetters && std::islower(byte)) ||
|
(settings.lowerCaseLetters && std::islower(byte)) ||
|
||||||
@ -233,6 +237,42 @@ namespace hex::plugin::builtin {
|
|||||||
// Check if first byte of UTF-16 encoded string is 0x00
|
// Check if first byte of UTF-16 encoded string is 0x00
|
||||||
if (countedCharacters % 2 == 0)
|
if (countedCharacters % 2 == 0)
|
||||||
validChar = byte == 0x00;
|
validChar = byte == 0x00;
|
||||||
|
} else if (settings.type == UTF8) {
|
||||||
|
if ((byte & 0b1000'0000) == 0b0000'0000) {
|
||||||
|
// ASCII range
|
||||||
|
codePointWidth = 1;
|
||||||
|
remainingCharacters = 0;
|
||||||
|
validChar = true;
|
||||||
|
} else if ((byte & 0b1100'0000) == 0b1000'0000) {
|
||||||
|
// Continuation mark
|
||||||
|
|
||||||
|
if (remainingCharacters > 0) {
|
||||||
|
remainingCharacters -= 1;
|
||||||
|
validChar = true;
|
||||||
|
} else {
|
||||||
|
countedCharacters -= std::max<i64>(0, codePointWidth - (remainingCharacters + 1));
|
||||||
|
codePointWidth = 0;
|
||||||
|
remainingCharacters = 0;
|
||||||
|
validChar = false;
|
||||||
|
}
|
||||||
|
} else if ((byte & 0b1110'0000) == 0b1100'0000) {
|
||||||
|
// Two bytes
|
||||||
|
codePointWidth = 2;
|
||||||
|
remainingCharacters = codePointWidth - 1;
|
||||||
|
validChar = true;
|
||||||
|
} else if ((byte & 0b1111'0000) == 0b1110'0000) {
|
||||||
|
// Three bytes
|
||||||
|
codePointWidth = 3;
|
||||||
|
remainingCharacters = codePointWidth - 1;
|
||||||
|
validChar = true;
|
||||||
|
} else if ((byte & 0b1111'1000) == 0b1111'0000) {
|
||||||
|
// Four bytes
|
||||||
|
codePointWidth = 4;
|
||||||
|
remainingCharacters = codePointWidth - 1;
|
||||||
|
validChar = true;
|
||||||
|
} else {
|
||||||
|
validChar = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task.update(progress);
|
task.update(progress);
|
||||||
@ -240,9 +280,9 @@ namespace hex::plugin::builtin {
|
|||||||
if (validChar)
|
if (validChar)
|
||||||
countedCharacters++;
|
countedCharacters++;
|
||||||
if (!validChar || startAddress + countedCharacters == endAddress) {
|
if (!validChar || startAddress + countedCharacters == endAddress) {
|
||||||
if (countedCharacters >= size_t(settings.minLength)) {
|
if (countedCharacters >= settings.minLength) {
|
||||||
if (!settings.nullTermination || byte == 0x00) {
|
if (!settings.nullTermination || byte == 0x00) {
|
||||||
results.push_back(Occurrence { Region { startAddress, countedCharacters }, decodeType, endian, false });
|
results.push_back(Occurrence { Region { startAddress, size_t(countedCharacters) }, decodeType, endian, false });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -563,6 +603,11 @@ namespace hex::plugin::builtin {
|
|||||||
case ASCII:
|
case ASCII:
|
||||||
result = hex::encodeByteString(bytes);
|
result = hex::encodeByteString(bytes);
|
||||||
break;
|
break;
|
||||||
|
case UTF8:
|
||||||
|
result = std::string(bytes.begin(), bytes.end());
|
||||||
|
result = wolv::util::replaceStrings(result, "\n", "");
|
||||||
|
result = wolv::util::replaceStrings(result, "\r", "");
|
||||||
|
break;
|
||||||
case UTF16:
|
case UTF16:
|
||||||
for (size_t i = occurrence.endian == std::endian::little ? 0 : 1; i < bytes.size(); i += 2)
|
for (size_t i = occurrence.endian == std::endian::little ? 0 : 1; i < bytes.size(); i += 2)
|
||||||
result += hex::encodeByteString({ bytes[i] });
|
result += hex::encodeByteString({ bytes[i] });
|
||||||
@ -667,8 +712,9 @@ namespace hex::plugin::builtin {
|
|||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
|
|
||||||
if (ImGui::BeginTabBar("SearchMethods")) {
|
if (ImGui::BeginTabBar("SearchMethods")) {
|
||||||
const std::array<std::string, 5> StringTypes = {
|
const std::array<std::string, 6> StringTypes = {
|
||||||
"hex.ui.common.encoding.ascii"_lang,
|
"hex.ui.common.encoding.ascii"_lang,
|
||||||
|
"hex.ui.common.encoding.utf8"_lang,
|
||||||
"hex.ui.common.encoding.utf16le"_lang,
|
"hex.ui.common.encoding.utf16le"_lang,
|
||||||
"hex.ui.common.encoding.utf16be"_lang,
|
"hex.ui.common.encoding.utf16be"_lang,
|
||||||
hex::format("{} + {}", "hex.ui.common.encoding.ascii"_lang, "hex.ui.common.encoding.utf16le"_lang),
|
hex::format("{} + {}", "hex.ui.common.encoding.ascii"_lang, "hex.ui.common.encoding.utf16le"_lang),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user