From c468801c6e311425d7bf38a8a667e1badb1cb83c Mon Sep 17 00:00:00 2001 From: WerWolv Date: Wed, 3 Aug 2022 11:38:36 +0200 Subject: [PATCH] feat: Added support for ASCII + UTF16 string extraction Closes #641 --- .../include/content/views/view_find.hpp | 19 ++- .../source/content/views/view_find.cpp | 158 +++++++++++------- 2 files changed, 111 insertions(+), 66 deletions(-) diff --git a/plugins/builtin/include/content/views/view_find.hpp b/plugins/builtin/include/content/views/view_find.hpp index bdefb9e4b..daa524dd2 100644 --- a/plugins/builtin/include/content/views/view_find.hpp +++ b/plugins/builtin/include/content/views/view_find.hpp @@ -19,6 +19,11 @@ namespace hex::plugin::builtin { private: + struct Occurrence { + Region region; + enum class DecodeType { ASCII, Binary, UTF16LE, UTF16BE } decodeType; + }; + struct BinaryPattern { u8 mask, value; }; @@ -35,7 +40,7 @@ namespace hex::plugin::builtin { struct Strings { int minLength = 5; - enum class Type : int { ASCII = 0, UTF16LE = 1, UTF16BE = 2 } type = Type::ASCII; + enum class Type : int { ASCII = 0, UTF16LE = 1, UTF16BE = 2, ASCII_UTF16LE = 3, ASCII_UTF16BE = 4 } type = Type::ASCII; bool nullTermination = false; bool m_lowerCaseLetters = true; @@ -61,21 +66,21 @@ namespace hex::plugin::builtin { } binaryPattern; } m_searchSettings, m_decodeSettings; - std::map> m_foundRegions, m_sortedRegions; + std::map> m_foundOccurrences, m_sortedOccurrences; std::atomic m_searchRunning; bool m_settingsValid = false; std::string m_currFilter; private: - static std::vector searchStrings(Task &&task, prv::Provider *provider, Region searchRegion, SearchSettings::Strings settings); - static std::vector searchSequence(Task &&task, prv::Provider *provider, Region searchRegion, SearchSettings::Bytes settings); - static std::vector searchRegex(Task &&task, prv::Provider *provider, Region searchRegion, SearchSettings::Regex settings); - static std::vector searchBinaryPattern(Task &&task, prv::Provider *provider, Region searchRegion, SearchSettings::BinaryPattern settings); + static std::vector searchStrings(Task &task, prv::Provider *provider, Region searchRegion, SearchSettings::Strings settings); + static std::vector searchSequence(Task &task, prv::Provider *provider, Region searchRegion, SearchSettings::Bytes settings); + static std::vector searchRegex(Task &task, prv::Provider *provider, Region searchRegion, SearchSettings::Regex settings); + static std::vector searchBinaryPattern(Task &task, prv::Provider *provider, Region searchRegion, SearchSettings::BinaryPattern settings); static std::vector parseBinaryPatternString(std::string string); void runSearch(); - std::string decodeValue(prv::Provider *provider, Region region); + std::string decodeValue(prv::Provider *provider, Occurrence occurrence) const; }; } \ No newline at end of file diff --git a/plugins/builtin/source/content/views/view_find.cpp b/plugins/builtin/source/content/views/view_find.cpp index 74a4957ca..262ea88f6 100644 --- a/plugins/builtin/source/content/views/view_find.cpp +++ b/plugins/builtin/source/content/views/view_find.cpp @@ -16,17 +16,17 @@ namespace hex::plugin::builtin { ViewFind::ViewFind() : View("hex.builtin.view.find.name") { const static auto HighlightColor = [] { return (ImGui::GetCustomColorU32(ImGuiCustomCol_ToolbarPurple) & 0x00FFFFFF) | 0x70000000; }; - const static auto FindRegion = [this](u64 address) -> std::optional { - auto ®ions = this->m_foundRegions[ImHexApi::Provider::get()]; + const static auto FindOccurrence = [this](u64 address) -> std::optional { + auto &occurrences = this->m_foundOccurrences[ImHexApi::Provider::get()]; - auto it = std::upper_bound(regions.begin(), regions.end(), address, [](u64 address, Region ®ion) { - return address < region.getStartAddress(); + auto it = std::upper_bound(occurrences.begin(), occurrences.end(), address, [](u64 address, Occurrence &occurrence) { + return address < occurrence.region.getStartAddress(); }); - if (it != regions.begin()) + if (it != occurrences.begin()) it--; - if (it != regions.end() && it->getStartAddress() <= address && address <= it->getEndAddress()) + if (it != occurrences.end() && it->region.getStartAddress() <= address && address <= it->region.getEndAddress()) return *it; else return std::nullopt; @@ -35,7 +35,7 @@ namespace hex::plugin::builtin { ImHexApi::HexEditor::addBackgroundHighlightingProvider([](u64 address, const u8* data, size_t size) -> std::optional { hex::unused(data, size); - if (FindRegion(address).has_value()) + if (FindOccurrence(address).has_value()) return HighlightColor(); else return std::nullopt; @@ -44,19 +44,19 @@ namespace hex::plugin::builtin { ImHexApi::HexEditor::addTooltipProvider([this](u64 address, const u8* data, size_t size) { hex::unused(data, size); - auto region = FindRegion(address); - if (!region.has_value()) + auto occurrence = FindOccurrence(address); + if (!occurrence.has_value()) return; ImGui::BeginTooltip(); - ImGui::PushID(®ion); + ImGui::PushID(&occurrence); if (ImGui::BeginTable("##tooltips", 1, ImGuiTableFlags_RowBg | ImGuiTableFlags_NoClip)) { ImGui::TableNextRow(); ImGui::TableNextColumn(); { - const auto value = this->decodeValue(ImHexApi::Provider::get(), region.value()); + const auto value = this->decodeValue(ImHexApi::Provider::get(), occurrence.value()); ImGui::ColorButton("##color", ImColor(HighlightColor())); ImGui::SameLine(0, 10); @@ -70,7 +70,7 @@ namespace hex::plugin::builtin { ImGui::TableNextColumn(); ImGui::TextFormatted("{}: ", "hex.builtin.common.region"_lang.get()); ImGui::TableNextColumn(); - ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", region->getStartAddress(), region->getEndAddress()); + ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", occurrence->region.getStartAddress(), occurrence->region.getEndAddress()); auto demangledValue = llvm::demangle(value); @@ -157,13 +157,46 @@ namespace hex::plugin::builtin { } - std::vector ViewFind::searchStrings(Task &&task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::Strings settings) { - std::vector results; + std::vector ViewFind::searchStrings(Task &task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::Strings settings) { + using enum SearchSettings::Strings::Type; + + std::vector results; + + if (settings.type == ASCII_UTF16BE || settings.type == ASCII_UTF16LE) { + auto newSettings = settings; + + newSettings.type = ASCII; + auto asciiResults = searchStrings(task, provider, searchRegion, newSettings); + std::copy(asciiResults.begin(), asciiResults.end(), std::back_inserter(results)); + + if (settings.type == ASCII_UTF16BE) { + newSettings.type = UTF16BE; + auto utf16Results = searchStrings(task, provider, searchRegion, newSettings); + std::copy(utf16Results.begin(), utf16Results.end(), std::back_inserter(results)); + } else if (settings.type == ASCII_UTF16LE) { + newSettings.type = UTF16LE; + auto utf16Results = searchStrings(task, provider, searchRegion, newSettings); + std::copy(utf16Results.begin(), utf16Results.end(), std::back_inserter(results)); + } + + return results; + } auto reader = prv::BufferedReader(provider); reader.seek(searchRegion.getStartAddress()); reader.setEndAddress(searchRegion.getEndAddress()); + const Occurrence::DecodeType decodeType = [&]{ + if (settings.type == ASCII) + return Occurrence::DecodeType::ASCII; + else if (settings.type == SearchSettings::Strings::Type::UTF16BE) + return Occurrence::DecodeType::UTF16BE; + else if (settings.type == SearchSettings::Strings::Type::UTF16LE) + return Occurrence::DecodeType::UTF16LE; + else + return Occurrence::DecodeType::Binary; + }(); + size_t countedCharacters = 0; u64 startAddress = reader.begin().getAddress(); for (u8 byte : reader) { @@ -176,11 +209,11 @@ namespace hex::plugin::builtin { (settings.m_symbols && std::ispunct(byte)) || (settings.m_lineFeeds && byte == '\n'); - if (settings.type == SearchSettings::Strings::Type::UTF16LE) { + if (settings.type == UTF16LE) { // Check if second byte of UTF-16 encoded string is 0x00 if (countedCharacters % 2 == 1) validChar = byte == 0x00; - } else if (settings.type == SearchSettings::Strings::Type::UTF16BE) { + } else if (settings.type == UTF16BE) { // Check if first byte of UTF-16 encoded string is 0x00 if (countedCharacters % 2 == 0) validChar = byte == 0x00; @@ -191,7 +224,7 @@ namespace hex::plugin::builtin { else { if (countedCharacters >= size_t(settings.minLength)) { if (!(settings.nullTermination && byte != 0x00)) { - results.push_back(Region { startAddress, countedCharacters }); + results.push_back(Occurrence { Region { startAddress, countedCharacters }, decodeType }); } } @@ -204,8 +237,8 @@ namespace hex::plugin::builtin { return results; } - std::vector ViewFind::searchSequence(Task &&task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::Bytes settings) { - std::vector results; + std::vector ViewFind::searchSequence(Task &task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::Bytes settings) { + std::vector results; auto reader = prv::BufferedReader(provider); reader.seek(searchRegion.getStartAddress()); @@ -220,15 +253,15 @@ namespace hex::plugin::builtin { auto address = occurrence.getAddress(); reader.seek(address + sequence.size()); - results.push_back(Region { address, sequence.size() }); + results.push_back(Occurrence{ Region { address, sequence.size() }, Occurrence::DecodeType::Binary }); task.update(address - searchRegion.getStartAddress()); } return results; } - std::vector ViewFind::searchRegex(Task &&task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::Regex settings) { - auto stringRegions = searchStrings(std::move(task), provider, searchRegion, SearchSettings::Strings { + std::vector ViewFind::searchRegex(Task &task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::Regex settings) { + auto stringOccurrences = searchStrings(task, provider, searchRegion, SearchSettings::Strings { .minLength = 1, .type = SearchSettings::Strings::Type::ASCII, .m_lowerCaseLetters = true, @@ -240,21 +273,21 @@ namespace hex::plugin::builtin { .m_lineFeeds = true }); - std::vector result; + std::vector result; std::regex regex(settings.pattern); - for (const auto ®ion : stringRegions) { - std::string string(region.getSize(), '\x00'); - provider->read(region.getStartAddress(), string.data(), region.getSize()); + for (const auto &occurrence : stringOccurrences) { + std::string string(occurrence.region.getSize(), '\x00'); + provider->read(occurrence.region.getStartAddress(), string.data(), occurrence.region.getSize()); if (std::regex_match(string, regex)) - result.push_back(region); + result.push_back(occurrence); } return result; } - std::vector ViewFind::searchBinaryPattern(Task &&task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::BinaryPattern settings) { - std::vector results; + std::vector ViewFind::searchBinaryPattern(Task &task, prv::Provider *provider, hex::Region searchRegion, SearchSettings::BinaryPattern settings) { + std::vector results; auto reader = prv::BufferedReader(provider); reader.seek(searchRegion.getStartAddress()); @@ -267,7 +300,7 @@ namespace hex::plugin::builtin { if ((byte & settings.pattern[matchedBytes].mask) == settings.pattern[matchedBytes].value) { matchedBytes++; if (matchedBytes == settings.pattern.size()) { - results.push_back(Region { address - (patternSize - 1), patternSize }); + results.push_back(Occurrence { Region { address - (patternSize - 1), patternSize }, Occurrence::DecodeType::Binary }); task.update(address); matchedBytes = 0; } @@ -299,27 +332,27 @@ namespace hex::plugin::builtin { switch (settings.mode) { using enum SearchSettings::Mode; case Strings: - this->m_foundRegions[provider] = searchStrings(std::move(task), provider, searchRegion, settings.strings); + this->m_foundOccurrences[provider] = searchStrings(task, provider, searchRegion, settings.strings); break; case Sequence: - this->m_foundRegions[provider] = searchSequence(std::move(task), provider, searchRegion, settings.bytes); + this->m_foundOccurrences[provider] = searchSequence(task, provider, searchRegion, settings.bytes); break; case Regex: - this->m_foundRegions[provider] = searchRegex(std::move(task), provider, searchRegion, settings.regex); + this->m_foundOccurrences[provider] = searchRegex(task, provider, searchRegion, settings.regex); break; case BinaryPattern: - this->m_foundRegions[provider] = searchBinaryPattern(std::move(task), provider, searchRegion, settings.binaryPattern); + this->m_foundOccurrences[provider] = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern); break; } - this->m_sortedRegions = this->m_foundRegions; + this->m_sortedOccurrences = this->m_foundOccurrences; this->m_searchRunning = false; }).detach(); } - std::string ViewFind::decodeValue(prv::Provider *provider, hex::Region region) { - std::vector bytes(std::min(region.getSize(), 128)); - provider->read(region.getStartAddress(), bytes.data(), bytes.size()); + std::string ViewFind::decodeValue(prv::Provider *provider, Occurrence occurrence) const { + std::vector bytes(std::min(occurrence.region.getSize(), 128)); + provider->read(occurrence.region.getStartAddress(), bytes.data(), bytes.size()); std::string result; switch (this->m_decodeSettings.mode) { @@ -327,10 +360,9 @@ namespace hex::plugin::builtin { case Strings: { - auto &settings = this->m_decodeSettings.strings; - - switch (settings.type) { - using enum SearchSettings::Strings::Type; + switch (occurrence.decodeType) { + using enum Occurrence::DecodeType; + case Binary: case ASCII: result = hex::encodeByteString(bytes); break; @@ -342,6 +374,7 @@ namespace hex::plugin::builtin { for (size_t i = 1; i < bytes.size(); i += 2) result += hex::encodeByteString({ bytes[i] }); break; + } } break; @@ -392,7 +425,14 @@ namespace hex::plugin::builtin { if (settings.minLength < 1) settings.minLength = 1; - const std::array StringTypes = { "hex.builtin.common.encoding.ascii"_lang,"hex.builtin.common.encoding.utf16le"_lang, "hex.builtin.common.encoding.utf16be"_lang }; + const std::array StringTypes = { + "hex.builtin.common.encoding.ascii"_lang, + "hex.builtin.common.encoding.utf16le"_lang, + "hex.builtin.common.encoding.utf16be"_lang, + hex::format("{} + {}", "hex.builtin.common.encoding.ascii"_lang, "hex.builtin.common.encoding.utf16le"_lang), + hex::format("{} + {}", "hex.builtin.common.encoding.ascii"_lang, "hex.builtin.common.encoding.utf16be"_lang) + }; + if (ImGui::BeginCombo("hex.builtin.common.type"_lang, StringTypes[std::to_underlying(settings.type)].c_str())) { for (size_t i = 0; i < StringTypes.size(); i++) { auto type = static_cast(i); @@ -479,7 +519,7 @@ namespace hex::plugin::builtin { ImGui::EndDisabled(); ImGui::SameLine(); - ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundRegions[provider].size()); + ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences[provider].size()); } ImGui::EndDisabled(); @@ -487,15 +527,15 @@ namespace hex::plugin::builtin { ImGui::Separator(); ImGui::NewLine(); - auto &currRegion = this->m_sortedRegions[provider]; + auto &currOccurrences = this->m_sortedOccurrences[provider]; ImGui::PushItemWidth(ImGui::GetContentRegionAvailWidth()); if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, this->m_currFilter)) { - this->m_sortedRegions = this->m_foundRegions; + this->m_sortedOccurrences = this->m_foundOccurrences; - currRegion.erase(std::remove_if(currRegion.begin(), currRegion.end(), [this, provider](const auto ®ion) { + currOccurrences.erase(std::remove_if(currOccurrences.begin(), currOccurrences.end(), [this, provider](const auto ®ion) { return !this->decodeValue(provider, region).contains(this->m_currFilter); - }), currRegion.end()); + }), currOccurrences.end()); } ImGui::PopItemWidth(); @@ -508,17 +548,17 @@ namespace hex::plugin::builtin { auto sortSpecs = ImGui::TableGetSortSpecs(); if (sortSpecs->SpecsDirty) { - std::sort(currRegion.begin(), currRegion.end(), [this, &sortSpecs, provider](Region &left, Region &right) -> bool { + std::sort(currOccurrences.begin(), currOccurrences.end(), [this, &sortSpecs, provider](Occurrence &left, Occurrence &right) -> bool { if (sortSpecs->Specs->ColumnUserID == ImGui::GetID("offset")) { if (sortSpecs->Specs->SortDirection == ImGuiSortDirection_Ascending) - return left.getStartAddress() > right.getStartAddress(); + return left.region.getStartAddress() > right.region.getStartAddress(); else - return left.getStartAddress() < right.getStartAddress(); + return left.region.getStartAddress() < right.region.getStartAddress(); } else if (sortSpecs->Specs->ColumnUserID == ImGui::GetID("size")) { if (sortSpecs->Specs->SortDirection == ImGuiSortDirection_Ascending) - return left.getSize() > right.getSize(); + return left.region.getSize() > right.region.getSize(); else - return left.getSize() < right.getSize(); + return left.region.getSize() < right.region.getSize(); } else if (sortSpecs->Specs->ColumnUserID == ImGui::GetID("value")) { if (sortSpecs->Specs->SortDirection == ImGuiSortDirection_Ascending) return this->decodeValue(provider, left) > this->decodeValue(provider, right); @@ -535,18 +575,18 @@ namespace hex::plugin::builtin { ImGui::TableHeadersRow(); ImGuiListClipper clipper; - clipper.Begin(currRegion.size(), ImGui::GetTextLineHeightWithSpacing()); + clipper.Begin(currOccurrences.size(), ImGui::GetTextLineHeightWithSpacing()); while (clipper.Step()) { - for (size_t i = clipper.DisplayStart; i < std::min(clipper.DisplayEnd, currRegion.size()); i++) { - auto &foundItem = currRegion[i]; + for (size_t i = clipper.DisplayStart; i < std::min(clipper.DisplayEnd, currOccurrences.size()); i++) { + auto &foundItem = currOccurrences[i]; ImGui::TableNextRow(); ImGui::TableNextColumn(); - ImGui::TextFormatted("0x{:08X}", foundItem.getStartAddress()); + ImGui::TextFormatted("0x{:08X}", foundItem.region.getStartAddress()); ImGui::TableNextColumn(); - ImGui::TextFormatted("{}", hex::toByteString(foundItem.getSize())); + ImGui::TextFormatted("{}", hex::toByteString(foundItem.region.getSize())); ImGui::TableNextColumn(); ImGui::PushID(i); @@ -555,7 +595,7 @@ namespace hex::plugin::builtin { ImGui::TextFormatted("{}", value); ImGui::SameLine(); if (ImGui::Selectable("##line", false, ImGuiSelectableFlags_SpanAllColumns)) - ImHexApi::HexEditor::setSelection(foundItem.getStartAddress(), foundItem.getSize()); + ImHexApi::HexEditor::setSelection(foundItem.region.getStartAddress(), foundItem.region.getSize()); drawContextMenu(value); ImGui::PopID();