From 4029c333edb5821eff698c4e73cba73ac9e854c7 Mon Sep 17 00:00:00 2001 From: WerWolv Date: Sun, 2 May 2021 20:13:37 +0200 Subject: [PATCH] patterns: Added UTF-16 character type and string parsing --- .../include/hex/lang/pattern_data.hpp | 47 +++++++++++++++++++ plugins/libimhex/include/hex/lang/token.hpp | 3 ++ plugins/libimhex/source/lang/evaluator.cpp | 25 ++++++---- plugins/libimhex/source/lang/lexer.cpp | 2 + source/views/view_pattern.cpp | 2 +- 5 files changed, 70 insertions(+), 9 deletions(-) diff --git a/plugins/libimhex/include/hex/lang/pattern_data.hpp b/plugins/libimhex/include/hex/lang/pattern_data.hpp index adbc1ed9b..455d0edfe 100644 --- a/plugins/libimhex/include/hex/lang/pattern_data.hpp +++ b/plugins/libimhex/include/hex/lang/pattern_data.hpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include #include @@ -478,6 +480,27 @@ namespace hex::lang { } }; + class PatternDataCharacter16 : public PatternData { + public: + explicit PatternDataCharacter16(u64 offset, u32 color = 0) + : PatternData(offset, 2, color) { } + + PatternData* clone() override { + return new PatternDataCharacter16(*this); + } + + void createEntry(prv::Provider* &provider) override { + char16_t character; + provider->read(this->getOffset(), &character, 2); + + this->createDefaultEntry(hex::format("'{0}'", std::wstring_convert, char16_t>{}.to_bytes(character))); + } + + [[nodiscard]] std::string getFormattedName() const override { + return "char16"; + } + }; + class PatternDataString : public PatternData { public: PatternDataString(u64 offset, size_t size, u32 color = 0) @@ -500,6 +523,30 @@ namespace hex::lang { } }; + class PatternDataString16 : public PatternData { + public: + PatternDataString16(u64 offset, size_t size, u32 color = 0) + : PatternData(offset, size, color) { } + + PatternData* clone() override { + return new PatternDataString16(*this); + } + + void createEntry(prv::Provider* &provider) override { + std::u16string buffer(this->getSize() + 1, 0x00); + provider->read(this->getOffset(), buffer.data(), this->getSize()); + buffer[this->getSize()] = '\0'; + + auto utf8String = std::wstring_convert, char16_t>{}.to_bytes(buffer); + + this->createDefaultEntry(hex::format("\"{0}\"", utf8String)) ; + } + + [[nodiscard]] std::string getFormattedName() const override { + return "String16"; + } + }; + class PatternDataArray : public PatternData { public: PatternDataArray(u64 offset, size_t size, u32 color = 0) diff --git a/plugins/libimhex/include/hex/lang/token.hpp b/plugins/libimhex/include/hex/lang/token.hpp index 7bbf51a16..2680e5176 100644 --- a/plugins/libimhex/include/hex/lang/token.hpp +++ b/plugins/libimhex/include/hex/lang/token.hpp @@ -78,6 +78,7 @@ namespace hex::lang { Unsigned128Bit = 0x100, Signed128Bit = 0x101, Character = 0x13, + Character16 = 0x23, Boolean = 0x14, Float = 0x42, Double = 0x82, @@ -145,6 +146,7 @@ namespace hex::lang { case ValueType::Float: return IntegerLiteral(type, static_cast(value)); case ValueType::Double: return IntegerLiteral(type, static_cast(value)); case ValueType::Character: return IntegerLiteral(type, static_cast(value)); + case ValueType::Character16: return IntegerLiteral(type, static_cast(value)); default: __builtin_unreachable(); } }, literal); @@ -165,6 +167,7 @@ namespace hex::lang { case ValueType::Float: return "float"; case ValueType::Double: return "double"; case ValueType::Character: return "char"; + case ValueType::Character16: return "char16"; default: return "< ??? >"; } } diff --git a/plugins/libimhex/source/lang/evaluator.cpp b/plugins/libimhex/source/lang/evaluator.cpp index 16385cfe2..39090f863 100644 --- a/plugins/libimhex/source/lang/evaluator.cpp +++ b/plugins/libimhex/source/lang/evaluator.cpp @@ -306,6 +306,7 @@ namespace hex::lang { CHECK_TYPE(Token::ValueType::Unsigned8Bit); CHECK_TYPE(Token::ValueType::Signed8Bit); CHECK_TYPE(Token::ValueType::Character); + CHECK_TYPE(Token::ValueType::Character16); CHECK_TYPE(Token::ValueType::Boolean); DEFAULT_TYPE(Token::ValueType::Signed32Bit); @@ -475,6 +476,8 @@ namespace hex::lang { if (type == Token::ValueType::Character) pattern = new PatternDataCharacter(this->m_currOffset); + else if (type == Token::ValueType::Character16) + pattern = new PatternDataCharacter16(this->m_currOffset); else if (type == Token::ValueType::Boolean) pattern = new PatternDataBoolean(this->m_currOffset); else if (Token::isUnsigned(type)) @@ -777,14 +780,18 @@ namespace hex::lang { } } } else { - u8 currByte = 0x00; - u64 offset = startOffset; + if (auto typeDecl = dynamic_cast(node->getType()); typeDecl != nullptr) { + if (auto builtinType = dynamic_cast(typeDecl->getType()); builtinType != nullptr) { + std::vector bytes(Token::getTypeSize(builtinType->getType()), 0x00); + u64 offset = startOffset; - do { - this->m_provider->read(offset, &currByte, sizeof(u8)); - offset += sizeof(u8); - arraySize += sizeof(u8); - } while (currByte != 0x00 && offset < this->m_provider->getSize()); + do { + this->m_provider->read(offset, bytes.data(), bytes.size()); + offset += bytes.size(); + arraySize++; + } while (!std::all_of(bytes.begin(), bytes.end(), [](u8 byte){ return byte == 0x00; }) && offset < this->m_provider->getSize()); + } + } } std::vector entries; @@ -819,8 +826,10 @@ namespace hex::lang { if (entries.empty()) { pattern = new PatternDataPadding(startOffset, 0); } - else if (dynamic_cast(entries[0])) + else if (dynamic_cast(entries[0]) != nullptr) pattern = new PatternDataString(startOffset, (this->m_currOffset - startOffset), color.value_or(0)); + else if (dynamic_cast(entries[0]) != nullptr) + pattern = new PatternDataString16(startOffset, (this->m_currOffset - startOffset), color.value_or(0)); else { if (node->getSize() == nullptr) this->getConsole().abortEvaluation("no bounds provided for array"); diff --git a/plugins/libimhex/source/lang/lexer.cpp b/plugins/libimhex/source/lang/lexer.cpp index 9f587abc3..1210b7b3d 100644 --- a/plugins/libimhex/source/lang/lexer.cpp +++ b/plugins/libimhex/source/lang/lexer.cpp @@ -450,6 +450,8 @@ namespace hex::lang { tokens.emplace_back(TOKEN(ValueType, Double)); else if (identifier == "char") tokens.emplace_back(TOKEN(ValueType, Character)); + else if (identifier == "char16") + tokens.emplace_back(TOKEN(ValueType, Character16)); else if (identifier == "bool") tokens.emplace_back(TOKEN(ValueType, Boolean)); else if (identifier == "padding") diff --git a/source/views/view_pattern.cpp b/source/views/view_pattern.cpp index 115341059..38d37737f 100644 --- a/source/views/view_pattern.cpp +++ b/source/views/view_pattern.cpp @@ -23,7 +23,7 @@ namespace hex { static std::pair builtInTypes[] = { { "u8", 1 }, { "u16", 2 }, { "u32", 4 }, { "u64", 8 }, { "u128", 16 }, { "s8", 1 }, { "s16", 2 }, { "s32", 4 }, { "s64", 8 }, { "s128", 16 }, - { "float", 4 }, { "double", 8 }, { "char", 1 }, { "bool", 1 }, { "padding", 1 } + { "float", 4 }, { "double", 8 }, { "char", 1 }, { "char16", 2 }, { "bool", 1 }, { "padding", 1 } }; for (const auto &[name, size] : builtInTypes) {