patterns: Added UTF-16 character type and string parsing
This commit is contained in:
parent
0c6bd73996
commit
4029c333ed
@ -10,6 +10,8 @@
|
||||
#include <hex/views/view.hpp>
|
||||
|
||||
#include <cstring>
|
||||
#include <codecvt>
|
||||
#include <locale>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
@ -478,6 +480,27 @@ namespace hex::lang {
|
||||
}
|
||||
};
|
||||
|
||||
class PatternDataCharacter16 : public PatternData {
|
||||
public:
|
||||
explicit PatternDataCharacter16(u64 offset, u32 color = 0)
|
||||
: PatternData(offset, 2, color) { }
|
||||
|
||||
PatternData* clone() override {
|
||||
return new PatternDataCharacter16(*this);
|
||||
}
|
||||
|
||||
void createEntry(prv::Provider* &provider) override {
|
||||
char16_t character;
|
||||
provider->read(this->getOffset(), &character, 2);
|
||||
|
||||
this->createDefaultEntry(hex::format("'{0}'", std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(character)));
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string getFormattedName() const override {
|
||||
return "char16";
|
||||
}
|
||||
};
|
||||
|
||||
class PatternDataString : public PatternData {
|
||||
public:
|
||||
PatternDataString(u64 offset, size_t size, u32 color = 0)
|
||||
@ -500,6 +523,30 @@ namespace hex::lang {
|
||||
}
|
||||
};
|
||||
|
||||
class PatternDataString16 : public PatternData {
|
||||
public:
|
||||
PatternDataString16(u64 offset, size_t size, u32 color = 0)
|
||||
: PatternData(offset, size, color) { }
|
||||
|
||||
PatternData* clone() override {
|
||||
return new PatternDataString16(*this);
|
||||
}
|
||||
|
||||
void createEntry(prv::Provider* &provider) override {
|
||||
std::u16string buffer(this->getSize() + 1, 0x00);
|
||||
provider->read(this->getOffset(), buffer.data(), this->getSize());
|
||||
buffer[this->getSize()] = '\0';
|
||||
|
||||
auto utf8String = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(buffer);
|
||||
|
||||
this->createDefaultEntry(hex::format("\"{0}\"", utf8String)) ;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string getFormattedName() const override {
|
||||
return "String16";
|
||||
}
|
||||
};
|
||||
|
||||
class PatternDataArray : public PatternData {
|
||||
public:
|
||||
PatternDataArray(u64 offset, size_t size, u32 color = 0)
|
||||
|
@ -78,6 +78,7 @@ namespace hex::lang {
|
||||
Unsigned128Bit = 0x100,
|
||||
Signed128Bit = 0x101,
|
||||
Character = 0x13,
|
||||
Character16 = 0x23,
|
||||
Boolean = 0x14,
|
||||
Float = 0x42,
|
||||
Double = 0x82,
|
||||
@ -145,6 +146,7 @@ namespace hex::lang {
|
||||
case ValueType::Float: return IntegerLiteral(type, static_cast<float>(value));
|
||||
case ValueType::Double: return IntegerLiteral(type, static_cast<double>(value));
|
||||
case ValueType::Character: return IntegerLiteral(type, static_cast<char>(value));
|
||||
case ValueType::Character16: return IntegerLiteral(type, static_cast<char16_t>(value));
|
||||
default: __builtin_unreachable();
|
||||
}
|
||||
}, literal);
|
||||
@ -165,6 +167,7 @@ namespace hex::lang {
|
||||
case ValueType::Float: return "float";
|
||||
case ValueType::Double: return "double";
|
||||
case ValueType::Character: return "char";
|
||||
case ValueType::Character16: return "char16";
|
||||
default: return "< ??? >";
|
||||
}
|
||||
}
|
||||
|
@ -306,6 +306,7 @@ namespace hex::lang {
|
||||
CHECK_TYPE(Token::ValueType::Unsigned8Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed8Bit);
|
||||
CHECK_TYPE(Token::ValueType::Character);
|
||||
CHECK_TYPE(Token::ValueType::Character16);
|
||||
CHECK_TYPE(Token::ValueType::Boolean);
|
||||
DEFAULT_TYPE(Token::ValueType::Signed32Bit);
|
||||
|
||||
@ -475,6 +476,8 @@ namespace hex::lang {
|
||||
|
||||
if (type == Token::ValueType::Character)
|
||||
pattern = new PatternDataCharacter(this->m_currOffset);
|
||||
else if (type == Token::ValueType::Character16)
|
||||
pattern = new PatternDataCharacter16(this->m_currOffset);
|
||||
else if (type == Token::ValueType::Boolean)
|
||||
pattern = new PatternDataBoolean(this->m_currOffset);
|
||||
else if (Token::isUnsigned(type))
|
||||
@ -777,14 +780,18 @@ namespace hex::lang {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
u8 currByte = 0x00;
|
||||
u64 offset = startOffset;
|
||||
if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(node->getType()); typeDecl != nullptr) {
|
||||
if (auto builtinType = dynamic_cast<ASTNodeBuiltinType*>(typeDecl->getType()); builtinType != nullptr) {
|
||||
std::vector<u8> bytes(Token::getTypeSize(builtinType->getType()), 0x00);
|
||||
u64 offset = startOffset;
|
||||
|
||||
do {
|
||||
this->m_provider->read(offset, &currByte, sizeof(u8));
|
||||
offset += sizeof(u8);
|
||||
arraySize += sizeof(u8);
|
||||
} while (currByte != 0x00 && offset < this->m_provider->getSize());
|
||||
do {
|
||||
this->m_provider->read(offset, bytes.data(), bytes.size());
|
||||
offset += bytes.size();
|
||||
arraySize++;
|
||||
} while (!std::all_of(bytes.begin(), bytes.end(), [](u8 byte){ return byte == 0x00; }) && offset < this->m_provider->getSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<PatternData*> entries;
|
||||
@ -819,8 +826,10 @@ namespace hex::lang {
|
||||
if (entries.empty()) {
|
||||
pattern = new PatternDataPadding(startOffset, 0);
|
||||
}
|
||||
else if (dynamic_cast<PatternDataCharacter*>(entries[0]))
|
||||
else if (dynamic_cast<PatternDataCharacter*>(entries[0]) != nullptr)
|
||||
pattern = new PatternDataString(startOffset, (this->m_currOffset - startOffset), color.value_or(0));
|
||||
else if (dynamic_cast<PatternDataCharacter16*>(entries[0]) != nullptr)
|
||||
pattern = new PatternDataString16(startOffset, (this->m_currOffset - startOffset), color.value_or(0));
|
||||
else {
|
||||
if (node->getSize() == nullptr)
|
||||
this->getConsole().abortEvaluation("no bounds provided for array");
|
||||
|
@ -450,6 +450,8 @@ namespace hex::lang {
|
||||
tokens.emplace_back(TOKEN(ValueType, Double));
|
||||
else if (identifier == "char")
|
||||
tokens.emplace_back(TOKEN(ValueType, Character));
|
||||
else if (identifier == "char16")
|
||||
tokens.emplace_back(TOKEN(ValueType, Character16));
|
||||
else if (identifier == "bool")
|
||||
tokens.emplace_back(TOKEN(ValueType, Boolean));
|
||||
else if (identifier == "padding")
|
||||
|
@ -23,7 +23,7 @@ namespace hex {
|
||||
static std::pair<const char* const, size_t> builtInTypes[] = {
|
||||
{ "u8", 1 }, { "u16", 2 }, { "u32", 4 }, { "u64", 8 }, { "u128", 16 },
|
||||
{ "s8", 1 }, { "s16", 2 }, { "s32", 4 }, { "s64", 8 }, { "s128", 16 },
|
||||
{ "float", 4 }, { "double", 8 }, { "char", 1 }, { "bool", 1 }, { "padding", 1 }
|
||||
{ "float", 4 }, { "double", 8 }, { "char", 1 }, { "char16", 2 }, { "bool", 1 }, { "padding", 1 }
|
||||
};
|
||||
|
||||
for (const auto &[name, size] : builtInTypes) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user