1
0
mirror of synced 2024-11-15 03:27:40 +01:00
ImHex/source/lang/evaluator.cpp
WerWolv 78ef07cf0f
Pattern Language rewrite (#111)
* Initial parser rewrite effort

Lexer and Token cleanup, Parser started over

* Greatly improved parser syntax

* Reimplemented using declarations and variable placement parsing

* Added back unions and structs

* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)

* Code style improvement

* Implemented arrays and fixed memory issues

* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns

* Fixed builtin types, arrays and reimplemented strings

* Improved error messages

* Made character a distinct type, used for chars and strings

* Implemented padding, fixed arrays

* Added bitfields

* Added rvalue parsing, no evaluating yet

* Added .idea folder to gitignore

* Fixed build on MacOS

* Added custom implementation of integral concept if not available

* Rebased onto master

* Fixed array variable decl crash

* Added rvalues and dot syntax

* Lower case all pattern language error messages

* Fixed typo in variable name

* Fixed bug where preprocessor would not ignore commented out directives

* Reimplemented pointers

* Fixed rebase issues
2021-01-02 20:27:11 +01:00

437 lines
20 KiB
C++

#include "lang/evaluator.hpp"
#include "lang/token.hpp"
#include <bit>
#include <algorithm>
#include <ranges>
#include <unistd.h>
namespace hex::lang {
Evaluator::Evaluator(prv::Provider* &provider, std::endian defaultDataEndian)
: m_provider(provider), m_defaultDataEndian(defaultDataEndian) {
}
ASTNodeIntegerLiteral* Evaluator::evaluateRValue(ASTNodeRValue *node) {
const std::vector<PatternData*>* currMembers = this->m_currMembers;
PatternData *currPattern = nullptr;
for (const auto &identifier : node->getPath()) {
if (auto structPattern = dynamic_cast<PatternDataStruct*>(currPattern); structPattern != nullptr)
currMembers = &structPattern->getMembers();
else if (auto unionPattern = dynamic_cast<PatternDataUnion*>(currPattern); unionPattern != nullptr)
currMembers = &unionPattern->getMembers();
else if (currPattern != nullptr)
throwEvaluateError("tried to access member of a non-struct/union type", node->getLineNumber());
auto candidate = std::find_if(currMembers->begin(), currMembers->end(), [&](auto member) {
return member->getVariableName() == identifier;
});
if (candidate != currMembers->end())
currPattern = *candidate;
else
throwEvaluateError(hex::format("could not find identifier '%s'", identifier.c_str()), node->getLineNumber());
}
if (auto unsignedPattern = dynamic_cast<PatternDataUnsigned*>(currPattern); unsignedPattern != nullptr) {
s128 value = 0;
this->m_provider->read(unsignedPattern->getOffset(), &value, unsignedPattern->getSize());
return new ASTNodeIntegerLiteral(value, Token::ValueType::Signed128Bit);
} else if (auto signedPattern = dynamic_cast<PatternDataSigned*>(currPattern); signedPattern != nullptr) {
s128 value = 0;
this->m_provider->read(signedPattern->getOffset(), &value, signedPattern->getSize());
return new ASTNodeIntegerLiteral(signExtend(value, signedPattern->getSize() * 8, 128), Token::ValueType::Signed128Bit);
} else
throwEvaluateError("tried to use non-integer value in numeric expression", node->getLineNumber());
}
ASTNodeIntegerLiteral* Evaluator::evaluateOperator(ASTNodeIntegerLiteral *left, ASTNodeIntegerLiteral *right, Token::Operator op) {
return std::visit([&](auto &&leftValue, auto &&rightValue) -> ASTNodeIntegerLiteral* {
auto newType = [&] {
#define CHECK_TYPE(type) if (left->getType() == (type) || right->getType() == (type)) return (type)
#define DEFAULT_TYPE(type) return (type)
CHECK_TYPE(Token::ValueType::Double);
CHECK_TYPE(Token::ValueType::Float);
CHECK_TYPE(Token::ValueType::Unsigned128Bit);
CHECK_TYPE(Token::ValueType::Signed128Bit);
CHECK_TYPE(Token::ValueType::Unsigned64Bit);
CHECK_TYPE(Token::ValueType::Signed64Bit);
CHECK_TYPE(Token::ValueType::Unsigned32Bit);
CHECK_TYPE(Token::ValueType::Signed32Bit);
CHECK_TYPE(Token::ValueType::Unsigned16Bit);
CHECK_TYPE(Token::ValueType::Signed16Bit);
CHECK_TYPE(Token::ValueType::Unsigned8Bit);
CHECK_TYPE(Token::ValueType::Signed8Bit);
CHECK_TYPE(Token::ValueType::Character);
DEFAULT_TYPE(Token::ValueType::Signed32Bit);
#undef CHECK_TYPE
#undef DEFAULT_TYPE
}();
switch (op) {
case Token::Operator::Plus:
return new ASTNodeIntegerLiteral(leftValue + rightValue, newType);
case Token::Operator::Minus:
return new ASTNodeIntegerLiteral(leftValue - rightValue, newType);
case Token::Operator::Star:
return new ASTNodeIntegerLiteral(leftValue * rightValue, newType);
case Token::Operator::Slash:
return new ASTNodeIntegerLiteral(leftValue / rightValue, newType);
case Token::Operator::ShiftLeft:
return new ASTNodeIntegerLiteral(leftValue << rightValue, newType);
case Token::Operator::ShiftRight:
return new ASTNodeIntegerLiteral(leftValue >> rightValue, newType);
case Token::Operator::BitAnd:
return new ASTNodeIntegerLiteral(leftValue & rightValue, newType);
case Token::Operator::BitXor:
return new ASTNodeIntegerLiteral(leftValue ^ rightValue, newType);
case Token::Operator::BitOr:
return new ASTNodeIntegerLiteral(leftValue | rightValue, newType);
default: throwEvaluateError("invalid operator used in mathematical expression", left->getLineNumber());
}
}, left->getValue(), right->getValue());
}
ASTNodeIntegerLiteral* Evaluator::evaluateMathematicalExpression(ASTNodeNumericExpression *node) {
ASTNodeIntegerLiteral *leftInteger, *rightInteger;
if (auto leftExprLiteral = dynamic_cast<ASTNodeIntegerLiteral*>(node->getLeftOperand()); leftExprLiteral != nullptr)
leftInteger = leftExprLiteral;
else if (auto leftExprExpression = dynamic_cast<ASTNodeNumericExpression*>(node->getLeftOperand()); leftExprExpression != nullptr)
leftInteger = evaluateMathematicalExpression(leftExprExpression);
else if (auto leftExprRvalue = dynamic_cast<ASTNodeRValue*>(node->getLeftOperand()); leftExprRvalue != nullptr)
leftInteger = evaluateRValue(leftExprRvalue);
else
throwEvaluateError("invalid expression. Expected integer literal", node->getLineNumber());
if (auto rightExprLiteral = dynamic_cast<ASTNodeIntegerLiteral*>(node->getRightOperand()); rightExprLiteral != nullptr)
rightInteger = rightExprLiteral;
else if (auto rightExprExpression = dynamic_cast<ASTNodeNumericExpression*>(node->getRightOperand()); rightExprExpression != nullptr)
rightInteger = evaluateMathematicalExpression(rightExprExpression);
else if (auto rightExprRvalue = dynamic_cast<ASTNodeRValue*>(node->getRightOperand()); rightExprRvalue != nullptr)
rightInteger = evaluateRValue(rightExprRvalue);
else
throwEvaluateError("invalid expression. Expected integer literal", node->getLineNumber());
return evaluateOperator(leftInteger, rightInteger, node->getOperator());
}
PatternData* Evaluator::evaluateBuiltinType(ASTNodeBuiltinType *node) {
auto &type = node->getType();
auto typeSize = Token::getTypeSize(type);
PatternData *pattern;
if (type == Token::ValueType::Character)
pattern = new PatternDataCharacter(this->m_currOffset);
else if (Token::isUnsigned(type))
pattern = new PatternDataUnsigned(this->m_currOffset, typeSize);
else if (Token::isSigned(type))
pattern = new PatternDataSigned(this->m_currOffset, typeSize);
else if (Token::isFloatingPoint(type))
pattern = new PatternDataFloat(this->m_currOffset, typeSize);
else
throwEvaluateError("invalid builtin type", node->getLineNumber());
this->m_currOffset += typeSize;
pattern->setTypeName(Token::getTypeName(type));
return pattern;
}
PatternData* Evaluator::evaluateStruct(ASTNodeStruct *node) {
std::vector<PatternData*> memberPatterns;
ScopeExit currMemberReset([this] { this->m_currMembers = nullptr; });
if (this->m_currMembers == nullptr)
this->m_currMembers = &memberPatterns;
else
currMemberReset.release();
auto startOffset = this->m_currOffset;
for (auto &member : node->getMembers()) {
if (auto memberVariableNode = dynamic_cast<ASTNodeVariableDecl*>(member); memberVariableNode != nullptr)
memberPatterns.emplace_back(this->evaluateVariable(memberVariableNode));
else if (auto memberArrayNode = dynamic_cast<ASTNodeArrayVariableDecl*>(member); memberArrayNode != nullptr)
memberPatterns.emplace_back(this->evaluateArray(memberArrayNode));
else if (auto memberPointerNode = dynamic_cast<ASTNodePointerVariableDecl*>(member); memberPointerNode != nullptr)
memberPatterns.emplace_back(this->evaluatePointer(memberPointerNode));
else
throwEvaluateError("invalid struct member", member->getLineNumber());
this->m_currEndian.reset();
}
return new PatternDataStruct(startOffset, this->m_currOffset - startOffset, memberPatterns);
}
PatternData* Evaluator::evaluateUnion(ASTNodeUnion *node) {
std::vector<PatternData*> memberPatterns;
ScopeExit currMemberReset([this] { this->m_currMembers = nullptr; });
if (this->m_currMembers == nullptr)
this->m_currMembers = &memberPatterns;
else
currMemberReset.release();
auto startOffset = this->m_currOffset;
for (auto &member : node->getMembers()) {
if (auto memberVariableNode = dynamic_cast<ASTNodeVariableDecl*>(member); memberVariableNode != nullptr)
memberPatterns.emplace_back(this->evaluateVariable(memberVariableNode));
else if (auto memberArrayNode = dynamic_cast<ASTNodeArrayVariableDecl*>(member); memberArrayNode != nullptr)
memberPatterns.emplace_back(this->evaluateArray(memberArrayNode));
else if (auto memberPointerNode = dynamic_cast<ASTNodePointerVariableDecl*>(member); memberPointerNode != nullptr)
memberPatterns.emplace_back(this->evaluatePointer(memberPointerNode));
else
throwEvaluateError("invalid union member", member->getLineNumber());
this->m_currOffset = startOffset;
this->m_currEndian.reset();
}
return new PatternDataUnion(startOffset, this->m_currOffset - startOffset, memberPatterns);
}
PatternData* Evaluator::evaluateEnum(ASTNodeEnum *node) {
std::vector<std::pair<u64, std::string>> entryPatterns;
auto startOffset = this->m_currOffset;
for (auto &[name, value] : node->getEntries()) {
auto expression = dynamic_cast<ASTNodeNumericExpression*>(value);
if (expression == nullptr)
throwEvaluateError("invalid expression in enum value", value->getLineNumber());
auto valueNode = evaluateMathematicalExpression(expression);
SCOPE_EXIT( delete valueNode; );
entryPatterns.emplace_back( std::get<s128>(valueNode->getValue()), name );
}
size_t size;
if (auto underlyingType = dynamic_cast<const ASTNodeBuiltinType*>(node->getUnderlyingType()); underlyingType != nullptr)
size = Token::getTypeSize(underlyingType->getType());
else
throwEvaluateError("invalid enum underlying type", node->getLineNumber());
return new PatternDataEnum(startOffset, size, entryPatterns);
}
PatternData* Evaluator::evaluateBitfield(ASTNodeBitfield *node) {
std::vector<std::pair<std::string, size_t>> entryPatterns;
auto startOffset = this->m_currOffset;
size_t bits = 0;
for (auto &[name, value] : node->getEntries()) {
auto expression = dynamic_cast<ASTNodeNumericExpression*>(value);
if (expression == nullptr)
throwEvaluateError("invalid expression in bitfield field size", value->getLineNumber());
auto valueNode = evaluateMathematicalExpression(expression);
SCOPE_EXIT( delete valueNode; );
auto fieldBits = std::get<s128>(valueNode->getValue());
if (fieldBits > 64)
throwEvaluateError("bitfield entry must at most occupy 64 bits", value->getLineNumber());
bits += fieldBits;
entryPatterns.emplace_back(name, fieldBits);
}
return new PatternDataBitfield(startOffset, (bits / 8) + 1, entryPatterns);
}
PatternData* Evaluator::evaluateType(ASTNodeTypeDecl *node) {
auto type = node->getType();
if (!this->m_currEndian.has_value())
this->m_currEndian = node->getEndian();
PatternData *pattern;
if (auto builtinTypeNode = dynamic_cast<ASTNodeBuiltinType*>(type); builtinTypeNode != nullptr)
return this->evaluateBuiltinType(builtinTypeNode);
else if (auto typeDeclNode = dynamic_cast<ASTNodeTypeDecl*>(type); typeDeclNode != nullptr)
pattern = this->evaluateType(typeDeclNode);
else if (auto structNode = dynamic_cast<ASTNodeStruct*>(type); structNode != nullptr)
pattern = this->evaluateStruct(structNode);
else if (auto unionNode = dynamic_cast<ASTNodeUnion*>(type); unionNode != nullptr)
pattern = this->evaluateUnion(unionNode);
else if (auto enumNode = dynamic_cast<ASTNodeEnum*>(type); enumNode != nullptr)
pattern = this->evaluateEnum(enumNode);
else if (auto bitfieldNode = dynamic_cast<ASTNodeBitfield*>(type); bitfieldNode != nullptr)
pattern = this->evaluateBitfield(bitfieldNode);
else
throwEvaluateError("type could not be evaluated", node->getLineNumber());
if (!node->getName().empty())
pattern->setTypeName(node->getName().data());
return pattern;
}
PatternData* Evaluator::evaluateVariable(ASTNodeVariableDecl *node) {
if (auto offset = dynamic_cast<ASTNodeNumericExpression*>(node->getPlacementOffset()); offset != nullptr) {
auto valueNode = evaluateMathematicalExpression(offset);
SCOPE_EXIT( delete valueNode; );
this->m_currOffset = std::get<s128>(valueNode->getValue());
}
if (this->m_currOffset >= this->m_provider->getActualSize())
throwEvaluateError("array exceeds size of file", node->getLineNumber());
PatternData *pattern;
if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(node->getType()); typeDecl != nullptr)
pattern = this->evaluateType(typeDecl);
else if (auto builtinTypeDecl = dynamic_cast<ASTNodeBuiltinType*>(node->getType()); builtinTypeDecl != nullptr)
pattern = this->evaluateBuiltinType(builtinTypeDecl);
else
throwEvaluateError("ASTNodeVariableDecl had an invalid type. This is a bug!", 1);
pattern->setVariableName(node->getName().data());
pattern->setEndian(this->getCurrentEndian());
this->m_currEndian.reset();
return pattern;
}
PatternData* Evaluator::evaluateArray(ASTNodeArrayVariableDecl *node) {
if (auto offset = dynamic_cast<ASTNodeNumericExpression*>(node->getPlacementOffset()); offset != nullptr) {
auto valueNode = evaluateMathematicalExpression(offset);
SCOPE_EXIT( delete valueNode; );
this->m_currOffset = std::get<s128>(valueNode->getValue());
}
auto startOffset = this->m_currOffset;
auto sizeNode = dynamic_cast<ASTNodeNumericExpression*>(node->getSize());
if (sizeNode == nullptr)
throwEvaluateError("array size not a numeric expression", node->getLineNumber());
auto valueNode = evaluateMathematicalExpression(sizeNode);
SCOPE_EXIT( delete valueNode; );
auto arraySize = std::get<s128>(valueNode->getValue());
if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(node->getType()); typeDecl != nullptr) {
if (auto builtinType = dynamic_cast<ASTNodeBuiltinType*>(typeDecl->getType()); builtinType != nullptr) {
if (builtinType->getType() == Token::ValueType::Padding) {
this->m_currOffset += arraySize;
return new PatternDataPadding(startOffset, arraySize);
}
}
}
std::vector<PatternData*> entries;
std::optional<u32> color;
for (s128 i = 0; i < arraySize; i++) {
PatternData *entry;
if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(node->getType()); typeDecl != nullptr)
entry = this->evaluateType(typeDecl);
else if (auto builtinTypeDecl = dynamic_cast<ASTNodeBuiltinType*>(node->getType()); builtinTypeDecl != nullptr) {
entry = this->evaluateBuiltinType(builtinTypeDecl);
}
else
throwEvaluateError("ASTNodeVariableDecl had an invalid type. This is a bug!", 1);
entry->setVariableName(hex::format("[%llu]", (u64)i));
entry->setEndian(this->getCurrentEndian());
if (!color.has_value())
color = entry->getColor();
entry->setColor(color.value_or(0));
entries.push_back(entry);
if (this->m_currOffset >= this->m_provider->getActualSize())
throwEvaluateError("array exceeds size of file", node->getLineNumber());
}
this->m_currEndian.reset();
if (entries.empty())
throwEvaluateError("array size must be greater than zero", node->getLineNumber());
PatternData *pattern;
if (dynamic_cast<PatternDataCharacter*>(entries[0]))
pattern = new PatternDataString(startOffset, (this->m_currOffset - startOffset), color.value_or(0));
else
pattern = new PatternDataArray(startOffset, (this->m_currOffset - startOffset), entries, color.value_or(0));
pattern->setVariableName(node->getName().data());
return pattern;
}
PatternData* Evaluator::evaluatePointer(ASTNodePointerVariableDecl *node) {
s128 pointerOffset;
if (auto offset = dynamic_cast<ASTNodeNumericExpression*>(node->getPlacementOffset()); offset != nullptr) {
auto valueNode = evaluateMathematicalExpression(offset);
SCOPE_EXIT( delete valueNode; );
pointerOffset = std::get<s128>(valueNode->getValue());
this->m_currOffset = pointerOffset;
} else {
pointerOffset = this->m_currOffset;
}
PatternData *sizeType;
if (auto builtinTypeNode = dynamic_cast<ASTNodeBuiltinType*>(node->getSizeType()); builtinTypeNode != nullptr) {
sizeType = evaluateBuiltinType(builtinTypeNode);
} else
throwEvaluateError("Pointer size is not a builtin type", node->getLineNumber());
size_t pointerSize = sizeType->getSize();
delete sizeType;
u128 pointedAtOffset = 0;
this->m_provider->read(pointerOffset, &pointedAtOffset, pointerSize);
this->m_currOffset = pointedAtOffset;
auto pointedAt = evaluateType(dynamic_cast<ASTNodeTypeDecl*>(node->getType()));
this->m_currOffset = pointerOffset + pointerSize;
return new PatternDataPointer(pointerOffset, pointerSize, pointedAt);
}
std::optional<std::vector<PatternData*>> Evaluator::evaluate(const std::vector<ASTNode *> &ast) {
std::vector<PatternData*> patterns;
try {
for (const auto& node : ast) {
this->m_currEndian.reset();
if (auto variableDeclNode = dynamic_cast<ASTNodeVariableDecl*>(node); variableDeclNode != nullptr) {
patterns.push_back(this->evaluateVariable(variableDeclNode));
} else if (auto arrayDeclNode = dynamic_cast<ASTNodeArrayVariableDecl*>(node); arrayDeclNode != nullptr) {
patterns.push_back(this->evaluateArray(arrayDeclNode));
} else if (auto pointerDeclNode = dynamic_cast<ASTNodePointerVariableDecl*>(node); pointerDeclNode != nullptr) {
patterns.push_back(this->evaluatePointer(pointerDeclNode));
}
}
} catch (EvaluateError &e) {
this->m_error = e;
return { };
}
return patterns;
}
}