Added support for more literal types and scope resolution operator parsing
This commit is contained in:
parent
f137d759c8
commit
ceee311efa
@ -26,10 +26,8 @@ namespace hex::lang {
|
||||
|
||||
class ASTNodeIntegerLiteral : public ASTNode {
|
||||
public:
|
||||
ASTNodeIntegerLiteral(std::variant<u128, s128> value, Token::ValueType type)
|
||||
: ASTNode(), m_value(value), m_type(type) {
|
||||
ASTNodeIntegerLiteral(Token::IntegerLiteral literal) : ASTNode(), m_literal(literal) { }
|
||||
|
||||
}
|
||||
ASTNodeIntegerLiteral(const ASTNodeIntegerLiteral&) = default;
|
||||
|
||||
ASTNode* clone() override {
|
||||
@ -37,16 +35,15 @@ namespace hex::lang {
|
||||
}
|
||||
|
||||
[[nodiscard]] const auto& getValue() const {
|
||||
return this->m_value;
|
||||
return this->m_literal.second;
|
||||
}
|
||||
|
||||
[[nodiscard]] Token::ValueType getType() const {
|
||||
return this->m_type;
|
||||
return this->m_literal.first;
|
||||
}
|
||||
|
||||
private:
|
||||
std::variant<u128, s128> m_value;
|
||||
Token::ValueType m_type;
|
||||
Token::IntegerLiteral m_literal;
|
||||
};
|
||||
|
||||
class ASTNodeNumericExpression : public ASTNode {
|
||||
@ -351,4 +348,22 @@ namespace hex::lang {
|
||||
std::vector<std::string> m_path;
|
||||
};
|
||||
|
||||
class ASTNodeScopeResolution : public ASTNode {
|
||||
public:
|
||||
explicit ASTNodeScopeResolution(std::vector<std::string> path) : ASTNode(), m_path(std::move(path)) { }
|
||||
|
||||
ASTNodeScopeResolution(const ASTNodeScopeResolution&) = default;
|
||||
|
||||
ASTNode* clone() override {
|
||||
return new ASTNodeScopeResolution(*this);
|
||||
}
|
||||
|
||||
const std::vector<std::string>& getPath() {
|
||||
return this->m_path;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string> m_path;
|
||||
};
|
||||
|
||||
}
|
@ -53,6 +53,7 @@ namespace hex::lang {
|
||||
return this->m_curr[index].type;
|
||||
}
|
||||
|
||||
ASTNode* parseScopeResolution(std::vector<std::string> &path);
|
||||
ASTNode* parseRValue(std::vector<std::string> &path);
|
||||
ASTNode* parseFactor();
|
||||
ASTNode* parseMultiplicativeExpression();
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "providers/provider.hpp"
|
||||
#include "helpers/utils.hpp"
|
||||
#include "lang/token.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <random>
|
||||
@ -638,7 +639,7 @@ namespace hex::lang {
|
||||
|
||||
class PatternDataEnum : public PatternData {
|
||||
public:
|
||||
PatternDataEnum(u64 offset, size_t size, std::vector<std::pair<u64, std::string>> enumValues, u32 color = 0)
|
||||
PatternDataEnum(u64 offset, size_t size, std::vector<std::pair<Token::IntegerLiteral, std::string>> enumValues, u32 color = 0)
|
||||
: PatternData(offset, size, color), m_enumValues(std::move(enumValues)) { }
|
||||
|
||||
PatternData* clone() override {
|
||||
@ -653,12 +654,18 @@ namespace hex::lang {
|
||||
std::string valueString = PatternData::getTypeName() + "::";
|
||||
|
||||
bool foundValue = false;
|
||||
for (auto &[entryValue, entryName] : this->m_enumValues) {
|
||||
if (value == entryValue) {
|
||||
valueString += entryName;
|
||||
foundValue = true;
|
||||
for (auto &[entryValueLiteral, entryName] : this->m_enumValues) {
|
||||
bool matches = std::visit([&, name = entryName](auto &&entryValue) {
|
||||
if (value == entryValue) {
|
||||
valueString += name;
|
||||
foundValue = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}, entryValueLiteral.second);
|
||||
if (matches)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundValue)
|
||||
@ -694,7 +701,7 @@ namespace hex::lang {
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<u64, std::string>> m_enumValues;
|
||||
std::vector<std::pair<Token::IntegerLiteral, std::string>> m_enumValues;
|
||||
};
|
||||
|
||||
class PatternDataBitfield : public PatternData {
|
||||
|
@ -78,11 +78,13 @@ namespace hex::lang {
|
||||
SquareBracketClose,
|
||||
Comma,
|
||||
Dot,
|
||||
ScopeResolution,
|
||||
EndOfExpression,
|
||||
EndOfProgram
|
||||
};
|
||||
|
||||
using ValueTypes = std::variant<Keyword, std::string, Operator, s128, ValueType, Separator>;
|
||||
using IntegerLiteral = std::pair<ValueType, std::variant<u8, s8, u16, s16, u32, s32, u64, s64, u128, s128, float, double>>;
|
||||
using ValueTypes = std::variant<Keyword, std::string, Operator, IntegerLiteral, ValueType, Separator>;
|
||||
|
||||
Token(Type type, auto value, u32 lineNumber) : type(type), value(value), lineNumber(lineNumber) {
|
||||
|
||||
@ -174,7 +176,7 @@ namespace hex::lang {
|
||||
#define KEYWORD_LE COMPONENT(Keyword, LittleEndian)
|
||||
#define KEYWORD_BE COMPONENT(Keyword, BigEndian)
|
||||
|
||||
#define INTEGER hex::lang::Token::Type::Integer, 0xFFFF'FFFF'FFFF'FFFF
|
||||
#define INTEGER hex::lang::Token::Type::Integer, hex::lang::Token::IntegerLiteral({ hex::lang::Token::ValueType::Any, 0xFFFF'FFFF'FFFF'FFFF })
|
||||
#define IDENTIFIER hex::lang::Token::Type::Identifier, ""
|
||||
|
||||
#define OPERATOR_AT COMPONENT(Operator, AtDeclaration)
|
||||
@ -206,5 +208,6 @@ namespace hex::lang {
|
||||
#define SEPARATOR_SQUAREBRACKETCLOSE COMPONENT(Separator, SquareBracketClose)
|
||||
#define SEPARATOR_COMMA COMPONENT(Separator, Comma)
|
||||
#define SEPARATOR_DOT COMPONENT(Separator, Dot)
|
||||
#define SEPARATOR_SCOPE_RESOLUTION COMPONENT(Separator, ScopeResolution)
|
||||
#define SEPARATOR_ENDOFEXPRESSION COMPONENT(Separator, EndOfExpression)
|
||||
#define SEPARATOR_ENDOFPROGRAM COMPONENT(Separator, EndOfProgram)
|
@ -38,67 +38,116 @@ namespace hex::lang {
|
||||
}
|
||||
|
||||
if (auto unsignedPattern = dynamic_cast<PatternDataUnsigned*>(currPattern); unsignedPattern != nullptr) {
|
||||
s128 value = 0;
|
||||
this->m_provider->read(unsignedPattern->getOffset(), &value, unsignedPattern->getSize());
|
||||
return new ASTNodeIntegerLiteral(value, Token::ValueType::Signed128Bit);
|
||||
u8 value[unsignedPattern->getSize()];
|
||||
this->m_provider->read(unsignedPattern->getOffset(), value, unsignedPattern->getSize());
|
||||
|
||||
switch (unsignedPattern->getSize()) {
|
||||
case 1: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned8Bit, *reinterpret_cast<u8*>(value) });
|
||||
case 2: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned16Bit, *reinterpret_cast<u16*>(value) });
|
||||
case 4: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned32Bit, *reinterpret_cast<u32*>(value) });
|
||||
case 8: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned64Bit, *reinterpret_cast<u64*>(value) });
|
||||
case 16: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned128Bit, *reinterpret_cast<u128*>(value) });
|
||||
default: throwEvaluateError("invalid rvalue size", node->getLineNumber());
|
||||
}
|
||||
} else if (auto signedPattern = dynamic_cast<PatternDataSigned*>(currPattern); signedPattern != nullptr) {
|
||||
s128 value = 0;
|
||||
this->m_provider->read(signedPattern->getOffset(), &value, signedPattern->getSize());
|
||||
return new ASTNodeIntegerLiteral(signExtend(value, signedPattern->getSize() * 8, 128), Token::ValueType::Signed128Bit);
|
||||
u8 value[unsignedPattern->getSize()];
|
||||
this->m_provider->read(signedPattern->getOffset(), value, signedPattern->getSize());
|
||||
|
||||
switch (unsignedPattern->getSize()) {
|
||||
case 1: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed8Bit, *reinterpret_cast<s8*>(value) });
|
||||
case 2: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed16Bit, *reinterpret_cast<s16*>(value) });
|
||||
case 4: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, *reinterpret_cast<s32*>(value) });
|
||||
case 8: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed64Bit, *reinterpret_cast<s64*>(value) });
|
||||
case 16: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed128Bit, *reinterpret_cast<s128*>(value) });
|
||||
default: throwEvaluateError("invalid rvalue size", node->getLineNumber());
|
||||
}
|
||||
} else
|
||||
throwEvaluateError("tried to use non-integer value in numeric expression", node->getLineNumber());
|
||||
}
|
||||
|
||||
#define FLOAT_BIT_OPERATION(name) \
|
||||
auto name(std::floating_point auto left, auto right) { throw std::runtime_error(""); return 0; } \
|
||||
auto name(auto left, std::floating_point auto right) { throw std::runtime_error(""); return 0; } \
|
||||
auto name(std::floating_point auto left, std::floating_point auto right) { throw std::runtime_error(""); return 0; } \
|
||||
auto name(std::integral auto left, std::integral auto right)
|
||||
|
||||
namespace {
|
||||
|
||||
FLOAT_BIT_OPERATION(shiftLeft) {
|
||||
return left << right;
|
||||
}
|
||||
|
||||
FLOAT_BIT_OPERATION(shiftRight) {
|
||||
return left >> right;
|
||||
}
|
||||
|
||||
FLOAT_BIT_OPERATION(bitAnd) {
|
||||
return left & right;
|
||||
}
|
||||
|
||||
FLOAT_BIT_OPERATION(bitOr) {
|
||||
return left | right;
|
||||
}
|
||||
|
||||
FLOAT_BIT_OPERATION(bitXor) {
|
||||
return left ^ right;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ASTNodeIntegerLiteral* Evaluator::evaluateOperator(ASTNodeIntegerLiteral *left, ASTNodeIntegerLiteral *right, Token::Operator op) {
|
||||
return std::visit([&](auto &&leftValue, auto &&rightValue) -> ASTNodeIntegerLiteral* {
|
||||
auto newType = [&] {
|
||||
#define CHECK_TYPE(type) if (left->getType() == (type) || right->getType() == (type)) return (type)
|
||||
#define DEFAULT_TYPE(type) return (type)
|
||||
|
||||
auto newType = [&] {
|
||||
#define CHECK_TYPE(type) if (left->getType() == (type) || right->getType() == (type)) return (type)
|
||||
#define DEFAULT_TYPE(type) return (type)
|
||||
CHECK_TYPE(Token::ValueType::Double);
|
||||
CHECK_TYPE(Token::ValueType::Float);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned128Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed128Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned64Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed64Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned32Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed32Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned16Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed16Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned8Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed8Bit);
|
||||
CHECK_TYPE(Token::ValueType::Character);
|
||||
DEFAULT_TYPE(Token::ValueType::Signed32Bit);
|
||||
|
||||
CHECK_TYPE(Token::ValueType::Double);
|
||||
CHECK_TYPE(Token::ValueType::Float);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned128Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed128Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned64Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed64Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned32Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed32Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned16Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed16Bit);
|
||||
CHECK_TYPE(Token::ValueType::Unsigned8Bit);
|
||||
CHECK_TYPE(Token::ValueType::Signed8Bit);
|
||||
CHECK_TYPE(Token::ValueType::Character);
|
||||
DEFAULT_TYPE(Token::ValueType::Signed32Bit);
|
||||
#undef CHECK_TYPE
|
||||
#undef DEFAULT_TYPE
|
||||
}();
|
||||
|
||||
#undef CHECK_TYPE
|
||||
#undef DEFAULT_TYPE
|
||||
}();
|
||||
try {
|
||||
return std::visit([&](auto &&leftValue, auto &&rightValue) -> ASTNodeIntegerLiteral * {
|
||||
switch (op) {
|
||||
case Token::Operator::Plus:
|
||||
return new ASTNodeIntegerLiteral({ newType, leftValue + rightValue });
|
||||
case Token::Operator::Minus:
|
||||
return new ASTNodeIntegerLiteral({ newType, leftValue - rightValue });
|
||||
case Token::Operator::Star:
|
||||
return new ASTNodeIntegerLiteral({ newType, leftValue * rightValue });
|
||||
case Token::Operator::Slash:
|
||||
return new ASTNodeIntegerLiteral({ newType, leftValue / rightValue });
|
||||
case Token::Operator::ShiftLeft:
|
||||
return new ASTNodeIntegerLiteral({ newType, shiftLeft(leftValue, rightValue) });
|
||||
case Token::Operator::ShiftRight:
|
||||
return new ASTNodeIntegerLiteral({ newType, shiftRight(leftValue, rightValue) });
|
||||
case Token::Operator::BitAnd:
|
||||
return new ASTNodeIntegerLiteral({ newType, bitAnd(leftValue, rightValue) });
|
||||
case Token::Operator::BitXor:
|
||||
return new ASTNodeIntegerLiteral({ newType, bitXor(leftValue, rightValue) });
|
||||
case Token::Operator::BitOr:
|
||||
return new ASTNodeIntegerLiteral({ newType, bitOr(leftValue, rightValue) });
|
||||
default:
|
||||
throwEvaluateError("invalid operator used in mathematical expression", left->getLineNumber());
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case Token::Operator::Plus:
|
||||
return new ASTNodeIntegerLiteral(leftValue + rightValue, newType);
|
||||
case Token::Operator::Minus:
|
||||
return new ASTNodeIntegerLiteral(leftValue - rightValue, newType);
|
||||
case Token::Operator::Star:
|
||||
return new ASTNodeIntegerLiteral(leftValue * rightValue, newType);
|
||||
case Token::Operator::Slash:
|
||||
return new ASTNodeIntegerLiteral(leftValue / rightValue, newType);
|
||||
case Token::Operator::ShiftLeft:
|
||||
return new ASTNodeIntegerLiteral(leftValue << rightValue, newType);
|
||||
case Token::Operator::ShiftRight:
|
||||
return new ASTNodeIntegerLiteral(leftValue >> rightValue, newType);
|
||||
case Token::Operator::BitAnd:
|
||||
return new ASTNodeIntegerLiteral(leftValue & rightValue, newType);
|
||||
case Token::Operator::BitXor:
|
||||
return new ASTNodeIntegerLiteral(leftValue ^ rightValue, newType);
|
||||
case Token::Operator::BitOr:
|
||||
return new ASTNodeIntegerLiteral(leftValue | rightValue, newType);
|
||||
default: throwEvaluateError("invalid operator used in mathematical expression", left->getLineNumber());
|
||||
|
||||
}
|
||||
|
||||
}, left->getValue(), right->getValue());
|
||||
}, left->getValue(), right->getValue());
|
||||
} catch (std::runtime_error &e) {
|
||||
throwEvaluateError("bitwise operations on floating point numbers are forbidden", left->getLineNumber());
|
||||
}
|
||||
}
|
||||
|
||||
ASTNodeIntegerLiteral* Evaluator::evaluateMathematicalExpression(ASTNodeNumericExpression *node) {
|
||||
@ -197,7 +246,7 @@ namespace hex::lang {
|
||||
}
|
||||
|
||||
PatternData* Evaluator::evaluateEnum(ASTNodeEnum *node) {
|
||||
std::vector<std::pair<u64, std::string>> entryPatterns;
|
||||
std::vector<std::pair<Token::IntegerLiteral, std::string>> entryPatterns;
|
||||
|
||||
auto startOffset = this->m_currOffset;
|
||||
for (auto &[name, value] : node->getEntries()) {
|
||||
@ -208,7 +257,7 @@ namespace hex::lang {
|
||||
auto valueNode = evaluateMathematicalExpression(expression);
|
||||
SCOPE_EXIT( delete valueNode; );
|
||||
|
||||
entryPatterns.emplace_back( std::get<s128>(valueNode->getValue()), name );
|
||||
entryPatterns.push_back({{ valueNode->getType(), valueNode->getValue() }, name });
|
||||
}
|
||||
|
||||
size_t size;
|
||||
@ -233,9 +282,14 @@ namespace hex::lang {
|
||||
auto valueNode = evaluateMathematicalExpression(expression);
|
||||
SCOPE_EXIT( delete valueNode; );
|
||||
|
||||
auto fieldBits = std::get<s128>(valueNode->getValue());
|
||||
if (fieldBits > 64)
|
||||
throwEvaluateError("bitfield entry must at most occupy 64 bits", value->getLineNumber());
|
||||
auto fieldBits = std::visit([node, type = valueNode->getType()] (auto &&value) {
|
||||
if (Token::isFloatingPoint(type))
|
||||
throwEvaluateError("bitfield entry size must be an integer value", node->getLineNumber());
|
||||
return static_cast<s128>(value);
|
||||
}, valueNode->getValue());
|
||||
|
||||
if (fieldBits > 64 || fieldBits <= 0)
|
||||
throwEvaluateError("bitfield entry must occupy between 1 and 64 bits", value->getLineNumber());
|
||||
|
||||
bits += fieldBits;
|
||||
|
||||
@ -280,7 +334,11 @@ namespace hex::lang {
|
||||
auto valueNode = evaluateMathematicalExpression(offset);
|
||||
SCOPE_EXIT( delete valueNode; );
|
||||
|
||||
this->m_currOffset = std::get<s128>(valueNode->getValue());
|
||||
this->m_currOffset = std::visit([node, type = valueNode->getType()] (auto &&value) {
|
||||
if (Token::isFloatingPoint(type))
|
||||
throwEvaluateError("placement offset must be an integer value", node->getLineNumber());
|
||||
return static_cast<u64>(value);
|
||||
}, valueNode->getValue());
|
||||
}
|
||||
if (this->m_currOffset >= this->m_provider->getActualSize())
|
||||
throwEvaluateError("array exceeds size of file", node->getLineNumber());
|
||||
@ -306,7 +364,11 @@ namespace hex::lang {
|
||||
auto valueNode = evaluateMathematicalExpression(offset);
|
||||
SCOPE_EXIT( delete valueNode; );
|
||||
|
||||
this->m_currOffset = std::get<s128>(valueNode->getValue());
|
||||
this->m_currOffset = std::visit([node, type = valueNode->getType()] (auto &&value) {
|
||||
if (Token::isFloatingPoint(type))
|
||||
throwEvaluateError("placement offset must be an integer value", node->getLineNumber());
|
||||
return static_cast<u64>(value);
|
||||
}, valueNode->getValue());
|
||||
}
|
||||
|
||||
auto startOffset = this->m_currOffset;
|
||||
@ -320,7 +382,11 @@ namespace hex::lang {
|
||||
|
||||
SCOPE_EXIT( delete valueNode; );
|
||||
|
||||
auto arraySize = std::get<s128>(valueNode->getValue());
|
||||
auto arraySize = std::visit([node, type = valueNode->getType()] (auto &&value) {
|
||||
if (Token::isFloatingPoint(type))
|
||||
throwEvaluateError("array size must be an integer value", node->getLineNumber());
|
||||
return static_cast<u64>(value);
|
||||
}, valueNode->getValue());
|
||||
|
||||
if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(node->getType()); typeDecl != nullptr) {
|
||||
if (auto builtinType = dynamic_cast<ASTNodeBuiltinType*>(typeDecl->getType()); builtinType != nullptr) {
|
||||
@ -377,7 +443,11 @@ namespace hex::lang {
|
||||
auto valueNode = evaluateMathematicalExpression(offset);
|
||||
SCOPE_EXIT( delete valueNode; );
|
||||
|
||||
pointerOffset = std::get<s128>(valueNode->getValue());
|
||||
pointerOffset = std::visit([node, type = valueNode->getType()] (auto &&value) {
|
||||
if (Token::isFloatingPoint(type))
|
||||
throwEvaluateError("pointer offset must be an integer value", node->getLineNumber());
|
||||
return static_cast<s128>(value);
|
||||
}, valueNode->getValue());
|
||||
this->m_currOffset = pointerOffset;
|
||||
} else {
|
||||
pointerOffset = this->m_currOffset;
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include "lang/lexer.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
namespace hex::lang {
|
||||
|
||||
@ -25,48 +26,119 @@ namespace hex::lang {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::optional<u64> parseInt(std::string_view string) {
|
||||
u64 integer = 0;
|
||||
size_t getIntegerLiteralLength(std::string_view string) {
|
||||
return string.find_first_not_of("0123456789ABCDEFabcdef.xUL");
|
||||
}
|
||||
|
||||
std::optional<Token::IntegerLiteral> parseIntegerLiteral(std::string_view string) {
|
||||
Token::ValueType type = Token::ValueType::Any;
|
||||
Token::IntegerLiteral result;
|
||||
|
||||
u8 base;
|
||||
|
||||
std::string_view numberData;
|
||||
auto endPos = getIntegerLiteralLength(string);
|
||||
std::string_view numberData = string.substr(0, endPos);
|
||||
|
||||
if (string.starts_with("0x")) {
|
||||
numberData = string.substr(2);
|
||||
if (numberData.ends_with('U')) {
|
||||
type = Token::ValueType::Unsigned32Bit;
|
||||
numberData.remove_suffix(1);
|
||||
} else if (numberData.ends_with("UL")) {
|
||||
type = Token::ValueType::Unsigned64Bit;
|
||||
numberData.remove_suffix(2);
|
||||
} else if (numberData.ends_with("ULL")) {
|
||||
type = Token::ValueType::Unsigned128Bit;
|
||||
numberData.remove_suffix(3);
|
||||
} else if (numberData.ends_with("L")) {
|
||||
type = Token::ValueType::Signed64Bit;
|
||||
numberData.remove_suffix(1);
|
||||
} else if (numberData.ends_with("LL")) {
|
||||
type = Token::ValueType::Signed128Bit;
|
||||
numberData.remove_suffix(2);
|
||||
} else if (numberData.ends_with('F')) {
|
||||
type = Token::ValueType::Float;
|
||||
numberData.remove_suffix(1);
|
||||
} else if (numberData.ends_with('D')) {
|
||||
type = Token::ValueType::Double;
|
||||
numberData.remove_suffix(1);
|
||||
}
|
||||
|
||||
if (numberData.starts_with("0x")) {
|
||||
numberData = numberData.substr(2);
|
||||
base = 16;
|
||||
|
||||
if (Token::isFloatingPoint(type))
|
||||
return { };
|
||||
|
||||
if (numberData.find_first_not_of("0123456789ABCDEFabcdef") != std::string_view::npos)
|
||||
return { };
|
||||
} else if (string.starts_with("0b")) {
|
||||
numberData = string.substr(2);
|
||||
} else if (numberData.starts_with("0b")) {
|
||||
numberData = numberData.substr(2);
|
||||
base = 2;
|
||||
|
||||
if (Token::isFloatingPoint(type))
|
||||
return { };
|
||||
|
||||
if (numberData.find_first_not_of("01") != std::string_view::npos)
|
||||
return { };
|
||||
} else if (isdigit(string[0])) {
|
||||
numberData = string;
|
||||
} else if (numberData.find('.') != std::string_view::npos || Token::isFloatingPoint(type)) {
|
||||
base = 10;
|
||||
if (type == Token::ValueType::Any)
|
||||
type = Token::ValueType::Double;
|
||||
|
||||
if (std::count(numberData.begin(), numberData.end(), '.') > 1 || numberData.find_first_not_of("0123456789.") != std::string_view::npos)
|
||||
return { };
|
||||
|
||||
if (numberData.ends_with('.'))
|
||||
return { };
|
||||
} else if (isdigit(numberData[0])) {
|
||||
base = 10;
|
||||
|
||||
if (numberData.find_first_not_of("0123456789") != std::string_view::npos)
|
||||
return { };
|
||||
} else return { };
|
||||
|
||||
if (type == Token::ValueType::Any)
|
||||
type = Token::ValueType::Signed32Bit;
|
||||
|
||||
|
||||
if (numberData.length() == 0)
|
||||
return { };
|
||||
|
||||
for (const char& c : numberData) {
|
||||
if (Token::isUnsigned(type) || Token::isSigned(type)) {
|
||||
u128 integer = 0;
|
||||
for (const char& c : numberData) {
|
||||
integer *= base;
|
||||
|
||||
if (isdigit(c))
|
||||
integer += (c - '0');
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
integer += 10 + (c - 'A');
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
integer += 10 + (c - 'a');
|
||||
else return { };
|
||||
if (isdigit(c))
|
||||
integer += (c - '0');
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
integer += 10 + (c - 'A');
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
integer += 10 + (c - 'a');
|
||||
else return { };
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case Token::ValueType::Unsigned32Bit: return {{ type, u32(integer) }};
|
||||
case Token::ValueType::Signed32Bit: return {{ type, s32(integer) }};
|
||||
case Token::ValueType::Unsigned64Bit: return {{ type, u64(integer) }};
|
||||
case Token::ValueType::Signed64Bit: return {{ type, s64(integer) }};
|
||||
case Token::ValueType::Unsigned128Bit: return {{ type, u128(integer) }};
|
||||
case Token::ValueType::Signed128Bit: return {{ type, s128(integer) }};
|
||||
default: return { };
|
||||
}
|
||||
} else if (Token::isFloatingPoint(type)) {
|
||||
double floatingPoint = strtod(numberData.data(), nullptr);
|
||||
|
||||
switch (type) {
|
||||
case Token::ValueType::Float: return {{ type, float(floatingPoint) }};
|
||||
case Token::ValueType::Double: return {{ type, double(floatingPoint) }};
|
||||
default: return { };
|
||||
}
|
||||
}
|
||||
|
||||
return integer;
|
||||
|
||||
return { };
|
||||
}
|
||||
|
||||
std::optional<std::vector<Token>> Lexer::lex(const std::string& code) {
|
||||
@ -119,6 +191,9 @@ namespace hex::lang {
|
||||
} else if (c == '=') {
|
||||
tokens.emplace_back(TOKEN(Operator, Assignment));
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 2) == "::") {
|
||||
tokens.emplace_back(TOKEN(Separator, ScopeResolution));
|
||||
offset += 2;
|
||||
} else if (c == ':') {
|
||||
tokens.emplace_back(TOKEN(Operator, Inherit));
|
||||
offset += 1;
|
||||
@ -134,10 +209,10 @@ namespace hex::lang {
|
||||
} else if (c == '/') {
|
||||
tokens.emplace_back(TOKEN(Operator, Slash));
|
||||
offset += 1;
|
||||
} else if (offset + 1 <= code.length() && code[offset] == '<' && code[offset + 1] == '<') {
|
||||
} else if (code.substr(offset, 2) == "<<") {
|
||||
tokens.emplace_back(TOKEN(Operator, ShiftLeft));
|
||||
offset += 2;
|
||||
} else if (offset + 1 <= code.length() && code[offset] == '>' && code[offset + 1] == '>') {
|
||||
} else if (code.substr(offset, 2) == ">>") {
|
||||
tokens.emplace_back(TOKEN(Operator, ShiftRight));
|
||||
offset += 2;
|
||||
} else if (c == '|') {
|
||||
@ -179,7 +254,7 @@ namespace hex::lang {
|
||||
if (offset >= code.length() || code[offset] != '\'')
|
||||
throwLexerError("missing terminating ' after character literal", lineNumber);
|
||||
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, character));
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, Token::IntegerLiteral({ Token::ValueType::Character, character }) ));
|
||||
offset += 1;
|
||||
|
||||
} else if (std::isalpha(c)) {
|
||||
@ -239,17 +314,14 @@ namespace hex::lang {
|
||||
|
||||
offset += identifier.length();
|
||||
} else if (std::isdigit(c)) {
|
||||
char *end = nullptr;
|
||||
std::strtoull(&code[offset], &end, 0);
|
||||
|
||||
auto integer = parseInt(std::string_view(&code[offset], end - &code[offset]));
|
||||
auto integer = parseIntegerLiteral(&code[offset]);
|
||||
|
||||
if (!integer.has_value())
|
||||
throwLexerError("invalid integer literal", lineNumber);
|
||||
|
||||
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, integer.value()));
|
||||
offset += (end - &code[offset]);
|
||||
offset += getIntegerLiteralLength(&code[offset]);
|
||||
} else
|
||||
throwLexerError("unknown token", lineNumber);
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
#define MATCHES(x) (begin() && x)
|
||||
|
||||
#define TO_NUMERIC_EXPRESSION(node) new ASTNodeNumericExpression((node), new ASTNodeIntegerLiteral(0, Token::ValueType::Signed128Bit), Token::Operator::Plus)
|
||||
#define TO_NUMERIC_EXPRESSION(node) new ASTNodeNumericExpression((node), new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, s32(0) }), Token::Operator::Plus)
|
||||
|
||||
// Definition syntax:
|
||||
// [A] : Either A or no token
|
||||
@ -18,6 +18,20 @@ namespace hex::lang {
|
||||
|
||||
/* Mathematical expressions */
|
||||
|
||||
// Identifier::<Identifier[::]...>
|
||||
ASTNode* Parser::parseScopeResolution(std::vector<std::string> &path) {
|
||||
if (peek(IDENTIFIER, -1))
|
||||
path.push_back(getValue<std::string>(-1));
|
||||
|
||||
if (MATCHES(sequence(SEPARATOR_SCOPE_RESOLUTION))) {
|
||||
if (MATCHES(sequence(IDENTIFIER)))
|
||||
return this->parseScopeResolution(path);
|
||||
else
|
||||
throwParseError("expected member name", -1);
|
||||
} else
|
||||
return TO_NUMERIC_EXPRESSION(new ASTNodeScopeResolution(path));
|
||||
}
|
||||
|
||||
// <Identifier[.]...>
|
||||
ASTNode* Parser::parseRValue(std::vector<std::string> &path) {
|
||||
if (peek(IDENTIFIER, -1))
|
||||
@ -35,13 +49,17 @@ namespace hex::lang {
|
||||
// <Integer|((parseMathematicalExpression))>
|
||||
ASTNode* Parser::parseFactor() {
|
||||
if (MATCHES(sequence(INTEGER)))
|
||||
return TO_NUMERIC_EXPRESSION(new ASTNodeIntegerLiteral(getValue<s128>(-1), Token::ValueType::Signed128Bit));
|
||||
return TO_NUMERIC_EXPRESSION(new ASTNodeIntegerLiteral(getValue<Token::IntegerLiteral>(-1)));
|
||||
else if (MATCHES(sequence(SEPARATOR_ROUNDBRACKETOPEN))) {
|
||||
auto node = this->parseMathematicalExpression();
|
||||
if (!MATCHES(sequence(SEPARATOR_ROUNDBRACKETCLOSE)))
|
||||
throwParseError("expected closing parenthesis");
|
||||
return node;
|
||||
} else if (MATCHES(sequence(IDENTIFIER))) {
|
||||
} else if (MATCHES(sequence(IDENTIFIER) && peek(SEPARATOR_SCOPE_RESOLUTION))) {
|
||||
std::vector<std::string> path;
|
||||
return this->parseScopeResolution(path);
|
||||
}
|
||||
else if (MATCHES(sequence(IDENTIFIER))) {
|
||||
std::vector<std::string> path;
|
||||
return this->parseRValue(path);
|
||||
}
|
||||
@ -297,10 +315,46 @@ namespace hex::lang {
|
||||
else if (MATCHES(sequence(IDENTIFIER))) {
|
||||
ASTNode *valueExpr;
|
||||
auto name = getValue<std::string>(-1);
|
||||
if (enumNode->getEntries().empty())
|
||||
valueExpr = new ASTNodeIntegerLiteral(0, underlyingType->getType());
|
||||
if (enumNode->getEntries().empty()) {
|
||||
auto type = underlyingType->getType();
|
||||
|
||||
switch (type) {
|
||||
case Token::ValueType::Signed8Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, s8(0) });
|
||||
break;
|
||||
case Token::ValueType::Unsigned8Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, u8(0) });
|
||||
break;
|
||||
case Token::ValueType::Signed16Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, s16(0) });
|
||||
break;
|
||||
case Token::ValueType::Unsigned16Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, u16(0) });
|
||||
break;
|
||||
case Token::ValueType::Signed32Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, s32(0) });
|
||||
break;
|
||||
case Token::ValueType::Unsigned32Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, u32(0) });
|
||||
break;
|
||||
case Token::ValueType::Signed64Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, s64(0) });
|
||||
break;
|
||||
case Token::ValueType::Unsigned64Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, u64(0) });
|
||||
break;
|
||||
case Token::ValueType::Signed128Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, s128(0) });
|
||||
break;
|
||||
case Token::ValueType::Unsigned128Bit:
|
||||
valueExpr = new ASTNodeIntegerLiteral({ type, u128(0) });
|
||||
break;
|
||||
default:
|
||||
throwParseError("invalid enum underlying type", -1);
|
||||
}
|
||||
}
|
||||
else
|
||||
valueExpr = new ASTNodeNumericExpression(enumNode->getEntries().back().second, new ASTNodeIntegerLiteral(1, Token::ValueType::Signed128Bit), Token::Operator::Plus);
|
||||
valueExpr = new ASTNodeNumericExpression(enumNode->getEntries().back().second, new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, s32(1) }), Token::Operator::Plus);
|
||||
|
||||
enumNode->addEntry(name, valueExpr);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user