From ceee311efa8baf4dbf88e2c8955386ee33908f8e Mon Sep 17 00:00:00 2001 From: WerWolv Date: Tue, 5 Jan 2021 14:42:08 +0100 Subject: [PATCH] Added support for more literal types and scope resolution operator parsing --- include/lang/ast_node.hpp | 29 ++++-- include/lang/parser.hpp | 1 + include/lang/pattern_data.hpp | 21 ++-- include/lang/token.hpp | 7 +- source/lang/evaluator.cpp | 190 +++++++++++++++++++++++----------- source/lang/lexer.cpp | 126 +++++++++++++++++----- source/lang/parser.cpp | 66 ++++++++++-- 7 files changed, 331 insertions(+), 109 deletions(-) diff --git a/include/lang/ast_node.hpp b/include/lang/ast_node.hpp index 24538d1ce..cedc58e99 100644 --- a/include/lang/ast_node.hpp +++ b/include/lang/ast_node.hpp @@ -26,10 +26,8 @@ namespace hex::lang { class ASTNodeIntegerLiteral : public ASTNode { public: - ASTNodeIntegerLiteral(std::variant value, Token::ValueType type) - : ASTNode(), m_value(value), m_type(type) { + ASTNodeIntegerLiteral(Token::IntegerLiteral literal) : ASTNode(), m_literal(literal) { } - } ASTNodeIntegerLiteral(const ASTNodeIntegerLiteral&) = default; ASTNode* clone() override { @@ -37,16 +35,15 @@ namespace hex::lang { } [[nodiscard]] const auto& getValue() const { - return this->m_value; + return this->m_literal.second; } [[nodiscard]] Token::ValueType getType() const { - return this->m_type; + return this->m_literal.first; } private: - std::variant m_value; - Token::ValueType m_type; + Token::IntegerLiteral m_literal; }; class ASTNodeNumericExpression : public ASTNode { @@ -351,4 +348,22 @@ namespace hex::lang { std::vector m_path; }; + class ASTNodeScopeResolution : public ASTNode { + public: + explicit ASTNodeScopeResolution(std::vector path) : ASTNode(), m_path(std::move(path)) { } + + ASTNodeScopeResolution(const ASTNodeScopeResolution&) = default; + + ASTNode* clone() override { + return new ASTNodeScopeResolution(*this); + } + + const std::vector& getPath() { + return this->m_path; + } + + private: + std::vector m_path; + }; + } \ No newline at end of file diff --git a/include/lang/parser.hpp b/include/lang/parser.hpp index 63b5d9177..4b16ba820 100644 --- a/include/lang/parser.hpp +++ b/include/lang/parser.hpp @@ -53,6 +53,7 @@ namespace hex::lang { return this->m_curr[index].type; } + ASTNode* parseScopeResolution(std::vector &path); ASTNode* parseRValue(std::vector &path); ASTNode* parseFactor(); ASTNode* parseMultiplicativeExpression(); diff --git a/include/lang/pattern_data.hpp b/include/lang/pattern_data.hpp index 791ac025c..fd6b3a88d 100644 --- a/include/lang/pattern_data.hpp +++ b/include/lang/pattern_data.hpp @@ -7,6 +7,7 @@ #include "providers/provider.hpp" #include "helpers/utils.hpp" +#include "lang/token.hpp" #include #include @@ -638,7 +639,7 @@ namespace hex::lang { class PatternDataEnum : public PatternData { public: - PatternDataEnum(u64 offset, size_t size, std::vector> enumValues, u32 color = 0) + PatternDataEnum(u64 offset, size_t size, std::vector> enumValues, u32 color = 0) : PatternData(offset, size, color), m_enumValues(std::move(enumValues)) { } PatternData* clone() override { @@ -653,12 +654,18 @@ namespace hex::lang { std::string valueString = PatternData::getTypeName() + "::"; bool foundValue = false; - for (auto &[entryValue, entryName] : this->m_enumValues) { - if (value == entryValue) { - valueString += entryName; - foundValue = true; + for (auto &[entryValueLiteral, entryName] : this->m_enumValues) { + bool matches = std::visit([&, name = entryName](auto &&entryValue) { + if (value == entryValue) { + valueString += name; + foundValue = true; + return true; + } + + return false; + }, entryValueLiteral.second); + if (matches) break; - } } if (!foundValue) @@ -694,7 +701,7 @@ namespace hex::lang { } private: - std::vector> m_enumValues; + std::vector> m_enumValues; }; class PatternDataBitfield : public PatternData { diff --git a/include/lang/token.hpp b/include/lang/token.hpp index 8ccff6b64..103a21ff6 100644 --- a/include/lang/token.hpp +++ b/include/lang/token.hpp @@ -78,11 +78,13 @@ namespace hex::lang { SquareBracketClose, Comma, Dot, + ScopeResolution, EndOfExpression, EndOfProgram }; - using ValueTypes = std::variant; + using IntegerLiteral = std::pair>; + using ValueTypes = std::variant; Token(Type type, auto value, u32 lineNumber) : type(type), value(value), lineNumber(lineNumber) { @@ -174,7 +176,7 @@ namespace hex::lang { #define KEYWORD_LE COMPONENT(Keyword, LittleEndian) #define KEYWORD_BE COMPONENT(Keyword, BigEndian) -#define INTEGER hex::lang::Token::Type::Integer, 0xFFFF'FFFF'FFFF'FFFF +#define INTEGER hex::lang::Token::Type::Integer, hex::lang::Token::IntegerLiteral({ hex::lang::Token::ValueType::Any, 0xFFFF'FFFF'FFFF'FFFF }) #define IDENTIFIER hex::lang::Token::Type::Identifier, "" #define OPERATOR_AT COMPONENT(Operator, AtDeclaration) @@ -206,5 +208,6 @@ namespace hex::lang { #define SEPARATOR_SQUAREBRACKETCLOSE COMPONENT(Separator, SquareBracketClose) #define SEPARATOR_COMMA COMPONENT(Separator, Comma) #define SEPARATOR_DOT COMPONENT(Separator, Dot) +#define SEPARATOR_SCOPE_RESOLUTION COMPONENT(Separator, ScopeResolution) #define SEPARATOR_ENDOFEXPRESSION COMPONENT(Separator, EndOfExpression) #define SEPARATOR_ENDOFPROGRAM COMPONENT(Separator, EndOfProgram) \ No newline at end of file diff --git a/source/lang/evaluator.cpp b/source/lang/evaluator.cpp index 579894557..ce2a3327e 100644 --- a/source/lang/evaluator.cpp +++ b/source/lang/evaluator.cpp @@ -38,67 +38,116 @@ namespace hex::lang { } if (auto unsignedPattern = dynamic_cast(currPattern); unsignedPattern != nullptr) { - s128 value = 0; - this->m_provider->read(unsignedPattern->getOffset(), &value, unsignedPattern->getSize()); - return new ASTNodeIntegerLiteral(value, Token::ValueType::Signed128Bit); + u8 value[unsignedPattern->getSize()]; + this->m_provider->read(unsignedPattern->getOffset(), value, unsignedPattern->getSize()); + + switch (unsignedPattern->getSize()) { + case 1: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned8Bit, *reinterpret_cast(value) }); + case 2: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned16Bit, *reinterpret_cast(value) }); + case 4: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned32Bit, *reinterpret_cast(value) }); + case 8: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned64Bit, *reinterpret_cast(value) }); + case 16: return new ASTNodeIntegerLiteral({ Token::ValueType::Unsigned128Bit, *reinterpret_cast(value) }); + default: throwEvaluateError("invalid rvalue size", node->getLineNumber()); + } } else if (auto signedPattern = dynamic_cast(currPattern); signedPattern != nullptr) { - s128 value = 0; - this->m_provider->read(signedPattern->getOffset(), &value, signedPattern->getSize()); - return new ASTNodeIntegerLiteral(signExtend(value, signedPattern->getSize() * 8, 128), Token::ValueType::Signed128Bit); + u8 value[unsignedPattern->getSize()]; + this->m_provider->read(signedPattern->getOffset(), value, signedPattern->getSize()); + + switch (unsignedPattern->getSize()) { + case 1: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed8Bit, *reinterpret_cast(value) }); + case 2: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed16Bit, *reinterpret_cast(value) }); + case 4: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, *reinterpret_cast(value) }); + case 8: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed64Bit, *reinterpret_cast(value) }); + case 16: return new ASTNodeIntegerLiteral({ Token::ValueType::Signed128Bit, *reinterpret_cast(value) }); + default: throwEvaluateError("invalid rvalue size", node->getLineNumber()); + } } else throwEvaluateError("tried to use non-integer value in numeric expression", node->getLineNumber()); } +#define FLOAT_BIT_OPERATION(name) \ + auto name(std::floating_point auto left, auto right) { throw std::runtime_error(""); return 0; } \ + auto name(auto left, std::floating_point auto right) { throw std::runtime_error(""); return 0; } \ + auto name(std::floating_point auto left, std::floating_point auto right) { throw std::runtime_error(""); return 0; } \ + auto name(std::integral auto left, std::integral auto right) + + namespace { + + FLOAT_BIT_OPERATION(shiftLeft) { + return left << right; + } + + FLOAT_BIT_OPERATION(shiftRight) { + return left >> right; + } + + FLOAT_BIT_OPERATION(bitAnd) { + return left & right; + } + + FLOAT_BIT_OPERATION(bitOr) { + return left | right; + } + + FLOAT_BIT_OPERATION(bitXor) { + return left ^ right; + } + + } + ASTNodeIntegerLiteral* Evaluator::evaluateOperator(ASTNodeIntegerLiteral *left, ASTNodeIntegerLiteral *right, Token::Operator op) { - return std::visit([&](auto &&leftValue, auto &&rightValue) -> ASTNodeIntegerLiteral* { + auto newType = [&] { + #define CHECK_TYPE(type) if (left->getType() == (type) || right->getType() == (type)) return (type) + #define DEFAULT_TYPE(type) return (type) - auto newType = [&] { - #define CHECK_TYPE(type) if (left->getType() == (type) || right->getType() == (type)) return (type) - #define DEFAULT_TYPE(type) return (type) + CHECK_TYPE(Token::ValueType::Double); + CHECK_TYPE(Token::ValueType::Float); + CHECK_TYPE(Token::ValueType::Unsigned128Bit); + CHECK_TYPE(Token::ValueType::Signed128Bit); + CHECK_TYPE(Token::ValueType::Unsigned64Bit); + CHECK_TYPE(Token::ValueType::Signed64Bit); + CHECK_TYPE(Token::ValueType::Unsigned32Bit); + CHECK_TYPE(Token::ValueType::Signed32Bit); + CHECK_TYPE(Token::ValueType::Unsigned16Bit); + CHECK_TYPE(Token::ValueType::Signed16Bit); + CHECK_TYPE(Token::ValueType::Unsigned8Bit); + CHECK_TYPE(Token::ValueType::Signed8Bit); + CHECK_TYPE(Token::ValueType::Character); + DEFAULT_TYPE(Token::ValueType::Signed32Bit); - CHECK_TYPE(Token::ValueType::Double); - CHECK_TYPE(Token::ValueType::Float); - CHECK_TYPE(Token::ValueType::Unsigned128Bit); - CHECK_TYPE(Token::ValueType::Signed128Bit); - CHECK_TYPE(Token::ValueType::Unsigned64Bit); - CHECK_TYPE(Token::ValueType::Signed64Bit); - CHECK_TYPE(Token::ValueType::Unsigned32Bit); - CHECK_TYPE(Token::ValueType::Signed32Bit); - CHECK_TYPE(Token::ValueType::Unsigned16Bit); - CHECK_TYPE(Token::ValueType::Signed16Bit); - CHECK_TYPE(Token::ValueType::Unsigned8Bit); - CHECK_TYPE(Token::ValueType::Signed8Bit); - CHECK_TYPE(Token::ValueType::Character); - DEFAULT_TYPE(Token::ValueType::Signed32Bit); + #undef CHECK_TYPE + #undef DEFAULT_TYPE + }(); - #undef CHECK_TYPE - #undef DEFAULT_TYPE - }(); + try { + return std::visit([&](auto &&leftValue, auto &&rightValue) -> ASTNodeIntegerLiteral * { + switch (op) { + case Token::Operator::Plus: + return new ASTNodeIntegerLiteral({ newType, leftValue + rightValue }); + case Token::Operator::Minus: + return new ASTNodeIntegerLiteral({ newType, leftValue - rightValue }); + case Token::Operator::Star: + return new ASTNodeIntegerLiteral({ newType, leftValue * rightValue }); + case Token::Operator::Slash: + return new ASTNodeIntegerLiteral({ newType, leftValue / rightValue }); + case Token::Operator::ShiftLeft: + return new ASTNodeIntegerLiteral({ newType, shiftLeft(leftValue, rightValue) }); + case Token::Operator::ShiftRight: + return new ASTNodeIntegerLiteral({ newType, shiftRight(leftValue, rightValue) }); + case Token::Operator::BitAnd: + return new ASTNodeIntegerLiteral({ newType, bitAnd(leftValue, rightValue) }); + case Token::Operator::BitXor: + return new ASTNodeIntegerLiteral({ newType, bitXor(leftValue, rightValue) }); + case Token::Operator::BitOr: + return new ASTNodeIntegerLiteral({ newType, bitOr(leftValue, rightValue) }); + default: + throwEvaluateError("invalid operator used in mathematical expression", left->getLineNumber()); + } - switch (op) { - case Token::Operator::Plus: - return new ASTNodeIntegerLiteral(leftValue + rightValue, newType); - case Token::Operator::Minus: - return new ASTNodeIntegerLiteral(leftValue - rightValue, newType); - case Token::Operator::Star: - return new ASTNodeIntegerLiteral(leftValue * rightValue, newType); - case Token::Operator::Slash: - return new ASTNodeIntegerLiteral(leftValue / rightValue, newType); - case Token::Operator::ShiftLeft: - return new ASTNodeIntegerLiteral(leftValue << rightValue, newType); - case Token::Operator::ShiftRight: - return new ASTNodeIntegerLiteral(leftValue >> rightValue, newType); - case Token::Operator::BitAnd: - return new ASTNodeIntegerLiteral(leftValue & rightValue, newType); - case Token::Operator::BitXor: - return new ASTNodeIntegerLiteral(leftValue ^ rightValue, newType); - case Token::Operator::BitOr: - return new ASTNodeIntegerLiteral(leftValue | rightValue, newType); - default: throwEvaluateError("invalid operator used in mathematical expression", left->getLineNumber()); - - } - - }, left->getValue(), right->getValue()); + }, left->getValue(), right->getValue()); + } catch (std::runtime_error &e) { + throwEvaluateError("bitwise operations on floating point numbers are forbidden", left->getLineNumber()); + } } ASTNodeIntegerLiteral* Evaluator::evaluateMathematicalExpression(ASTNodeNumericExpression *node) { @@ -197,7 +246,7 @@ namespace hex::lang { } PatternData* Evaluator::evaluateEnum(ASTNodeEnum *node) { - std::vector> entryPatterns; + std::vector> entryPatterns; auto startOffset = this->m_currOffset; for (auto &[name, value] : node->getEntries()) { @@ -208,7 +257,7 @@ namespace hex::lang { auto valueNode = evaluateMathematicalExpression(expression); SCOPE_EXIT( delete valueNode; ); - entryPatterns.emplace_back( std::get(valueNode->getValue()), name ); + entryPatterns.push_back({{ valueNode->getType(), valueNode->getValue() }, name }); } size_t size; @@ -233,9 +282,14 @@ namespace hex::lang { auto valueNode = evaluateMathematicalExpression(expression); SCOPE_EXIT( delete valueNode; ); - auto fieldBits = std::get(valueNode->getValue()); - if (fieldBits > 64) - throwEvaluateError("bitfield entry must at most occupy 64 bits", value->getLineNumber()); + auto fieldBits = std::visit([node, type = valueNode->getType()] (auto &&value) { + if (Token::isFloatingPoint(type)) + throwEvaluateError("bitfield entry size must be an integer value", node->getLineNumber()); + return static_cast(value); + }, valueNode->getValue()); + + if (fieldBits > 64 || fieldBits <= 0) + throwEvaluateError("bitfield entry must occupy between 1 and 64 bits", value->getLineNumber()); bits += fieldBits; @@ -280,7 +334,11 @@ namespace hex::lang { auto valueNode = evaluateMathematicalExpression(offset); SCOPE_EXIT( delete valueNode; ); - this->m_currOffset = std::get(valueNode->getValue()); + this->m_currOffset = std::visit([node, type = valueNode->getType()] (auto &&value) { + if (Token::isFloatingPoint(type)) + throwEvaluateError("placement offset must be an integer value", node->getLineNumber()); + return static_cast(value); + }, valueNode->getValue()); } if (this->m_currOffset >= this->m_provider->getActualSize()) throwEvaluateError("array exceeds size of file", node->getLineNumber()); @@ -306,7 +364,11 @@ namespace hex::lang { auto valueNode = evaluateMathematicalExpression(offset); SCOPE_EXIT( delete valueNode; ); - this->m_currOffset = std::get(valueNode->getValue()); + this->m_currOffset = std::visit([node, type = valueNode->getType()] (auto &&value) { + if (Token::isFloatingPoint(type)) + throwEvaluateError("placement offset must be an integer value", node->getLineNumber()); + return static_cast(value); + }, valueNode->getValue()); } auto startOffset = this->m_currOffset; @@ -320,7 +382,11 @@ namespace hex::lang { SCOPE_EXIT( delete valueNode; ); - auto arraySize = std::get(valueNode->getValue()); + auto arraySize = std::visit([node, type = valueNode->getType()] (auto &&value) { + if (Token::isFloatingPoint(type)) + throwEvaluateError("array size must be an integer value", node->getLineNumber()); + return static_cast(value); + }, valueNode->getValue()); if (auto typeDecl = dynamic_cast(node->getType()); typeDecl != nullptr) { if (auto builtinType = dynamic_cast(typeDecl->getType()); builtinType != nullptr) { @@ -377,7 +443,11 @@ namespace hex::lang { auto valueNode = evaluateMathematicalExpression(offset); SCOPE_EXIT( delete valueNode; ); - pointerOffset = std::get(valueNode->getValue()); + pointerOffset = std::visit([node, type = valueNode->getType()] (auto &&value) { + if (Token::isFloatingPoint(type)) + throwEvaluateError("pointer offset must be an integer value", node->getLineNumber()); + return static_cast(value); + }, valueNode->getValue()); this->m_currOffset = pointerOffset; } else { pointerOffset = this->m_currOffset; diff --git a/source/lang/lexer.cpp b/source/lang/lexer.cpp index c05b3a3bb..e4e2817ec 100644 --- a/source/lang/lexer.cpp +++ b/source/lang/lexer.cpp @@ -1,8 +1,9 @@ #include "lang/lexer.hpp" -#include +#include #include #include +#include namespace hex::lang { @@ -25,48 +26,119 @@ namespace hex::lang { return ret; } - std::optional parseInt(std::string_view string) { - u64 integer = 0; + size_t getIntegerLiteralLength(std::string_view string) { + return string.find_first_not_of("0123456789ABCDEFabcdef.xUL"); + } + + std::optional parseIntegerLiteral(std::string_view string) { + Token::ValueType type = Token::ValueType::Any; + Token::IntegerLiteral result; + u8 base; - std::string_view numberData; + auto endPos = getIntegerLiteralLength(string); + std::string_view numberData = string.substr(0, endPos); - if (string.starts_with("0x")) { - numberData = string.substr(2); + if (numberData.ends_with('U')) { + type = Token::ValueType::Unsigned32Bit; + numberData.remove_suffix(1); + } else if (numberData.ends_with("UL")) { + type = Token::ValueType::Unsigned64Bit; + numberData.remove_suffix(2); + } else if (numberData.ends_with("ULL")) { + type = Token::ValueType::Unsigned128Bit; + numberData.remove_suffix(3); + } else if (numberData.ends_with("L")) { + type = Token::ValueType::Signed64Bit; + numberData.remove_suffix(1); + } else if (numberData.ends_with("LL")) { + type = Token::ValueType::Signed128Bit; + numberData.remove_suffix(2); + } else if (numberData.ends_with('F')) { + type = Token::ValueType::Float; + numberData.remove_suffix(1); + } else if (numberData.ends_with('D')) { + type = Token::ValueType::Double; + numberData.remove_suffix(1); + } + + if (numberData.starts_with("0x")) { + numberData = numberData.substr(2); base = 16; + if (Token::isFloatingPoint(type)) + return { }; + if (numberData.find_first_not_of("0123456789ABCDEFabcdef") != std::string_view::npos) return { }; - } else if (string.starts_with("0b")) { - numberData = string.substr(2); + } else if (numberData.starts_with("0b")) { + numberData = numberData.substr(2); base = 2; + if (Token::isFloatingPoint(type)) + return { }; + if (numberData.find_first_not_of("01") != std::string_view::npos) return { }; - } else if (isdigit(string[0])) { - numberData = string; + } else if (numberData.find('.') != std::string_view::npos || Token::isFloatingPoint(type)) { + base = 10; + if (type == Token::ValueType::Any) + type = Token::ValueType::Double; + + if (std::count(numberData.begin(), numberData.end(), '.') > 1 || numberData.find_first_not_of("0123456789.") != std::string_view::npos) + return { }; + + if (numberData.ends_with('.')) + return { }; + } else if (isdigit(numberData[0])) { base = 10; if (numberData.find_first_not_of("0123456789") != std::string_view::npos) return { }; } else return { }; + if (type == Token::ValueType::Any) + type = Token::ValueType::Signed32Bit; + + if (numberData.length() == 0) return { }; - for (const char& c : numberData) { + if (Token::isUnsigned(type) || Token::isSigned(type)) { + u128 integer = 0; + for (const char& c : numberData) { integer *= base; - if (isdigit(c)) - integer += (c - '0'); - else if (c >= 'A' && c <= 'F') - integer += 10 + (c - 'A'); - else if (c >= 'a' && c <= 'f') - integer += 10 + (c - 'a'); - else return { }; + if (isdigit(c)) + integer += (c - '0'); + else if (c >= 'A' && c <= 'F') + integer += 10 + (c - 'A'); + else if (c >= 'a' && c <= 'f') + integer += 10 + (c - 'a'); + else return { }; + } + + switch (type) { + case Token::ValueType::Unsigned32Bit: return {{ type, u32(integer) }}; + case Token::ValueType::Signed32Bit: return {{ type, s32(integer) }}; + case Token::ValueType::Unsigned64Bit: return {{ type, u64(integer) }}; + case Token::ValueType::Signed64Bit: return {{ type, s64(integer) }}; + case Token::ValueType::Unsigned128Bit: return {{ type, u128(integer) }}; + case Token::ValueType::Signed128Bit: return {{ type, s128(integer) }}; + default: return { }; + } + } else if (Token::isFloatingPoint(type)) { + double floatingPoint = strtod(numberData.data(), nullptr); + + switch (type) { + case Token::ValueType::Float: return {{ type, float(floatingPoint) }}; + case Token::ValueType::Double: return {{ type, double(floatingPoint) }}; + default: return { }; + } } - return integer; + + return { }; } std::optional> Lexer::lex(const std::string& code) { @@ -119,6 +191,9 @@ namespace hex::lang { } else if (c == '=') { tokens.emplace_back(TOKEN(Operator, Assignment)); offset += 1; + } else if (code.substr(offset, 2) == "::") { + tokens.emplace_back(TOKEN(Separator, ScopeResolution)); + offset += 2; } else if (c == ':') { tokens.emplace_back(TOKEN(Operator, Inherit)); offset += 1; @@ -134,10 +209,10 @@ namespace hex::lang { } else if (c == '/') { tokens.emplace_back(TOKEN(Operator, Slash)); offset += 1; - } else if (offset + 1 <= code.length() && code[offset] == '<' && code[offset + 1] == '<') { + } else if (code.substr(offset, 2) == "<<") { tokens.emplace_back(TOKEN(Operator, ShiftLeft)); offset += 2; - } else if (offset + 1 <= code.length() && code[offset] == '>' && code[offset + 1] == '>') { + } else if (code.substr(offset, 2) == ">>") { tokens.emplace_back(TOKEN(Operator, ShiftRight)); offset += 2; } else if (c == '|') { @@ -179,7 +254,7 @@ namespace hex::lang { if (offset >= code.length() || code[offset] != '\'') throwLexerError("missing terminating ' after character literal", lineNumber); - tokens.emplace_back(VALUE_TOKEN(Integer, character)); + tokens.emplace_back(VALUE_TOKEN(Integer, Token::IntegerLiteral({ Token::ValueType::Character, character }) )); offset += 1; } else if (std::isalpha(c)) { @@ -239,17 +314,14 @@ namespace hex::lang { offset += identifier.length(); } else if (std::isdigit(c)) { - char *end = nullptr; - std::strtoull(&code[offset], &end, 0); - - auto integer = parseInt(std::string_view(&code[offset], end - &code[offset])); + auto integer = parseIntegerLiteral(&code[offset]); if (!integer.has_value()) throwLexerError("invalid integer literal", lineNumber); tokens.emplace_back(VALUE_TOKEN(Integer, integer.value())); - offset += (end - &code[offset]); + offset += getIntegerLiteralLength(&code[offset]); } else throwLexerError("unknown token", lineNumber); diff --git a/source/lang/parser.cpp b/source/lang/parser.cpp index 2f31a65b6..a18c57c08 100644 --- a/source/lang/parser.cpp +++ b/source/lang/parser.cpp @@ -5,7 +5,7 @@ #define MATCHES(x) (begin() && x) -#define TO_NUMERIC_EXPRESSION(node) new ASTNodeNumericExpression((node), new ASTNodeIntegerLiteral(0, Token::ValueType::Signed128Bit), Token::Operator::Plus) +#define TO_NUMERIC_EXPRESSION(node) new ASTNodeNumericExpression((node), new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, s32(0) }), Token::Operator::Plus) // Definition syntax: // [A] : Either A or no token @@ -18,6 +18,20 @@ namespace hex::lang { /* Mathematical expressions */ + // Identifier:: + ASTNode* Parser::parseScopeResolution(std::vector &path) { + if (peek(IDENTIFIER, -1)) + path.push_back(getValue(-1)); + + if (MATCHES(sequence(SEPARATOR_SCOPE_RESOLUTION))) { + if (MATCHES(sequence(IDENTIFIER))) + return this->parseScopeResolution(path); + else + throwParseError("expected member name", -1); + } else + return TO_NUMERIC_EXPRESSION(new ASTNodeScopeResolution(path)); + } + // ASTNode* Parser::parseRValue(std::vector &path) { if (peek(IDENTIFIER, -1)) @@ -35,13 +49,17 @@ namespace hex::lang { // ASTNode* Parser::parseFactor() { if (MATCHES(sequence(INTEGER))) - return TO_NUMERIC_EXPRESSION(new ASTNodeIntegerLiteral(getValue(-1), Token::ValueType::Signed128Bit)); + return TO_NUMERIC_EXPRESSION(new ASTNodeIntegerLiteral(getValue(-1))); else if (MATCHES(sequence(SEPARATOR_ROUNDBRACKETOPEN))) { auto node = this->parseMathematicalExpression(); if (!MATCHES(sequence(SEPARATOR_ROUNDBRACKETCLOSE))) throwParseError("expected closing parenthesis"); return node; - } else if (MATCHES(sequence(IDENTIFIER))) { + } else if (MATCHES(sequence(IDENTIFIER) && peek(SEPARATOR_SCOPE_RESOLUTION))) { + std::vector path; + return this->parseScopeResolution(path); + } + else if (MATCHES(sequence(IDENTIFIER))) { std::vector path; return this->parseRValue(path); } @@ -297,10 +315,46 @@ namespace hex::lang { else if (MATCHES(sequence(IDENTIFIER))) { ASTNode *valueExpr; auto name = getValue(-1); - if (enumNode->getEntries().empty()) - valueExpr = new ASTNodeIntegerLiteral(0, underlyingType->getType()); + if (enumNode->getEntries().empty()) { + auto type = underlyingType->getType(); + + switch (type) { + case Token::ValueType::Signed8Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, s8(0) }); + break; + case Token::ValueType::Unsigned8Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, u8(0) }); + break; + case Token::ValueType::Signed16Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, s16(0) }); + break; + case Token::ValueType::Unsigned16Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, u16(0) }); + break; + case Token::ValueType::Signed32Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, s32(0) }); + break; + case Token::ValueType::Unsigned32Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, u32(0) }); + break; + case Token::ValueType::Signed64Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, s64(0) }); + break; + case Token::ValueType::Unsigned64Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, u64(0) }); + break; + case Token::ValueType::Signed128Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, s128(0) }); + break; + case Token::ValueType::Unsigned128Bit: + valueExpr = new ASTNodeIntegerLiteral({ type, u128(0) }); + break; + default: + throwParseError("invalid enum underlying type", -1); + } + } else - valueExpr = new ASTNodeNumericExpression(enumNode->getEntries().back().second, new ASTNodeIntegerLiteral(1, Token::ValueType::Signed128Bit), Token::Operator::Plus); + valueExpr = new ASTNodeNumericExpression(enumNode->getEntries().back().second, new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, s32(1) }), Token::Operator::Plus); enumNode->addEntry(name, valueExpr); }