2020-11-10 15:26:38 +01:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <hex.hpp>
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
|
2020-11-10 15:26:38 +01:00
|
|
|
#include "token.hpp"
|
|
|
|
#include "ast_node.hpp"
|
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
#include "helpers/utils.hpp"
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include <utility>
|
2020-11-10 15:26:38 +01:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace hex::lang {
|
|
|
|
|
|
|
|
class Parser {
|
|
|
|
public:
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
Parser() = default;
|
|
|
|
~Parser() = default;
|
2020-11-10 15:26:38 +01:00
|
|
|
|
2020-11-27 21:20:23 +01:00
|
|
|
using TokenIter = std::vector<Token>::const_iterator;
|
|
|
|
|
2020-12-22 18:10:01 +01:00
|
|
|
std::optional<std::vector<ASTNode*>> parse(const std::vector<Token> &tokens);
|
2020-11-10 15:26:38 +01:00
|
|
|
|
2020-11-27 21:20:23 +01:00
|
|
|
const std::pair<u32, std::string>& getError() { return this->m_error; }
|
|
|
|
|
|
|
|
private:
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
using ParseError = std::pair<u32, std::string>;
|
|
|
|
|
|
|
|
ParseError m_error;
|
|
|
|
TokenIter m_curr;
|
|
|
|
TokenIter m_originalPosition;
|
|
|
|
|
|
|
|
std::unordered_map<std::string, ASTNode*> m_types;
|
|
|
|
std::vector<TokenIter> m_matchedOptionals;
|
|
|
|
|
|
|
|
u32 getLineNumber(s32 index) const {
|
|
|
|
return this->m_curr[index].lineNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
const T& getValue(s32 index) const {
|
|
|
|
auto value = std::get_if<T>(&this->m_curr[index].value);
|
|
|
|
|
|
|
|
if (value == nullptr)
|
|
|
|
throwParseError("failed to decode token. Invalid type.", getLineNumber(index));
|
|
|
|
|
|
|
|
return *value;
|
|
|
|
}
|
|
|
|
|
|
|
|
Token::Type getType(s32 index) const {
|
|
|
|
return this->m_curr[index].type;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parseRValue(std::vector<std::string> &path);
|
|
|
|
ASTNode* parseFactor();
|
|
|
|
ASTNode* parseMultiplicativeExpression();
|
|
|
|
ASTNode* parseAdditiveExpression();
|
|
|
|
ASTNode* parseShiftExpression();
|
|
|
|
ASTNode* parseBinaryAndExpression();
|
|
|
|
ASTNode* parseBinaryXorExpression();
|
|
|
|
ASTNode* parseBinaryOrExpression();
|
|
|
|
ASTNode* parseMathematicalExpression();
|
|
|
|
|
|
|
|
ASTNode* parseType(s32 startIndex);
|
|
|
|
ASTNode* parseUsingDeclaration();
|
|
|
|
ASTNode* parsePadding();
|
|
|
|
ASTNode* parseMemberVariable();
|
|
|
|
ASTNode* parseMemberArrayVariable();
|
|
|
|
ASTNode* parseMemberPointerVariable();
|
|
|
|
ASTNode* parseStruct();
|
|
|
|
ASTNode* parseUnion();
|
|
|
|
ASTNode* parseEnum();
|
|
|
|
ASTNode* parseBitfield();
|
|
|
|
ASTNode* parseVariablePlacement();
|
|
|
|
ASTNode* parseArrayVariablePlacement();
|
|
|
|
ASTNode* parsePointerVariablePlacement();
|
|
|
|
ASTNode* parseStatement();
|
|
|
|
|
|
|
|
std::vector<ASTNode*> parseTillToken(Token::Type endTokenType, const auto value) {
|
|
|
|
std::vector<ASTNode*> program;
|
|
|
|
ScopeExit guard([&]{ for (auto &node : program) delete node; });
|
|
|
|
|
|
|
|
while (this->m_curr->type != endTokenType || (*this->m_curr) != value) {
|
|
|
|
program.push_back(parseStatement());
|
|
|
|
}
|
|
|
|
|
|
|
|
this->m_curr++;
|
|
|
|
|
|
|
|
guard.release();
|
|
|
|
|
|
|
|
return program;
|
|
|
|
}
|
|
|
|
|
|
|
|
[[noreturn]] void throwParseError(std::string_view error, s32 token = -1) const {
|
|
|
|
throw ParseError(this->m_curr[token].lineNumber, "Parser: " + std::string(error));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Token consuming */
|
|
|
|
|
|
|
|
bool begin() {
|
|
|
|
this->m_originalPosition = this->m_curr;
|
|
|
|
this->m_matchedOptionals.clear();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool sequence() {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool sequence(Token::Type type, auto value, auto ... args) {
|
|
|
|
if (!peek(type, value)) {
|
|
|
|
this->m_curr = this->m_originalPosition;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
this->m_curr++;
|
|
|
|
|
|
|
|
if (!sequence(args...)) {
|
|
|
|
this->m_curr = this->m_originalPosition;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool variant(Token::Type type1, auto value1, Token::Type type2, auto value2) {
|
|
|
|
if (!peek(type1, value1)) {
|
|
|
|
if (!peek(type2, value2)) {
|
|
|
|
this->m_curr = this->m_originalPosition;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
this->m_curr++;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool optional(Token::Type type, auto value) {
|
|
|
|
if (peek(type, value)) {
|
|
|
|
this->m_matchedOptionals.push_back(this->m_curr);
|
|
|
|
this->m_curr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool peek(Token::Type type, auto value, s32 index = 0) {
|
|
|
|
return this->m_curr[index].type == type && this->m_curr[index] == value;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool peekOptional(Token::Type type, auto value, u32 index = 0) {
|
|
|
|
if (index >= this->m_matchedOptionals.size())
|
|
|
|
return false;
|
|
|
|
return peek(type, value, std::distance(this->m_curr, this->m_matchedOptionals[index]));
|
|
|
|
}
|
|
|
|
|
2020-11-10 15:26:38 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|