1
0
mirror of synced 2024-11-18 12:57:12 +01:00
ImHex/include/lang/parser.hpp
WerWolv 78ef07cf0f
Pattern Language rewrite (#111)
* Initial parser rewrite effort

Lexer and Token cleanup, Parser started over

* Greatly improved parser syntax

* Reimplemented using declarations and variable placement parsing

* Added back unions and structs

* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)

* Code style improvement

* Implemented arrays and fixed memory issues

* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns

* Fixed builtin types, arrays and reimplemented strings

* Improved error messages

* Made character a distinct type, used for chars and strings

* Implemented padding, fixed arrays

* Added bitfields

* Added rvalue parsing, no evaluating yet

* Added .idea folder to gitignore

* Fixed build on MacOS

* Added custom implementation of integral concept if not available

* Rebased onto master

* Fixed array variable decl crash

* Added rvalues and dot syntax

* Lower case all pattern language error messages

* Fixed typo in variable name

* Fixed bug where preprocessor would not ignore commented out directives

* Reimplemented pointers

* Fixed rebase issues
2021-01-02 20:27:11 +01:00

163 lines
4.6 KiB
C++

#pragma once
#include <hex.hpp>
#include "token.hpp"
#include "ast_node.hpp"
#include "helpers/utils.hpp"
#include <unordered_map>
#include <stdexcept>
#include <utility>
#include <vector>
namespace hex::lang {
class Parser {
public:
Parser() = default;
~Parser() = default;
using TokenIter = std::vector<Token>::const_iterator;
std::optional<std::vector<ASTNode*>> parse(const std::vector<Token> &tokens);
const std::pair<u32, std::string>& getError() { return this->m_error; }
private:
using ParseError = std::pair<u32, std::string>;
ParseError m_error;
TokenIter m_curr;
TokenIter m_originalPosition;
std::unordered_map<std::string, ASTNode*> m_types;
std::vector<TokenIter> m_matchedOptionals;
u32 getLineNumber(s32 index) const {
return this->m_curr[index].lineNumber;
}
template<typename T>
const T& getValue(s32 index) const {
auto value = std::get_if<T>(&this->m_curr[index].value);
if (value == nullptr)
throwParseError("failed to decode token. Invalid type.", getLineNumber(index));
return *value;
}
Token::Type getType(s32 index) const {
return this->m_curr[index].type;
}
ASTNode* parseRValue(std::vector<std::string> &path);
ASTNode* parseFactor();
ASTNode* parseMultiplicativeExpression();
ASTNode* parseAdditiveExpression();
ASTNode* parseShiftExpression();
ASTNode* parseBinaryAndExpression();
ASTNode* parseBinaryXorExpression();
ASTNode* parseBinaryOrExpression();
ASTNode* parseMathematicalExpression();
ASTNode* parseType(s32 startIndex);
ASTNode* parseUsingDeclaration();
ASTNode* parsePadding();
ASTNode* parseMemberVariable();
ASTNode* parseMemberArrayVariable();
ASTNode* parseMemberPointerVariable();
ASTNode* parseStruct();
ASTNode* parseUnion();
ASTNode* parseEnum();
ASTNode* parseBitfield();
ASTNode* parseVariablePlacement();
ASTNode* parseArrayVariablePlacement();
ASTNode* parsePointerVariablePlacement();
ASTNode* parseStatement();
std::vector<ASTNode*> parseTillToken(Token::Type endTokenType, const auto value) {
std::vector<ASTNode*> program;
ScopeExit guard([&]{ for (auto &node : program) delete node; });
while (this->m_curr->type != endTokenType || (*this->m_curr) != value) {
program.push_back(parseStatement());
}
this->m_curr++;
guard.release();
return program;
}
[[noreturn]] void throwParseError(std::string_view error, s32 token = -1) const {
throw ParseError(this->m_curr[token].lineNumber, "Parser: " + std::string(error));
}
/* Token consuming */
bool begin() {
this->m_originalPosition = this->m_curr;
this->m_matchedOptionals.clear();
return true;
}
bool sequence() {
return true;
}
bool sequence(Token::Type type, auto value, auto ... args) {
if (!peek(type, value)) {
this->m_curr = this->m_originalPosition;
return false;
}
this->m_curr++;
if (!sequence(args...)) {
this->m_curr = this->m_originalPosition;
return false;
}
return true;
}
bool variant(Token::Type type1, auto value1, Token::Type type2, auto value2) {
if (!peek(type1, value1)) {
if (!peek(type2, value2)) {
this->m_curr = this->m_originalPosition;
return false;
}
}
this->m_curr++;
return true;
}
bool optional(Token::Type type, auto value) {
if (peek(type, value)) {
this->m_matchedOptionals.push_back(this->m_curr);
this->m_curr++;
}
return true;
}
bool peek(Token::Type type, auto value, s32 index = 0) {
return this->m_curr[index].type == type && this->m_curr[index] == value;
}
bool peekOptional(Token::Type type, auto value, u32 index = 0) {
if (index >= this->m_matchedOptionals.size())
return false;
return peek(type, value, std::distance(this->m_curr, this->m_matchedOptionals[index]));
}
};
}