2020-11-17 02:31:51 +01:00
|
|
|
#include "lang/lexer.hpp"
|
2020-11-10 15:26:38 +01:00
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include <functional>
|
2020-12-18 21:44:13 +01:00
|
|
|
#include <optional>
|
2020-11-10 15:26:38 +01:00
|
|
|
|
|
|
|
namespace hex::lang {
|
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
#define TOKEN(type, value) Token::Type::type, Token::type::value, lineNumber
|
|
|
|
#define VALUE_TOKEN(type, value) Token::Type::type, value, lineNumber
|
|
|
|
|
2020-11-10 15:26:38 +01:00
|
|
|
Lexer::Lexer() { }
|
|
|
|
|
|
|
|
std::string matchTillInvalid(const char* characters, std::function<bool(char)> predicate) {
|
|
|
|
std::string ret;
|
|
|
|
|
|
|
|
while (*characters != 0x00) {
|
|
|
|
ret += *characters;
|
|
|
|
characters++;
|
|
|
|
|
|
|
|
if (!predicate(*characters))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<u64> parseInt(std::string_view string) {
|
|
|
|
u64 integer = 0;
|
2020-11-11 10:47:02 +01:00
|
|
|
u8 base;
|
2020-11-10 15:26:38 +01:00
|
|
|
|
|
|
|
std::string_view numberData;
|
|
|
|
|
|
|
|
if (string.starts_with("0x")) {
|
|
|
|
numberData = string.substr(2);
|
|
|
|
base = 16;
|
|
|
|
|
|
|
|
if (numberData.find_first_not_of("0123456789ABCDEFabcdef") != std::string_view::npos)
|
|
|
|
return { };
|
|
|
|
} else if (string.starts_with("0b")) {
|
|
|
|
numberData = string.substr(2);
|
|
|
|
base = 2;
|
|
|
|
|
|
|
|
if (numberData.find_first_not_of("01") != std::string_view::npos)
|
|
|
|
return { };
|
|
|
|
} else if (isdigit(string[0])) {
|
|
|
|
numberData = string;
|
|
|
|
base = 10;
|
|
|
|
|
|
|
|
if (numberData.find_first_not_of("0123456789") != std::string_view::npos)
|
|
|
|
return { };
|
|
|
|
} else return { };
|
|
|
|
|
|
|
|
if (numberData.length() == 0)
|
|
|
|
return { };
|
|
|
|
|
|
|
|
for (const char& c : numberData) {
|
|
|
|
integer *= base;
|
|
|
|
|
|
|
|
if (isdigit(c))
|
|
|
|
integer += (c - '0');
|
|
|
|
else if (c >= 'A' && c <= 'F')
|
2020-11-10 16:24:48 +01:00
|
|
|
integer += 10 + (c - 'A');
|
2020-11-10 15:26:38 +01:00
|
|
|
else if (c >= 'a' && c <= 'f')
|
2020-11-10 16:24:48 +01:00
|
|
|
integer += 10 + (c - 'a');
|
2020-11-10 15:26:38 +01:00
|
|
|
else return { };
|
|
|
|
}
|
|
|
|
|
|
|
|
return integer;
|
|
|
|
}
|
|
|
|
|
2020-12-22 18:10:01 +01:00
|
|
|
std::optional<std::vector<Token>> Lexer::lex(const std::string& code) {
|
2020-11-10 15:26:38 +01:00
|
|
|
std::vector<Token> tokens;
|
|
|
|
u32 offset = 0;
|
|
|
|
|
2020-11-27 21:20:23 +01:00
|
|
|
u32 lineNumber = 1;
|
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
try {
|
2020-11-14 14:40:21 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
while (offset < code.length()) {
|
|
|
|
const char& c = code[offset];
|
2020-11-13 14:35:52 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
if (c == 0x00)
|
|
|
|
break;
|
2020-11-13 14:35:52 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
if (std::isblank(c) || std::isspace(c)) {
|
|
|
|
if (code[offset] == '\n') lineNumber++;
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == ';') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, EndOfExpression));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '(') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, RoundBracketOpen));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == ')') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, RoundBracketClose));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '{') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, CurlyBracketOpen));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '}') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, CurlyBracketClose));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '[') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, SquareBracketOpen));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == ']') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, SquareBracketClose));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == ',') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, Comma));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '.') {
|
|
|
|
tokens.emplace_back(TOKEN(Separator, Dot));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '@') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, AtDeclaration));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '=') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, Assignment));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == ':') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, Inherit));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '+') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, Plus));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '-') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, Minus));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '*') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, Star));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '/') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, Slash));
|
|
|
|
offset += 1;
|
|
|
|
} else if (offset + 1 <= code.length() && code[offset] == '<' && code[offset + 1] == '<') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, ShiftLeft));
|
2020-11-13 14:35:52 +01:00
|
|
|
offset += 2;
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
} else if (offset + 1 <= code.length() && code[offset] == '>' && code[offset + 1] == '>') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, ShiftRight));
|
|
|
|
offset += 2;
|
|
|
|
} else if (c == '|') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, BitOr));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '&') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, BitAnd));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '^') {
|
|
|
|
tokens.emplace_back(TOKEN(Operator, BitXor));
|
|
|
|
offset += 1;
|
|
|
|
} else if (c == '\'') {
|
|
|
|
offset += 1;
|
2020-11-13 14:35:52 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
if (offset >= code.length())
|
|
|
|
throwLexerError("invalid character literal", lineNumber);
|
2020-11-10 15:26:38 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
char character = code[offset];
|
2020-11-16 22:54:54 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
if (character == '\\') {
|
|
|
|
offset += 1;
|
2020-11-27 14:18:28 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
if (offset >= code.length())
|
|
|
|
throwLexerError("invalid character literal", lineNumber);
|
2020-11-27 14:18:28 +01:00
|
|
|
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
if (code[offset] != '\\' && code[offset] != '\'')
|
|
|
|
throwLexerError("invalid escape sequence", lineNumber);
|
|
|
|
|
|
|
|
|
|
|
|
character = code[offset];
|
|
|
|
} else {
|
|
|
|
if (code[offset] == '\\' || code[offset] == '\'' || character == '\n' || character == '\r')
|
|
|
|
throwLexerError("invalid character literal", lineNumber);
|
2020-11-27 14:18:28 +01:00
|
|
|
|
2020-11-27 21:20:23 +01:00
|
|
|
}
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
|
|
|
|
offset += 1;
|
|
|
|
|
|
|
|
if (offset >= code.length() || code[offset] != '\'')
|
|
|
|
throwLexerError("missing terminating ' after character literal", lineNumber);
|
|
|
|
|
|
|
|
tokens.emplace_back(VALUE_TOKEN(Integer, character));
|
|
|
|
offset += 1;
|
|
|
|
|
|
|
|
} else if (std::isalpha(c)) {
|
|
|
|
std::string identifier = matchTillInvalid(&code[offset], [](char c) -> bool { return std::isalnum(c) || c == '_'; });
|
|
|
|
|
|
|
|
// Check for reserved keywords
|
|
|
|
|
|
|
|
if (identifier == "struct")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, Struct));
|
|
|
|
else if (identifier == "union")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, Union));
|
|
|
|
else if (identifier == "using")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, Using));
|
|
|
|
else if (identifier == "enum")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, Enum));
|
|
|
|
else if (identifier == "bitfield")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, Bitfield));
|
|
|
|
else if (identifier == "be")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, BigEndian));
|
|
|
|
else if (identifier == "le")
|
|
|
|
tokens.emplace_back(TOKEN(Keyword, LittleEndian));
|
|
|
|
|
|
|
|
// Check for built-in types
|
|
|
|
else if (identifier == "u8")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Unsigned8Bit));
|
|
|
|
else if (identifier == "s8")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Signed8Bit));
|
|
|
|
else if (identifier == "u16")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Unsigned16Bit));
|
|
|
|
else if (identifier == "s16")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Signed16Bit));
|
|
|
|
else if (identifier == "u32")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Unsigned32Bit));
|
|
|
|
else if (identifier == "s32")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Signed32Bit));
|
|
|
|
else if (identifier == "u64")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Unsigned64Bit));
|
|
|
|
else if (identifier == "s64")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Signed64Bit));
|
|
|
|
else if (identifier == "u128")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Unsigned128Bit));
|
|
|
|
else if (identifier == "s128")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Signed128Bit));
|
|
|
|
else if (identifier == "float")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Float));
|
|
|
|
else if (identifier == "double")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Double));
|
|
|
|
else if (identifier == "char")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Character));
|
|
|
|
else if (identifier == "padding")
|
|
|
|
tokens.emplace_back(TOKEN(ValueType, Padding));
|
|
|
|
|
|
|
|
// If it's not a keyword and a builtin type, it has to be an identifier
|
|
|
|
|
|
|
|
else
|
|
|
|
tokens.emplace_back(VALUE_TOKEN(Identifier, identifier));
|
|
|
|
|
|
|
|
offset += identifier.length();
|
|
|
|
} else if (std::isdigit(c)) {
|
|
|
|
char *end = nullptr;
|
|
|
|
std::strtoull(&code[offset], &end, 0);
|
|
|
|
|
|
|
|
auto integer = parseInt(std::string_view(&code[offset], end - &code[offset]));
|
|
|
|
|
|
|
|
if (!integer.has_value())
|
|
|
|
throwLexerError("invalid integer literal", lineNumber);
|
|
|
|
|
|
|
|
|
|
|
|
tokens.emplace_back(VALUE_TOKEN(Integer, integer.value()));
|
|
|
|
offset += (end - &code[offset]);
|
|
|
|
} else
|
|
|
|
throwLexerError("unknown token", lineNumber);
|
|
|
|
|
2020-11-27 21:20:23 +01:00
|
|
|
}
|
Pattern Language rewrite (#111)
* Initial parser rewrite effort
Lexer and Token cleanup, Parser started over
* Greatly improved parser syntax
* Reimplemented using declarations and variable placement parsing
* Added back unions and structs
* Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^)
* Code style improvement
* Implemented arrays and fixed memory issues
* Fixed more memory issues in parser, reimplemented validator, evaluator and patterns
* Fixed builtin types, arrays and reimplemented strings
* Improved error messages
* Made character a distinct type, used for chars and strings
* Implemented padding, fixed arrays
* Added bitfields
* Added rvalue parsing, no evaluating yet
* Added .idea folder to gitignore
* Fixed build on MacOS
* Added custom implementation of integral concept if not available
* Rebased onto master
* Fixed array variable decl crash
* Added rvalues and dot syntax
* Lower case all pattern language error messages
* Fixed typo in variable name
* Fixed bug where preprocessor would not ignore commented out directives
* Reimplemented pointers
* Fixed rebase issues
2021-01-02 20:27:11 +01:00
|
|
|
|
|
|
|
tokens.emplace_back(TOKEN(Separator, EndOfProgram));
|
|
|
|
} catch (LexerError &e) {
|
|
|
|
this->m_error = e;
|
|
|
|
return { };
|
2020-11-10 15:26:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-12-22 18:10:01 +01:00
|
|
|
return tokens;
|
2020-11-10 15:26:38 +01:00
|
|
|
}
|
|
|
|
}
|