zongors-reality-engine/src/parser.c

304 lines
7.7 KiB
C

#include "parser.h"
#include "hashmap.h"
DEFINE_HASHMAP(TokenMap, 150);
TokenMap **tokenMap;
void initTokenMap() {
tokenMap = new_TokenMap();
put_TokenMap(tokenMap, "fn", TOKEN_FN);
put_TokenMap(tokenMap, "to", TOKEN_TO);
put_TokenMap(tokenMap, "in", TOKEN_IN);
put_TokenMap(tokenMap, "is", TOKEN_IS);
put_TokenMap(tokenMap, "as", TOKEN_AS);
put_TokenMap(tokenMap, "use", TOKEN_USE);
put_TokenMap(tokenMap, "if", TOKEN_IF);
put_TokenMap(tokenMap, "else", TOKEN_ELSE);
put_TokenMap(tokenMap, "default", TOKEN_DEFAULT);
put_TokenMap(tokenMap, "for", TOKEN_FOR);
put_TokenMap(tokenMap, "try", TOKEN_TRY);
put_TokenMap(tokenMap, "catch", TOKEN_CATCH);
put_TokenMap(tokenMap, "while", TOKEN_WHILE);
put_TokenMap(tokenMap, "do", TOKEN_DO);
put_TokenMap(tokenMap, "exit", TOKEN_EXIT);
put_TokenMap(tokenMap, "switch", TOKEN_SWITCH);
put_TokenMap(tokenMap, "return", TOKEN_RETURN);
put_TokenMap(tokenMap, "const", TOKEN_CONST);
put_TokenMap(tokenMap, "type", TOKEN_TYPE);
put_TokenMap(tokenMap, "this", TOKEN_THIS);
put_TokenMap(tokenMap, "yield", TOKEN_YIELD);
put_TokenMap(tokenMap, "case", TOKEN_CASE);
put_TokenMap(tokenMap, "assert", TOKEN_ASSERT);
put_TokenMap(tokenMap, "break", TOKEN_BREAK);
put_TokenMap(tokenMap, "let", TOKEN_LET);
put_TokenMap(tokenMap, "print", TOKEN_PRINT);
put_TokenMap(tokenMap, "nil", TOKEN_NULL);
put_TokenMap(tokenMap, "and", TOKEN_AND);
put_TokenMap(tokenMap, "or", TOKEN_OR);
put_TokenMap(tokenMap, "xor", TOKEN_XOR);
put_TokenMap(tokenMap, "mod", TOKEN_MOD);
put_TokenMap(tokenMap, "eq", TOKEN_EQ);
put_TokenMap(tokenMap, "ge", TOKEN_GE);
put_TokenMap(tokenMap, "lt", TOKEN_LT);
put_TokenMap(tokenMap, "le", TOKEN_LE);
put_TokenMap(tokenMap, "ne", TOKEN_NE);
put_TokenMap(tokenMap, "gt", TOKEN_GT);
put_TokenMap(tokenMap, "ge", TOKEN_GE);
put_TokenMap(tokenMap, "srl", TOKEN_SHIFTRIGHT);
put_TokenMap(tokenMap, "sll", TOKEN_SHIFTLEFT);
}
typedef struct Tokenizer Tokenizer;
struct Tokenizer {
char *start;
char *current;
int32_t line;
};
Tokenizer tokenizer;
void initTokenizer(char *src) {
tokenizer.start = src;
tokenizer.current = src;
tokenizer.line = 1;
}
static bool isAlpha(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
c == '\'' || c == '?';
}
static bool isDigit(char c) { return (c >= '0' && c <= '9') || c == '-'; }
static bool isAtEnd() { return *tokenizer.current == '\0'; }
static Token makeToken(TokenType type) {
Token token;
token.type = type;
token.start = tokenizer.start;
token.length = (int32_t)(tokenizer.current - tokenizer.start);
token.line = tokenizer.line;
return token;
}
static Token errorToken(char *msg) {
Token token;
token.type = TOKEN_ERROR;
token.start = msg;
token.length = (int32_t)strlen(msg);
token.line = tokenizer.line;
return token;
}
static char advance() {
tokenizer.current++;
return tokenizer.current[-1];
}
static char peek() { return *tokenizer.current; }
static char peekNext() {
if (isAtEnd())
return '\0';
return tokenizer.current[1];
}
static void skipWhitespace() {
for (;;) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
tokenizer.line++;
advance();
break;
case '!':
while (peek() != '\n' && !isAtEnd())
advance();
break;
default:
return;
}
}
}
static char *currentTokenToS() {
int32_t size = tokenizer.current - tokenizer.start;
char *str = (char *)malloc(sizeof(size));
strncpy(str, tokenizer.start, size);
str[size] = '\0';
return str;
}
TokenType getToken(char *s) {
TokenMap *np;
for (np = tokenMap[hash_TokenMap(s)]; np != NULL; np = np->next)
if (strcmp(s, np->keyword) == 0)
return np->token;
return TOKEN_IDENTIFIER;
}
static TokenType identifierType() {
char *check = currentTokenToS();
TokenType t = getToken(check);
free(check);
return t;
}
static Token identifier() {
while (isAlpha(peek()) || isDigit(peek()))
advance();
return makeToken(identifierType());
}
static Token number() {
bool is_float = false;
while (isDigit(peek()))
advance();
/* Look for a fractional part. */
if (peek() == '.' && isDigit(peekNext())) {
is_float = true;
/* Consume the ".". */
advance();
while (isDigit(peek()))
advance();
}
return makeToken((is_float)
? TOKEN_FLOAT
: TOKEN_INT); /* or measure if ends in postscript */
}
static Token string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n')
tokenizer.line++;
advance();
}
if (isAtEnd())
return errorToken("Unterminated string.");
/* The closing quote. */
advance();
return makeToken(TOKEN_STRING);
}
Token nextToken() {
skipWhitespace();
tokenizer.start = tokenizer.current;
if (isAtEnd())
return makeToken(TOKEN_EOF);
char c = advance();
if (isAlpha(c))
return identifier();
if (isDigit(c))
return number();
switch (c) {
case '(':
return makeToken(TOKEN_LEFT_PAREN);
case ')':
return makeToken(TOKEN_RIGHT_PAREN);
case '{':
return makeToken(TOKEN_LEFT_BRACE);
case '}':
return makeToken(TOKEN_RIGHT_BRACE);
case '+':
return makeToken(TOKEN_ADD);
case '/':
return makeToken(TOKEN_DIV);
case '-':
return makeToken(TOKEN_SUB);
case '*':
return makeToken(TOKEN_MUL);
case ';':
return makeToken(TOKEN_SEMICOLON);
case '"':
return string();
}
return errorToken("Unexpected character.");
}
void debug_printToken(Token t) {
char *str = currentTokenToS();
switch (t.type) {
PRINT_TOKEN_CASE(TOKEN_LEFT_PAREN)
PRINT_TOKEN_CASE(TOKEN_RIGHT_PAREN)
PRINT_TOKEN_CASE(TOKEN_LEFT_BRACE)
PRINT_TOKEN_CASE(TOKEN_RIGHT_BRACE)
PRINT_TOKEN_CASE(TOKEN_SEMICOLON)
PRINT_TOKEN_CASE(TOKEN_IDENTIFIER)
PRINT_TOKEN_CASE(TOKEN_STRING)
PRINT_TOKEN_CASE(TOKEN_FLOAT)
PRINT_TOKEN_CASE(TOKEN_U8)
PRINT_TOKEN_CASE(TOKEN_I8)
PRINT_TOKEN_CASE(TOKEN_U16)
PRINT_TOKEN_CASE(TOKEN_I16)
PRINT_TOKEN_CASE(TOKEN_U64)
PRINT_TOKEN_CASE(TOKEN_I64)
PRINT_TOKEN_CASE(TOKEN_INT)
PRINT_TOKEN_CASE(TOKEN_UINT)
/* PRINT_TOKEN_CASE(TOKEN_ARRAY) */
/* PRINT_TOKEN_CASE(TOKEN_MAP) */
PRINT_TOKEN_CASE(TOKEN_FALSE)
PRINT_TOKEN_CASE(TOKEN_TRUE)
PRINT_TOKEN_CASE(TOKEN_NULL)
PRINT_TOKEN_CASE(TOKEN_EOF)
PRINT_TOKEN_CASE(TOKEN_ERROR)
PRINT_TOKEN_CASE(TOKEN_ADD)
PRINT_TOKEN_CASE(TOKEN_SUB)
PRINT_TOKEN_CASE(TOKEN_MUL)
PRINT_TOKEN_CASE(TOKEN_DIV)
PRINT_TOKEN_CASE(TOKEN_MOD)
PRINT_TOKEN_CASE(TOKEN_GT)
PRINT_TOKEN_CASE(TOKEN_LT)
PRINT_TOKEN_CASE(TOKEN_EQ)
PRINT_TOKEN_CASE(TOKEN_GE)
PRINT_TOKEN_CASE(TOKEN_LE)
PRINT_TOKEN_CASE(TOKEN_NE)
PRINT_TOKEN_CASE(TOKEN_AND)
PRINT_TOKEN_CASE(TOKEN_OR)
PRINT_TOKEN_CASE(TOKEN_XOR)
PRINT_TOKEN_CASE(TOKEN_SHIFTRIGHT)
PRINT_TOKEN_CASE(TOKEN_SHIFTLEFT)
PRINT_TOKEN_CASE(TOKEN_FN)
PRINT_TOKEN_CASE(TOKEN_TO)
PRINT_TOKEN_CASE(TOKEN_IN)
PRINT_TOKEN_CASE(TOKEN_IS)
PRINT_TOKEN_CASE(TOKEN_AS)
PRINT_TOKEN_CASE(TOKEN_USE)
PRINT_TOKEN_CASE(TOKEN_IF)
PRINT_TOKEN_CASE(TOKEN_ELSE)
PRINT_TOKEN_CASE(TOKEN_DEFAULT)
PRINT_TOKEN_CASE(TOKEN_FOR)
PRINT_TOKEN_CASE(TOKEN_TRY)
PRINT_TOKEN_CASE(TOKEN_CATCH)
PRINT_TOKEN_CASE(TOKEN_WHILE)
PRINT_TOKEN_CASE(TOKEN_DO)
PRINT_TOKEN_CASE(TOKEN_EXIT)
PRINT_TOKEN_CASE(TOKEN_SWITCH)
PRINT_TOKEN_CASE(TOKEN_RETURN)
PRINT_TOKEN_CASE(TOKEN_CONST)
PRINT_TOKEN_CASE(TOKEN_TYPE)
PRINT_TOKEN_CASE(TOKEN_THIS)
PRINT_TOKEN_CASE(TOKEN_YIELD)
PRINT_TOKEN_CASE(TOKEN_CASE)
PRINT_TOKEN_CASE(TOKEN_ASSERT)
PRINT_TOKEN_CASE(TOKEN_BREAK)
PRINT_TOKEN_CASE(TOKEN_LET)
PRINT_TOKEN_CASE(TOKEN_PRINT)
}
free(str);
}