304 lines
7.7 KiB
C
304 lines
7.7 KiB
C
#include "parser.h"
|
|
#include "hashmap.h"
|
|
|
|
DEFINE_HASHMAP(TokenMap, 150);
|
|
|
|
TokenMap **tokenMap;
|
|
|
|
void initTokenMap() {
|
|
tokenMap = new_TokenMap();
|
|
|
|
put_TokenMap(tokenMap, "fn", TOKEN_FN);
|
|
put_TokenMap(tokenMap, "to", TOKEN_TO);
|
|
put_TokenMap(tokenMap, "in", TOKEN_IN);
|
|
put_TokenMap(tokenMap, "is", TOKEN_IS);
|
|
put_TokenMap(tokenMap, "as", TOKEN_AS);
|
|
put_TokenMap(tokenMap, "use", TOKEN_USE);
|
|
put_TokenMap(tokenMap, "if", TOKEN_IF);
|
|
put_TokenMap(tokenMap, "else", TOKEN_ELSE);
|
|
put_TokenMap(tokenMap, "default", TOKEN_DEFAULT);
|
|
put_TokenMap(tokenMap, "for", TOKEN_FOR);
|
|
put_TokenMap(tokenMap, "try", TOKEN_TRY);
|
|
put_TokenMap(tokenMap, "catch", TOKEN_CATCH);
|
|
put_TokenMap(tokenMap, "while", TOKEN_WHILE);
|
|
put_TokenMap(tokenMap, "do", TOKEN_DO);
|
|
put_TokenMap(tokenMap, "exit", TOKEN_EXIT);
|
|
put_TokenMap(tokenMap, "switch", TOKEN_SWITCH);
|
|
put_TokenMap(tokenMap, "return", TOKEN_RETURN);
|
|
put_TokenMap(tokenMap, "const", TOKEN_CONST);
|
|
put_TokenMap(tokenMap, "type", TOKEN_TYPE);
|
|
put_TokenMap(tokenMap, "this", TOKEN_THIS);
|
|
put_TokenMap(tokenMap, "yield", TOKEN_YIELD);
|
|
put_TokenMap(tokenMap, "case", TOKEN_CASE);
|
|
put_TokenMap(tokenMap, "assert", TOKEN_ASSERT);
|
|
put_TokenMap(tokenMap, "break", TOKEN_BREAK);
|
|
put_TokenMap(tokenMap, "let", TOKEN_LET);
|
|
put_TokenMap(tokenMap, "print", TOKEN_PRINT);
|
|
put_TokenMap(tokenMap, "nil", TOKEN_NULL);
|
|
put_TokenMap(tokenMap, "and", TOKEN_AND);
|
|
put_TokenMap(tokenMap, "or", TOKEN_OR);
|
|
put_TokenMap(tokenMap, "xor", TOKEN_XOR);
|
|
put_TokenMap(tokenMap, "mod", TOKEN_MOD);
|
|
put_TokenMap(tokenMap, "eq", TOKEN_EQ);
|
|
put_TokenMap(tokenMap, "ge", TOKEN_GE);
|
|
put_TokenMap(tokenMap, "lt", TOKEN_LT);
|
|
put_TokenMap(tokenMap, "le", TOKEN_LE);
|
|
put_TokenMap(tokenMap, "ne", TOKEN_NE);
|
|
put_TokenMap(tokenMap, "gt", TOKEN_GT);
|
|
put_TokenMap(tokenMap, "ge", TOKEN_GE);
|
|
put_TokenMap(tokenMap, "srl", TOKEN_SHIFTRIGHT);
|
|
put_TokenMap(tokenMap, "sll", TOKEN_SHIFTLEFT);
|
|
}
|
|
|
|
typedef struct Tokenizer Tokenizer;
|
|
struct Tokenizer {
|
|
char *start;
|
|
char *current;
|
|
int32_t line;
|
|
};
|
|
|
|
Tokenizer tokenizer;
|
|
|
|
void initTokenizer(char *src) {
|
|
tokenizer.start = src;
|
|
tokenizer.current = src;
|
|
tokenizer.line = 1;
|
|
}
|
|
|
|
static bool isAlpha(char c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
|
|
c == '\'' || c == '?';
|
|
}
|
|
|
|
static bool isDigit(char c) { return (c >= '0' && c <= '9') || c == '-'; }
|
|
|
|
static bool isAtEnd() { return *tokenizer.current == '\0'; }
|
|
|
|
static Token makeToken(TokenType type) {
|
|
Token token;
|
|
token.type = type;
|
|
token.start = tokenizer.start;
|
|
token.length = (int32_t)(tokenizer.current - tokenizer.start);
|
|
token.line = tokenizer.line;
|
|
return token;
|
|
}
|
|
|
|
static Token errorToken(char *msg) {
|
|
Token token;
|
|
token.type = TOKEN_ERROR;
|
|
token.start = msg;
|
|
token.length = (int32_t)strlen(msg);
|
|
token.line = tokenizer.line;
|
|
return token;
|
|
}
|
|
|
|
static char advance() {
|
|
tokenizer.current++;
|
|
return tokenizer.current[-1];
|
|
}
|
|
|
|
static char peek() { return *tokenizer.current; }
|
|
|
|
static char peekNext() {
|
|
if (isAtEnd())
|
|
return '\0';
|
|
return tokenizer.current[1];
|
|
}
|
|
|
|
static void skipWhitespace() {
|
|
for (;;) {
|
|
char c = peek();
|
|
switch (c) {
|
|
case ' ':
|
|
case '\r':
|
|
case '\t':
|
|
advance();
|
|
break;
|
|
case '\n':
|
|
tokenizer.line++;
|
|
advance();
|
|
break;
|
|
case '!':
|
|
while (peek() != '\n' && !isAtEnd())
|
|
advance();
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static char *currentTokenToS() {
|
|
int32_t size = tokenizer.current - tokenizer.start;
|
|
char *str = (char *)malloc(sizeof(size));
|
|
strncpy(str, tokenizer.start, size);
|
|
str[size] = '\0';
|
|
return str;
|
|
}
|
|
|
|
TokenType getToken(char *s) {
|
|
TokenMap *np;
|
|
for (np = tokenMap[hash_TokenMap(s)]; np != NULL; np = np->next)
|
|
if (strcmp(s, np->keyword) == 0)
|
|
return np->token;
|
|
return TOKEN_IDENTIFIER;
|
|
}
|
|
|
|
static TokenType identifierType() {
|
|
char *check = currentTokenToS();
|
|
TokenType t = getToken(check);
|
|
free(check);
|
|
return t;
|
|
}
|
|
|
|
static Token identifier() {
|
|
while (isAlpha(peek()) || isDigit(peek()))
|
|
advance();
|
|
return makeToken(identifierType());
|
|
}
|
|
|
|
static Token number() {
|
|
bool is_float = false;
|
|
while (isDigit(peek()))
|
|
advance();
|
|
|
|
/* Look for a fractional part. */
|
|
if (peek() == '.' && isDigit(peekNext())) {
|
|
is_float = true;
|
|
/* Consume the ".". */
|
|
advance();
|
|
|
|
while (isDigit(peek()))
|
|
advance();
|
|
}
|
|
|
|
return makeToken((is_float)
|
|
? TOKEN_FLOAT
|
|
: TOKEN_INT); /* or measure if ends in postscript */
|
|
}
|
|
|
|
static Token string() {
|
|
while (peek() != '"' && !isAtEnd()) {
|
|
if (peek() == '\n')
|
|
tokenizer.line++;
|
|
advance();
|
|
}
|
|
|
|
if (isAtEnd())
|
|
return errorToken("Unterminated string.");
|
|
|
|
/* The closing quote. */
|
|
advance();
|
|
return makeToken(TOKEN_STRING);
|
|
}
|
|
|
|
Token nextToken() {
|
|
skipWhitespace();
|
|
tokenizer.start = tokenizer.current;
|
|
if (isAtEnd())
|
|
return makeToken(TOKEN_EOF);
|
|
|
|
char c = advance();
|
|
if (isAlpha(c))
|
|
return identifier();
|
|
if (isDigit(c))
|
|
return number();
|
|
switch (c) {
|
|
case '(':
|
|
return makeToken(TOKEN_LEFT_PAREN);
|
|
case ')':
|
|
return makeToken(TOKEN_RIGHT_PAREN);
|
|
case '{':
|
|
return makeToken(TOKEN_LEFT_BRACE);
|
|
case '}':
|
|
return makeToken(TOKEN_RIGHT_BRACE);
|
|
case '+':
|
|
return makeToken(TOKEN_ADD);
|
|
case '/':
|
|
return makeToken(TOKEN_DIV);
|
|
case '-':
|
|
return makeToken(TOKEN_SUB);
|
|
case '*':
|
|
return makeToken(TOKEN_MUL);
|
|
case ';':
|
|
return makeToken(TOKEN_SEMICOLON);
|
|
case '"':
|
|
return string();
|
|
}
|
|
|
|
return errorToken("Unexpected character.");
|
|
}
|
|
|
|
void debug_printToken(Token t) {
|
|
char *str = currentTokenToS();
|
|
|
|
switch (t.type) {
|
|
PRINT_TOKEN_CASE(TOKEN_LEFT_PAREN)
|
|
PRINT_TOKEN_CASE(TOKEN_RIGHT_PAREN)
|
|
PRINT_TOKEN_CASE(TOKEN_LEFT_BRACE)
|
|
PRINT_TOKEN_CASE(TOKEN_RIGHT_BRACE)
|
|
PRINT_TOKEN_CASE(TOKEN_SEMICOLON)
|
|
PRINT_TOKEN_CASE(TOKEN_IDENTIFIER)
|
|
PRINT_TOKEN_CASE(TOKEN_STRING)
|
|
PRINT_TOKEN_CASE(TOKEN_FLOAT)
|
|
PRINT_TOKEN_CASE(TOKEN_U8)
|
|
PRINT_TOKEN_CASE(TOKEN_I8)
|
|
PRINT_TOKEN_CASE(TOKEN_U16)
|
|
PRINT_TOKEN_CASE(TOKEN_I16)
|
|
PRINT_TOKEN_CASE(TOKEN_U64)
|
|
PRINT_TOKEN_CASE(TOKEN_I64)
|
|
PRINT_TOKEN_CASE(TOKEN_INT)
|
|
PRINT_TOKEN_CASE(TOKEN_UINT)
|
|
/* PRINT_TOKEN_CASE(TOKEN_ARRAY) */
|
|
/* PRINT_TOKEN_CASE(TOKEN_MAP) */
|
|
PRINT_TOKEN_CASE(TOKEN_FALSE)
|
|
PRINT_TOKEN_CASE(TOKEN_TRUE)
|
|
PRINT_TOKEN_CASE(TOKEN_NULL)
|
|
PRINT_TOKEN_CASE(TOKEN_EOF)
|
|
PRINT_TOKEN_CASE(TOKEN_ERROR)
|
|
PRINT_TOKEN_CASE(TOKEN_ADD)
|
|
PRINT_TOKEN_CASE(TOKEN_SUB)
|
|
PRINT_TOKEN_CASE(TOKEN_MUL)
|
|
PRINT_TOKEN_CASE(TOKEN_DIV)
|
|
PRINT_TOKEN_CASE(TOKEN_MOD)
|
|
PRINT_TOKEN_CASE(TOKEN_GT)
|
|
PRINT_TOKEN_CASE(TOKEN_LT)
|
|
PRINT_TOKEN_CASE(TOKEN_EQ)
|
|
PRINT_TOKEN_CASE(TOKEN_GE)
|
|
PRINT_TOKEN_CASE(TOKEN_LE)
|
|
PRINT_TOKEN_CASE(TOKEN_NE)
|
|
PRINT_TOKEN_CASE(TOKEN_AND)
|
|
PRINT_TOKEN_CASE(TOKEN_OR)
|
|
PRINT_TOKEN_CASE(TOKEN_XOR)
|
|
PRINT_TOKEN_CASE(TOKEN_SHIFTRIGHT)
|
|
PRINT_TOKEN_CASE(TOKEN_SHIFTLEFT)
|
|
PRINT_TOKEN_CASE(TOKEN_FN)
|
|
PRINT_TOKEN_CASE(TOKEN_TO)
|
|
PRINT_TOKEN_CASE(TOKEN_IN)
|
|
PRINT_TOKEN_CASE(TOKEN_IS)
|
|
PRINT_TOKEN_CASE(TOKEN_AS)
|
|
PRINT_TOKEN_CASE(TOKEN_USE)
|
|
PRINT_TOKEN_CASE(TOKEN_IF)
|
|
PRINT_TOKEN_CASE(TOKEN_ELSE)
|
|
PRINT_TOKEN_CASE(TOKEN_DEFAULT)
|
|
PRINT_TOKEN_CASE(TOKEN_FOR)
|
|
PRINT_TOKEN_CASE(TOKEN_TRY)
|
|
PRINT_TOKEN_CASE(TOKEN_CATCH)
|
|
PRINT_TOKEN_CASE(TOKEN_WHILE)
|
|
PRINT_TOKEN_CASE(TOKEN_DO)
|
|
PRINT_TOKEN_CASE(TOKEN_EXIT)
|
|
PRINT_TOKEN_CASE(TOKEN_SWITCH)
|
|
PRINT_TOKEN_CASE(TOKEN_RETURN)
|
|
PRINT_TOKEN_CASE(TOKEN_CONST)
|
|
PRINT_TOKEN_CASE(TOKEN_TYPE)
|
|
PRINT_TOKEN_CASE(TOKEN_THIS)
|
|
PRINT_TOKEN_CASE(TOKEN_YIELD)
|
|
PRINT_TOKEN_CASE(TOKEN_CASE)
|
|
PRINT_TOKEN_CASE(TOKEN_ASSERT)
|
|
PRINT_TOKEN_CASE(TOKEN_BREAK)
|
|
PRINT_TOKEN_CASE(TOKEN_LET)
|
|
PRINT_TOKEN_CASE(TOKEN_PRINT)
|
|
}
|
|
free(str);
|
|
}
|