#include "parser.h" #include "hashmap.h" DEFINE_HASHMAP(TokenMap, 150); TokenMap **tokenMap; void initTokenMap() { tokenMap = new_TokenMap(); put_TokenMap(tokenMap, "fn", TOKEN_FN); put_TokenMap(tokenMap, "to", TOKEN_TO); put_TokenMap(tokenMap, "in", TOKEN_IN); put_TokenMap(tokenMap, "is", TOKEN_IS); put_TokenMap(tokenMap, "as", TOKEN_AS); put_TokenMap(tokenMap, "use", TOKEN_USE); put_TokenMap(tokenMap, "if", TOKEN_IF); put_TokenMap(tokenMap, "else", TOKEN_ELSE); put_TokenMap(tokenMap, "default", TOKEN_DEFAULT); put_TokenMap(tokenMap, "for", TOKEN_FOR); put_TokenMap(tokenMap, "try", TOKEN_TRY); put_TokenMap(tokenMap, "catch", TOKEN_CATCH); put_TokenMap(tokenMap, "while", TOKEN_WHILE); put_TokenMap(tokenMap, "do", TOKEN_DO); put_TokenMap(tokenMap, "exit", TOKEN_EXIT); put_TokenMap(tokenMap, "switch", TOKEN_SWITCH); put_TokenMap(tokenMap, "return", TOKEN_RETURN); put_TokenMap(tokenMap, "const", TOKEN_CONST); put_TokenMap(tokenMap, "type", TOKEN_TYPE); put_TokenMap(tokenMap, "this", TOKEN_THIS); put_TokenMap(tokenMap, "yield", TOKEN_YIELD); put_TokenMap(tokenMap, "case", TOKEN_CASE); put_TokenMap(tokenMap, "assert", TOKEN_ASSERT); put_TokenMap(tokenMap, "break", TOKEN_BREAK); put_TokenMap(tokenMap, "let", TOKEN_LET); put_TokenMap(tokenMap, "print", TOKEN_PRINT); put_TokenMap(tokenMap, "nil", TOKEN_NULL); put_TokenMap(tokenMap, "and", TOKEN_AND); put_TokenMap(tokenMap, "or", TOKEN_OR); put_TokenMap(tokenMap, "xor", TOKEN_XOR); put_TokenMap(tokenMap, "mod", TOKEN_MOD); put_TokenMap(tokenMap, "eq", TOKEN_EQ); put_TokenMap(tokenMap, "ge", TOKEN_GE); put_TokenMap(tokenMap, "lt", TOKEN_LT); put_TokenMap(tokenMap, "le", TOKEN_LE); put_TokenMap(tokenMap, "ne", TOKEN_NE); put_TokenMap(tokenMap, "gt", TOKEN_GT); put_TokenMap(tokenMap, "ge", TOKEN_GE); put_TokenMap(tokenMap, "srl", TOKEN_SHIFTRIGHT); put_TokenMap(tokenMap, "sll", TOKEN_SHIFTLEFT); } typedef struct Tokenizer Tokenizer; struct Tokenizer { char *start; char *current; int32_t line; }; Tokenizer tokenizer; void initTokenizer(char *src) { tokenizer.start = src; tokenizer.current = src; tokenizer.line = 1; } static bool isAlpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '\'' || c == '?'; } static bool isDigit(char c) { return (c >= '0' && c <= '9') || c == '-'; } static bool isAtEnd() { return *tokenizer.current == '\0'; } static Token makeToken(TokenType type) { Token token; token.type = type; token.start = tokenizer.start; token.length = (int32_t)(tokenizer.current - tokenizer.start); token.line = tokenizer.line; return token; } static Token errorToken(char *msg) { Token token; token.type = TOKEN_ERROR; token.start = msg; token.length = (int32_t)strlen(msg); token.line = tokenizer.line; return token; } static char advance() { tokenizer.current++; return tokenizer.current[-1]; } static char peek() { return *tokenizer.current; } static char peekNext() { if (isAtEnd()) return '\0'; return tokenizer.current[1]; } static void skipWhitespace() { for (;;) { char c = peek(); switch (c) { case ' ': case '\r': case '\t': advance(); break; case '\n': tokenizer.line++; advance(); break; case '!': while (peek() != '\n' && !isAtEnd()) advance(); break; default: return; } } } static char *currentTokenToS() { int32_t size = tokenizer.current - tokenizer.start; char *str = (char *)malloc(sizeof(size)); strncpy(str, tokenizer.start, size); str[size] = '\0'; return str; } TokenType getToken(char *s) { TokenMap *np; for (np = tokenMap[hash_TokenMap(s)]; np != NULL; np = np->next) if (strcmp(s, np->keyword) == 0) return np->token; return TOKEN_IDENTIFIER; } static TokenType identifierType() { char *check = currentTokenToS(); TokenType t = getToken(check); free(check); return t; } static Token identifier() { while (isAlpha(peek()) || isDigit(peek())) advance(); return makeToken(identifierType()); } static Token number() { bool is_float = false; while (isDigit(peek())) advance(); /* Look for a fractional part. */ if (peek() == '.' && isDigit(peekNext())) { is_float = true; /* Consume the ".". */ advance(); while (isDigit(peek())) advance(); } return makeToken((is_float) ? TOKEN_FLOAT : TOKEN_INT); /* or measure if ends in postscript */ } static Token string() { while (peek() != '"' && !isAtEnd()) { if (peek() == '\n') tokenizer.line++; advance(); } if (isAtEnd()) return errorToken("Unterminated string."); /* The closing quote. */ advance(); return makeToken(TOKEN_STRING); } Token nextToken() { skipWhitespace(); tokenizer.start = tokenizer.current; if (isAtEnd()) return makeToken(TOKEN_EOF); char c = advance(); if (isAlpha(c)) return identifier(); if (isDigit(c)) return number(); switch (c) { case '(': return makeToken(TOKEN_LEFT_PAREN); case ')': return makeToken(TOKEN_RIGHT_PAREN); case '{': return makeToken(TOKEN_LEFT_BRACE); case '}': return makeToken(TOKEN_RIGHT_BRACE); case '+': return makeToken(TOKEN_ADD); case '/': return makeToken(TOKEN_DIV); case '-': return makeToken(TOKEN_SUB); case '*': return makeToken(TOKEN_MUL); case ';': return makeToken(TOKEN_SEMICOLON); case '"': return string(); } return errorToken("Unexpected character."); } void debug_printToken(Token t) { char *str = currentTokenToS(); switch (t.type) { PRINT_TOKEN_CASE(TOKEN_LEFT_PAREN) PRINT_TOKEN_CASE(TOKEN_RIGHT_PAREN) PRINT_TOKEN_CASE(TOKEN_LEFT_BRACE) PRINT_TOKEN_CASE(TOKEN_RIGHT_BRACE) PRINT_TOKEN_CASE(TOKEN_SEMICOLON) PRINT_TOKEN_CASE(TOKEN_IDENTIFIER) PRINT_TOKEN_CASE(TOKEN_STRING) PRINT_TOKEN_CASE(TOKEN_FLOAT) PRINT_TOKEN_CASE(TOKEN_U8) PRINT_TOKEN_CASE(TOKEN_I8) PRINT_TOKEN_CASE(TOKEN_U16) PRINT_TOKEN_CASE(TOKEN_I16) PRINT_TOKEN_CASE(TOKEN_U64) PRINT_TOKEN_CASE(TOKEN_I64) PRINT_TOKEN_CASE(TOKEN_INT) PRINT_TOKEN_CASE(TOKEN_UINT) /* PRINT_TOKEN_CASE(TOKEN_ARRAY) */ /* PRINT_TOKEN_CASE(TOKEN_MAP) */ PRINT_TOKEN_CASE(TOKEN_FALSE) PRINT_TOKEN_CASE(TOKEN_TRUE) PRINT_TOKEN_CASE(TOKEN_NULL) PRINT_TOKEN_CASE(TOKEN_EOF) PRINT_TOKEN_CASE(TOKEN_ERROR) PRINT_TOKEN_CASE(TOKEN_ADD) PRINT_TOKEN_CASE(TOKEN_SUB) PRINT_TOKEN_CASE(TOKEN_MUL) PRINT_TOKEN_CASE(TOKEN_DIV) PRINT_TOKEN_CASE(TOKEN_MOD) PRINT_TOKEN_CASE(TOKEN_GT) PRINT_TOKEN_CASE(TOKEN_LT) PRINT_TOKEN_CASE(TOKEN_EQ) PRINT_TOKEN_CASE(TOKEN_GE) PRINT_TOKEN_CASE(TOKEN_LE) PRINT_TOKEN_CASE(TOKEN_NE) PRINT_TOKEN_CASE(TOKEN_AND) PRINT_TOKEN_CASE(TOKEN_OR) PRINT_TOKEN_CASE(TOKEN_XOR) PRINT_TOKEN_CASE(TOKEN_SHIFTRIGHT) PRINT_TOKEN_CASE(TOKEN_SHIFTLEFT) PRINT_TOKEN_CASE(TOKEN_FN) PRINT_TOKEN_CASE(TOKEN_TO) PRINT_TOKEN_CASE(TOKEN_IN) PRINT_TOKEN_CASE(TOKEN_IS) PRINT_TOKEN_CASE(TOKEN_AS) PRINT_TOKEN_CASE(TOKEN_USE) PRINT_TOKEN_CASE(TOKEN_IF) PRINT_TOKEN_CASE(TOKEN_ELSE) PRINT_TOKEN_CASE(TOKEN_DEFAULT) PRINT_TOKEN_CASE(TOKEN_FOR) PRINT_TOKEN_CASE(TOKEN_TRY) PRINT_TOKEN_CASE(TOKEN_CATCH) PRINT_TOKEN_CASE(TOKEN_WHILE) PRINT_TOKEN_CASE(TOKEN_DO) PRINT_TOKEN_CASE(TOKEN_EXIT) PRINT_TOKEN_CASE(TOKEN_SWITCH) PRINT_TOKEN_CASE(TOKEN_RETURN) PRINT_TOKEN_CASE(TOKEN_CONST) PRINT_TOKEN_CASE(TOKEN_TYPE) PRINT_TOKEN_CASE(TOKEN_THIS) PRINT_TOKEN_CASE(TOKEN_YIELD) PRINT_TOKEN_CASE(TOKEN_CASE) PRINT_TOKEN_CASE(TOKEN_ASSERT) PRINT_TOKEN_CASE(TOKEN_BREAK) PRINT_TOKEN_CASE(TOKEN_LET) PRINT_TOKEN_CASE(TOKEN_PRINT) } free(str); }