#include #include "../../vm/libc.h" #include "lexer.h" typedef struct { const char *start; const char *current; i32 line; } Lexer; Lexer lexer; void init_lexer(const char *source) { lexer.start = source; lexer.current = source; lexer.line = 1; } static bool is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; } static bool is_digit(char c) { return c >= '0' && c <= '9'; } static bool is_at_end() { return *lexer.current == '\0'; } static char advance() { lexer.current++; return lexer.current[-1]; } static char peek() { return *lexer.current; } static char peek_next() { if (is_at_end()) return '\0'; return lexer.current[1]; } static bool match(char expected) { if (is_at_end()) return false; if (*lexer.current != expected) return false; lexer.current++; return true; } static Token make_token(TokenType type) { Token token; token.type = type; token.start = lexer.start; token.length = (i32)(lexer.current - lexer.start); token.line = lexer.line; return token; } static Token error_token(const char *message) { Token token; token.type = TOKEN_ERROR; token.start = message; token.length = (i32)strlen(message); token.line = lexer.line; return token; } static void skip_whitespace() { for (;;) { char c = peek(); switch (c) { case ' ': case '\r': case '\t': advance(); break; case '\n': lexer.line++; advance(); break; case '/': if (peek_next() == '/') { // Single-line comment: skip until newline or end of file advance(); while (peek() != '\n' && !is_at_end()) advance(); } else if (peek_next() == '*') { // Multi-line comment: skip until '*/' or end of file advance(); advance(); while (!is_at_end()) { if (peek() == '\n') lexer.line++; if (peek() == '*' && peek_next() == '/') { advance(); advance(); break; // Exit loop, comment ended } advance(); } } else { return; // Not a comment, let tokenization handle it } break; default: return; } } } static TokenType check_keyword(i32 start, i32 length, const char *rest, TokenType type) { if (lexer.current - lexer.start == start + length && memcmp(lexer.start + start, rest, length) == 0) { return type; } return TOKEN_IDENTIFIER; } static TokenType identifierType() { switch (lexer.start[0]) { case 'a': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'n': return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); case 's': return check_keyword(2, 0, "", TOKEN_KEYWORD_AS); } } break; case 'c': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'l': return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); case 'o': return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST); } } break; case 'e': return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); case 'f': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'a': return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); case 'o': return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR); case '3': return check_keyword(1, 1, "2", TOKEN_TYPE_REAL); } return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN); } break; case 'i': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'f': return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); case 's': return check_keyword(2, 0, "", TOKEN_KEYWORD_IS); case '8': return check_keyword(2, 0, "", TOKEN_TYPE_I8); case '1': return check_keyword(2, 1, "6", TOKEN_TYPE_I16); case '3': return check_keyword(2, 1, "2", TOKEN_TYPE_INT); case 'n': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { case 'i': return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); case 't': return check_keyword(3, 0, "", TOKEN_TYPE_INT); } } break; } } break; case 'n': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'a': return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); case 'i': return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL); } } break; case 'o': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'p': return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); case 'r': return check_keyword(2, 0, "", TOKEN_OPERATOR_OR); } } break; case 'p': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'l': return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); } } break; case 'r': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'e': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { case 'f': return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); case 't': return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); case 'a': if (lexer.current - lexer.start > 3) { switch(lexer.start[3]) { case 'd': return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); case 'l': return check_keyword(4, 0, "", TOKEN_TYPE_REAL); } } } } break; } } break; case 's': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 't': return check_keyword(2, 1, "r", TOKEN_TYPE_STR); } } break; case 't': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'h': return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); case 'r': return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); } } break; case 'u': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 's': return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); case '8': return check_keyword(2, 0, "", TOKEN_TYPE_U8); case '1': return check_keyword(2, 1, "6", TOKEN_TYPE_U16); case '3': return check_keyword(2, 1, "2", TOKEN_TYPE_NAT); } } break; case 'w': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'h': return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); case 'r': return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); } } break; case 'b': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'y': return check_keyword(2, 2, "te", TOKEN_TYPE_U8); case 'o': return check_keyword(2, 2, "ol", TOKEN_TYPE_U8); } } break; case 'g': return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); case 'l': return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); case 'd': return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); case 'v': return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID); } return TOKEN_IDENTIFIER; } static Token identifier() { while (is_alpha(peek()) || is_digit(peek())) advance(); return make_token(identifierType()); } static Token number() { while (is_digit(peek())) advance(); /* Look for a fractional part. */ if (peek() == '.' && is_digit(peek_next())) { /* Consume the ".". */ advance(); while (is_digit(peek())) advance(); return make_token(TOKEN_LITERAL_REAL); } return make_token(TOKEN_LITERAL_INT); } static Token string() { while (peek() != '"' && !is_at_end()) { if (peek() == '\n') lexer.line++; advance(); } if (is_at_end()) return error_token("Unterminated string."); /* The closing quote. */ advance(); return make_token(TOKEN_LITERAL_STR); } Token next_token() { skip_whitespace(); lexer.start = lexer.current; if (is_at_end()) return make_token(TOKEN_EOF); char c = advance(); if (is_alpha(c)) return identifier(); char next = peek(); if ((c == '-' && is_digit(next)) || is_digit(c)) return number(); switch (c) { case '(': return make_token(TOKEN_LPAREN); case ')': return make_token(TOKEN_RPAREN); case '{': return make_token(TOKEN_LBRACE); case '}': return make_token(TOKEN_RBRACE); case '[': return make_token(TOKEN_LBRACKET); case ']': return make_token(TOKEN_RBRACKET); case ';': return make_token(TOKEN_SEMICOLON); case ',': return make_token(TOKEN_COMMA); case '.': return make_token(TOKEN_DOT); case '-': return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); case '+': return make_token(TOKEN_PLUS); case '/': return make_token(TOKEN_SLASH); case '&': return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND); case '#': return make_token(TOKEN_MESH); case '$': return make_token(TOKEN_BIG_MONEY); case '*': return make_token(TOKEN_STAR); case '!': return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); case '=': return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); case '<': return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); case '>': return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); case '"': return string(); } return error_token("Unexpected character."); }