#include "lexer.h" Lexer lexer; void init_lexer(const char *source) { lexer.start = source; lexer.current = source; lexer.line = 1; } int is_at_end() { return *lexer.current == '\0'; } char advance() { return *lexer.current++; } char peek() { return *lexer.current; } char peek_next() { if (is_at_end()) return '\0'; return lexer.current[1]; } int match(char expected) { if (*lexer.current != expected) return 0; lexer.current++; return 1; } void skip_whitespace() { for (;;) { char c = peek(); switch (c) { case ' ': case '\r': case '\t': advance(); break; case '\n': lexer.line++; advance(); break; case '!': if (peek_next() == '!') { while (peek() != '\n' && !is_at_end()) advance(); } else { while (peek() != '\n' && !is_at_end()) advance(); } break; default: return; } } } Token make_token(TokenType type) { Token token; token.type = type; token.start = lexer.start; token.length = (int)(lexer.current - lexer.start); token.line = lexer.line; return token; } Token error_token(const char *message) { Token token; token.type = TOKEN_ERROR; token.start = message; token.length = (int)strlen(message); token.line = lexer.line; return token; } int is_alpha(char c) { return isalpha(c) || c == '_'; } int is_digit(char c) { return isdigit(c); } Token number() { while (is_digit(peek())) advance(); if (peek() == '.' && is_digit(peek_next())) { advance(); while (is_digit(peek())) advance(); return make_token(TOKEN_FLOAT_LITERAL); } return make_token(TOKEN_INT_LITERAL); } Token string() { while (peek() != '"' && !is_at_end()) { if (peek() == '\n') lexer.line++; advance(); } if (is_at_end()) return error_token("Unterminated string."); advance(); return make_token(TOKEN_STRING_LITERAL); } Token identifier() { while (is_alpha(peek()) || is_digit(peek())) advance(); int length = (int)(lexer.current - lexer.start); const char *text = lexer.start; if (length == 4 && strncmp(text, "init", 4) == 0) return make_token(TOKEN_KEYWORD_INIT); if (length == 4 && strncmp(text, "this", 4) == 0) return make_token(TOKEN_KEYWORD_THIS); if (length == 4 && strncmp(text, "type", 4) == 0) return make_token(TOKEN_KEYWORD_TYPE); if (length == 2 && strncmp(text, "fn", 2) == 0) return make_token(TOKEN_KEYWORD_FN); if (length == 3 && strncmp(text, "let", 3) == 0) return make_token(TOKEN_KEYWORD_LET); if (length == 5 && strncmp(text, "const", 5) == 0) return make_token(TOKEN_KEYWORD_CONST); if (length == 2 && strncmp(text, "if", 2) == 0) return make_token(TOKEN_KEYWORD_IF); if (length == 4 && strncmp(text, "else", 4) == 0) return make_token(TOKEN_KEYWORD_ELSE); if (length == 5 && strncmp(text, "while", 5) == 0) return make_token(TOKEN_KEYWORD_WHILE); if (length == 3 && strncmp(text, "for", 3) == 0) return make_token(TOKEN_KEYWORD_FOR); if (length == 6 && strncmp(text, "return", 6) == 0) return make_token(TOKEN_KEYWORD_RETURN); if (length == 3 && strncmp(text, "use", 3) == 0) return make_token(TOKEN_KEYWORD_USE); if (length == 2 && strncmp(text, "is", 2) == 0) return make_token(TOKEN_OPERATOR_IS); if (length == 3 && strncmp(text, "int", 3) == 0) return make_token(TOKEN_TYPE_INT); if (length == 3 && strncmp(text, "nat", 3) == 0) return make_token(TOKEN_TYPE_NAT); if (length == 3 && strncmp(text, "str", 3) == 0) return make_token(TOKEN_TYPE_STR); if (length == 3 && strncmp(text, "real", 4) == 0) return make_token(TOKEN_TYPE_REAL); return make_token(TOKEN_IDENTIFIER); } Token next_token() { skip_whitespace(); lexer.start = lexer.current; if (is_at_end()) return make_token(TOKEN_EOF); char c = advance(); if (is_alpha(c)) return identifier(); if (is_digit(c)) return number(); switch (c) { case '(': return make_token(TOKEN_LPAREN); case ')': return make_token(TOKEN_RPAREN); case '{': return make_token(TOKEN_LBRACE); case '}': return make_token(TOKEN_RBRACE); case '[': return make_token(TOKEN_LBRACKET); case ']': return make_token(TOKEN_RBRACKET); case ',': return make_token(TOKEN_COMMA); case '.': return make_token(TOKEN_DOT); case ':': return make_token(TOKEN_COLON); case ';': return make_token(TOKEN_SEMICOLON); case '+': return make_token(TOKEN_PLUS); case '-': return make_token(TOKEN_MINUS); case '*': return make_token(TOKEN_STAR); case '/': return make_token(TOKEN_SLASH); case '!': return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); case '=': return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); case '<': return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); case '>': return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); case '"': return string(); } return error_token("Unexpected character."); } const char *token_type_name(TokenType type) { switch (type) { case TOKEN_IDENTIFIER: return "identifier"; case TOKEN_INT_LITERAL: return "int literal"; case TOKEN_FLOAT_LITERAL: return "real literal"; case TOKEN_STRING_LITERAL: return "string literal"; case TOKEN_TYPE_INT: return "int"; case TOKEN_TYPE_REAL: return "real"; case TOKEN_TYPE_STR: return "str"; case TOKEN_TYPE_NAT: return "nat"; case TOKEN_KEYWORD_THIS: return "this"; case TOKEN_KEYWORD_TYPE: return "type"; case TOKEN_KEYWORD_FN: return "fn"; case TOKEN_KEYWORD_LET: return "let"; case TOKEN_KEYWORD_CONST: return "const"; case TOKEN_KEYWORD_IF: return "if"; case TOKEN_KEYWORD_ELSE: return "else"; case TOKEN_KEYWORD_WHILE: return "while"; case TOKEN_KEYWORD_FOR: return "for"; case TOKEN_KEYWORD_RETURN: return "return"; case TOKEN_KEYWORD_INIT: return "init"; case TOKEN_KEYWORD_USE: return "use"; case TOKEN_OPERATOR_IS: return "is"; case TOKEN_BANG: return "!"; case TOKEN_EQ: return "="; case TOKEN_DOT: return "."; case TOKEN_COMMA: return ","; case TOKEN_COLON: return ":"; case TOKEN_SEMICOLON: return ";"; case TOKEN_PLUS: return "+"; case TOKEN_MINUS: return "-"; case TOKEN_STAR: return "*"; case TOKEN_SLASH: return "/"; case TOKEN_LPAREN: return "("; case TOKEN_RPAREN: return ")"; case TOKEN_LBRACE: return "{"; case TOKEN_RBRACE: return "}"; case TOKEN_LBRACKET: return "["; case TOKEN_RBRACKET: return "]"; case TOKEN_EOF: return "eof"; case TOKEN_ERROR: return "error"; default: return "unknown"; } }