From 7e6a063bdb3ccf5b63e37f8bc46f1b9b8416f016 Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 27 Jul 2025 21:07:39 -0400 Subject: [PATCH] fix lexer --- src/Makefile | 24 +-- src/arch/linux/main.c | 34 ++++- src/keywords.h | 38 ----- src/lexer.c | 310 ++++++++++++++++++++++++++------------- src/lexer.h | 20 ++- src/tools/gen_keywords | Bin 15848 -> 0 bytes src/tools/gen_keywords.c | 90 ------------ 7 files changed, 260 insertions(+), 256 deletions(-) delete mode 100644 src/keywords.h delete mode 100755 src/tools/gen_keywords delete mode 100644 src/tools/gen_keywords.c diff --git a/src/Makefile b/src/Makefile index c55e539..954b848 100644 --- a/src/Makefile +++ b/src/Makefile @@ -12,21 +12,6 @@ CFLAGS_WASM = -g -std=c89 -Wall -Wextra -Werror -Wno-unused-parameter -I. LDFLAGS_WASM = -s WASM=1 -g -s USE_SDL=2 LDLIBS_WASM = -TOOLS_DIR := tools -GENERATOR := $(TOOLS_DIR)/gen_keywords -GENERATOR_SRC := $(GENERATOR).c -KEYWORDS_H := keywords.h - -# Rule to generate keywords.h -$(KEYWORDS_H): $(GENERATOR) $(GENERATOR_SRC) - @echo "Generating keywords.h..." - @$(GENERATOR) > $(KEYWORDS_H) - -# Rule to build the generator -$(GENERATOR): $(GENERATOR_SRC) - @echo "Compiling keyword generator..." - @$(CC) -o $@ $< - # Source and build configuration # ---------------------------- COMMON_SRC = $(wildcard *.c) @@ -45,7 +30,7 @@ OBJ_NATIVE = $(addprefix $(OBJ_DIR_NATIVE)/,$(notdir $(COMMON_SRC:.c=.o))) OBJ_WASM = $(addprefix $(OBJ_DIR_WASM)/,$(notdir $(COMMON_SRC:.c=.o))) # Phony targets -.PHONY: all clean clean_generated install wasm native emscripten linux macos +.PHONY: all clean install wasm native emscripten linux macos # Default target builds the native version all: native @@ -80,14 +65,9 @@ $(OBJ_DIR_WASM)/%.o: %.c # Clean build artifacts # --------------------- -clean: clean-generated +clean: rm -rf $(OBJ_DIR_NATIVE) $(OBJ_DIR_WASM) $(EXEC_NATIVE) $(EXEC_WASM) -# Clean rule for deleting generated keyword binary and header -clean-generated: - @echo "Removing generated files..." - @rm -f $(KEYWORDS_H) $(GENERATOR) - # Install target (example) # ------------------------ install: native diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 49084b9..f727605 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,6 +1,7 @@ +#include "../../vm.h" #include "../../debug.h" #include "../../test.h" -#include "../../vm.h" +#include "../../lexer.h" #include int main(int argc, char **argv) { @@ -10,6 +11,37 @@ int main(int argc, char **argv) { vm.stack_size = STACK_SIZE; vm.memory_size = MEMORY_SIZE; + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + FILE *f = fopen(argv[1], "rb"); + if (!f) { + perror("fopen"); + return 1; + } + + fseek(f, 0, SEEK_END); + long len = ftell(f); + fseek(f, 0, SEEK_SET); + char *source = (char *)malloc(len + 1); + fread(source, 1, len, f); + source[len] = '\0'; + fclose(f); + + init_lexer(source); + + for (;;) { + Token token = next_token(); + printf("[%d] %-18s: '%.*s'\n", token.line, token_type_name(token.type), token.length, token.start); + if (token.type == TOKEN_EOF) break; + } + + free(source); + return 0; + + test_hello_world_compile(&vm); /* test_add_compile(&vm); */ /* test_add_function_compile(&vm); */ diff --git a/src/keywords.h b/src/keywords.h deleted file mode 100644 index 688dcb9..0000000 --- a/src/keywords.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef KEYWORDS_H -#define KEYWORDS_H - -#include "lexer.h" - -static TokenType check_keyword(int start, int length, const char *rest, TokenType type) { - if ((lexer.current - lexer.start) == start + length && - memcmp(lexer.start + start, rest, length) == 0) return type; - return TOKEN_IDENTIFIER; -} - -static TokenType identifier_type(void) { - switch (lexer.start[0]) { - case 'c': - return check_keyword(1, 4, "onst", TOKEN_KEYWORD_CONST); - case 'e': - return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); - case 'f': - return check_keyword(1, 1, "n", TOKEN_KEYWORD_FN); - return check_keyword(1, 2, "or", TOKEN_KEYWORD_FOR); - case 'i': - return check_keyword(1, 1, "f", TOKEN_KEYWORD_IF); - return check_keyword(1, 1, "s", TOKEN_OPERATOR_IS); - case 'l': - return check_keyword(1, 2, "et", TOKEN_KEYWORD_LET); - case 'r': - return check_keyword(1, 5, "eturn", TOKEN_KEYWORD_RETURN); - case 't': - return check_keyword(1, 3, "ype", TOKEN_KEYWORD_TYPE); - case 'u': - return check_keyword(1, 2, "se", TOKEN_KEYWORD_USE); - case 'w': - return check_keyword(1, 4, "hile", TOKEN_KEYWORD_WHILE); - } - return TOKEN_IDENTIFIER; -} - -#endif // KEYWORDS_H diff --git a/src/lexer.c b/src/lexer.c index d858912..bf0a340 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,4 +1,6 @@ -#include "keywords.h" +#include "lexer.h" + +Lexer lexer; void init_lexer(const char *source) { lexer.start = source; @@ -6,25 +8,21 @@ void init_lexer(const char *source) { lexer.line = 1; } -int is_at_end() { - return *lexer.current == '\0'; -} +int is_at_end() { return *lexer.current == '\0'; } -char advance() { - return *lexer.current++; -} +char advance() { return *lexer.current++; } -char peek() { - return *lexer.current; -} +char peek() { return *lexer.current; } char peek_next() { - if (is_at_end()) return '\0'; + if (is_at_end()) + return '\0'; return lexer.current[1]; } int match(char expected) { - if (*lexer.current != expected) return 0; + if (*lexer.current != expected) + return 0; lexer.current++; return 1; } @@ -33,24 +31,26 @@ void skip_whitespace() { for (;;) { char c = peek(); switch (c) { - case ' ': - case '\r': - case '\t': - advance(); - break; - case '\n': - lexer.line++; - advance(); - break; - case '!': - if (peek_next() == '!') { - while (peek() != '\n' && !is_at_end()) advance(); - } else { - while (peek() != '\n' && !is_at_end()) advance(); - } - break; - default: - return; + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '!': + if (peek_next() == '!') { + while (peek() != '\n' && !is_at_end()) + advance(); + } else { + while (peek() != '\n' && !is_at_end()) + advance(); + } + break; + default: + return; } } } @@ -73,20 +73,18 @@ Token error_token(const char *message) { return token; } -int is_alpha(char c) { - return isalpha(c) || c == '_'; -} +int is_alpha(char c) { return isalpha(c) || c == '_'; } -int is_digit(char c) { - return isdigit(c); -} +int is_digit(char c) { return isdigit(c); } Token number() { - while (is_digit(peek())) advance(); + while (is_digit(peek())) + advance(); if (peek() == '.' && is_digit(peek_next())) { advance(); - while (is_digit(peek())) advance(); + while (is_digit(peek())) + advance(); return make_token(TOKEN_FLOAT_LITERAL); } @@ -95,47 +93,116 @@ Token number() { Token string() { while (peek() != '"' && !is_at_end()) { - if (peek() == '\n') lexer.line++; + if (peek() == '\n') + lexer.line++; advance(); } - if (is_at_end()) return error_token("Unterminated string."); + if (is_at_end()) + return error_token("Unterminated string."); - advance(); // Consume closing quote + advance(); return make_token(TOKEN_STRING_LITERAL); } +Token identifier() { + while (is_alpha(peek()) || is_digit(peek())) + advance(); + + int length = (int)(lexer.current - lexer.start); + const char *text = lexer.start; + + if (length == 4 && strncmp(text, "init", 4) == 0) + return make_token(TOKEN_KEYWORD_INIT); + if (length == 4 && strncmp(text, "this", 4) == 0) + return make_token(TOKEN_KEYWORD_THIS); + if (length == 4 && strncmp(text, "type", 4) == 0) + return make_token(TOKEN_KEYWORD_TYPE); + if (length == 2 && strncmp(text, "fn", 2) == 0) + return make_token(TOKEN_KEYWORD_FN); + if (length == 3 && strncmp(text, "let", 3) == 0) + return make_token(TOKEN_KEYWORD_LET); + if (length == 5 && strncmp(text, "const", 5) == 0) + return make_token(TOKEN_KEYWORD_CONST); + if (length == 2 && strncmp(text, "if", 2) == 0) + return make_token(TOKEN_KEYWORD_IF); + if (length == 4 && strncmp(text, "else", 4) == 0) + return make_token(TOKEN_KEYWORD_ELSE); + if (length == 5 && strncmp(text, "while", 5) == 0) + return make_token(TOKEN_KEYWORD_WHILE); + if (length == 3 && strncmp(text, "for", 3) == 0) + return make_token(TOKEN_KEYWORD_FOR); + if (length == 6 && strncmp(text, "return", 6) == 0) + return make_token(TOKEN_KEYWORD_RETURN); + if (length == 3 && strncmp(text, "use", 3) == 0) + return make_token(TOKEN_KEYWORD_USE); + if (length == 2 && strncmp(text, "is", 2) == 0) + return make_token(TOKEN_OPERATOR_IS); + if (length == 3 && strncmp(text, "int", 3) == 0) + return make_token(TOKEN_TYPE_INT); + if (length == 3 && strncmp(text, "nat", 3) == 0) + return make_token(TOKEN_TYPE_NAT); + if (length == 3 && strncmp(text, "str", 3) == 0) + return make_token(TOKEN_TYPE_STR); + if (length == 3 && strncmp(text, "real", 4) == 0) + return make_token(TOKEN_TYPE_REAL); + + return make_token(TOKEN_IDENTIFIER); +} + Token next_token() { skip_whitespace(); lexer.start = lexer.current; - if (is_at_end()) return make_token(TOKEN_EOF); + if (is_at_end()) + return make_token(TOKEN_EOF); char c = advance(); - if (is_alpha(c)) return identifier(); - if (is_digit(c)) return number(); + if (is_alpha(c)) + return identifier(); + if (is_digit(c)) + return number(); switch (c) { - case '(': return make_token(TOKEN_LPAREN); - case ')': return make_token(TOKEN_RPAREN); - case '{': return make_token(TOKEN_LBRACE); - case '}': return make_token(TOKEN_RBRACE); - case '[': return make_token(TOKEN_LBRACKET); - case ']': return make_token(TOKEN_RBRACKET); - case ',': return make_token(TOKEN_COMMA); - case '.': return make_token(TOKEN_DOT); - case ':': return make_token(TOKEN_COLON); - case ';': return make_token(TOKEN_SEMICOLON); - case '+': return make_token(TOKEN_PLUS); - case '-': return make_token(TOKEN_MINUS); - case '*': return make_token(TOKEN_STAR); - case '/': return make_token(TOKEN_SLASH); - case '=': return make_token(TOKEN_EQ); - case '"': return string(); - case '!': - if (match('!')) return make_token(TOKEN_DOUBLE_BANG); - return make_token(TOKEN_BANG); + case '(': + return make_token(TOKEN_LPAREN); + case ')': + return make_token(TOKEN_RPAREN); + case '{': + return make_token(TOKEN_LBRACE); + case '}': + return make_token(TOKEN_RBRACE); + case '[': + return make_token(TOKEN_LBRACKET); + case ']': + return make_token(TOKEN_RBRACKET); + case ',': + return make_token(TOKEN_COMMA); + case '.': + return make_token(TOKEN_DOT); + case ':': + return make_token(TOKEN_COLON); + case ';': + return make_token(TOKEN_SEMICOLON); + case '+': + return make_token(TOKEN_PLUS); + case '-': + return make_token(TOKEN_MINUS); + case '*': + return make_token(TOKEN_STAR); + case '/': + return make_token(TOKEN_SLASH); + case '!': + return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + case '=': + return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + case '<': + return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); + case '>': + return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); + case '"': + return string(); } return error_token("Unexpected character."); @@ -143,42 +210,85 @@ Token next_token() { const char *token_type_name(TokenType type) { switch (type) { - case TOKEN_IDENTIFIER: return "identifier"; - case TOKEN_INT_LITERAL: return "int"; - case TOKEN_FLOAT_LITERAL: return "float"; - case TOKEN_STRING_LITERAL: return "string"; - case TOKEN_KEYWORD_TYPE: return "type"; - case TOKEN_KEYWORD_FN: return "fn"; - case TOKEN_KEYWORD_LET: return "let"; - case TOKEN_KEYWORD_CONST: return "const"; - case TOKEN_KEYWORD_IF: return "if"; - case TOKEN_KEYWORD_ELSE: return "else"; - case TOKEN_KEYWORD_WHILE: return "while"; - case TOKEN_KEYWORD_FOR: return "for"; - case TOKEN_KEYWORD_RETURN: return "return"; - case TOKEN_KEYWORD_USE: return "use"; - case TOKEN_OPERATOR_IS: return "is"; - case TOKEN_BANG: return "!"; - case TOKEN_DOUBLE_BANG: return "!!"; - case TOKEN_EQ: return "="; - case TOKEN_DOT: return "."; - case TOKEN_COMMA: return ","; - case TOKEN_COLON: return ":"; - case TOKEN_SEMICOLON: return ";"; - case TOKEN_PLUS: return "+"; - case TOKEN_MINUS: return "-"; - case TOKEN_STAR: return "*"; - case TOKEN_SLASH: return "/"; - case TOKEN_LPAREN: return "("; - case TOKEN_RPAREN: return ")"; - case TOKEN_LBRACE: return "{"; - case TOKEN_RBRACE: return "}"; - case TOKEN_LBRACKET: return "["; - case TOKEN_RBRACKET: return "]"; - case TOKEN_EOF: return "eof"; - case TOKEN_ERROR: return "error"; - default: return "unknown"; + case TOKEN_IDENTIFIER: + return "identifier"; + case TOKEN_INT_LITERAL: + return "int literal"; + case TOKEN_FLOAT_LITERAL: + return "real literal"; + case TOKEN_STRING_LITERAL: + return "string literal"; + case TOKEN_TYPE_INT: + return "int"; + case TOKEN_TYPE_REAL: + return "real"; + case TOKEN_TYPE_STR: + return "str"; + case TOKEN_TYPE_NAT: + return "nat"; + case TOKEN_KEYWORD_THIS: + return "this"; + case TOKEN_KEYWORD_TYPE: + return "type"; + case TOKEN_KEYWORD_FN: + return "fn"; + case TOKEN_KEYWORD_LET: + return "let"; + case TOKEN_KEYWORD_CONST: + return "const"; + case TOKEN_KEYWORD_IF: + return "if"; + case TOKEN_KEYWORD_ELSE: + return "else"; + case TOKEN_KEYWORD_WHILE: + return "while"; + case TOKEN_KEYWORD_FOR: + return "for"; + case TOKEN_KEYWORD_RETURN: + return "return"; + case TOKEN_KEYWORD_INIT: + return "init"; + case TOKEN_KEYWORD_USE: + return "use"; + case TOKEN_OPERATOR_IS: + return "is"; + case TOKEN_BANG: + return "!"; + case TOKEN_EQ: + return "="; + case TOKEN_DOT: + return "."; + case TOKEN_COMMA: + return ","; + case TOKEN_COLON: + return ":"; + case TOKEN_SEMICOLON: + return ";"; + case TOKEN_PLUS: + return "+"; + case TOKEN_MINUS: + return "-"; + case TOKEN_STAR: + return "*"; + case TOKEN_SLASH: + return "/"; + case TOKEN_LPAREN: + return "("; + case TOKEN_RPAREN: + return ")"; + case TOKEN_LBRACE: + return "{"; + case TOKEN_RBRACE: + return "}"; + case TOKEN_LBRACKET: + return "["; + case TOKEN_RBRACKET: + return "]"; + case TOKEN_EOF: + return "eof"; + case TOKEN_ERROR: + return "error"; + default: + return "unknown"; } } - - diff --git a/src/lexer.h b/src/lexer.h index 5be3edd..55c2c02 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,5 +1,5 @@ -#ifndef ZRL_VM_H -#define ZRL_VM_H +#ifndef ZRL_LEXER_H +#define ZRL_LEXER_H #include #include @@ -11,6 +11,10 @@ typedef enum { TOKEN_INT_LITERAL, TOKEN_FLOAT_LITERAL, TOKEN_STRING_LITERAL, + TOKEN_TYPE_INT, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, TOKEN_KEYWORD_TYPE, TOKEN_KEYWORD_FN, TOKEN_KEYWORD_LET, @@ -21,10 +25,17 @@ typedef enum { TOKEN_KEYWORD_FOR, TOKEN_KEYWORD_RETURN, TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, TOKEN_OPERATOR_IS, - TOKEN_DOUBLE_BANG, TOKEN_BANG, + TOKEN_BANG_EQ, TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, TOKEN_DOT, TOKEN_COMMA, TOKEN_COLON, @@ -60,9 +71,8 @@ typedef struct { int line; } Lexer; -Lexer lexer; - void init_lexer(const char *source); +const char *token_type_name(TokenType type); Token next_token(); #endif diff --git a/src/tools/gen_keywords b/src/tools/gen_keywords deleted file mode 100755 index 97880c72010df83bee20a4d9a261ae4b48c5273e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15848 zcmeHOeQaCR6~A`+kp>dGr5&YR+qd+C1QI7{o35n_I8B{CL)wJY85GvNy7qJPMq)?y z3vGdlPDvRiD=PaEXq(3VK!Q!1sKmO$G?Y+Q3ffrE{^>sx(SR*oS_rJ00@Lc9^X@s% z&P~cT?T=}iuk_wI=XXBtdFNg~-{*JzaHy@->+uL~CE`|r(muaLf~4r)L=2Fi*eojG zyi%+X^FW^CU{W8H1hlgGOQzFO!VyNl7f7$t&ypQe&JZd3UA1usOzAq%Z!S8KtkR!4 zN+FoC-tA9AXK0#pcIma04AVkCtj%bxv>$DnPBpsjdZO&-1vB&;Bfl~7b15c-dZK)p zm}U7Z^;r8iZLI5w*Y;S+ z{zJ8gHgBrkv@wuL1=b5pYXKbCpL^XNzk>lJs#koCKT;^pXg)v|%|PLP0GKulj{VGL z|GBH+4*{;hp|;Hc%ob+|a7{G^xf+hjS#X>$3&jm$GANZie?-Pk_n1i_@l?Mp3#J6F%y`oE+3%=j*^>-|kit}_oVH}F(w5p(_jI$h6@KR0M zbtrf_;rkTaPk5h#FDCq`f-fWdgo3Xi{AmTRB>Z^=uOfU{!D|Q~Rqzdjk16=)2_MGd zL&MJ$&UtR<3UDujKt~I3`f72s=L_)p1@>bFIFA9UCxg-mW59stTTrqX1Md~wN`sO^ zobQ2%_=9*B0&(}g;2_0Qu>!>k6f01yK(PYF3jDuS;LpC5pXdV@OZB0-BXvUP$A)ch zZd4z5we*Zrid^Fx0O!{J3eLV&LEupCM%{(69JIBspo~kR3!{$w63Vy~x-bkf#&5V3 z&c5{}J^Ooo;N8iNNT^}B;dT9ZQw6MPa!a;?`Q=2ww`ve$3c&Zk^OJsjZYPp0Q}z-_ zZ6z3CY725>zEwxj?hH||uXgMkE0I)R(X*5K$oseHBU2^1=dAwL6?-uRxD=29N^@fg z->U6-f6VV_(nQ+2#6B(>%a)OHKnc9)pWHJNH!pJbTaN^F5Qu z^H?8@ci{loc0mj%Y`va6a$X+_orgr zoT2YCDz-|iXex;*eFkAA%IRR=tB zKAas1XU}EddI<_F#_C1q2}Fifg4Z|M__56IXeHJeRnhXF+-5^)t>rV`%Q(P&1sFFG4&g zwY_JY`-*d)cJIvKTx`V(6f01yK(PYF3KT0)tU$2>#R~kJ6~OmO*zCi7@Ukl@dQ4ly zQpt=htb{OoGNw4#ZNWRNL@F)Prrn=TihcmCj96_Yl5sPk-4WVzS9|C7-G*K!Rs&!q z%|Z-7D;ewQkDFR$k9o*U2f8cE%HTa*)V5+;Bz2#ej6mO7tlNy;XWVBVKA1|!tKe0f z=DeJ%(U65VcL(h58qLWJOrvS-rnH%{YYGD36;x{vmdOHWnw8M1s$`F`{&d=eFtrc} zGjJkTYfVkE5A9~^KwGzt+=b?}Ub8pW+ec9xFR-P&WdAZVCsuv6CM%?2DqG7S2Fa23 zJ3_mR@b=KINVqi|>IB^pr~x(zHoXp395S^MmYFs%Ox67pV-5lt`(LV(6FFwrWmH~D3 zU8t1}-LO-9R-jmcVg-s7C|00Yfno*zH!HyV{CHm01T<1xWA1D3CB*VMN|7zp?<7yI{Xdbi@{Q^gBPs~s&Zw`f)H^i!=hGzJ<14chwp^^Nr#*Efn!ZFqfi#D}@rjAp~Jj2^K>sCAd0 zFTddZ>l_cCF+JjX!SkB&ZNMyIp@15|YXvg14+1mW{^#cb*Thnx1=&*4$AS0(=Y6KE z{T48bycL4)3ygmUaTW^bPk_fbYW$x&_T^#^?K5_sT><-zgTqN_?|`f+7>XakvCz9z zsPhm9WTEi6PCTzFV8Hx%H)}@ke0u@dMR3>KNAuiy766&ezs0K9S*{40Pj^1^JU{9Tg+W|DtB%LHP= z*uH01^UiP!Y(9s{ZiIBBwX=C=$k4ZU3S&oG`?lsbqrJ6tcPL^+nzyxuU|YJnB|gYo z-T%qp8%;)U63*_^u3ZAM`iJ@3X>oThK1 zH{zL;(H%|3af^Mp9iqjpqyfK9#-aZtoQ(aM4E5n`@uwdbGqkB+2@YHKjnJ;`#;0x2 zpZ@27=>gzUZ6N}g!@YKNKTta@X*YAplx+qMB>Mw>=~SPYwhxPd3+?Z>;O`t(Tm<0f zPBYyn9K-HtrdtH!hm#OgQadd*_nYaAl}b(%4A7;`o+vsH*VkiXL`W=Pn}^`+3`iiI za)u{hb{mOwwAVDcVohd&Du)Ui9Gvv?p7aCa-T}ktqP`7?8 z{}>p|pZ$6L!xYy)g-z`r*Aa#O9jm1^)9)5K1JC2f@GpTW{QRr`Th!Q)sM`N;0k43A zFO&aY#s3df`(v5u+u#eU6xW}hpU&3;2#xKel$RNw00quH?w{9{?*BxwIM8^0vLDZb z%b-IO_UHdI4cDQU5LJ@6XZC0MF=z_?d7U~={`Ib>WY~|X7n}?IwV<@q$Z=Hh*A%%F zoG}*X&+Dj3^7pIuid;_plew#9fA#Z=xne7%Ja?r=p8XAyo+c$e591i{b@TlfpHDb< f=^@YOHc0wa?ue9%=seEP=HE`wEuT>w6jA&GW9CM- diff --git a/src/tools/gen_keywords.c b/src/tools/gen_keywords.c deleted file mode 100644 index 14f26fa..0000000 --- a/src/tools/gen_keywords.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include - -typedef enum { - TOKEN_IDENTIFIER, - TOKEN_KEYWORD_TYPE, - TOKEN_KEYWORD_FN, - TOKEN_KEYWORD_LET, - TOKEN_KEYWORD_CONST, - TOKEN_KEYWORD_IF, - TOKEN_KEYWORD_ELSE, - TOKEN_KEYWORD_WHILE, - TOKEN_KEYWORD_FOR, - TOKEN_KEYWORD_RETURN, - TOKEN_KEYWORD_USE, - TOKEN_OPERATOR_IS -} TokenType; - -typedef struct { - const char *keyword; - TokenType token; -} Keyword; - -Keyword keywords[] = { - {"type", TOKEN_KEYWORD_TYPE}, - {"fn", TOKEN_KEYWORD_FN}, - {"let", TOKEN_KEYWORD_LET}, - {"const", TOKEN_KEYWORD_CONST}, - {"if", TOKEN_KEYWORD_IF}, - {"else", TOKEN_KEYWORD_ELSE}, - {"while", TOKEN_KEYWORD_WHILE}, - {"for", TOKEN_KEYWORD_FOR}, - {"return", TOKEN_KEYWORD_RETURN}, - {"use", TOKEN_KEYWORD_USE}, - {"is", TOKEN_OPERATOR_IS}, -}; - -void emit_keyword_header(FILE *out) { - fprintf(out, "#ifndef KEYWORDS_H\n"); - fprintf(out, "#define KEYWORDS_H\n\n"); - fprintf(out, "#include \"lexer.h\"\n\n"); - - fprintf(out, "static TokenType check_keyword(int start, int length, const char *rest, TokenType type) {\n"); - fprintf(out, " if ((lexer.current - lexer.start) == start + length &&\n"); - fprintf(out, " memcmp(lexer.start + start, rest, length) == 0) return type;\n"); - fprintf(out, " return TOKEN_IDENTIFIER;\n"); - fprintf(out, "}\n\n"); - - fprintf(out, "static TokenType identifier_type(void) {\n"); - fprintf(out, " switch (lexer.start[0]) {\n"); - - for (char ch = 'a'; ch <= 'z'; ++ch) { - int printed = 0; - for (int i = 0; i < sizeof(keywords) / sizeof(Keyword); ++i) { - const char *kw = keywords[i].keyword; - if (kw[0] == ch) { - if (!printed) { - fprintf(out, " case '%c':\n", ch); - printed = 1; - } - - int len = (int)strlen(kw); - fprintf(out, " return check_keyword(%d, %d, \"%s\", %s);\n", - 1, len - 1, kw + 1, - (keywords[i].token == TOKEN_IDENTIFIER ? "TOKEN_IDENTIFIER" : - keywords[i].token == TOKEN_OPERATOR_IS ? "TOKEN_OPERATOR_IS" : - keywords[i].token == TOKEN_KEYWORD_RETURN ? "TOKEN_KEYWORD_RETURN" : - keywords[i].token == TOKEN_KEYWORD_WHILE ? "TOKEN_KEYWORD_WHILE" : - keywords[i].token == TOKEN_KEYWORD_CONST ? "TOKEN_KEYWORD_CONST" : - keywords[i].token == TOKEN_KEYWORD_TYPE ? "TOKEN_KEYWORD_TYPE" : - keywords[i].token == TOKEN_KEYWORD_FN ? "TOKEN_KEYWORD_FN" : - keywords[i].token == TOKEN_KEYWORD_IF ? "TOKEN_KEYWORD_IF" : - keywords[i].token == TOKEN_KEYWORD_FOR ? "TOKEN_KEYWORD_FOR" : - keywords[i].token == TOKEN_KEYWORD_LET ? "TOKEN_KEYWORD_LET" : - keywords[i].token == TOKEN_KEYWORD_ELSE ? "TOKEN_KEYWORD_ELSE" : - keywords[i].token == TOKEN_KEYWORD_USE ? "TOKEN_KEYWORD_USE" : "TOKEN_IDENTIFIER")); - } - } - } - - fprintf(out, " }\n return TOKEN_IDENTIFIER;\n"); - fprintf(out, "}\n\n"); - - fprintf(out, "#endif // KEYWORDS_H\n"); -} - -int main(void) { - emit_keyword_header(stdout); - return 0; -}