From 88dfbb098d84949a7b208b31b0271b8c088bf0a8 Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 27 Jul 2025 13:11:44 -0400 Subject: [PATCH] start working on lexer --- README.org | 12 +- docs/SPECIFICATION.org | 105 +++++----- .../{client.zre => client.zrl} | 0 .../{common.zre => common.zrl} | 2 +- .../{server.zre => server.zrl} | 0 src/Makefile | 24 ++- src/common.h | 4 +- src/debug.h | 4 +- src/keywords.h | 38 ++++ src/lexer.c | 184 ++++++++++++++++++ src/lexer.h | 68 +++++++ src/opcodes.h | 9 +- src/test.c | 8 +- src/test.h | 4 +- src/tools/gen_keywords | Bin 0 -> 15848 bytes src/tools/gen_keywords.c | 90 +++++++++ src/vm.c | 10 +- src/vm.h | 6 +- 18 files changed, 477 insertions(+), 91 deletions(-) rename docs/project-syntax-example/{client.zre => client.zrl} (100%) rename docs/project-syntax-example/{common.zre => common.zrl} (92%) rename docs/project-syntax-example/{server.zre => server.zrl} (100%) create mode 100644 src/keywords.h create mode 100644 src/lexer.c create mode 100644 src/lexer.h create mode 100755 src/tools/gen_keywords create mode 100644 src/tools/gen_keywords.c diff --git a/README.org b/README.org index 5edf8ec..e4fc833 100644 --- a/README.org +++ b/README.org @@ -6,23 +6,23 @@ :END: * Overview - - ZRE is a lightweight, portable programming language for permacomputing, game preservation, and indie game development. + - Reality Engine is a lightweight, portable programming language for permacomputing, game preservation, and indie game development inspired by [[https://wiki.xxiivv.com/site/uxn.html][uxn]], [[http://duskos.org/][Dusk OS]], and [[https://doc.cat-v.org/inferno/4th_edition/dis_VM_specification][Dis VM]]. - Built in **C89** for cross-platform compatibility (desktop, microcontrollers, and web via Emscripten). - Designed for simplicity, performance, and creative exploration. - - It is inspired by [[https://wiki.xxiivv.com/site/uxn.html][uxn]], [[http://duskos.org/][Dusk OS]], [[https://doc.cat-v.org/inferno/4th_edition/dis_VM_specification][Dis VM]], [[https://www.craftinginterpreters.com/the-lox-language.html][Lox]], [[https://lua.org][Lua]], [[https://en.wikipedia.org/wiki/Lisp_(programming_language)][Lisp]], [[https://en.wikipedia.org/wiki/C_(programming_language)][C]], and [[https://ziglang.org/][Zig]]. + - Reality Engine Language (ZRL) is a C-like, imperitve, data oriented language inspired by [[https://www.craftinginterpreters.com/the-lox-language.html][Lox]], [[https://lua.org][Lua]], [[https://en.wikipedia.org/wiki/Lisp_(programming_language)][Lisp]], [[https://en.wikipedia.org/wiki/C_(programming_language)][C]], [[https://fortran-lang.org/][Fortran]], and [[https://ziglang.org/][Zig]]. * Key Features ** Core Philosophy - - Simple, portable, lightweight - - Targets permacomputing, game world preservation, rapid prototyping, and indie games. + - Simple, portable, lightweight, permacomputing oriented + - Targets retro hardware, game world preservation, rapid prototyping, and indie games. - No macros or object hierarchies—prioritizes clarity and explicit behavior. - C/Zig like syntax. Lisp/Lua like development workflow. ** Engine & Tooling - Integrated 2D/3D rendering system: - Immediate-mode canvas-based 3D rendering with low-poly 5th-6th gen console aesthetics. - - 2D overlays styled after ImGui. + - 2D canvas styled after ImGui and Raylib. - [[https://www.libsdl.org/][SDL2]] backend for input, audio, and cross-platform compatibility. - Tree-walk interpreter: - Compile bytecode to files for performance. @@ -85,7 +85,7 @@ * Motivation -ZRE bridges retro-inspired creativity with modern portability for: +ZRL bridges retro-inspired creativity with modern portability for: - Game jams (rapid prototyping + 3D engine). - Indie games (5th/6th-gen aesthetics). - Permacomputing (low-resource, sustainable code). diff --git a/docs/SPECIFICATION.org b/docs/SPECIFICATION.org index cf3524f..61115a5 100644 --- a/docs/SPECIFICATION.org +++ b/docs/SPECIFICATION.org @@ -1,18 +1,18 @@ -* /ZRE/ (Zongors Reality Engine) Design parameters +* /ZRL/ (Reality Engine Language) Design parameters :PROPERTIES: -:CUSTOM_ID: zre-zongors-transpiler-language-design-parameters +:CUSTOM_ID: zrl-zongors-reality-engine-language-design-parameters :END: -** What is /zre/? +** What is /zrl/? :PROPERTIES: -:CUSTOM_ID: what-is-zre +:CUSTOM_ID: what-is-zrl :END: -/zre/ is an domain specific language for 3d games with C/Lua style syntax. +/zrl/ is an domain specific language for 3D games with C/Lua style syntax. The compiler is written in C which should make it easy to port to other systems. -* /ZRE/ Grammar and Specification +* /ZRL/ Grammar and Specification :PROPERTIES: -:CUSTOM_ID: zre-grammar-and-specification +:CUSTOM_ID: zrl-grammar-and-specification :END: ** Types :PROPERTIES: @@ -26,7 +26,7 @@ systems. - Note that these look like classes but act like structs the methods actually have a implied struct as their first argument -#+begin_src zre +#+begin_src zrl type «token» { init() { // values @@ -41,7 +41,7 @@ type Vec3 { this.y = z; } } -#+end_src zre +#+end_src zrl * Basic Types :PROPERTIES: @@ -99,20 +99,9 @@ string interpolation Array of a specific type -#+begin_src zre -let «variable» = [val1, val2, ...]; -#+end_src zre - -*** Map -:PROPERTIES: -:CUSTOM_ID: map -:END: - -Hashmap - -#+begin_src zre -let «variable» = {key1: val1, key2: val2, ...}; -#+end_src zre +#+begin_src zrl +«type»[«length»] «variable» = [val1, val2, ...]; +#+end_src zrl *** Tunnel :PROPERTIES: @@ -202,19 +191,19 @@ The following is a list of global operators and their effect: let operator -#+begin_src zre +#+begin_src zrl let «token» = true; -#+end_src zre +#+end_src zrl =is= checks if a object is of that type -#+begin_src zre +#+begin_src zrl if («token» is real) { print("hello yes self is a real?"); } -#+end_src zre +#+end_src zrl also used for letting constants @@ -222,26 +211,26 @@ also used for letting constants coerces a type as another type if possible -#+begin_src zre +#+begin_src zrl let «token» = 0; ! default is int some_functon(«token» as real); ! needs a real -#+end_src zre +#+end_src zrl =in= checks if a object's type, or a type implements a contract -#+begin_src zre +#+begin_src zrl if («token» in Tunnel, Drawable) { print("im tunnel-able and draw-able"); } -#+end_src zre +#+end_src zrl also used inside of the for loops -#+begin_src zre +#+begin_src zrl for («token» in «collection») { «body» } -#+end_src zre +#+end_src zrl ** Object :PROPERTIES: @@ -249,9 +238,9 @@ for («token» in «collection») { «body» } :END: An object is an invoked type. -#+begin_src zre +#+begin_src zrl let «variable» = «type»(«fields», …); -#+end_src zre +#+end_src zrl ** Tunnel :PROPERTIES: @@ -305,7 +294,7 @@ connected tunnel =success? : tunnel_object.walk(path_or_endpoint)= -> moves around the filesystem or through the graph -#+begin_src zre +#+begin_src zrl ! client let endpoint = Client("tcp://path/to/source"); let tunnel = endpoint.attach(user, auth); @@ -320,7 +309,7 @@ s.bind("/some/resource", fn () str { return "hello world"; }) server.start(); -#+end_src zre +#+end_src zrl ** Functions :PROPERTIES: @@ -331,11 +320,11 @@ always have a "default type" for all constant values or a developer can use the =as= keyword we do not have to define all values like in C, while keeping the same type safety as a more strongly typed language. -#+begin_src zre +#+begin_src zrl fn «token» («parameter» «type», ...) «return_type» { «body» } -#+end_src zre +#+end_src zrl - Built in transtypes - sort @@ -352,21 +341,21 @@ fn «token» («parameter» «type», ...) «return_type» { :PROPERTIES: :CUSTOM_ID: loops :END: -#+begin_src zre +#+begin_src zrl for («variable» in «collection») { «body» } -#+end_src zre +#+end_src zrl iterates through each object in the collection setting it to variable -#+begin_src zre +#+begin_src zrl while («boolean expression») { «body» } -#+end_src zre +#+end_src zrl loops until the expression is false -#+begin_src zre +#+begin_src zrl do («variable» = initial_value, end_value, increment) { «body» } -#+end_src zre +#+end_src zrl loops from initial value to end value by increment value (like a for loop in other languages) @@ -374,7 +363,7 @@ loops from initial value to end value by increment value (like a for loop in oth :PROPERTIES: :CUSTOM_ID: branching :END: -#+begin_src zre +#+begin_src zrl if («boolean expression») { } else if («boolean expression») { @@ -382,16 +371,16 @@ if («boolean expression») { } else { } -#+end_src zre +#+end_src zrl -#+begin_src zre +#+begin_src zrl switch (value) { case A: case B: case C: default: } -#+end_src zre +#+end_src zrl ** Error handling :PROPERTIES: @@ -400,7 +389,7 @@ switch (value) { Error handling is much like in C/C++ where a try catch can be used. -#+begin_src zre +#+begin_src zrl let rr = nil; let var = rr ?? 0; ! value is 0 try { @@ -409,7 +398,7 @@ try { } catch (e) { print("Caught error ${e}"); } -#+end_src zre +#+end_src zrl ** Localization :PROPERTIES: @@ -417,9 +406,9 @@ try { :END: will look up the text of «token» in the linked localization.json file -#+begin_src zre +#+begin_src zrl #«token» -#+end_src zre +#+end_src zrl #+begin_src json { @@ -440,9 +429,9 @@ will look up the text of «token» in the linked localization.json file In most languages the include or use statements get libraries which link to other files and so on. -#+begin_src zre -use "./some_local_file.zre" -#+end_src zre +#+begin_src zrl +use "./some_local_file.zrl" +#+end_src zrl ** Testing :PROPERTIES: @@ -452,9 +441,9 @@ use "./some_local_file.zre" :PROPERTIES: :CUSTOM_ID: assertion :END: -#+begin_src zre +#+begin_src zrl assert(«expression», «expected output») ! returns «error or none» -#+end_src zre +#+end_src zrl ** Measurements :PROPERTIES: diff --git a/docs/project-syntax-example/client.zre b/docs/project-syntax-example/client.zrl similarity index 100% rename from docs/project-syntax-example/client.zre rename to docs/project-syntax-example/client.zrl diff --git a/docs/project-syntax-example/common.zre b/docs/project-syntax-example/common.zrl similarity index 92% rename from docs/project-syntax-example/common.zre rename to docs/project-syntax-example/common.zrl index 1f69339..92a7a9e 100644 --- a/docs/project-syntax-example/common.zre +++ b/docs/project-syntax-example/common.zrl @@ -29,7 +29,7 @@ type Player { Camera([this.pos.x + 10.0, this.pos.y + 10.0, this.pos.z], this.pos); } - login(str password) Player[] { + login(str password) Player[] { ! looks like a method but really it just has an implied "Player this" as the first argument this.server.attach(this.username, password); this.players = server.open("players"); return players.read(); diff --git a/docs/project-syntax-example/server.zre b/docs/project-syntax-example/server.zrl similarity index 100% rename from docs/project-syntax-example/server.zre rename to docs/project-syntax-example/server.zrl diff --git a/src/Makefile b/src/Makefile index 954b848..c55e539 100644 --- a/src/Makefile +++ b/src/Makefile @@ -12,6 +12,21 @@ CFLAGS_WASM = -g -std=c89 -Wall -Wextra -Werror -Wno-unused-parameter -I. LDFLAGS_WASM = -s WASM=1 -g -s USE_SDL=2 LDLIBS_WASM = +TOOLS_DIR := tools +GENERATOR := $(TOOLS_DIR)/gen_keywords +GENERATOR_SRC := $(GENERATOR).c +KEYWORDS_H := keywords.h + +# Rule to generate keywords.h +$(KEYWORDS_H): $(GENERATOR) $(GENERATOR_SRC) + @echo "Generating keywords.h..." + @$(GENERATOR) > $(KEYWORDS_H) + +# Rule to build the generator +$(GENERATOR): $(GENERATOR_SRC) + @echo "Compiling keyword generator..." + @$(CC) -o $@ $< + # Source and build configuration # ---------------------------- COMMON_SRC = $(wildcard *.c) @@ -30,7 +45,7 @@ OBJ_NATIVE = $(addprefix $(OBJ_DIR_NATIVE)/,$(notdir $(COMMON_SRC:.c=.o))) OBJ_WASM = $(addprefix $(OBJ_DIR_WASM)/,$(notdir $(COMMON_SRC:.c=.o))) # Phony targets -.PHONY: all clean install wasm native emscripten linux macos +.PHONY: all clean clean_generated install wasm native emscripten linux macos # Default target builds the native version all: native @@ -65,9 +80,14 @@ $(OBJ_DIR_WASM)/%.o: %.c # Clean build artifacts # --------------------- -clean: +clean: clean-generated rm -rf $(OBJ_DIR_NATIVE) $(OBJ_DIR_WASM) $(EXEC_NATIVE) $(EXEC_WASM) +# Clean rule for deleting generated keyword binary and header +clean-generated: + @echo "Removing generated files..." + @rm -f $(KEYWORDS_H) $(GENERATOR) + # Install target (example) # ------------------------ install: native diff --git a/src/common.h b/src/common.h index b644b5f..eb5de65 100644 --- a/src/common.h +++ b/src/common.h @@ -1,5 +1,5 @@ -#ifndef ZRE_COMMON_H -#define ZRE_COMMON_H +#ifndef ZRL_COMMON_H +#define ZRL_COMMON_H #include #include diff --git a/src/debug.h b/src/debug.h index 01ec1c8..b20e2f2 100644 --- a/src/debug.h +++ b/src/debug.h @@ -1,5 +1,5 @@ -#ifndef ZRE_DEBUG_H -#define ZRE_DEBUG_H +#ifndef ZRL_DEBUG_H +#define ZRL_DEBUG_H #include "vm.h" #include "opcodes.h" diff --git a/src/keywords.h b/src/keywords.h new file mode 100644 index 0000000..688dcb9 --- /dev/null +++ b/src/keywords.h @@ -0,0 +1,38 @@ +#ifndef KEYWORDS_H +#define KEYWORDS_H + +#include "lexer.h" + +static TokenType check_keyword(int start, int length, const char *rest, TokenType type) { + if ((lexer.current - lexer.start) == start + length && + memcmp(lexer.start + start, rest, length) == 0) return type; + return TOKEN_IDENTIFIER; +} + +static TokenType identifier_type(void) { + switch (lexer.start[0]) { + case 'c': + return check_keyword(1, 4, "onst", TOKEN_KEYWORD_CONST); + case 'e': + return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + return check_keyword(1, 1, "n", TOKEN_KEYWORD_FN); + return check_keyword(1, 2, "or", TOKEN_KEYWORD_FOR); + case 'i': + return check_keyword(1, 1, "f", TOKEN_KEYWORD_IF); + return check_keyword(1, 1, "s", TOKEN_OPERATOR_IS); + case 'l': + return check_keyword(1, 2, "et", TOKEN_KEYWORD_LET); + case 'r': + return check_keyword(1, 5, "eturn", TOKEN_KEYWORD_RETURN); + case 't': + return check_keyword(1, 3, "ype", TOKEN_KEYWORD_TYPE); + case 'u': + return check_keyword(1, 2, "se", TOKEN_KEYWORD_USE); + case 'w': + return check_keyword(1, 4, "hile", TOKEN_KEYWORD_WHILE); + } + return TOKEN_IDENTIFIER; +} + +#endif // KEYWORDS_H diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..d858912 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,184 @@ +#include "keywords.h" + +void init_lexer(const char *source) { + lexer.start = source; + lexer.current = source; + lexer.line = 1; +} + +int is_at_end() { + return *lexer.current == '\0'; +} + +char advance() { + return *lexer.current++; +} + +char peek() { + return *lexer.current; +} + +char peek_next() { + if (is_at_end()) return '\0'; + return lexer.current[1]; +} + +int match(char expected) { + if (*lexer.current != expected) return 0; + lexer.current++; + return 1; +} + +void skip_whitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '!': + if (peek_next() == '!') { + while (peek() != '\n' && !is_at_end()) advance(); + } else { + while (peek() != '\n' && !is_at_end()) advance(); + } + break; + default: + return; + } + } +} + +Token make_token(TokenType type) { + Token token; + token.type = type; + token.start = lexer.start; + token.length = (int)(lexer.current - lexer.start); + token.line = lexer.line; + return token; +} + +Token error_token(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int)strlen(message); + token.line = lexer.line; + return token; +} + +int is_alpha(char c) { + return isalpha(c) || c == '_'; +} + +int is_digit(char c) { + return isdigit(c); +} + +Token number() { + while (is_digit(peek())) advance(); + + if (peek() == '.' && is_digit(peek_next())) { + advance(); + while (is_digit(peek())) advance(); + return make_token(TOKEN_FLOAT_LITERAL); + } + + return make_token(TOKEN_INT_LITERAL); +} + +Token string() { + while (peek() != '"' && !is_at_end()) { + if (peek() == '\n') lexer.line++; + advance(); + } + + if (is_at_end()) return error_token("Unterminated string."); + + advance(); // Consume closing quote + return make_token(TOKEN_STRING_LITERAL); +} + +Token next_token() { + skip_whitespace(); + lexer.start = lexer.current; + + if (is_at_end()) return make_token(TOKEN_EOF); + + char c = advance(); + + if (is_alpha(c)) return identifier(); + if (is_digit(c)) return number(); + + switch (c) { + case '(': return make_token(TOKEN_LPAREN); + case ')': return make_token(TOKEN_RPAREN); + case '{': return make_token(TOKEN_LBRACE); + case '}': return make_token(TOKEN_RBRACE); + case '[': return make_token(TOKEN_LBRACKET); + case ']': return make_token(TOKEN_RBRACKET); + case ',': return make_token(TOKEN_COMMA); + case '.': return make_token(TOKEN_DOT); + case ':': return make_token(TOKEN_COLON); + case ';': return make_token(TOKEN_SEMICOLON); + case '+': return make_token(TOKEN_PLUS); + case '-': return make_token(TOKEN_MINUS); + case '*': return make_token(TOKEN_STAR); + case '/': return make_token(TOKEN_SLASH); + case '=': return make_token(TOKEN_EQ); + case '"': return string(); + case '!': + if (match('!')) return make_token(TOKEN_DOUBLE_BANG); + return make_token(TOKEN_BANG); + } + + return error_token("Unexpected character."); +} + +const char *token_type_name(TokenType type) { + switch (type) { + case TOKEN_IDENTIFIER: return "identifier"; + case TOKEN_INT_LITERAL: return "int"; + case TOKEN_FLOAT_LITERAL: return "float"; + case TOKEN_STRING_LITERAL: return "string"; + case TOKEN_KEYWORD_TYPE: return "type"; + case TOKEN_KEYWORD_FN: return "fn"; + case TOKEN_KEYWORD_LET: return "let"; + case TOKEN_KEYWORD_CONST: return "const"; + case TOKEN_KEYWORD_IF: return "if"; + case TOKEN_KEYWORD_ELSE: return "else"; + case TOKEN_KEYWORD_WHILE: return "while"; + case TOKEN_KEYWORD_FOR: return "for"; + case TOKEN_KEYWORD_RETURN: return "return"; + case TOKEN_KEYWORD_USE: return "use"; + case TOKEN_OPERATOR_IS: return "is"; + case TOKEN_BANG: return "!"; + case TOKEN_DOUBLE_BANG: return "!!"; + case TOKEN_EQ: return "="; + case TOKEN_DOT: return "."; + case TOKEN_COMMA: return ","; + case TOKEN_COLON: return ":"; + case TOKEN_SEMICOLON: return ";"; + case TOKEN_PLUS: return "+"; + case TOKEN_MINUS: return "-"; + case TOKEN_STAR: return "*"; + case TOKEN_SLASH: return "/"; + case TOKEN_LPAREN: return "("; + case TOKEN_RPAREN: return ")"; + case TOKEN_LBRACE: return "{"; + case TOKEN_RBRACE: return "}"; + case TOKEN_LBRACKET: return "["; + case TOKEN_RBRACKET: return "]"; + case TOKEN_EOF: return "eof"; + case TOKEN_ERROR: return "error"; + default: return "unknown"; + } +} + + diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..5be3edd --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,68 @@ +#ifndef ZRL_VM_H +#define ZRL_VM_H + +#include +#include +#include + +typedef enum { + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_INT_LITERAL, + TOKEN_FLOAT_LITERAL, + TOKEN_STRING_LITERAL, + TOKEN_KEYWORD_TYPE, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_LET, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_OPERATOR_IS, + TOKEN_DOUBLE_BANG, + TOKEN_BANG, + TOKEN_EQ, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ERROR +} TokenType; + +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; + +typedef struct { + const char *keyword; + TokenType token; +} Keyword; + +typedef struct { + const char *start; + const char *current; + int line; +} Lexer; + +Lexer lexer; + +void init_lexer(const char *source); +Token next_token(); + +#endif diff --git a/src/opcodes.h b/src/opcodes.h index 770d129..223699c 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -1,5 +1,5 @@ -#ifndef ZRE_OPCODES_H -#define ZRE_OPCODES_H +#ifndef ZRL_OPCODES_H +#define ZRL_OPCODES_H #include "common.h" @@ -15,11 +15,6 @@ typedef struct slice_s { uint32_t end; } Slice; -typedef struct cell_s { - uint32_t car; - uint32_t cdr; -} Cell; - #define MAX_REGS 32 typedef struct frame_s { Value registers[MAX_REGS]; /* R0-R31 */ diff --git a/src/test.c b/src/test.c index d71a35c..ac46b69 100644 --- a/src/test.c +++ b/src/test.c @@ -2,9 +2,9 @@ #include "vm.h" bool test_hello_world_compile(VM *vm) { - str(vm, "nuqneH 'u'?", 0); + uint32_t hello = str_alloc(vm, "nuqneH 'u'?", 0); vm->code[vm->cp++].u = OP(OP_LOADU, 1, 0, 0); - vm->code[vm->cp++].u = 0; + vm->code[vm->cp++].u = hello; vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print("nuqneH 'u'?"); */ vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0); /* explicit halt */ return true; @@ -40,9 +40,9 @@ bool test_loop_compile(VM *vm) { vm->code[vm->cp++].u = OP(OP_ADD_INT, 1, 1, 3); /* (implied by loop) i = i + (-1) */ vm->code[vm->cp++].u = OP(OP_JGE_INT, 4, 1, 2); /* } */ vm->code[vm->cp++].u = OP(OP_REAL_TO_UINT, 1, 0, 0); /* let b = a as nat; */ - str(vm, "Enter a string:", 0); + uint32_t prompt_addr = str_alloc(vm, "Enter a string:", 0); vm->code[vm->cp++].u = OP(OP_LOADU, 5, 0, 0); - vm->code[vm->cp++].u = 0; + vm->code[vm->cp++].u = prompt_addr; vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 5, 0); /* print("Enter a string: "); */ vm->code[vm->cp++].u = OP(OP_READ_STRING, 2, 0, 0); /* let user_string = gets(); */ vm->code[vm->cp++].u = OP(OP_UINT_TO_STRING, 3, 1, 0); diff --git a/src/test.h b/src/test.h index bee509d..f51ee21 100644 --- a/src/test.h +++ b/src/test.h @@ -1,5 +1,5 @@ -#ifndef ZRE_TEST_H -#define ZRE_TEST_H +#ifndef ZRL_TEST_H +#define ZRL_TEST_H #include "opcodes.h" diff --git a/src/tools/gen_keywords b/src/tools/gen_keywords new file mode 100755 index 0000000000000000000000000000000000000000..97880c72010df83bee20a4d9a261ae4b48c5273e GIT binary patch literal 15848 zcmeHOeQaCR6~A`+kp>dGr5&YR+qd+C1QI7{o35n_I8B{CL)wJY85GvNy7qJPMq)?y z3vGdlPDvRiD=PaEXq(3VK!Q!1sKmO$G?Y+Q3ffrE{^>sx(SR*oS_rJ00@Lc9^X@s% z&P~cT?T=}iuk_wI=XXBtdFNg~-{*JzaHy@->+uL~CE`|r(muaLf~4r)L=2Fi*eojG zyi%+X^FW^CU{W8H1hlgGOQzFO!VyNl7f7$t&ypQe&JZd3UA1usOzAq%Z!S8KtkR!4 zN+FoC-tA9AXK0#pcIma04AVkCtj%bxv>$DnPBpsjdZO&-1vB&;Bfl~7b15c-dZK)p zm}U7Z^;r8iZLI5w*Y;S+ z{zJ8gHgBrkv@wuL1=b5pYXKbCpL^XNzk>lJs#koCKT;^pXg)v|%|PLP0GKulj{VGL z|GBH+4*{;hp|;Hc%ob+|a7{G^xf+hjS#X>$3&jm$GANZie?-Pk_n1i_@l?Mp3#J6F%y`oE+3%=j*^>-|kit}_oVH}F(w5p(_jI$h6@KR0M zbtrf_;rkTaPk5h#FDCq`f-fWdgo3Xi{AmTRB>Z^=uOfU{!D|Q~Rqzdjk16=)2_MGd zL&MJ$&UtR<3UDujKt~I3`f72s=L_)p1@>bFIFA9UCxg-mW59stTTrqX1Md~wN`sO^ zobQ2%_=9*B0&(}g;2_0Qu>!>k6f01yK(PYF3jDuS;LpC5pXdV@OZB0-BXvUP$A)ch zZd4z5we*Zrid^Fx0O!{J3eLV&LEupCM%{(69JIBspo~kR3!{$w63Vy~x-bkf#&5V3 z&c5{}J^Ooo;N8iNNT^}B;dT9ZQw6MPa!a;?`Q=2ww`ve$3c&Zk^OJsjZYPp0Q}z-_ zZ6z3CY725>zEwxj?hH||uXgMkE0I)R(X*5K$oseHBU2^1=dAwL6?-uRxD=29N^@fg z->U6-f6VV_(nQ+2#6B(>%a)OHKnc9)pWHJNH!pJbTaN^F5Qu z^H?8@ci{loc0mj%Y`va6a$X+_orgr zoT2YCDz-|iXex;*eFkAA%IRR=tB zKAas1XU}EddI<_F#_C1q2}Fifg4Z|M__56IXeHJeRnhXF+-5^)t>rV`%Q(P&1sFFG4&g zwY_JY`-*d)cJIvKTx`V(6f01yK(PYF3KT0)tU$2>#R~kJ6~OmO*zCi7@Ukl@dQ4ly zQpt=htb{OoGNw4#ZNWRNL@F)Prrn=TihcmCj96_Yl5sPk-4WVzS9|C7-G*K!Rs&!q z%|Z-7D;ewQkDFR$k9o*U2f8cE%HTa*)V5+;Bz2#ej6mO7tlNy;XWVBVKA1|!tKe0f z=DeJ%(U65VcL(h58qLWJOrvS-rnH%{YYGD36;x{vmdOHWnw8M1s$`F`{&d=eFtrc} zGjJkTYfVkE5A9~^KwGzt+=b?}Ub8pW+ec9xFR-P&WdAZVCsuv6CM%?2DqG7S2Fa23 zJ3_mR@b=KINVqi|>IB^pr~x(zHoXp395S^MmYFs%Ox67pV-5lt`(LV(6FFwrWmH~D3 zU8t1}-LO-9R-jmcVg-s7C|00Yfno*zH!HyV{CHm01T<1xWA1D3CB*VMN|7zp?<7yI{Xdbi@{Q^gBPs~s&Zw`f)H^i!=hGzJ<14chwp^^Nr#*Efn!ZFqfi#D}@rjAp~Jj2^K>sCAd0 zFTddZ>l_cCF+JjX!SkB&ZNMyIp@15|YXvg14+1mW{^#cb*Thnx1=&*4$AS0(=Y6KE z{T48bycL4)3ygmUaTW^bPk_fbYW$x&_T^#^?K5_sT><-zgTqN_?|`f+7>XakvCz9z zsPhm9WTEi6PCTzFV8Hx%H)}@ke0u@dMR3>KNAuiy766&ezs0K9S*{40Pj^1^JU{9Tg+W|DtB%LHP= z*uH01^UiP!Y(9s{ZiIBBwX=C=$k4ZU3S&oG`?lsbqrJ6tcPL^+nzyxuU|YJnB|gYo z-T%qp8%;)U63*_^u3ZAM`iJ@3X>oThK1 zH{zL;(H%|3af^Mp9iqjpqyfK9#-aZtoQ(aM4E5n`@uwdbGqkB+2@YHKjnJ;`#;0x2 zpZ@27=>gzUZ6N}g!@YKNKTta@X*YAplx+qMB>Mw>=~SPYwhxPd3+?Z>;O`t(Tm<0f zPBYyn9K-HtrdtH!hm#OgQadd*_nYaAl}b(%4A7;`o+vsH*VkiXL`W=Pn}^`+3`iiI za)u{hb{mOwwAVDcVohd&Du)Ui9Gvv?p7aCa-T}ktqP`7?8 z{}>p|pZ$6L!xYy)g-z`r*Aa#O9jm1^)9)5K1JC2f@GpTW{QRr`Th!Q)sM`N;0k43A zFO&aY#s3df`(v5u+u#eU6xW}hpU&3;2#xKel$RNw00quH?w{9{?*BxwIM8^0vLDZb z%b-IO_UHdI4cDQU5LJ@6XZC0MF=z_?d7U~={`Ib>WY~|X7n}?IwV<@q$Z=Hh*A%%F zoG}*X&+Dj3^7pIuid;_plew#9fA#Z=xne7%Ja?r=p8XAyo+c$e591i{b@TlfpHDb< f=^@YOHc0wa?ue9%=seEP=HE`wEuT>w6jA&GW9CM- literal 0 HcmV?d00001 diff --git a/src/tools/gen_keywords.c b/src/tools/gen_keywords.c new file mode 100644 index 0000000..14f26fa --- /dev/null +++ b/src/tools/gen_keywords.c @@ -0,0 +1,90 @@ +#include +#include + +typedef enum { + TOKEN_IDENTIFIER, + TOKEN_KEYWORD_TYPE, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_LET, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_OPERATOR_IS +} TokenType; + +typedef struct { + const char *keyword; + TokenType token; +} Keyword; + +Keyword keywords[] = { + {"type", TOKEN_KEYWORD_TYPE}, + {"fn", TOKEN_KEYWORD_FN}, + {"let", TOKEN_KEYWORD_LET}, + {"const", TOKEN_KEYWORD_CONST}, + {"if", TOKEN_KEYWORD_IF}, + {"else", TOKEN_KEYWORD_ELSE}, + {"while", TOKEN_KEYWORD_WHILE}, + {"for", TOKEN_KEYWORD_FOR}, + {"return", TOKEN_KEYWORD_RETURN}, + {"use", TOKEN_KEYWORD_USE}, + {"is", TOKEN_OPERATOR_IS}, +}; + +void emit_keyword_header(FILE *out) { + fprintf(out, "#ifndef KEYWORDS_H\n"); + fprintf(out, "#define KEYWORDS_H\n\n"); + fprintf(out, "#include \"lexer.h\"\n\n"); + + fprintf(out, "static TokenType check_keyword(int start, int length, const char *rest, TokenType type) {\n"); + fprintf(out, " if ((lexer.current - lexer.start) == start + length &&\n"); + fprintf(out, " memcmp(lexer.start + start, rest, length) == 0) return type;\n"); + fprintf(out, " return TOKEN_IDENTIFIER;\n"); + fprintf(out, "}\n\n"); + + fprintf(out, "static TokenType identifier_type(void) {\n"); + fprintf(out, " switch (lexer.start[0]) {\n"); + + for (char ch = 'a'; ch <= 'z'; ++ch) { + int printed = 0; + for (int i = 0; i < sizeof(keywords) / sizeof(Keyword); ++i) { + const char *kw = keywords[i].keyword; + if (kw[0] == ch) { + if (!printed) { + fprintf(out, " case '%c':\n", ch); + printed = 1; + } + + int len = (int)strlen(kw); + fprintf(out, " return check_keyword(%d, %d, \"%s\", %s);\n", + 1, len - 1, kw + 1, + (keywords[i].token == TOKEN_IDENTIFIER ? "TOKEN_IDENTIFIER" : + keywords[i].token == TOKEN_OPERATOR_IS ? "TOKEN_OPERATOR_IS" : + keywords[i].token == TOKEN_KEYWORD_RETURN ? "TOKEN_KEYWORD_RETURN" : + keywords[i].token == TOKEN_KEYWORD_WHILE ? "TOKEN_KEYWORD_WHILE" : + keywords[i].token == TOKEN_KEYWORD_CONST ? "TOKEN_KEYWORD_CONST" : + keywords[i].token == TOKEN_KEYWORD_TYPE ? "TOKEN_KEYWORD_TYPE" : + keywords[i].token == TOKEN_KEYWORD_FN ? "TOKEN_KEYWORD_FN" : + keywords[i].token == TOKEN_KEYWORD_IF ? "TOKEN_KEYWORD_IF" : + keywords[i].token == TOKEN_KEYWORD_FOR ? "TOKEN_KEYWORD_FOR" : + keywords[i].token == TOKEN_KEYWORD_LET ? "TOKEN_KEYWORD_LET" : + keywords[i].token == TOKEN_KEYWORD_ELSE ? "TOKEN_KEYWORD_ELSE" : + keywords[i].token == TOKEN_KEYWORD_USE ? "TOKEN_KEYWORD_USE" : "TOKEN_IDENTIFIER")); + } + } + } + + fprintf(out, " }\n return TOKEN_IDENTIFIER;\n"); + fprintf(out, "}\n\n"); + + fprintf(out, "#endif // KEYWORDS_H\n"); +} + +int main(void) { + emit_keyword_header(stdout); + return 0; +} diff --git a/src/vm.c b/src/vm.c index 1f209e5..aea8fa0 100644 --- a/src/vm.c +++ b/src/vm.c @@ -26,8 +26,9 @@ /** * Embeds a string into the VM */ -void str(VM *vm, const char *str, uint32_t length) { +uint32_t str_alloc(VM *vm, const char *str, uint32_t length) { if (!length) length = strlen(str); + uint32_t str_addr = vm->mp; vm->memory[vm->mp++].u = length; uint32_t i, j = 0; for (i = 0; i < length; i++) { @@ -38,6 +39,7 @@ void str(VM *vm, const char *str, uint32_t length) { } } vm->frames[vm->fp].allocated.end += length / 4; + return str_addr; } /** @@ -197,21 +199,21 @@ bool step_vm(VM *vm) { int32_t a = (int32_t)vm->frames[vm->fp].registers[src1].i; /* get value */ char buffer[32]; int len = sprintf(buffer, "%d", a); - str(vm, buffer, len); /* copy buffer to dest */ + str_alloc(vm, buffer, len); /* copy buffer to dest */ return true; } case OP_UINT_TO_STRING: { uint32_t a = (uint32_t)vm->frames[vm->fp].registers[src1].u; /* get value */ char buffer[32]; int len = sprintf(buffer, "%d", a); - str(vm, buffer, len); /* copy buffer to dest */ + str_alloc(vm, buffer, len); /* copy buffer to dest */ return true; } case OP_REAL_TO_STRING: { float a = (float)vm->frames[vm->fp].registers[src1].f; /* get value */ char buffer[32]; int len = sprintf(buffer, "%f", a); - str(vm, buffer, len); /* copy buffer to dest */ + str_alloc(vm, buffer, len); /* copy buffer to dest */ return true; } case OP_READ_STRING: { diff --git a/src/vm.h b/src/vm.h index ee540bc..d48fff2 100644 --- a/src/vm.h +++ b/src/vm.h @@ -1,10 +1,10 @@ -#ifndef ZRE_VM_H -#define ZRE_VM_H +#ifndef ZRL_VM_H +#define ZRL_VM_H #include "opcodes.h" VM* init_vm(); bool step_vm(VM *vm); -void str(VM *vm, const char *str, uint32_t length); +uint32_t str_alloc(VM *vm, const char *str, uint32_t length); #endif