From 88dfbb098d84949a7b208b31b0271b8c088bf0a8 Mon Sep 17 00:00:00 2001
From: zongor <admin@alfrescocavern.com>
Date: Sun, 27 Jul 2025 13:11:44 -0400
Subject: [PATCH] start working on lexer

---
 README.org                                    |  12 +-
 docs/SPECIFICATION.org                        | 105 +++++-----
 .../{client.zre => client.zrl}                |   0
 .../{common.zre => common.zrl}                |   2 +-
 .../{server.zre => server.zrl}                |   0
 src/Makefile                                  |  24 ++-
 src/common.h                                  |   4 +-
 src/debug.h                                   |   4 +-
 src/keywords.h                                |  38 ++++
 src/lexer.c                                   | 184 ++++++++++++++++++
 src/lexer.h                                   |  68 +++++++
 src/opcodes.h                                 |   9 +-
 src/test.c                                    |   8 +-
 src/test.h                                    |   4 +-
 src/tools/gen_keywords                        | Bin 0 -> 15848 bytes
 src/tools/gen_keywords.c                      |  90 +++++++++
 src/vm.c                                      |  10 +-
 src/vm.h                                      |   6 +-
 18 files changed, 477 insertions(+), 91 deletions(-)
 rename docs/project-syntax-example/{client.zre => client.zrl} (100%)
 rename docs/project-syntax-example/{common.zre => common.zrl} (92%)
 rename docs/project-syntax-example/{server.zre => server.zrl} (100%)
 create mode 100644 src/keywords.h
 create mode 100644 src/lexer.c
 create mode 100644 src/lexer.h
 create mode 100755 src/tools/gen_keywords
 create mode 100644 src/tools/gen_keywords.c

diff --git a/README.org b/README.org
index 5edf8ec..e4fc833 100644
--- a/README.org
+++ b/README.org
@@ -6,23 +6,23 @@
 :END:
 
 * Overview
-  - ZRE is a lightweight, portable programming language for permacomputing, game preservation, and indie game development.
+  - Reality Engine is a lightweight, portable programming language for permacomputing, game preservation, and indie game development inspired by [[https://wiki.xxiivv.com/site/uxn.html][uxn]], [[http://duskos.org/][Dusk OS]], and [[https://doc.cat-v.org/inferno/4th_edition/dis_VM_specification][Dis VM]].
   - Built in **C89**  for cross-platform compatibility (desktop, microcontrollers, and web via Emscripten).
   - Designed for simplicity, performance, and creative exploration.
-  - It is inspired by [[https://wiki.xxiivv.com/site/uxn.html][uxn]], [[http://duskos.org/][Dusk OS]], [[https://doc.cat-v.org/inferno/4th_edition/dis_VM_specification][Dis VM]], [[https://www.craftinginterpreters.com/the-lox-language.html][Lox]], [[https://lua.org][Lua]], [[https://en.wikipedia.org/wiki/Lisp_(programming_language)][Lisp]], [[https://en.wikipedia.org/wiki/C_(programming_language)][C]], and [[https://ziglang.org/][Zig]].
+  - Reality Engine Language (ZRL) is a C-like, imperitve, data oriented language inspired by [[https://www.craftinginterpreters.com/the-lox-language.html][Lox]], [[https://lua.org][Lua]], [[https://en.wikipedia.org/wiki/Lisp_(programming_language)][Lisp]], [[https://en.wikipedia.org/wiki/C_(programming_language)][C]], [[https://fortran-lang.org/][Fortran]], and [[https://ziglang.org/][Zig]].
 
 * Key Features
 
 ** Core Philosophy
-  - Simple, portable, lightweight
-  - Targets permacomputing, game world preservation, rapid prototyping, and indie games.
+  - Simple, portable, lightweight, permacomputing oriented
+  - Targets retro hardware, game world preservation, rapid prototyping, and indie games.
   - No macros or object hierarchies—prioritizes clarity and explicit behavior.
   - C/Zig like syntax. Lisp/Lua like development workflow.
 
 ** Engine & Tooling
   - Integrated 2D/3D rendering system:
     - Immediate-mode canvas-based 3D rendering with low-poly 5th-6th gen console aesthetics.
-    - 2D overlays styled after ImGui.
+    - 2D canvas styled after ImGui and Raylib.
     - [[https://www.libsdl.org/][SDL2]] backend for input, audio, and cross-platform compatibility.
   - Tree-walk interpreter:
     - Compile bytecode to files for performance.
@@ -85,7 +85,7 @@
 
 * Motivation
 
-ZRE bridges retro-inspired creativity with modern portability for:
+ZRL bridges retro-inspired creativity with modern portability for:
 - Game jams (rapid prototyping + 3D engine).
 - Indie games (5th/6th-gen aesthetics).
 - Permacomputing (low-resource, sustainable code).
diff --git a/docs/SPECIFICATION.org b/docs/SPECIFICATION.org
index cf3524f..61115a5 100644
--- a/docs/SPECIFICATION.org
+++ b/docs/SPECIFICATION.org
@@ -1,18 +1,18 @@
-* /ZRE/ (Zongors Reality Engine) Design parameters
+* /ZRL/ (Reality Engine Language) Design parameters
 :PROPERTIES:
-:CUSTOM_ID: zre-zongors-transpiler-language-design-parameters
+:CUSTOM_ID: zrl-zongors-reality-engine-language-design-parameters
 :END:
-** What is /zre/?
+** What is /zrl/?
 :PROPERTIES:
-:CUSTOM_ID: what-is-zre
+:CUSTOM_ID: what-is-zrl
 :END:
-/zre/ is an domain specific language for 3d games with C/Lua style syntax.
+/zrl/ is an domain specific language for 3D games with C/Lua style syntax.
 The compiler is written in C which should make it easy to port to other
 systems. 
 
-* /ZRE/ Grammar and Specification
+* /ZRL/ Grammar and Specification
 :PROPERTIES:
-:CUSTOM_ID: zre-grammar-and-specification
+:CUSTOM_ID: zrl-grammar-and-specification
 :END:
 ** Types
 :PROPERTIES:
@@ -26,7 +26,7 @@ systems.
 - Note that these look like classes but act like structs
   the methods actually have a implied struct as their first argument
 
-#+begin_src zre
+#+begin_src zrl
 type «token» {
   init() {
     // values
@@ -41,7 +41,7 @@ type Vec3 {
      this.y = z;
   }
 }
-#+end_src zre
+#+end_src zrl
 
 * Basic Types
 :PROPERTIES:
@@ -99,20 +99,9 @@ string interpolation
 
 Array of a specific type
 
-#+begin_src zre
-let «variable» = [val1, val2, ...];
-#+end_src zre
-
-*** Map
-:PROPERTIES:
-:CUSTOM_ID: map
-:END:
-
-Hashmap
-
-#+begin_src zre
-let «variable» = {key1: val1, key2: val2, ...};
-#+end_src zre
+#+begin_src zrl
+«type»[«length»] «variable» = [val1, val2, ...];
+#+end_src zrl
 
 *** Tunnel
 :PROPERTIES:
@@ -202,19 +191,19 @@ The following is a list of global operators and their effect:
 
 let operator
 
-#+begin_src zre
+#+begin_src zrl
 let «token» = true;
-#+end_src zre
+#+end_src zrl
 
 =is=
 
 checks if a object is of that type
 
-#+begin_src zre
+#+begin_src zrl
 if («token» is real) {
   print("hello yes self is a real?");
 }
-#+end_src zre
+#+end_src zrl
 
 also used for letting constants
 
@@ -222,26 +211,26 @@ also used for letting constants
 
 coerces a type as another type if possible
 
-#+begin_src zre
+#+begin_src zrl
 let «token» = 0; ! default is int
 some_functon(«token» as real); ! needs a real
-#+end_src zre
+#+end_src zrl
 
 =in=
 
 checks if a object's type, or a type implements a contract
 
-#+begin_src zre
+#+begin_src zrl
 if («token» in Tunnel, Drawable) {
   print("im tunnel-able and draw-able");
 }
-#+end_src zre
+#+end_src zrl
 
 also used inside of the for loops
 
-#+begin_src zre
+#+begin_src zrl
 for («token» in «collection») { «body» }
-#+end_src zre
+#+end_src zrl
 
 ** Object
 :PROPERTIES:
@@ -249,9 +238,9 @@ for («token» in «collection») { «body» }
 :END:
 An object is an invoked type.
 
-#+begin_src zre
+#+begin_src zrl
 let «variable» = «type»(«fields», …);
-#+end_src zre
+#+end_src zrl
 
 ** Tunnel
 :PROPERTIES:
@@ -305,7 +294,7 @@ connected tunnel
 =success? : tunnel_object.walk(path_or_endpoint)= -> moves around the
 filesystem or through the graph
 
-#+begin_src zre
+#+begin_src zrl
 ! client 
 let endpoint = Client("tcp://path/to/source");
 let tunnel = endpoint.attach(user, auth);
@@ -320,7 +309,7 @@ s.bind("/some/resource", fn () str {
    return "hello world";
 })
 server.start();
-#+end_src zre
+#+end_src zrl
 
 ** Functions
 :PROPERTIES:
@@ -331,11 +320,11 @@ always have a "default type" for all constant values or a developer can
 use the =as= keyword we do not have to define all values like in C,
 while keeping the same type safety as a more strongly typed language.
 
-#+begin_src zre
+#+begin_src zrl
 fn «token» («parameter» «type», ...) «return_type» {
   «body»
 }
-#+end_src zre
+#+end_src zrl
 
 - Built in transtypes
   - sort
@@ -352,21 +341,21 @@ fn «token» («parameter» «type», ...) «return_type» {
 :PROPERTIES:
 :CUSTOM_ID: loops
 :END:
-#+begin_src zre
+#+begin_src zrl
 for («variable» in «collection») { «body» }
-#+end_src zre
+#+end_src zrl
 
 iterates through each object in the collection setting it to variable
 
-#+begin_src zre
+#+begin_src zrl
 while («boolean expression») { «body» }
-#+end_src zre
+#+end_src zrl
 
 loops until the expression is false
 
-#+begin_src zre
+#+begin_src zrl
 do («variable» = initial_value, end_value, increment) { «body» }
-#+end_src zre
+#+end_src zrl
 
 loops from initial value to end value by increment value (like a for loop in other languages)
 
@@ -374,7 +363,7 @@ loops from initial value to end value by increment value (like a for loop in oth
 :PROPERTIES:
 :CUSTOM_ID: branching
 :END:
-#+begin_src zre
+#+begin_src zrl
 if («boolean expression») {
 
 } else if («boolean expression») {
@@ -382,16 +371,16 @@ if («boolean expression») {
 } else {
 
 }
-#+end_src zre
+#+end_src zrl
 
-#+begin_src zre
+#+begin_src zrl
 switch (value) {
   case A:
   case B:
   case C:
   default:
 }
-#+end_src zre
+#+end_src zrl
 
 ** Error handling
 :PROPERTIES:
@@ -400,7 +389,7 @@ switch (value) {
 
 Error handling is much like in C/C++ where a try catch can be used.
 
-#+begin_src zre
+#+begin_src zrl
 let rr = nil;
 let var = rr ?? 0; ! value is 0
 try {
@@ -409,7 +398,7 @@ try {
 } catch (e) {
    print("Caught error ${e}");
 }
-#+end_src zre
+#+end_src zrl
 
 ** Localization
 :PROPERTIES:
@@ -417,9 +406,9 @@ try {
 :END:
 will look up the text of «token» in the linked localization.json file
 
-#+begin_src zre
+#+begin_src zrl
 #«token»
-#+end_src zre
+#+end_src zrl
 
 #+begin_src json
 {
@@ -440,9 +429,9 @@ will look up the text of «token» in the linked localization.json file
 In most languages the include or use statements get libraries which link
 to other files and so on. 
 
-#+begin_src zre
-use "./some_local_file.zre"
-#+end_src zre
+#+begin_src zrl
+use "./some_local_file.zrl"
+#+end_src zrl
 
 ** Testing
 :PROPERTIES:
@@ -452,9 +441,9 @@ use "./some_local_file.zre"
 :PROPERTIES:
 :CUSTOM_ID: assertion
 :END:
-#+begin_src zre
+#+begin_src zrl
 assert(«expression», «expected output») ! returns «error or none»
-#+end_src zre
+#+end_src zrl
 
 ** Measurements
 :PROPERTIES:
diff --git a/docs/project-syntax-example/client.zre b/docs/project-syntax-example/client.zrl
similarity index 100%
rename from docs/project-syntax-example/client.zre
rename to docs/project-syntax-example/client.zrl
diff --git a/docs/project-syntax-example/common.zre b/docs/project-syntax-example/common.zrl
similarity index 92%
rename from docs/project-syntax-example/common.zre
rename to docs/project-syntax-example/common.zrl
index 1f69339..92a7a9e 100644
--- a/docs/project-syntax-example/common.zre
+++ b/docs/project-syntax-example/common.zrl
@@ -29,7 +29,7 @@ type Player {
         Camera([this.pos.x + 10.0, this.pos.y + 10.0, this.pos.z], this.pos);
   }
 
-  login(str password) Player[] {
+  login(str password) Player[] { ! looks like a method but really it just has an implied "Player this" as the first argument
     this.server.attach(this.username, password);
     this.players = server.open("players");
     return players.read();
diff --git a/docs/project-syntax-example/server.zre b/docs/project-syntax-example/server.zrl
similarity index 100%
rename from docs/project-syntax-example/server.zre
rename to docs/project-syntax-example/server.zrl
diff --git a/src/Makefile b/src/Makefile
index 954b848..c55e539 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -12,6 +12,21 @@ CFLAGS_WASM = -g -std=c89 -Wall -Wextra -Werror -Wno-unused-parameter -I.
 LDFLAGS_WASM = -s WASM=1 -g -s USE_SDL=2
 LDLIBS_WASM =
 
+TOOLS_DIR := tools
+GENERATOR := $(TOOLS_DIR)/gen_keywords
+GENERATOR_SRC := $(GENERATOR).c
+KEYWORDS_H := keywords.h
+
+# Rule to generate keywords.h
+$(KEYWORDS_H): $(GENERATOR) $(GENERATOR_SRC)
+	@echo "Generating keywords.h..."
+	@$(GENERATOR) > $(KEYWORDS_H)
+
+# Rule to build the generator
+$(GENERATOR): $(GENERATOR_SRC)
+	@echo "Compiling keyword generator..."
+	@$(CC) -o $@ $<
+
 # Source and build configuration
 # ----------------------------
 COMMON_SRC = $(wildcard *.c)
@@ -30,7 +45,7 @@ OBJ_NATIVE = $(addprefix $(OBJ_DIR_NATIVE)/,$(notdir $(COMMON_SRC:.c=.o)))
 OBJ_WASM = $(addprefix $(OBJ_DIR_WASM)/,$(notdir $(COMMON_SRC:.c=.o)))
 
 # Phony targets
-.PHONY: all clean install wasm native emscripten linux macos
+.PHONY: all clean clean_generated install wasm native emscripten linux macos
 
 # Default target builds the native version
 all: native
@@ -65,9 +80,14 @@ $(OBJ_DIR_WASM)/%.o: %.c
 
 # Clean build artifacts
 # ---------------------
-clean:
+clean: clean-generated
 	rm -rf $(OBJ_DIR_NATIVE) $(OBJ_DIR_WASM) $(EXEC_NATIVE) $(EXEC_WASM)
 
+# Clean rule for deleting generated keyword binary and header
+clean-generated:
+	@echo "Removing generated files..."
+	@rm -f $(KEYWORDS_H) $(GENERATOR)
+
 # Install target (example)
 # ------------------------
 install: native
diff --git a/src/common.h b/src/common.h
index b644b5f..eb5de65 100644
--- a/src/common.h
+++ b/src/common.h
@@ -1,5 +1,5 @@
-#ifndef ZRE_COMMON_H
-#define ZRE_COMMON_H
+#ifndef ZRL_COMMON_H
+#define ZRL_COMMON_H
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/src/debug.h b/src/debug.h
index 01ec1c8..b20e2f2 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -1,5 +1,5 @@
-#ifndef ZRE_DEBUG_H
-#define ZRE_DEBUG_H
+#ifndef ZRL_DEBUG_H
+#define ZRL_DEBUG_H
 
 #include "vm.h"
 #include "opcodes.h"
diff --git a/src/keywords.h b/src/keywords.h
new file mode 100644
index 0000000..688dcb9
--- /dev/null
+++ b/src/keywords.h
@@ -0,0 +1,38 @@
+#ifndef KEYWORDS_H
+#define KEYWORDS_H
+
+#include "lexer.h"
+
+static TokenType check_keyword(int start, int length, const char *rest, TokenType type) {
+  if ((lexer.current - lexer.start) == start + length &&
+      memcmp(lexer.start + start, rest, length) == 0) return type;
+  return TOKEN_IDENTIFIER;
+}
+
+static TokenType identifier_type(void) {
+  switch (lexer.start[0]) {
+    case 'c':
+      return check_keyword(1, 4, "onst", TOKEN_KEYWORD_CONST);
+    case 'e':
+      return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE);
+    case 'f':
+      return check_keyword(1, 1, "n", TOKEN_KEYWORD_FN);
+      return check_keyword(1, 2, "or", TOKEN_KEYWORD_FOR);
+    case 'i':
+      return check_keyword(1, 1, "f", TOKEN_KEYWORD_IF);
+      return check_keyword(1, 1, "s", TOKEN_OPERATOR_IS);
+    case 'l':
+      return check_keyword(1, 2, "et", TOKEN_KEYWORD_LET);
+    case 'r':
+      return check_keyword(1, 5, "eturn", TOKEN_KEYWORD_RETURN);
+    case 't':
+      return check_keyword(1, 3, "ype", TOKEN_KEYWORD_TYPE);
+    case 'u':
+      return check_keyword(1, 2, "se", TOKEN_KEYWORD_USE);
+    case 'w':
+      return check_keyword(1, 4, "hile", TOKEN_KEYWORD_WHILE);
+  }
+  return TOKEN_IDENTIFIER;
+}
+
+#endif // KEYWORDS_H
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..d858912
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,184 @@
+#include "keywords.h"
+
+void init_lexer(const char *source) {
+  lexer.start = source;
+  lexer.current = source;
+  lexer.line = 1;
+}
+
+int is_at_end() {
+  return *lexer.current == '\0';
+}
+
+char advance() {
+  return *lexer.current++;
+}
+
+char peek() {
+  return *lexer.current;
+}
+
+char peek_next() {
+  if (is_at_end()) return '\0';
+  return lexer.current[1];
+}
+
+int match(char expected) {
+  if (*lexer.current != expected) return 0;
+  lexer.current++;
+  return 1;
+}
+
+void skip_whitespace() {
+  for (;;) {
+    char c = peek();
+    switch (c) {
+      case ' ':
+      case '\r':
+      case '\t':
+        advance();
+        break;
+      case '\n':
+        lexer.line++;
+        advance();
+        break;
+      case '!':
+        if (peek_next() == '!') {
+          while (peek() != '\n' && !is_at_end()) advance();
+        } else {
+          while (peek() != '\n' && !is_at_end()) advance();
+        }
+        break;
+      default:
+        return;
+    }
+  }
+}
+
+Token make_token(TokenType type) {
+  Token token;
+  token.type = type;
+  token.start = lexer.start;
+  token.length = (int)(lexer.current - lexer.start);
+  token.line = lexer.line;
+  return token;
+}
+
+Token error_token(const char *message) {
+  Token token;
+  token.type = TOKEN_ERROR;
+  token.start = message;
+  token.length = (int)strlen(message);
+  token.line = lexer.line;
+  return token;
+}
+
+int is_alpha(char c) {
+  return isalpha(c) || c == '_';
+}
+
+int is_digit(char c) {
+  return isdigit(c);
+}
+
+Token number() {
+  while (is_digit(peek())) advance();
+
+  if (peek() == '.' && is_digit(peek_next())) {
+    advance();
+    while (is_digit(peek())) advance();
+    return make_token(TOKEN_FLOAT_LITERAL);
+  }
+
+  return make_token(TOKEN_INT_LITERAL);
+}
+
+Token string() {
+  while (peek() != '"' && !is_at_end()) {
+    if (peek() == '\n') lexer.line++;
+    advance();
+  }
+
+  if (is_at_end()) return error_token("Unterminated string.");
+
+  advance(); // Consume closing quote
+  return make_token(TOKEN_STRING_LITERAL);
+}
+
+Token next_token() {
+  skip_whitespace();
+  lexer.start = lexer.current;
+
+  if (is_at_end()) return make_token(TOKEN_EOF);
+
+  char c = advance();
+
+  if (is_alpha(c)) return identifier();
+  if (is_digit(c)) return number();
+
+  switch (c) {
+    case '(': return make_token(TOKEN_LPAREN);
+    case ')': return make_token(TOKEN_RPAREN);
+    case '{': return make_token(TOKEN_LBRACE);
+    case '}': return make_token(TOKEN_RBRACE);
+    case '[': return make_token(TOKEN_LBRACKET);
+    case ']': return make_token(TOKEN_RBRACKET);
+    case ',': return make_token(TOKEN_COMMA);
+    case '.': return make_token(TOKEN_DOT);
+    case ':': return make_token(TOKEN_COLON);
+    case ';': return make_token(TOKEN_SEMICOLON);
+    case '+': return make_token(TOKEN_PLUS);
+    case '-': return make_token(TOKEN_MINUS);
+    case '*': return make_token(TOKEN_STAR);
+    case '/': return make_token(TOKEN_SLASH);
+    case '=': return make_token(TOKEN_EQ);
+    case '"': return string();
+    case '!':
+      if (match('!')) return make_token(TOKEN_DOUBLE_BANG);
+      return make_token(TOKEN_BANG);
+  }
+
+  return error_token("Unexpected character.");
+}
+
+const char *token_type_name(TokenType type) {
+  switch (type) {
+    case TOKEN_IDENTIFIER: return "identifier";
+    case TOKEN_INT_LITERAL: return "int";
+    case TOKEN_FLOAT_LITERAL: return "float";
+    case TOKEN_STRING_LITERAL: return "string";
+    case TOKEN_KEYWORD_TYPE: return "type";
+    case TOKEN_KEYWORD_FN: return "fn";
+    case TOKEN_KEYWORD_LET: return "let";
+    case TOKEN_KEYWORD_CONST: return "const";
+    case TOKEN_KEYWORD_IF: return "if";
+    case TOKEN_KEYWORD_ELSE: return "else";
+    case TOKEN_KEYWORD_WHILE: return "while";
+    case TOKEN_KEYWORD_FOR: return "for";
+    case TOKEN_KEYWORD_RETURN: return "return";
+    case TOKEN_KEYWORD_USE: return "use";
+    case TOKEN_OPERATOR_IS: return "is";
+    case TOKEN_BANG: return "!";
+    case TOKEN_DOUBLE_BANG: return "!!";
+    case TOKEN_EQ: return "=";
+    case TOKEN_DOT: return ".";
+    case TOKEN_COMMA: return ",";
+    case TOKEN_COLON: return ":";
+    case TOKEN_SEMICOLON: return ";";
+    case TOKEN_PLUS: return "+";
+    case TOKEN_MINUS: return "-";
+    case TOKEN_STAR: return "*";
+    case TOKEN_SLASH: return "/";
+    case TOKEN_LPAREN: return "(";
+    case TOKEN_RPAREN: return ")";
+    case TOKEN_LBRACE: return "{";
+    case TOKEN_RBRACE: return "}";
+    case TOKEN_LBRACKET: return "[";
+    case TOKEN_RBRACKET: return "]";
+    case TOKEN_EOF: return "eof";
+    case TOKEN_ERROR: return "error";
+    default: return "unknown";
+  }
+}
+
+
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..5be3edd
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,68 @@
+#ifndef ZRL_VM_H
+#define ZRL_VM_H
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+typedef enum {
+  TOKEN_EOF,
+  TOKEN_IDENTIFIER,
+  TOKEN_INT_LITERAL,
+  TOKEN_FLOAT_LITERAL,
+  TOKEN_STRING_LITERAL,
+  TOKEN_KEYWORD_TYPE,
+  TOKEN_KEYWORD_FN,
+  TOKEN_KEYWORD_LET,
+  TOKEN_KEYWORD_CONST,
+  TOKEN_KEYWORD_IF,
+  TOKEN_KEYWORD_ELSE,
+  TOKEN_KEYWORD_WHILE,
+  TOKEN_KEYWORD_FOR,
+  TOKEN_KEYWORD_RETURN,
+  TOKEN_KEYWORD_USE,
+  TOKEN_OPERATOR_IS,
+  TOKEN_DOUBLE_BANG,
+  TOKEN_BANG,
+  TOKEN_EQ,
+  TOKEN_DOT,
+  TOKEN_COMMA,
+  TOKEN_COLON,
+  TOKEN_SEMICOLON,
+  TOKEN_PLUS,
+  TOKEN_MINUS,
+  TOKEN_STAR,
+  TOKEN_SLASH,
+  TOKEN_LPAREN,
+  TOKEN_RPAREN,
+  TOKEN_LBRACE,
+  TOKEN_RBRACE,
+  TOKEN_LBRACKET,
+  TOKEN_RBRACKET,
+  TOKEN_ERROR
+} TokenType;
+
+typedef struct {
+  TokenType type;
+  const char *start;
+  int length;
+  int line;
+} Token;
+
+typedef struct {
+  const char *keyword;
+  TokenType token;
+} Keyword;
+
+typedef struct {
+  const char *start;
+  const char *current;
+  int line;
+} Lexer;
+
+Lexer lexer;
+
+void init_lexer(const char *source);
+Token next_token();
+
+#endif
diff --git a/src/opcodes.h b/src/opcodes.h
index 770d129..223699c 100644
--- a/src/opcodes.h
+++ b/src/opcodes.h
@@ -1,5 +1,5 @@
-#ifndef ZRE_OPCODES_H
-#define ZRE_OPCODES_H
+#ifndef ZRL_OPCODES_H
+#define ZRL_OPCODES_H
 
 #include "common.h"
 
@@ -15,11 +15,6 @@ typedef struct slice_s {
   uint32_t end;
 } Slice;
 
-typedef struct cell_s {
-  uint32_t car;
-  uint32_t cdr;
-} Cell;
-
 #define MAX_REGS 32
 typedef struct frame_s {
   Value registers[MAX_REGS]; /* R0-R31 */
diff --git a/src/test.c b/src/test.c
index d71a35c..ac46b69 100644
--- a/src/test.c
+++ b/src/test.c
@@ -2,9 +2,9 @@
 #include "vm.h"
 
 bool test_hello_world_compile(VM *vm) {
-  str(vm, "nuqneH 'u'?", 0);
+  uint32_t hello = str_alloc(vm, "nuqneH 'u'?", 0);
   vm->code[vm->cp++].u = OP(OP_LOADU, 1, 0, 0);
-  vm->code[vm->cp++].u = 0;
+  vm->code[vm->cp++].u = hello;
   vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print("nuqneH 'u'?"); */
   vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0); /* explicit halt */
   return true;
@@ -40,9 +40,9 @@ bool test_loop_compile(VM *vm) {
   vm->code[vm->cp++].u = OP(OP_ADD_INT, 1, 1, 3);  /* (implied by loop) i = i + (-1) */
   vm->code[vm->cp++].u = OP(OP_JGE_INT, 4, 1, 2);  /* } */
   vm->code[vm->cp++].u = OP(OP_REAL_TO_UINT, 1, 0, 0); /* let b = a as nat; */
-  str(vm, "Enter a string:", 0);
+  uint32_t prompt_addr = str_alloc(vm, "Enter a string:", 0);
   vm->code[vm->cp++].u = OP(OP_LOADU, 5, 0, 0);
-  vm->code[vm->cp++].u = 0;
+  vm->code[vm->cp++].u = prompt_addr;
   vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 5, 0); /* print("Enter a string: "); */
   vm->code[vm->cp++].u = OP(OP_READ_STRING, 2, 0, 0);  /* let user_string = gets(); */
   vm->code[vm->cp++].u = OP(OP_UINT_TO_STRING, 3, 1, 0);
diff --git a/src/test.h b/src/test.h
index bee509d..f51ee21 100644
--- a/src/test.h
+++ b/src/test.h
@@ -1,5 +1,5 @@
-#ifndef ZRE_TEST_H
-#define ZRE_TEST_H
+#ifndef ZRL_TEST_H
+#define ZRL_TEST_H
 
 #include "opcodes.h"
 
diff --git a/src/tools/gen_keywords b/src/tools/gen_keywords
new file mode 100755
index 0000000000000000000000000000000000000000..97880c72010df83bee20a4d9a261ae4b48c5273e
GIT binary patch
literal 15848
zcmeHOeQaCR6~A`+kp>dGr5&YR+qd+C1QI7{o35n_I8B{CL)wJY85GvNy7qJPMq)?y
z3vGdlPDvRiD=PaEXq(3VK!Q!1sKmO$G?Y+Q3ffrE{^>sx(SR*oS_rJ00@Lc9^X@s%
z&P~cT?T=}iuk_wI=XXBtdFNg~-{*JzaHy@->+uL~CE`|r(muaLf~4r)L=2Fi*eojG
zyi%+X^FW^CU{W8H1hlgGOQzFO!VyNl7f7$t&ypQe&JZd3UA1usOzAq%Z!S8KtkR!4
zN+FoC-tA9AXK0#pcIma04AVkCtj%bxv>$DnPBpsjdZO&-1vB&;Bfl~7b15c-dZK)p
zm<Apl6i=l=&|wQYXCYL-6Xd7THDu2e`@n`2kBh5Onn^VWb<5<A+3xr3IQpsiu^c2n
zrkrO7`DxUjN^8iEDd%}7_+dT&)yC7Kllr^b_|@Y;L1Rk8;aq1u;5eOj?CKKUqhc6y
zVZBFqrpaDKM%%@3;BcUSA~5*DANN-OZqwU~p8MXZb5E~*>}U7Z^;r8iZLI5w*Y;S+
z{zJ8gHgBrkv@wuL1=b5pYXKbCpL^XNzk>lJs#koCKT;^pXg)v|%|PLP0GKulj{VGL
z|GBH+4*{;hp|;Hc%ob+|a7{G^xf+hjS#X>$3&jm$GANZie?-Pk_n1i_@l?Mp3<GKy
zGctBGZ5zE&D=89vX)9?bKodI@H4;`b+G9Onip0URWt#%6vA~<ej<)c&7Gr&2eW20h
zH*6F}II`1-n`!fam9foqWM@lHDrrWd`+H0Xd7w9yq|OX!I;$b14D$iTzQj38PF{xx
zG`9iE^Rs`M1;bn-!i2r>#J6F%y`oE+3%=j*^>-|kit}_oVH}F(w5p(_jI$h6@KR0M
zbtrf_;rkTaPk5h#FDCq`f-fWdgo3Xi{AmTRB>Z^=uOfU{!D|Q~Rqzdjk16=)2_MGd
zL&MJ$&UtR<3UDujKt~I3`f72s=L_)p1@>bFIFA9UCxg-mW59stTTrqX1Md~wN`sO^
zobQ2%_=9*B0&(}g;2_0Qu>!>k6f01yK(PYF3jDuS;LpC5pXdV@OZB0-BXvUP$A)ch
zZd4z5we*Zrid^Fx0O!{J3eLV&LEupCM%{(69JIBspo~kR3!{$w63Vy~x-bkf#&5V3
z&c5{}J^Ooo;N8iNNT^}B;dT9ZQw6MPa!a;?`Q=2ww`ve$3c&Zk^OJsjZYPp0Q}z-_
zZ6z3CY725>zEwxj?hH||uXgMkE0I)R(X*5K$oseHBU2^1=dAwL6?-uRxD=29N^@fg
z->U6-f6VV_(<O*6`Zsjx15Iy$rO>nQ+2#6B(>%a)OHKnc9)pWHJNH!pJbTaN^F5Qu
z^H?8@ci{loc0mj%Y`va6a$X+_orgr<sKCf0FI@*cyo?nafAdN%N6HaW;=`X*p3Ey>
zoT2<?UU`y~_qd)1^UBMl+~O+l$t$n-$%I;5Wm8^xD=C+|N-eK!A|)<8Wws@GWfdu_
zUF9GC?p|jjDe);(dcF?Iv!RLTg{NTFycW6$3Luk6F7yF36q?kZ)JMkV=-F3bUOWcB
zjOasiym~e?1+|;<of4Zv6TV~3U^;N*qUc}ed%0^uKVCkLmyVUszNu$lJB{(icS2v#
z1r{HD0Uy5!pls-}(n}5W0-YQi_6?o{YeicjwF6Vv_y)5x)gQpG5a2v8wZJzRov9vW
z^?cvpmYM34te)c=TsTwxbyj<PgBRdBl+-e;_WA}#W~w8sp6eTYYNoo8)und75_lP_
zO6)roRqkfG-g$PdqI#EAW&I0a@Z#k{One!bUSy)c^doaOFb^Zs4$Py-v;y-4GEKld
zh0I1^et^u)!2AfAN?=YQa|19V$oPTzH8S&oDFa3y>YCDz-|iXex;*eFkAA%IRR=tB
zKAas1XU}EddI<_F<lJcZTP`;cn)JXG3`~{z9vOs*;X4(Y7#Q|E8k#tIWa0te<9h+4
z59|*y5`3ChK>#_C1q2}Fifg4Z|M__56IXeHJeRnhXF+-5^)t>rV`%Q(P&1sFFG4&g
zwY_JY`-*d)cJIvKTx`V(6f01yK(PYF3KT0)tU$2>#R~kJ6~OmO*zCi7@Ukl@dQ4ly
zQpt=htb{OoGNw4#ZNWRNL@F)Prrn=TihcmCj96_Yl5sPk-4WVzS9|C7-G*K!Rs&!q
z%|Z-7D;ewQkDFR$k9o*U2f8cE%HTa*)V5+;Bz2#ej6mO7tlNy;XWVBVKA1|!tKe0f
z=DeJ%(U65VcL(h58qLWJOrvS-rnH%{YYGD36;x{vmdOHWnw8M1s$`F`{&d=eFtrc}
zGjJkTYfVkE5A9~^KwGzt+=b?}Ub8pW+ec9xFR-P&WdAZVCsuv6CM%?2DqG7S2Fa23
zJ3_mR@b=KINVqi|>IB^pr~x(zHoXp395S^MmYFs%Ox67<EACvDrezLVcC1?~Oy}PE
zy`<E%STtj5H?ED{cuSd(J<`w^UAses<a1C=LT#=Md$olkicxEOr-FoA6`*yO0z~$7
zaOWJjrG3|KHS}G2xJ|X|3`M#+IbM55sIxiJ-f4t)W8LKVP<^M*owe~A4UfvTnM&X?
z)lhD*KT`H-<FVRI#$m?P)fLWN=*P3XWGg)Q%6Dm(a=CrL2Z8E9x4fUrJqon<qg?J;
zpxE%|8*H8jI)&$u-?RMM`K2e|8542*$z$?jF1HR1w$Jw?Sp&yLIGzMN2vk(~TPv2{
z?z`q-=}~dpvaL65SW}5G`hF3P=b-P6_+qcQ!vBc3rF>pV-5lt`(LV(6FFwrWmH~D3
zU8t1}-LO-9<i))O*bu{`*oqY>R-jmcVg-s7C|00Yfno*zH!HyV{CHm<uJy3d_A&wQ
zC1i6}pF+T^2&X-ZFC(DcAc?zw5a0e`<29tSwgj2?5#qWO8|(jaC6_{Z6yJP6<GpX=
zw8-SWXH%r-=dEX?xwwYtQlgX=N%_5Q<8(pt=@s6WkOi;5d2ikIgr6pW@$-}z=RHP+
z;yrfUKfHo*t_Q+6rtp^2k)I`hc+>01T<1xWA1D3CB*VMN|7zp?<7yI{Xdbi@{Q^<d
zt}3V^QZCgb<?Amena73qf6{P^#iZwF4&Luc<q|7M&-0{`WFEIFl6l-}Nak_leVjb6
zc>gBPs~s&Zw`f)H^i!=hGzJ<14chwp^^Nr#*Efn!ZFqfi#D}@rjAp~Jj2^K>sCAd0
zFTddZ>l_cCF+JjX!SkB&ZNMyIp@15|YXvg14+1mW{^#cb*Thnx1=&*4$AS0(=Y6KE
z{T48bycL4)3ygmUaTW^bPk_fbYW$x&_T^#^?K5_sT><-zgTqN_?|`f+7>XakvCz9z
zsPhm9WTEi6PCTzFV8Hx%H)}@ke0u@dMR3>KNAuiy76<G)vRCV{3UIVPPB$XH|E&Rh
zAzW`TC}U%NvDG^Ek}o_5(H`<w<F^5>6&ezs0K9S*{40Pj^1^JU{9Tg+W|DtB%LHP=
z*uH01^UiP!Y(9s{ZiIBBwX=C=$k4ZU3S&oG`?lsbqrJ6tcPL^+nzyxuU|YJnB|gYo
z-T%q<cDbeAwZ@I{h8d6AQGwg&E%=EGcivCe8>p8%;)U63*_^u3ZAM`iJ@3X>oThK1
zH{zL;(H%|3af^Mp9iqjpqyfK9#-aZtoQ(aM4E5n`@uwdbGqkB+2@YHKjnJ;`#;0x2
zpZ@27=>gzUZ6N}g!@YKNKTta@X*YAplx+qMB>Mw>=~SPYwhxPd3+?Z>;O`t(Tm<0f
zPBYyn9K-HtrdtH!hm#OgQadd*_nYaAl}b(%4A7;`o+vsH*VkiXL`W=Pn}^`+3`iiI
za)u{hb{mOwwAVDc<KW8@rPaJGTx&-UNPi=mPDdeO^k*KriS}AC$RP!h;S_MXlcAyX
zA`naU_QJ23?i~FOnC0M$-vM5V!T%Rx8YG+ve?PDs0H)CY1brbfUBiY%`98vMGcfSc
zbNj#A|JB$YZ3|l%gulXCfb$=v{+V+B{5`1Vk3V|icNqKg`ibczb%x)Y*f;z0I_6%$
zuuJym^%&D<Kv&q*{9~ZRwGR99`i3d5yEuQgW7-G)xOQTh*H29AsdF`d&go%ba7@^r
z*KbV6C}Hkjz5XouZ=-;`9%QN$j{dl2RQ<mJ7<R$y68HW~_hFo$g-xA5_&r<buaO_q
zV4*Ycq;@Ip)hhJgM+QuH@P$(x_UEwQSN!>Vohd&Du)Ui9Gvv?p7aCa-T}ktqP`7?8
z{}>p|pZ$6L!xYy)g-z`r*Aa#O9jm1^)9)5K1JC2f@GpTW{QRr`Th!Q)sM`N;0k43A
zFO&aY#s3df`(v5u+u#eU6xW}hpU&3;2#xKel$RNw00quH?w{9{?*BxwIM8^0vLDZb
z%b-IO_UHdI4cDQU5LJ@6XZC0MF=z_?d7U~={`Ib>WY~|X7n}?IwV<@q$Z=Hh*A%%F
zoG}*X&+Dj3^7pIuid;_plew#9fA#Z=xne7%Ja?r=p8XAyo+c$e591i{b@TlfpHDb<
f=^@YOHc0wa?ue9%=seEP=HE`wEuT>w6jA&GW9CM-

literal 0
HcmV?d00001

diff --git a/src/tools/gen_keywords.c b/src/tools/gen_keywords.c
new file mode 100644
index 0000000..14f26fa
--- /dev/null
+++ b/src/tools/gen_keywords.c
@@ -0,0 +1,90 @@
+#include <stdio.h>
+#include <string.h>
+
+typedef enum {
+  TOKEN_IDENTIFIER,
+  TOKEN_KEYWORD_TYPE,
+  TOKEN_KEYWORD_FN,
+  TOKEN_KEYWORD_LET,
+  TOKEN_KEYWORD_CONST,
+  TOKEN_KEYWORD_IF,
+  TOKEN_KEYWORD_ELSE,
+  TOKEN_KEYWORD_WHILE,
+  TOKEN_KEYWORD_FOR,
+  TOKEN_KEYWORD_RETURN,
+  TOKEN_KEYWORD_USE,
+  TOKEN_OPERATOR_IS
+} TokenType;
+
+typedef struct {
+  const char *keyword;
+  TokenType token;
+} Keyword;
+
+Keyword keywords[] = {
+  {"type",   TOKEN_KEYWORD_TYPE},
+  {"fn",     TOKEN_KEYWORD_FN},
+  {"let",    TOKEN_KEYWORD_LET},
+  {"const",  TOKEN_KEYWORD_CONST},
+  {"if",     TOKEN_KEYWORD_IF},
+  {"else",   TOKEN_KEYWORD_ELSE},
+  {"while",  TOKEN_KEYWORD_WHILE},
+  {"for",    TOKEN_KEYWORD_FOR},
+  {"return", TOKEN_KEYWORD_RETURN},
+  {"use",    TOKEN_KEYWORD_USE},
+  {"is",     TOKEN_OPERATOR_IS},
+};
+
+void emit_keyword_header(FILE *out) {
+  fprintf(out, "#ifndef KEYWORDS_H\n");
+  fprintf(out, "#define KEYWORDS_H\n\n");
+  fprintf(out, "#include \"lexer.h\"\n\n");
+
+  fprintf(out, "static TokenType check_keyword(int start, int length, const char *rest, TokenType type) {\n");
+  fprintf(out, "  if ((lexer.current - lexer.start) == start + length &&\n");
+  fprintf(out, "      memcmp(lexer.start + start, rest, length) == 0) return type;\n");
+  fprintf(out, "  return TOKEN_IDENTIFIER;\n");
+  fprintf(out, "}\n\n");
+
+  fprintf(out, "static TokenType identifier_type(void) {\n");
+  fprintf(out, "  switch (lexer.start[0]) {\n");
+
+  for (char ch = 'a'; ch <= 'z'; ++ch) {
+    int printed = 0;
+    for (int i = 0; i < sizeof(keywords) / sizeof(Keyword); ++i) {
+      const char *kw = keywords[i].keyword;
+      if (kw[0] == ch) {
+        if (!printed) {
+          fprintf(out, "    case '%c':\n", ch);
+          printed = 1;
+        }
+
+        int len = (int)strlen(kw);
+        fprintf(out, "      return check_keyword(%d, %d, \"%s\", %s);\n",
+                1, len - 1, kw + 1,
+                (keywords[i].token == TOKEN_IDENTIFIER ? "TOKEN_IDENTIFIER" :
+                 keywords[i].token == TOKEN_OPERATOR_IS ? "TOKEN_OPERATOR_IS" :
+                 keywords[i].token == TOKEN_KEYWORD_RETURN ? "TOKEN_KEYWORD_RETURN" :
+                 keywords[i].token == TOKEN_KEYWORD_WHILE ? "TOKEN_KEYWORD_WHILE" :
+                 keywords[i].token == TOKEN_KEYWORD_CONST ? "TOKEN_KEYWORD_CONST" :
+                 keywords[i].token == TOKEN_KEYWORD_TYPE ? "TOKEN_KEYWORD_TYPE" :
+                 keywords[i].token == TOKEN_KEYWORD_FN ? "TOKEN_KEYWORD_FN" :
+                 keywords[i].token == TOKEN_KEYWORD_IF ? "TOKEN_KEYWORD_IF" :
+                 keywords[i].token == TOKEN_KEYWORD_FOR ? "TOKEN_KEYWORD_FOR" :
+                 keywords[i].token == TOKEN_KEYWORD_LET ? "TOKEN_KEYWORD_LET" :
+                 keywords[i].token == TOKEN_KEYWORD_ELSE ? "TOKEN_KEYWORD_ELSE" :
+                 keywords[i].token == TOKEN_KEYWORD_USE ? "TOKEN_KEYWORD_USE" : "TOKEN_IDENTIFIER"));
+      }
+    }
+  }
+
+  fprintf(out, "  }\n  return TOKEN_IDENTIFIER;\n");
+  fprintf(out, "}\n\n");
+
+  fprintf(out, "#endif // KEYWORDS_H\n");
+}
+
+int main(void) {
+  emit_keyword_header(stdout);
+  return 0;
+}
diff --git a/src/vm.c b/src/vm.c
index 1f209e5..aea8fa0 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -26,8 +26,9 @@
 /**
  * Embeds a string into the VM
  */
-void str(VM *vm, const char *str, uint32_t length) {
+uint32_t str_alloc(VM *vm, const char *str, uint32_t length) {
   if (!length) length = strlen(str);
+  uint32_t str_addr = vm->mp;
   vm->memory[vm->mp++].u = length;
   uint32_t i, j = 0;
   for (i = 0; i < length; i++) {
@@ -38,6 +39,7 @@ void str(VM *vm, const char *str, uint32_t length) {
     }
   }
   vm->frames[vm->fp].allocated.end += length / 4;
+  return str_addr;
 }
 
 /**
@@ -197,21 +199,21 @@ bool step_vm(VM *vm) {
     int32_t a = (int32_t)vm->frames[vm->fp].registers[src1].i; /* get value */
     char buffer[32];
     int len = sprintf(buffer, "%d", a);
-    str(vm, buffer, len); /* copy buffer to dest */
+    str_alloc(vm, buffer, len); /* copy buffer to dest */
     return true;
   }
   case OP_UINT_TO_STRING: {
     uint32_t a = (uint32_t)vm->frames[vm->fp].registers[src1].u; /* get value */
     char buffer[32];
     int len = sprintf(buffer, "%d", a);
-    str(vm, buffer, len); /* copy buffer to dest */
+    str_alloc(vm, buffer, len); /* copy buffer to dest */
     return true;
   }
   case OP_REAL_TO_STRING: {
     float a = (float)vm->frames[vm->fp].registers[src1].f; /* get value */
     char buffer[32];
     int len = sprintf(buffer, "%f", a);
-    str(vm, buffer, len); /* copy buffer to dest */
+    str_alloc(vm, buffer, len); /* copy buffer to dest */
     return true;
   }
   case OP_READ_STRING: {
diff --git a/src/vm.h b/src/vm.h
index ee540bc..d48fff2 100644
--- a/src/vm.h
+++ b/src/vm.h
@@ -1,10 +1,10 @@
-#ifndef ZRE_VM_H
-#define ZRE_VM_H
+#ifndef ZRL_VM_H
+#define ZRL_VM_H
 
 #include "opcodes.h"
 
 VM* init_vm();
 bool step_vm(VM *vm);
-void str(VM *vm, const char *str, uint32_t length);
+uint32_t str_alloc(VM *vm, const char *str, uint32_t length);
 
 #endif