start working on lexer

This commit is contained in:
zongor 2025-07-27 13:11:44 -04:00
parent c33265913e
commit 88dfbb098d
18 changed files with 477 additions and 91 deletions

View File

@ -6,23 +6,23 @@
:END: :END:
* Overview * Overview
- ZRE is a lightweight, portable programming language for permacomputing, game preservation, and indie game development. - Reality Engine is a lightweight, portable programming language for permacomputing, game preservation, and indie game development inspired by [[https://wiki.xxiivv.com/site/uxn.html][uxn]], [[http://duskos.org/][Dusk OS]], and [[https://doc.cat-v.org/inferno/4th_edition/dis_VM_specification][Dis VM]].
- Built in **C89** for cross-platform compatibility (desktop, microcontrollers, and web via Emscripten). - Built in **C89** for cross-platform compatibility (desktop, microcontrollers, and web via Emscripten).
- Designed for simplicity, performance, and creative exploration. - Designed for simplicity, performance, and creative exploration.
- It is inspired by [[https://wiki.xxiivv.com/site/uxn.html][uxn]], [[http://duskos.org/][Dusk OS]], [[https://doc.cat-v.org/inferno/4th_edition/dis_VM_specification][Dis VM]], [[https://www.craftinginterpreters.com/the-lox-language.html][Lox]], [[https://lua.org][Lua]], [[https://en.wikipedia.org/wiki/Lisp_(programming_language)][Lisp]], [[https://en.wikipedia.org/wiki/C_(programming_language)][C]], and [[https://ziglang.org/][Zig]]. - Reality Engine Language (ZRL) is a C-like, imperitve, data oriented language inspired by [[https://www.craftinginterpreters.com/the-lox-language.html][Lox]], [[https://lua.org][Lua]], [[https://en.wikipedia.org/wiki/Lisp_(programming_language)][Lisp]], [[https://en.wikipedia.org/wiki/C_(programming_language)][C]], [[https://fortran-lang.org/][Fortran]], and [[https://ziglang.org/][Zig]].
* Key Features * Key Features
** Core Philosophy ** Core Philosophy
- Simple, portable, lightweight - Simple, portable, lightweight, permacomputing oriented
- Targets permacomputing, game world preservation, rapid prototyping, and indie games. - Targets retro hardware, game world preservation, rapid prototyping, and indie games.
- No macros or object hierarchies—prioritizes clarity and explicit behavior. - No macros or object hierarchies—prioritizes clarity and explicit behavior.
- C/Zig like syntax. Lisp/Lua like development workflow. - C/Zig like syntax. Lisp/Lua like development workflow.
** Engine & Tooling ** Engine & Tooling
- Integrated 2D/3D rendering system: - Integrated 2D/3D rendering system:
- Immediate-mode canvas-based 3D rendering with low-poly 5th-6th gen console aesthetics. - Immediate-mode canvas-based 3D rendering with low-poly 5th-6th gen console aesthetics.
- 2D overlays styled after ImGui. - 2D canvas styled after ImGui and Raylib.
- [[https://www.libsdl.org/][SDL2]] backend for input, audio, and cross-platform compatibility. - [[https://www.libsdl.org/][SDL2]] backend for input, audio, and cross-platform compatibility.
- Tree-walk interpreter: - Tree-walk interpreter:
- Compile bytecode to files for performance. - Compile bytecode to files for performance.
@ -85,7 +85,7 @@
* Motivation * Motivation
ZRE bridges retro-inspired creativity with modern portability for: ZRL bridges retro-inspired creativity with modern portability for:
- Game jams (rapid prototyping + 3D engine). - Game jams (rapid prototyping + 3D engine).
- Indie games (5th/6th-gen aesthetics). - Indie games (5th/6th-gen aesthetics).
- Permacomputing (low-resource, sustainable code). - Permacomputing (low-resource, sustainable code).

View File

@ -1,18 +1,18 @@
* /ZRE/ (Zongors Reality Engine) Design parameters * /ZRL/ (Reality Engine Language) Design parameters
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: zre-zongors-transpiler-language-design-parameters :CUSTOM_ID: zrl-zongors-reality-engine-language-design-parameters
:END: :END:
** What is /zre/? ** What is /zrl/?
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: what-is-zre :CUSTOM_ID: what-is-zrl
:END: :END:
/zre/ is an domain specific language for 3d games with C/Lua style syntax. /zrl/ is an domain specific language for 3D games with C/Lua style syntax.
The compiler is written in C which should make it easy to port to other The compiler is written in C which should make it easy to port to other
systems. systems.
* /ZRE/ Grammar and Specification * /ZRL/ Grammar and Specification
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: zre-grammar-and-specification :CUSTOM_ID: zrl-grammar-and-specification
:END: :END:
** Types ** Types
:PROPERTIES: :PROPERTIES:
@ -26,7 +26,7 @@ systems.
- Note that these look like classes but act like structs - Note that these look like classes but act like structs
the methods actually have a implied struct as their first argument the methods actually have a implied struct as their first argument
#+begin_src zre #+begin_src zrl
type «token» { type «token» {
init() { init() {
// values // values
@ -41,7 +41,7 @@ type Vec3 {
this.y = z; this.y = z;
} }
} }
#+end_src zre #+end_src zrl
* Basic Types * Basic Types
:PROPERTIES: :PROPERTIES:
@ -99,20 +99,9 @@ string interpolation
Array of a specific type Array of a specific type
#+begin_src zre #+begin_src zrl
let «variable» = [val1, val2, ...]; «type»[«length»] «variable» = [val1, val2, ...];
#+end_src zre #+end_src zrl
*** Map
:PROPERTIES:
:CUSTOM_ID: map
:END:
Hashmap
#+begin_src zre
let «variable» = {key1: val1, key2: val2, ...};
#+end_src zre
*** Tunnel *** Tunnel
:PROPERTIES: :PROPERTIES:
@ -202,19 +191,19 @@ The following is a list of global operators and their effect:
let operator let operator
#+begin_src zre #+begin_src zrl
let «token» = true; let «token» = true;
#+end_src zre #+end_src zrl
=is= =is=
checks if a object is of that type checks if a object is of that type
#+begin_src zre #+begin_src zrl
if («token» is real) { if («token» is real) {
print("hello yes self is a real?"); print("hello yes self is a real?");
} }
#+end_src zre #+end_src zrl
also used for letting constants also used for letting constants
@ -222,26 +211,26 @@ also used for letting constants
coerces a type as another type if possible coerces a type as another type if possible
#+begin_src zre #+begin_src zrl
let «token» = 0; ! default is int let «token» = 0; ! default is int
some_functon(«token» as real); ! needs a real some_functon(«token» as real); ! needs a real
#+end_src zre #+end_src zrl
=in= =in=
checks if a object's type, or a type implements a contract checks if a object's type, or a type implements a contract
#+begin_src zre #+begin_src zrl
if («token» in Tunnel, Drawable) { if («token» in Tunnel, Drawable) {
print("im tunnel-able and draw-able"); print("im tunnel-able and draw-able");
} }
#+end_src zre #+end_src zrl
also used inside of the for loops also used inside of the for loops
#+begin_src zre #+begin_src zrl
for («token» in «collection») { «body» } for («token» in «collection») { «body» }
#+end_src zre #+end_src zrl
** Object ** Object
:PROPERTIES: :PROPERTIES:
@ -249,9 +238,9 @@ for («token» in «collection») { «body» }
:END: :END:
An object is an invoked type. An object is an invoked type.
#+begin_src zre #+begin_src zrl
let «variable» = «type»(«fields», …); let «variable» = «type»(«fields», …);
#+end_src zre #+end_src zrl
** Tunnel ** Tunnel
:PROPERTIES: :PROPERTIES:
@ -305,7 +294,7 @@ connected tunnel
=success? : tunnel_object.walk(path_or_endpoint)= -> moves around the =success? : tunnel_object.walk(path_or_endpoint)= -> moves around the
filesystem or through the graph filesystem or through the graph
#+begin_src zre #+begin_src zrl
! client ! client
let endpoint = Client("tcp://path/to/source"); let endpoint = Client("tcp://path/to/source");
let tunnel = endpoint.attach(user, auth); let tunnel = endpoint.attach(user, auth);
@ -320,7 +309,7 @@ s.bind("/some/resource", fn () str {
return "hello world"; return "hello world";
}) })
server.start(); server.start();
#+end_src zre #+end_src zrl
** Functions ** Functions
:PROPERTIES: :PROPERTIES:
@ -331,11 +320,11 @@ always have a "default type" for all constant values or a developer can
use the =as= keyword we do not have to define all values like in C, use the =as= keyword we do not have to define all values like in C,
while keeping the same type safety as a more strongly typed language. while keeping the same type safety as a more strongly typed language.
#+begin_src zre #+begin_src zrl
fn «token» («parameter» «type», ...) «return_type» { fn «token» («parameter» «type», ...) «return_type» {
«body» «body»
} }
#+end_src zre #+end_src zrl
- Built in transtypes - Built in transtypes
- sort - sort
@ -352,21 +341,21 @@ fn «token» («parameter» «type», ...) «return_type» {
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: loops :CUSTOM_ID: loops
:END: :END:
#+begin_src zre #+begin_src zrl
for («variable» in «collection») { «body» } for («variable» in «collection») { «body» }
#+end_src zre #+end_src zrl
iterates through each object in the collection setting it to variable iterates through each object in the collection setting it to variable
#+begin_src zre #+begin_src zrl
while («boolean expression») { «body» } while («boolean expression») { «body» }
#+end_src zre #+end_src zrl
loops until the expression is false loops until the expression is false
#+begin_src zre #+begin_src zrl
do («variable» = initial_value, end_value, increment) { «body» } do («variable» = initial_value, end_value, increment) { «body» }
#+end_src zre #+end_src zrl
loops from initial value to end value by increment value (like a for loop in other languages) loops from initial value to end value by increment value (like a for loop in other languages)
@ -374,7 +363,7 @@ loops from initial value to end value by increment value (like a for loop in oth
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: branching :CUSTOM_ID: branching
:END: :END:
#+begin_src zre #+begin_src zrl
if («boolean expression») { if («boolean expression») {
} else if («boolean expression») { } else if («boolean expression») {
@ -382,16 +371,16 @@ if («boolean expression») {
} else { } else {
} }
#+end_src zre #+end_src zrl
#+begin_src zre #+begin_src zrl
switch (value) { switch (value) {
case A: case A:
case B: case B:
case C: case C:
default: default:
} }
#+end_src zre #+end_src zrl
** Error handling ** Error handling
:PROPERTIES: :PROPERTIES:
@ -400,7 +389,7 @@ switch (value) {
Error handling is much like in C/C++ where a try catch can be used. Error handling is much like in C/C++ where a try catch can be used.
#+begin_src zre #+begin_src zrl
let rr = nil; let rr = nil;
let var = rr ?? 0; ! value is 0 let var = rr ?? 0; ! value is 0
try { try {
@ -409,7 +398,7 @@ try {
} catch (e) { } catch (e) {
print("Caught error ${e}"); print("Caught error ${e}");
} }
#+end_src zre #+end_src zrl
** Localization ** Localization
:PROPERTIES: :PROPERTIES:
@ -417,9 +406,9 @@ try {
:END: :END:
will look up the text of «token» in the linked localization.json file will look up the text of «token» in the linked localization.json file
#+begin_src zre #+begin_src zrl
#«token» #«token»
#+end_src zre #+end_src zrl
#+begin_src json #+begin_src json
{ {
@ -440,9 +429,9 @@ will look up the text of «token» in the linked localization.json file
In most languages the include or use statements get libraries which link In most languages the include or use statements get libraries which link
to other files and so on. to other files and so on.
#+begin_src zre #+begin_src zrl
use "./some_local_file.zre" use "./some_local_file.zrl"
#+end_src zre #+end_src zrl
** Testing ** Testing
:PROPERTIES: :PROPERTIES:
@ -452,9 +441,9 @@ use "./some_local_file.zre"
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: assertion :CUSTOM_ID: assertion
:END: :END:
#+begin_src zre #+begin_src zrl
assert(«expression», «expected output») ! returns «error or none» assert(«expression», «expected output») ! returns «error or none»
#+end_src zre #+end_src zrl
** Measurements ** Measurements
:PROPERTIES: :PROPERTIES:

View File

@ -29,7 +29,7 @@ type Player {
Camera([this.pos.x + 10.0, this.pos.y + 10.0, this.pos.z], this.pos); Camera([this.pos.x + 10.0, this.pos.y + 10.0, this.pos.z], this.pos);
} }
login(str password) Player[] { login(str password) Player[] { ! looks like a method but really it just has an implied "Player this" as the first argument
this.server.attach(this.username, password); this.server.attach(this.username, password);
this.players = server.open("players"); this.players = server.open("players");
return players.read(); return players.read();

View File

@ -12,6 +12,21 @@ CFLAGS_WASM = -g -std=c89 -Wall -Wextra -Werror -Wno-unused-parameter -I.
LDFLAGS_WASM = -s WASM=1 -g -s USE_SDL=2 LDFLAGS_WASM = -s WASM=1 -g -s USE_SDL=2
LDLIBS_WASM = LDLIBS_WASM =
TOOLS_DIR := tools
GENERATOR := $(TOOLS_DIR)/gen_keywords
GENERATOR_SRC := $(GENERATOR).c
KEYWORDS_H := keywords.h
# Rule to generate keywords.h
$(KEYWORDS_H): $(GENERATOR) $(GENERATOR_SRC)
@echo "Generating keywords.h..."
@$(GENERATOR) > $(KEYWORDS_H)
# Rule to build the generator
$(GENERATOR): $(GENERATOR_SRC)
@echo "Compiling keyword generator..."
@$(CC) -o $@ $<
# Source and build configuration # Source and build configuration
# ---------------------------- # ----------------------------
COMMON_SRC = $(wildcard *.c) COMMON_SRC = $(wildcard *.c)
@ -30,7 +45,7 @@ OBJ_NATIVE = $(addprefix $(OBJ_DIR_NATIVE)/,$(notdir $(COMMON_SRC:.c=.o)))
OBJ_WASM = $(addprefix $(OBJ_DIR_WASM)/,$(notdir $(COMMON_SRC:.c=.o))) OBJ_WASM = $(addprefix $(OBJ_DIR_WASM)/,$(notdir $(COMMON_SRC:.c=.o)))
# Phony targets # Phony targets
.PHONY: all clean install wasm native emscripten linux macos .PHONY: all clean clean_generated install wasm native emscripten linux macos
# Default target builds the native version # Default target builds the native version
all: native all: native
@ -65,9 +80,14 @@ $(OBJ_DIR_WASM)/%.o: %.c
# Clean build artifacts # Clean build artifacts
# --------------------- # ---------------------
clean: clean: clean-generated
rm -rf $(OBJ_DIR_NATIVE) $(OBJ_DIR_WASM) $(EXEC_NATIVE) $(EXEC_WASM) rm -rf $(OBJ_DIR_NATIVE) $(OBJ_DIR_WASM) $(EXEC_NATIVE) $(EXEC_WASM)
# Clean rule for deleting generated keyword binary and header
clean-generated:
@echo "Removing generated files..."
@rm -f $(KEYWORDS_H) $(GENERATOR)
# Install target (example) # Install target (example)
# ------------------------ # ------------------------
install: native install: native

View File

@ -1,5 +1,5 @@
#ifndef ZRE_COMMON_H #ifndef ZRL_COMMON_H
#define ZRE_COMMON_H #define ZRL_COMMON_H
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>

View File

@ -1,5 +1,5 @@
#ifndef ZRE_DEBUG_H #ifndef ZRL_DEBUG_H
#define ZRE_DEBUG_H #define ZRL_DEBUG_H
#include "vm.h" #include "vm.h"
#include "opcodes.h" #include "opcodes.h"

38
src/keywords.h Normal file
View File

@ -0,0 +1,38 @@
#ifndef KEYWORDS_H
#define KEYWORDS_H
#include "lexer.h"
static TokenType check_keyword(int start, int length, const char *rest, TokenType type) {
if ((lexer.current - lexer.start) == start + length &&
memcmp(lexer.start + start, rest, length) == 0) return type;
return TOKEN_IDENTIFIER;
}
static TokenType identifier_type(void) {
switch (lexer.start[0]) {
case 'c':
return check_keyword(1, 4, "onst", TOKEN_KEYWORD_CONST);
case 'e':
return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE);
case 'f':
return check_keyword(1, 1, "n", TOKEN_KEYWORD_FN);
return check_keyword(1, 2, "or", TOKEN_KEYWORD_FOR);
case 'i':
return check_keyword(1, 1, "f", TOKEN_KEYWORD_IF);
return check_keyword(1, 1, "s", TOKEN_OPERATOR_IS);
case 'l':
return check_keyword(1, 2, "et", TOKEN_KEYWORD_LET);
case 'r':
return check_keyword(1, 5, "eturn", TOKEN_KEYWORD_RETURN);
case 't':
return check_keyword(1, 3, "ype", TOKEN_KEYWORD_TYPE);
case 'u':
return check_keyword(1, 2, "se", TOKEN_KEYWORD_USE);
case 'w':
return check_keyword(1, 4, "hile", TOKEN_KEYWORD_WHILE);
}
return TOKEN_IDENTIFIER;
}
#endif // KEYWORDS_H

184
src/lexer.c Normal file
View File

@ -0,0 +1,184 @@
#include "keywords.h"
void init_lexer(const char *source) {
lexer.start = source;
lexer.current = source;
lexer.line = 1;
}
int is_at_end() {
return *lexer.current == '\0';
}
char advance() {
return *lexer.current++;
}
char peek() {
return *lexer.current;
}
char peek_next() {
if (is_at_end()) return '\0';
return lexer.current[1];
}
int match(char expected) {
if (*lexer.current != expected) return 0;
lexer.current++;
return 1;
}
void skip_whitespace() {
for (;;) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
lexer.line++;
advance();
break;
case '!':
if (peek_next() == '!') {
while (peek() != '\n' && !is_at_end()) advance();
} else {
while (peek() != '\n' && !is_at_end()) advance();
}
break;
default:
return;
}
}
}
Token make_token(TokenType type) {
Token token;
token.type = type;
token.start = lexer.start;
token.length = (int)(lexer.current - lexer.start);
token.line = lexer.line;
return token;
}
Token error_token(const char *message) {
Token token;
token.type = TOKEN_ERROR;
token.start = message;
token.length = (int)strlen(message);
token.line = lexer.line;
return token;
}
int is_alpha(char c) {
return isalpha(c) || c == '_';
}
int is_digit(char c) {
return isdigit(c);
}
Token number() {
while (is_digit(peek())) advance();
if (peek() == '.' && is_digit(peek_next())) {
advance();
while (is_digit(peek())) advance();
return make_token(TOKEN_FLOAT_LITERAL);
}
return make_token(TOKEN_INT_LITERAL);
}
Token string() {
while (peek() != '"' && !is_at_end()) {
if (peek() == '\n') lexer.line++;
advance();
}
if (is_at_end()) return error_token("Unterminated string.");
advance(); // Consume closing quote
return make_token(TOKEN_STRING_LITERAL);
}
Token next_token() {
skip_whitespace();
lexer.start = lexer.current;
if (is_at_end()) return make_token(TOKEN_EOF);
char c = advance();
if (is_alpha(c)) return identifier();
if (is_digit(c)) return number();
switch (c) {
case '(': return make_token(TOKEN_LPAREN);
case ')': return make_token(TOKEN_RPAREN);
case '{': return make_token(TOKEN_LBRACE);
case '}': return make_token(TOKEN_RBRACE);
case '[': return make_token(TOKEN_LBRACKET);
case ']': return make_token(TOKEN_RBRACKET);
case ',': return make_token(TOKEN_COMMA);
case '.': return make_token(TOKEN_DOT);
case ':': return make_token(TOKEN_COLON);
case ';': return make_token(TOKEN_SEMICOLON);
case '+': return make_token(TOKEN_PLUS);
case '-': return make_token(TOKEN_MINUS);
case '*': return make_token(TOKEN_STAR);
case '/': return make_token(TOKEN_SLASH);
case '=': return make_token(TOKEN_EQ);
case '"': return string();
case '!':
if (match('!')) return make_token(TOKEN_DOUBLE_BANG);
return make_token(TOKEN_BANG);
}
return error_token("Unexpected character.");
}
const char *token_type_name(TokenType type) {
switch (type) {
case TOKEN_IDENTIFIER: return "identifier";
case TOKEN_INT_LITERAL: return "int";
case TOKEN_FLOAT_LITERAL: return "float";
case TOKEN_STRING_LITERAL: return "string";
case TOKEN_KEYWORD_TYPE: return "type";
case TOKEN_KEYWORD_FN: return "fn";
case TOKEN_KEYWORD_LET: return "let";
case TOKEN_KEYWORD_CONST: return "const";
case TOKEN_KEYWORD_IF: return "if";
case TOKEN_KEYWORD_ELSE: return "else";
case TOKEN_KEYWORD_WHILE: return "while";
case TOKEN_KEYWORD_FOR: return "for";
case TOKEN_KEYWORD_RETURN: return "return";
case TOKEN_KEYWORD_USE: return "use";
case TOKEN_OPERATOR_IS: return "is";
case TOKEN_BANG: return "!";
case TOKEN_DOUBLE_BANG: return "!!";
case TOKEN_EQ: return "=";
case TOKEN_DOT: return ".";
case TOKEN_COMMA: return ",";
case TOKEN_COLON: return ":";
case TOKEN_SEMICOLON: return ";";
case TOKEN_PLUS: return "+";
case TOKEN_MINUS: return "-";
case TOKEN_STAR: return "*";
case TOKEN_SLASH: return "/";
case TOKEN_LPAREN: return "(";
case TOKEN_RPAREN: return ")";
case TOKEN_LBRACE: return "{";
case TOKEN_RBRACE: return "}";
case TOKEN_LBRACKET: return "[";
case TOKEN_RBRACKET: return "]";
case TOKEN_EOF: return "eof";
case TOKEN_ERROR: return "error";
default: return "unknown";
}
}

68
src/lexer.h Normal file
View File

@ -0,0 +1,68 @@
#ifndef ZRL_VM_H
#define ZRL_VM_H
#include <stdio.h>
#include <string.h>
#include <ctype.h>
typedef enum {
TOKEN_EOF,
TOKEN_IDENTIFIER,
TOKEN_INT_LITERAL,
TOKEN_FLOAT_LITERAL,
TOKEN_STRING_LITERAL,
TOKEN_KEYWORD_TYPE,
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_LET,
TOKEN_KEYWORD_CONST,
TOKEN_KEYWORD_IF,
TOKEN_KEYWORD_ELSE,
TOKEN_KEYWORD_WHILE,
TOKEN_KEYWORD_FOR,
TOKEN_KEYWORD_RETURN,
TOKEN_KEYWORD_USE,
TOKEN_OPERATOR_IS,
TOKEN_DOUBLE_BANG,
TOKEN_BANG,
TOKEN_EQ,
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_ERROR
} TokenType;
typedef struct {
TokenType type;
const char *start;
int length;
int line;
} Token;
typedef struct {
const char *keyword;
TokenType token;
} Keyword;
typedef struct {
const char *start;
const char *current;
int line;
} Lexer;
Lexer lexer;
void init_lexer(const char *source);
Token next_token();
#endif

View File

@ -1,5 +1,5 @@
#ifndef ZRE_OPCODES_H #ifndef ZRL_OPCODES_H
#define ZRE_OPCODES_H #define ZRL_OPCODES_H
#include "common.h" #include "common.h"
@ -15,11 +15,6 @@ typedef struct slice_s {
uint32_t end; uint32_t end;
} Slice; } Slice;
typedef struct cell_s {
uint32_t car;
uint32_t cdr;
} Cell;
#define MAX_REGS 32 #define MAX_REGS 32
typedef struct frame_s { typedef struct frame_s {
Value registers[MAX_REGS]; /* R0-R31 */ Value registers[MAX_REGS]; /* R0-R31 */

View File

@ -2,9 +2,9 @@
#include "vm.h" #include "vm.h"
bool test_hello_world_compile(VM *vm) { bool test_hello_world_compile(VM *vm) {
str(vm, "nuqneH 'u'?", 0); uint32_t hello = str_alloc(vm, "nuqneH 'u'?", 0);
vm->code[vm->cp++].u = OP(OP_LOADU, 1, 0, 0); vm->code[vm->cp++].u = OP(OP_LOADU, 1, 0, 0);
vm->code[vm->cp++].u = 0; vm->code[vm->cp++].u = hello;
vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print("nuqneH 'u'?"); */ vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print("nuqneH 'u'?"); */
vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0); /* explicit halt */ vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0); /* explicit halt */
return true; return true;
@ -40,9 +40,9 @@ bool test_loop_compile(VM *vm) {
vm->code[vm->cp++].u = OP(OP_ADD_INT, 1, 1, 3); /* (implied by loop) i = i + (-1) */ vm->code[vm->cp++].u = OP(OP_ADD_INT, 1, 1, 3); /* (implied by loop) i = i + (-1) */
vm->code[vm->cp++].u = OP(OP_JGE_INT, 4, 1, 2); /* } */ vm->code[vm->cp++].u = OP(OP_JGE_INT, 4, 1, 2); /* } */
vm->code[vm->cp++].u = OP(OP_REAL_TO_UINT, 1, 0, 0); /* let b = a as nat; */ vm->code[vm->cp++].u = OP(OP_REAL_TO_UINT, 1, 0, 0); /* let b = a as nat; */
str(vm, "Enter a string:", 0); uint32_t prompt_addr = str_alloc(vm, "Enter a string:", 0);
vm->code[vm->cp++].u = OP(OP_LOADU, 5, 0, 0); vm->code[vm->cp++].u = OP(OP_LOADU, 5, 0, 0);
vm->code[vm->cp++].u = 0; vm->code[vm->cp++].u = prompt_addr;
vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 5, 0); /* print("Enter a string: "); */ vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 5, 0); /* print("Enter a string: "); */
vm->code[vm->cp++].u = OP(OP_READ_STRING, 2, 0, 0); /* let user_string = gets(); */ vm->code[vm->cp++].u = OP(OP_READ_STRING, 2, 0, 0); /* let user_string = gets(); */
vm->code[vm->cp++].u = OP(OP_UINT_TO_STRING, 3, 1, 0); vm->code[vm->cp++].u = OP(OP_UINT_TO_STRING, 3, 1, 0);

View File

@ -1,5 +1,5 @@
#ifndef ZRE_TEST_H #ifndef ZRL_TEST_H
#define ZRE_TEST_H #define ZRL_TEST_H
#include "opcodes.h" #include "opcodes.h"

BIN
src/tools/gen_keywords Executable file

Binary file not shown.

90
src/tools/gen_keywords.c Normal file
View File

@ -0,0 +1,90 @@
#include <stdio.h>
#include <string.h>
typedef enum {
TOKEN_IDENTIFIER,
TOKEN_KEYWORD_TYPE,
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_LET,
TOKEN_KEYWORD_CONST,
TOKEN_KEYWORD_IF,
TOKEN_KEYWORD_ELSE,
TOKEN_KEYWORD_WHILE,
TOKEN_KEYWORD_FOR,
TOKEN_KEYWORD_RETURN,
TOKEN_KEYWORD_USE,
TOKEN_OPERATOR_IS
} TokenType;
typedef struct {
const char *keyword;
TokenType token;
} Keyword;
Keyword keywords[] = {
{"type", TOKEN_KEYWORD_TYPE},
{"fn", TOKEN_KEYWORD_FN},
{"let", TOKEN_KEYWORD_LET},
{"const", TOKEN_KEYWORD_CONST},
{"if", TOKEN_KEYWORD_IF},
{"else", TOKEN_KEYWORD_ELSE},
{"while", TOKEN_KEYWORD_WHILE},
{"for", TOKEN_KEYWORD_FOR},
{"return", TOKEN_KEYWORD_RETURN},
{"use", TOKEN_KEYWORD_USE},
{"is", TOKEN_OPERATOR_IS},
};
void emit_keyword_header(FILE *out) {
fprintf(out, "#ifndef KEYWORDS_H\n");
fprintf(out, "#define KEYWORDS_H\n\n");
fprintf(out, "#include \"lexer.h\"\n\n");
fprintf(out, "static TokenType check_keyword(int start, int length, const char *rest, TokenType type) {\n");
fprintf(out, " if ((lexer.current - lexer.start) == start + length &&\n");
fprintf(out, " memcmp(lexer.start + start, rest, length) == 0) return type;\n");
fprintf(out, " return TOKEN_IDENTIFIER;\n");
fprintf(out, "}\n\n");
fprintf(out, "static TokenType identifier_type(void) {\n");
fprintf(out, " switch (lexer.start[0]) {\n");
for (char ch = 'a'; ch <= 'z'; ++ch) {
int printed = 0;
for (int i = 0; i < sizeof(keywords) / sizeof(Keyword); ++i) {
const char *kw = keywords[i].keyword;
if (kw[0] == ch) {
if (!printed) {
fprintf(out, " case '%c':\n", ch);
printed = 1;
}
int len = (int)strlen(kw);
fprintf(out, " return check_keyword(%d, %d, \"%s\", %s);\n",
1, len - 1, kw + 1,
(keywords[i].token == TOKEN_IDENTIFIER ? "TOKEN_IDENTIFIER" :
keywords[i].token == TOKEN_OPERATOR_IS ? "TOKEN_OPERATOR_IS" :
keywords[i].token == TOKEN_KEYWORD_RETURN ? "TOKEN_KEYWORD_RETURN" :
keywords[i].token == TOKEN_KEYWORD_WHILE ? "TOKEN_KEYWORD_WHILE" :
keywords[i].token == TOKEN_KEYWORD_CONST ? "TOKEN_KEYWORD_CONST" :
keywords[i].token == TOKEN_KEYWORD_TYPE ? "TOKEN_KEYWORD_TYPE" :
keywords[i].token == TOKEN_KEYWORD_FN ? "TOKEN_KEYWORD_FN" :
keywords[i].token == TOKEN_KEYWORD_IF ? "TOKEN_KEYWORD_IF" :
keywords[i].token == TOKEN_KEYWORD_FOR ? "TOKEN_KEYWORD_FOR" :
keywords[i].token == TOKEN_KEYWORD_LET ? "TOKEN_KEYWORD_LET" :
keywords[i].token == TOKEN_KEYWORD_ELSE ? "TOKEN_KEYWORD_ELSE" :
keywords[i].token == TOKEN_KEYWORD_USE ? "TOKEN_KEYWORD_USE" : "TOKEN_IDENTIFIER"));
}
}
}
fprintf(out, " }\n return TOKEN_IDENTIFIER;\n");
fprintf(out, "}\n\n");
fprintf(out, "#endif // KEYWORDS_H\n");
}
int main(void) {
emit_keyword_header(stdout);
return 0;
}

View File

@ -26,8 +26,9 @@
/** /**
* Embeds a string into the VM * Embeds a string into the VM
*/ */
void str(VM *vm, const char *str, uint32_t length) { uint32_t str_alloc(VM *vm, const char *str, uint32_t length) {
if (!length) length = strlen(str); if (!length) length = strlen(str);
uint32_t str_addr = vm->mp;
vm->memory[vm->mp++].u = length; vm->memory[vm->mp++].u = length;
uint32_t i, j = 0; uint32_t i, j = 0;
for (i = 0; i < length; i++) { for (i = 0; i < length; i++) {
@ -38,6 +39,7 @@ void str(VM *vm, const char *str, uint32_t length) {
} }
} }
vm->frames[vm->fp].allocated.end += length / 4; vm->frames[vm->fp].allocated.end += length / 4;
return str_addr;
} }
/** /**
@ -197,21 +199,21 @@ bool step_vm(VM *vm) {
int32_t a = (int32_t)vm->frames[vm->fp].registers[src1].i; /* get value */ int32_t a = (int32_t)vm->frames[vm->fp].registers[src1].i; /* get value */
char buffer[32]; char buffer[32];
int len = sprintf(buffer, "%d", a); int len = sprintf(buffer, "%d", a);
str(vm, buffer, len); /* copy buffer to dest */ str_alloc(vm, buffer, len); /* copy buffer to dest */
return true; return true;
} }
case OP_UINT_TO_STRING: { case OP_UINT_TO_STRING: {
uint32_t a = (uint32_t)vm->frames[vm->fp].registers[src1].u; /* get value */ uint32_t a = (uint32_t)vm->frames[vm->fp].registers[src1].u; /* get value */
char buffer[32]; char buffer[32];
int len = sprintf(buffer, "%d", a); int len = sprintf(buffer, "%d", a);
str(vm, buffer, len); /* copy buffer to dest */ str_alloc(vm, buffer, len); /* copy buffer to dest */
return true; return true;
} }
case OP_REAL_TO_STRING: { case OP_REAL_TO_STRING: {
float a = (float)vm->frames[vm->fp].registers[src1].f; /* get value */ float a = (float)vm->frames[vm->fp].registers[src1].f; /* get value */
char buffer[32]; char buffer[32];
int len = sprintf(buffer, "%f", a); int len = sprintf(buffer, "%f", a);
str(vm, buffer, len); /* copy buffer to dest */ str_alloc(vm, buffer, len); /* copy buffer to dest */
return true; return true;
} }
case OP_READ_STRING: { case OP_READ_STRING: {

View File

@ -1,10 +1,10 @@
#ifndef ZRE_VM_H #ifndef ZRL_VM_H
#define ZRE_VM_H #define ZRL_VM_H
#include "opcodes.h" #include "opcodes.h"
VM* init_vm(); VM* init_vm();
bool step_vm(VM *vm); bool step_vm(VM *vm);
void str(VM *vm, const char *str, uint32_t length); uint32_t str_alloc(VM *vm, const char *str, uint32_t length);
#endif #endif