From 5d2aa1ad222d62ee13803a12f9514739753d5d55 Mon Sep 17 00:00:00 2001 From: zongor Date: Tue, 4 Nov 2025 17:09:55 -0800 Subject: [PATCH 01/27] wip compiler stuff --- Makefile | 9 +- README.org | 41 +++---- bench/fib.lisp | 2 +- bench/fib.lua | 2 +- bench/fib.pl | 2 +- bench/fib.py | 2 +- bench/fib.zl | 2 +- src/arch/emscripten/devices.c | 13 ++- src/arch/emscripten/devices.h | 6 +- src/arch/emscripten/main.c | 6 +- src/arch/linux/main.c | 171 ++++++++++++++--------------- src/tools/compiler.c | 200 ++++++++++++++++++++++++++++++++++ src/tools/compiler.h | 38 +++++++ src/vm/common.h | 1 + src/vm/fixed.c | 181 ++++++++++++++++++++++++++++++ src/vm/fixed.h | 53 +++++++++ src/vm/vm.c | 80 ++++++-------- test/add.rom | Bin 139 -> 143 bytes test/fib.rom | Bin 183 -> 187 bytes test/hello.rom | Bin 131 -> 135 bytes test/loop.rom | Bin 254 -> 258 bytes test/malloc.rom | Bin 163 -> 167 bytes test/paint-bw.rom | Bin 594 -> 574 bytes test/paint.rom | Bin 1394 -> 1266 bytes test/simple.rom | Bin 136 -> 140 bytes test/window.rom | Bin 326 -> 326 bytes 26 files changed, 633 insertions(+), 176 deletions(-) create mode 100644 src/tools/compiler.c create mode 100644 src/tools/compiler.h create mode 100644 src/vm/fixed.c create mode 100644 src/vm/fixed.h diff --git a/Makefile b/Makefile index 716919e..2327006 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,7 @@ endif VM_SOURCES := \ $(SRC_DIR)/vm/vm.c \ $(SRC_DIR)/vm/device.c \ + $(SRC_DIR)/vm/fixed.c \ $(SRC_DIR)/vm/libc.c ifeq ($(BUILD_MODE), release) @@ -87,13 +88,15 @@ ifeq ($(BUILD_MODE), release) $(ARCH_DIR)/devices.c\ $(SRC_DIR)/tools/parser.c \ $(SRC_DIR)/tools/lexer.c \ - $(SRC_DIR)/tools/assembler.c + $(SRC_DIR)/tools/assembler.c\ + $(SRC_DIR)/tools/compiler.c else PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c \ $(SRC_DIR)/tools/parser.c \ $(SRC_DIR)/tools/lexer.c \ - $(SRC_DIR)/tools/assembler.c + $(SRC_DIR)/tools/assembler.c\ + $(SRC_DIR)/tools/compiler.c endif # --- OBJECT FILES --- @@ -184,4 +187,4 @@ help: @echo "" @echo "Output:" @echo " Linux: build/linux/undar-linux-" - @echo " Web: build/emscripten/undar.html (+ .js, .wasm)" \ No newline at end of file + @echo " Web: build/emscripten/undar.html (+ .js, .wasm)" diff --git a/README.org b/README.org index c6495c4..0df876a 100644 --- a/README.org +++ b/README.org @@ -58,23 +58,16 @@ The Undâr compiler will be written in Sċieppan, as well as core VM tests. #+BEGIN_SRC lisp ((code (label main - (load-immediate $1 &hello-str) ; load hello string ptr - (call &pln ($1) nil) - (halt)) ; done - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (load-immediate $3 &new-line) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) + (ldi $0 &terminal-namespace) ; get terminal device + (ldi $1 0) + (syscall OPEN $0 $0 $1) + (ldi $1 &hello-str) ; load hello string ptr + (strlen $2 $1) + (syscall WRITE $0 $1 $2) + (halt 0))) (data (label terminal-namespace "/dev/term/0") - (label new-line "\n") - (label hello-str "nuqneH 'u'?"))) + (label hello-str "nuqneH 'u'?\n"))) #+END_SRC #+BEGIN_SRC sh @@ -92,24 +85,24 @@ heap allocations using the internal malloc opcode push pointers within this fram #+BEGIN_SRC lisp ((code (label main - (load-immediate $0 &terminal-namespace) ; get terminal device - (load-immediate $11 0) + (ldi $0 &terminal-namespace) ; get terminal device + (ldi $11 0) (syscall OPEN $0 $0 $11) - (load-immediate $1 &help) ; print help message - (call &pln ($0 $1) nil) + (ldi $1 &help) ; print help message + (fcall &pln ($0 $1) nil) - (load-immediate $1 32) ; read in a string of max 32 char length + (ldi $1 32) ; read in a string of max 32 char length (malloc $4 $1) ; allocate memory for the string (syscall READ $0 $4 $1) ; read the string - (call &pln ($0 $4) nil) ; print the string + (fcall &pln ($0 $4) nil) ; print the string (halt)) (label pln - (load-immediate $3 &new-line) - (string-length $2 $1) + (ldi $3 &new-line) + (strlen $2 $1) (syscall WRITE $0 $1 $2) - (string-length $4 $3) + (strlen $4 $3) (syscall WRITE $0 $3 $4) (return nil))) (data diff --git a/bench/fib.lisp b/bench/fib.lisp index 19174ae..f73186a 100644 --- a/bench/fib.lisp +++ b/bench/fib.lisp @@ -3,4 +3,4 @@ (return n)) (return (+ (fib (- n 2)) (fib (- n 1))))) -(print (fib 35)) \ No newline at end of file +(print (fib 36)) \ No newline at end of file diff --git a/bench/fib.lua b/bench/fib.lua index d7ce19a..be357e9 100644 --- a/bench/fib.lua +++ b/bench/fib.lua @@ -3,6 +3,6 @@ function fib(n) return fib(n-1) + fib(n-2) end -local result = fib(35) +local result = fib(36) print(result) diff --git a/bench/fib.pl b/bench/fib.pl index e246993..776351e 100644 --- a/bench/fib.pl +++ b/bench/fib.pl @@ -7,6 +7,6 @@ sub fib { return fib($n-1) + fib($n-2); } -my $result = fib(35); +my $result = fib(36); print "$result\n"; diff --git a/bench/fib.py b/bench/fib.py index c4ea615..4ab00e6 100644 --- a/bench/fib.py +++ b/bench/fib.py @@ -3,5 +3,5 @@ def fib(n): return n return fib(n-1) + fib(n-2) -result = fib(35) +result = fib(36) print(result) diff --git a/bench/fib.zl b/bench/fib.zl index 1b95c65..dd93fc8 100644 --- a/bench/fib.zl +++ b/bench/fib.zl @@ -3,5 +3,5 @@ fn fib(n) { return fib(n - 2) + fib(n - 1); } -let result = fib(35); +let result = fib(36); print result; diff --git a/src/arch/emscripten/devices.c b/src/arch/emscripten/devices.c index 2d58703..4d667b1 100644 --- a/src/arch/emscripten/devices.c +++ b/src/arch/emscripten/devices.c @@ -1,8 +1,7 @@ #include "devices.h" #include #include -#include -#include +#include i32 console_open(void *data, u32 mode, u32 handle, u8 *buffer, u32 size) { USED(mode); @@ -141,10 +140,14 @@ i32 mouse_open(void *data, u32 mode, u32 handle, u8 *buffer, u32 size) { } i32 mouse_read(void *data, u8 *buffer, u32 size) { - MouseDeviceData *mouse_data = (MouseDeviceData *)data; + USED(data); + USED(buffer); + USED(size); + return -1; +} - if (size < 12) - return -1; +i32 mouse_refresh(void *data, u8 *buffer) { + MouseDeviceData *mouse_data = (MouseDeviceData *)data; u8 *info = (u8 *)buffer; memcpy(&info[4], &mouse_data->x, sizeof(u32)); diff --git a/src/arch/emscripten/devices.h b/src/arch/emscripten/devices.h index b252c03..946ed28 100644 --- a/src/arch/emscripten/devices.h +++ b/src/arch/emscripten/devices.h @@ -26,16 +26,13 @@ typedef struct mouse_device_data_s { u8 btn2; u8 btn3; u8 btn4; - u32 size; } MouseDeviceData; /* Keyboard device data */ typedef struct keyboard_device_data_s { u32 handle; - const u8 *keys; i32 key_count; - u32 pos; - u32 size; + const u8 *keys; } KeyboardDeviceData; /* Console device data */ @@ -52,6 +49,7 @@ i32 screen_ioctl(void *data, u32 cmd, const u8 *buffer); i32 mouse_open(void *data, u32 mode, u32 handle, u8 *buffer, u32 size); i32 mouse_read(void *data, u8 *buffer, u32 size); +i32 mouse_refresh(void *data, u8 *buffer); i32 mouse_write(void *data, const u8 *buffer, u32 size); i32 mouse_close(void *data); diff --git a/src/arch/emscripten/main.c b/src/arch/emscripten/main.c index 8d454c9..1a98d0e 100644 --- a/src/arch/emscripten/main.c +++ b/src/arch/emscripten/main.c @@ -18,7 +18,8 @@ static DeviceOps mouse_ops = {.open = mouse_open, .read = mouse_read, .write = mouse_write, .close = mouse_close, - .ioctl = nil}; + .ioctl = nil, + .refresh = mouse_refresh}; static DeviceOps console_device_ops = { .open = console_open, @@ -177,13 +178,12 @@ int main(int argc, char **argv) { mouse_data.btn2 = 0; mouse_data.btn3 = 0; mouse_data.btn4 = 0; - mouse_data.size = 16; // Register devices vm_register_device(&vm, "/dev/screen/0", "screen", &screen_data, &screen_ops, 16 + screen_data.buffer_size); vm_register_device(&vm, "/dev/mouse/0", "mouse", &mouse_data, &mouse_ops, - mouse_data.size); + 16); vm_register_device(&vm, "/dev/term/0", "terminal", &console_data, &console_device_ops, 4); diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index cad07d3..3626374 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,11 +1,12 @@ #include "../../tools/assembler.h" -#include "../../tools/lexer.h" +#include "../../tools/compiler.h" #include "../../tools/parser.h" #include "../../vm/vm.h" #include "devices.h" #include #include +#include #include #include @@ -280,130 +281,130 @@ const char *opcode_to_string(Opcode op) { static const char *names[] = { [OP_HALT] = "halt", [OP_JMP] = "jump", - [OP_JMPF] = "jump-if-flag", - [OP_CALL] = "call", - [OP_RETURN] = "return", + [OP_JMPF] = "jmpf", + [OP_FCALL] = "fcall", + [OP_FRETURN] = "return", /* Immediate loads (only 32-bit variant needed) */ - [OP_LOAD_IMM] = "load-immediate", + [OP_LOAD_IMM] = "ldi", /* Register-indirect loads */ - [OP_LOAD_IND_8] = "load-indirect-8", - [OP_LOAD_IND_16] = "load-indirect-16", - [OP_LOAD_IND_32] = "load-indirect-32", + [OP_LOAD_IND_8] = "ld8", + [OP_LOAD_IND_16] = "ld16", + [OP_LOAD_IND_32] = "ld32", /* Absolute address loads */ - [OP_LOAD_ABS_8] = "load-absolute-8", - [OP_LOAD_ABS_16] = "load-absolute-16", - [OP_LOAD_ABS_32] = "load-absolute-32", + [OP_LOAD_ABS_8] = "lda8", + [OP_LOAD_ABS_16] = "lda16", + [OP_LOAD_ABS_32] = "lda32", /* Base+offset loads */ - [OP_LOAD_OFF_8] = "load-offset-8", - [OP_LOAD_OFF_16] = "load-offset-16", - [OP_LOAD_OFF_32] = "load-offset-32", + [OP_LOAD_OFF_8] = "ldo8", + [OP_LOAD_OFF_16] = "ldo16", + [OP_LOAD_OFF_32] = "ldo32", /* Absolute address stores */ - [OP_STORE_ABS_8] = "store-absolute-8", - [OP_STORE_ABS_16] = "store-absolute-16", - [OP_STORE_ABS_32] = "store-absolute-32", + [OP_STORE_ABS_8] = "sta8", + [OP_STORE_ABS_16] = "sta16", + [OP_STORE_ABS_32] = "sta32", /* Register-indirect stores */ - [OP_STORE_IND_8] = "store-indirect-8", - [OP_STORE_IND_16] = "store-indirect-16", - [OP_STORE_IND_32] = "store-indirect-32", + [OP_STORE_IND_8] = "sti8", + [OP_STORE_IND_16] = "sti16", + [OP_STORE_IND_32] = "sti32", /* Base+offset stores */ - [OP_STORE_OFF_8] = "store-offset-8", - [OP_STORE_OFF_16] = "store-offset-16", - [OP_STORE_OFF_32] = "store-offset-32", + [OP_STORE_OFF_8] = "sto8", + [OP_STORE_OFF_16] = "sto16", + [OP_STORE_OFF_32] = "sto32", /* Memory operations */ [OP_MALLOC] = "malloc", - [OP_MEMSET_8] = "memset-8", - [OP_MEMSET_16] = "memset-16", - [OP_MEMSET_32] = "memset-32", + [OP_MEMSET_8] = "set8", + [OP_MEMSET_16] = "set16", + [OP_MEMSET_32] = "set32", /* Register operations */ - [OP_REG_MOV] = "register-move", + [OP_REG_MOV] = "mov", [OP_SYSCALL] = "syscall", /* Bit operations */ - [OP_BIT_SHIFT_LEFT] = "bit-shift-left", - [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", - [OP_BIT_SHIFT_R_EXT] = "bit-shift-r-ext", - [OP_BAND] = "bit-and", - [OP_BOR] = "bit-or", - [OP_BXOR] = "bit-xor", + [OP_BIT_SHIFT_LEFT] = "sll", + [OP_BIT_SHIFT_RIGHT] = "srl", + [OP_BIT_SHIFT_R_EXT] = "sre", + [OP_BAND] = "and", + [OP_BOR] = "or", + [OP_BXOR] = "xor", /* Integer arithmetic */ - [OP_ADD_INT] = "add-int", - [OP_SUB_INT] = "sub-int", - [OP_MUL_INT] = "mul-int", - [OP_DIV_INT] = "div-int", - [OP_ABS_INT] = "abs-int", // ← NEW - [OP_NEG_INT] = "neg-int", // ← NEW + [OP_ADD_INT] = "addi", + [OP_SUB_INT] = "subi", + [OP_MUL_INT] = "muli", + [OP_DIV_INT] = "divi", + [OP_ABS_INT] = "absi", // ← NEW + [OP_NEG_INT] = "negi", // ← NEW /* Natural number arithmetic */ - [OP_ADD_NAT] = "add-nat", - [OP_SUB_NAT] = "sub-nat", - [OP_MUL_NAT] = "mul-nat", - [OP_DIV_NAT] = "div-nat", - [OP_ABS_NAT] = "abs-nat", // ← NEW - [OP_NEG_NAT] = "neg-nat", // ← NEW + [OP_ADD_NAT] = "addn", + [OP_SUB_NAT] = "subn", + [OP_MUL_NAT] = "muln", + [OP_DIV_NAT] = "divn", + [OP_ABS_NAT] = "absn", // ← NEW + [OP_NEG_NAT] = "negn", // ← NEW /* Floating point operations */ - [OP_ADD_REAL] = "add-real", - [OP_SUB_REAL] = "sub-real", - [OP_MUL_REAL] = "mul-real", - [OP_DIV_REAL] = "div-real", - [OP_ABS_REAL] = "abs-real", // ← NEW - [OP_NEG_REAL] = "neg-real", // ← NEW + [OP_ADD_REAL] = "addr", + [OP_SUB_REAL] = "subr", + [OP_MUL_REAL] = "mulr", + [OP_DIV_REAL] = "divr", + [OP_ABS_REAL] = "absr", // ← NEW + [OP_NEG_REAL] = "negr", // ← NEW /* Type conversions */ - [OP_INT_TO_REAL] = "int-to-real", - [OP_NAT_TO_REAL] = "nat-to-real", - [OP_REAL_TO_INT] = "real-to-int", - [OP_REAL_TO_NAT] = "real-to-nat", + [OP_INT_TO_REAL] = "itor", + [OP_NAT_TO_REAL] = "ntor", + [OP_REAL_TO_INT] = "rtoi", + [OP_REAL_TO_NAT] = "rton", /* Integer comparisons */ - [OP_JEQ_INT] = "jump-eq-int", - [OP_JNEQ_INT] = "jump-neq-int", - [OP_JGT_INT] = "jump-gt-int", - [OP_JLT_INT] = "jump-lt-int", - [OP_JLE_INT] = "jump-le-int", - [OP_JGE_INT] = "jump-ge-int", + [OP_JEQ_INT] = "jeqi", + [OP_JNEQ_INT] = "jneqi", + [OP_JGT_INT] = "jgti", + [OP_JLT_INT] = "jlti", + [OP_JLE_INT] = "jlei", + [OP_JGE_INT] = "jgei", /* Natural number comparisons */ - [OP_JEQ_NAT] = "jump-eq-nat", - [OP_JNEQ_NAT] = "jump-neq-nat", - [OP_JGT_NAT] = "jump-gt-nat", - [OP_JLT_NAT] = "jump-lt-nat", - [OP_JLE_NAT] = "jump-le-nat", - [OP_JGE_NAT] = "jump-ge-nat", + [OP_JEQ_NAT] = "jeqn", + [OP_JNEQ_NAT] = "jneqn", + [OP_JGT_NAT] = "jgtn", + [OP_JLT_NAT] = "jltn", + [OP_JLE_NAT] = "jlen", + [OP_JGE_NAT] = "jgen", /* Floating point comparisons */ - [OP_JEQ_REAL] = "jump-eq-real", - [OP_JNEQ_REAL] = "jump-neq-real", - [OP_JGE_REAL] = "jump-ge-real", - [OP_JGT_REAL] = "jump-gt-real", - [OP_JLT_REAL] = "jump-lt-real", - [OP_JLE_REAL] = "jump-le-real", + [OP_JEQ_REAL] = "jeqr", + [OP_JNEQ_REAL] = "jneqr", + [OP_JGE_REAL] = "jger", + [OP_JGT_REAL] = "jgtr", + [OP_JLT_REAL] = "jltr", + [OP_JLE_REAL] = "jler", /* String operations */ - [OP_STRLEN] = "string-length", - [OP_STREQ] = "string-eq", - [OP_STRCAT] = "string-concat", - [OP_STR_GET_CHAR] = "string-get-char", - [OP_STR_FIND_CHAR] = "string-find-char", - [OP_STR_SLICE] = "string-slice", + [OP_STRLEN] = "strlen", + [OP_STREQ] = "streq", + [OP_STRCAT] = "strcat", + [OP_STR_GET_CHAR] = "getch", + [OP_STR_FIND_CHAR] = "findch", + [OP_STR_SLICE] = "strcut", /* String conversions */ - [OP_INT_TO_STRING] = "int-to-string", - [OP_NAT_TO_STRING] = "nat-to-string", - [OP_REAL_TO_STRING] = "real-to-string", - [OP_STRING_TO_INT] = "string-to-int", - [OP_STRING_TO_NAT] = "string-to-nat", - [OP_STRING_TO_REAL] = "string-to-real"}; + [OP_INT_TO_STRING] = "itos", + [OP_NAT_TO_STRING] = "ntos", + [OP_REAL_TO_STRING] = "rtos", + [OP_STRING_TO_INT] = "stoi", + [OP_STRING_TO_NAT] = "ston", + [OP_STRING_TO_REAL] = "stor"}; if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { return ""; diff --git a/src/tools/compiler.c b/src/tools/compiler.c new file mode 100644 index 0000000..d531caf --- /dev/null +++ b/src/tools/compiler.c @@ -0,0 +1,200 @@ +#include "compiler.h" +#include "lexer.h" +#include +#include + +//typedef struct { +// Token current; +// Token previous; +// bool hadError; +// bool panicMode; +//} Parser; +// +//typedef enum { +// PREC_NONE, +// PREC_ASSIGNMENT, /* = */ +// PREC_OR, /* or */ +// PREC_AND, /* and */ +// PREC_EQUALITY, /* == != */ +// PREC_COMPARISON, /* < > <= >= */ +// PREC_TERM, /* + - */ +// PREC_FACTOR, /* * / */ +// PREC_UNARY, /* not */ +// PREC_CALL, /* . () */ +// PREC_PRIMARY +//} Precedence; +// +//typedef void (*ParseFn)(char *program); +// +//typedef struct { +// ParseFn prefix; +// ParseFn infix; +// Precedence precedence; +//} ParseRule; +// +//typedef struct { +// i8 rp; // Next free register +//} Compiler; +// +//Parser parser; +// +//const char *internalErrorMsg = +// "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes."; +// +//void errorAt(Token *token, const char *message) { +// if (parser.panicMode) +// return; +// parser.panicMode = true; +// fprintf(stderr, "[line %d] Error", token->line); +// +// if (token->type == TOKEN_EOF) { +// fprintf(stderr, " at end"); +// } else if (token->type == TOKEN_ERROR) { +// } else { +// fprintf(stderr, " at '%.*s'", token->length, token->start); +// } +// +// fprintf(stderr, ": %s\n", message); +// parser.hadError = true; +//} +// +//void error(const char *message) { errorAt(&parser.previous, message); } +// +//void errorAtCurrent(const char *message) { errorAt(&parser.current, message); } +// +//void advance() { +// parser.previous = parser.current; +// +// for (;;) { +// parser.current = nextToken(); +// if (parser.current.type != TOKEN_ERROR) +// break; +// +// errorAtCurrent(parser.current.start); +// } +//} +// +//void consume(TokenType type, const char *message) { +// if (parser.current.type == type) { +// advance(); +// return; +// } +// +// errorAtCurrent(message); +//} +// +//static int allocateRegister(Compiler *c) { +// char buffer[38]; +// if (c->rp > 28) { +// sprintf(buffer, "Out of registers (used %d, max 28)", c->rp); +// error(buffer); +// return -1; +// } +// +// return c->rp++; +//} + +//static void freeRegister(Compiler *c, u8 reg) { +// if (reg == c->rp - 1) { +// c->rp--; +// } +//} + +//void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } +// +//void emit_u32(VM *vm, u32 value) { +// write_u32(vm, code, vm->cp, value); +// vm->cp += 4; +//} +// +//void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } +// +//static bool check(TokenType type) { return parser.current.type == type; } +// +//static bool match(TokenType type) { +// if (!check(type)) +// return false; +// advance(); +// return true; +//} +// +//static void expression(Compiler *c, VM *vm) { +// USED(c); +// USED(vm); +//} +// +//void number(Compiler *c, VM *vm) { +// emit_opcode(vm, OP_LOAD_IMM); +// int reg = allocateRegister(c); +// if (reg < 0) +// return; +// emit_byte(vm, reg); +// +// switch (parser.previous.type) { +// case TOKEN_INT_LITERAL: { +// char *endptr; +// i32 value = (i32)strtol(parser.previous.start, &endptr, 10); +// emit_u32(vm, value); +// return; +// } +// case TOKEN_UINT_LITERAL: { +// long value = atol(parser.previous.start); +// emit_u32(vm, value); +// return; +// } +// case TOKEN_FLOAT_LITERAL: { +// float value = atof(parser.previous.start); +// fixed_t fvalue = float_to_fixed(value); +// emit_u32(vm, fvalue); +// return; +// } +// default: +// return; // Unreachable. +// } +// +// errorAtCurrent("Invalid number format"); +//} +// +//static void unary(Compiler *c, VM *vm) { +// TokenType operatorType = parser.previous.type; +// +// // Compile the operand. +// expression(c, vm); +// +// // Emit the operator instruction. +// switch (operatorType) { +// default: +// return; // Unreachable. +// } +//} +// +//static void emitHalt(Compiler *c, VM *vm) { +// emit_opcode(vm, OP_HALT); +// advance(); +// number(c, vm); +//} +// +//static void endCompiler(Compiler *c, VM *vm) { emitHalt(c, vm); } +// +//static void grouping(Compiler *c, VM *vm) { +// expression(c, vm); +// consume(TOKEN_RPAREN, "Expect ')' after expression."); +//} + +bool compile(const char *source, VM *vm) { + USED(source); + USED(vm); + //initLexer(source); +// + //parser.hadError = false; + //parser.panicMode = false; +// + //Compiler compiler; + //advance(); + //expression(&compiler, vm); + //consume(TOKEN_EOF, "Expect end of expression."); + //endCompiler(&compiler, vm); +// + //return parser.hadError; + return false; +} diff --git a/src/tools/compiler.h b/src/tools/compiler.h new file mode 100644 index 0000000..c657898 --- /dev/null +++ b/src/tools/compiler.h @@ -0,0 +1,38 @@ +#ifndef UNDAR_COMPILER_H +#define UNDAR_COMPILER_H + +#include "../vm/common.h" +#include "../vm/vm.h" +#include "../vm/fixed.h" +#include "lexer.h" + +#include +#include +#include +#include + +typedef struct field_s { + char* name; // "handle" + TokenType type; // TOKEN_TYPE_NAT + u32 offset; // 4 (for first field in heap object) + u32 size; // 4 (bytes for nat) +} Field; + +typedef struct plex_def_s { + char* name; + u32 field_count; + u32 logical_size; // Data size (e.g., 12 for Vec3) + u32 physical_size; // Total allocation (logical + 4) + Field *fields; // All offsets are PHYSICAL +} PlexDef; + +typedef struct array_def_s { + TokenType type; + u32 length; + u32 logical_size; // length * element_size + u32 physical_size; // logical_size + 4 +} ArrayDef; + +bool compile(const char *source, VM *vm); + +#endif diff --git a/src/vm/common.h b/src/vm/common.h index 4691709..18d0d28 100644 --- a/src/vm/common.h +++ b/src/vm/common.h @@ -10,6 +10,7 @@ typedef uint16_t u16; typedef int16_t i16; typedef uint32_t u32; typedef int32_t i32; +typedef float f32; #define true 1 #define false 0 diff --git a/src/vm/fixed.c b/src/vm/fixed.c new file mode 100644 index 0000000..bbbdbc2 --- /dev/null +++ b/src/vm/fixed.c @@ -0,0 +1,181 @@ +/* fixed.c - Q16.16 Fixed-Point Math Implementation */ + +#include "fixed.h" + +/* Conversion functions */ +fixed_t int_to_fixed(i32 i) { return i << 16; } + +i32 fixed_to_int(fixed_t f) { return f >> 16; } + +fixed_t float_to_fixed(f32 f) { return (fixed_t)(f * 65536.0f); } + +f32 fixed_to_float(fixed_t f) { return (f32)f / 65536.0f; } + +fixed_t fixed_add(fixed_t a, fixed_t b) { return a + b; } + +fixed_t fixed_sub(fixed_t a, fixed_t b) { return a - b; } + +fixed_t fixed_mul(fixed_t a, fixed_t b) { + /* Extract high and low parts */ + i32 a_hi = a >> 16; + u32 a_lo = (u32)a & 0xFFFFU; + i32 b_hi = b >> 16; + u32 b_lo = (u32)b & 0xFFFFU; + + /* Compute partial products */ + i32 p0 = (i32)(a_lo * b_lo) >> 16; /* Low * Low */ + i32 p1 = a_hi * (i32)b_lo; /* High * Low */ + i32 p2 = (i32)a_lo * b_hi; /* Low * High */ + i32 p3 = (a_hi * b_hi) << 16; /* High * High */ + + /* Combine results */ + return p0 + p1 + p2 + p3; +} + +fixed_t fixed_div(fixed_t a, fixed_t b) { + int negative; + u32 ua, ub, quotient, remainder; + int i; + + if (b == 0) + return 0; /* Handle division by zero */ + + /* Determine sign */ + negative = ((a < 0) ^ (b < 0)); + + /* Work with absolute values */ + ua = (a < 0) ? -a : a; + ub = (b < 0) ? -b : b; + + /* Perform division using long division in base 2^16 */ + quotient = 0; + remainder = 0; + + for (i = 0; i < 32; i++) { + remainder <<= 1; + if (ua & 0x80000000U) { + remainder |= 1; + } + ua <<= 1; + + if (remainder >= ub) { + remainder -= ub; + quotient |= 1; + } + + if (i < 31) { + quotient <<= 1; + } + } + + return negative ? -(i32)quotient : (i32)quotient; +} + +int fixed_eq(fixed_t a, fixed_t b) { return a == b; } + +int fixed_ne(fixed_t a, fixed_t b) { return a != b; } + +int fixed_lt(fixed_t a, fixed_t b) { return a < b; } + +int fixed_le(fixed_t a, fixed_t b) { return a <= b; } + +int fixed_gt(fixed_t a, fixed_t b) { return a > b; } + +int fixed_ge(fixed_t a, fixed_t b) { return a >= b; } + +/* Unary operations */ +fixed_t fixed_neg(fixed_t f) { return -f; } + +fixed_t fixed_abs(fixed_t f) { return (f < 0) ? -f : f; } + +/* Square root using Newton-Raphson method */ +fixed_t fixed_sqrt(fixed_t f) { + fixed_t x, prev; + + if (f <= 0) + return 0; + + x = f; + /* Newton-Raphson iteration: x = (x + f/x) / 2 */ + do { + prev = x; + x = fixed_div(fixed_add(x, fixed_div(f, x)), int_to_fixed(2)); + } while ( + fixed_gt(fixed_abs(fixed_sub(x, prev)), 1)); /* Precision to 1/65536 */ + + return x; +} + +/* Sine function using Taylor series */ +fixed_t fixed_sin(fixed_t f) { + fixed_t result, term, f_squared; + int i; + /* Normalize angle to [-π, π] */ + fixed_t pi2 = fixed_mul(FIXED_PI, int_to_fixed(2)); + while (fixed_gt(f, FIXED_PI)) + f = fixed_sub(f, pi2); + while (fixed_lt(f, fixed_neg(FIXED_PI))) + f = fixed_add(f, pi2); + + /* Taylor series: sin(x) = x - x³/3! + x⁵/5! - x⁷/7! + ... */ + result = f; + term = f; + f_squared = fixed_mul(f, f); + + /* Calculate first few terms for reasonable precision */ + for (i = 3; i <= 11; i += 2) { + term = fixed_mul(term, f_squared); + term = fixed_div(term, int_to_fixed(i * (i - 1))); + + if ((i / 2) % 2 == 0) { + result = fixed_add(result, term); + } else { + result = fixed_sub(result, term); + } + } + + return result; +} + +/* Cosine function using Taylor series */ +fixed_t fixed_cos(fixed_t f) { + /* cos(x) = 1 - x²/2! + x⁴/4! - x⁶/6! + ... */ + fixed_t result = FIXED_ONE; + fixed_t term = FIXED_ONE; + fixed_t f_squared = fixed_mul(f, f); + + int i; + for (i = 2; i <= 12; i += 2) { + term = fixed_mul(term, f_squared); + term = fixed_div(term, int_to_fixed(i * (i - 1))); + + if ((i / 2) % 2 == 0) { + result = fixed_add(result, term); + } else { + result = fixed_sub(result, term); + } + } + + return result; +} + +/* Tangent function */ +fixed_t fixed_tan(fixed_t f) { + fixed_t cos_val = fixed_cos(f); + if (cos_val == 0) + return 0; /* Handle undefined case */ + return fixed_div(fixed_sin(f), cos_val); +} + +/* Utility functions */ +fixed_t fixed_min(fixed_t a, fixed_t b) { return (a < b) ? a : b; } + +fixed_t fixed_max(fixed_t a, fixed_t b) { return (a > b) ? a : b; } + +fixed_t fixed_clamp(fixed_t f, fixed_t min_val, fixed_t max_val) { + if (f < min_val) + return min_val; + if (f > max_val) + return max_val; + return f; +} diff --git a/src/vm/fixed.h b/src/vm/fixed.h new file mode 100644 index 0000000..717705c --- /dev/null +++ b/src/vm/fixed.h @@ -0,0 +1,53 @@ +#ifndef FIXED_H +#define FIXED_H + +#include "common.h" + +/* Q16.16 fixed-point type */ +typedef i32 fixed_t; + +/* Constants */ +#define FIXED_ONE 0x00010000L /* 1.0 in Q16.16 */ +#define FIXED_ZERO 0x00000000L /* 0.0 in Q16.16 */ +#define FIXED_HALF 0x00008000L /* 0.5 in Q16.16 */ +#define FIXED_PI 0x0003243FL /* π ≈ 3.14159 */ +#define FIXED_E 0x0002B7E1L /* e ≈ 2.71828 */ +#define FIXED_MAX 0x7FFFFFFFL /* Maximum positive value */ +#define FIXED_MIN 0x80000000L /* Minimum negative value */ + +/* Conversion functions */ +fixed_t int_to_fixed(i32 i); +i32 fixed_to_int(fixed_t f); +fixed_t float_to_fixed(f32 f); +f32 fixed_to_float(fixed_t f); + +/* Basic arithmetic operations */ +fixed_t fixed_add(fixed_t a, fixed_t b); +fixed_t fixed_sub(fixed_t a, fixed_t b); +fixed_t fixed_mul(fixed_t a, fixed_t b); +fixed_t fixed_div(fixed_t a, fixed_t b); + +/* Comparison functions */ +int fixed_eq(fixed_t a, fixed_t b); +int fixed_ne(fixed_t a, fixed_t b); +int fixed_lt(fixed_t a, fixed_t b); +int fixed_le(fixed_t a, fixed_t b); +int fixed_gt(fixed_t a, fixed_t b); +int fixed_ge(fixed_t a, fixed_t b); + +/* Unary operations */ +fixed_t fixed_neg(fixed_t f); +fixed_t fixed_abs(fixed_t f); + +/* Advanced math functions */ +fixed_t fixed_sqrt(fixed_t f); +fixed_t fixed_sin(fixed_t f); /* f in radians */ +fixed_t fixed_cos(fixed_t f); /* f in radians */ +fixed_t fixed_tan(fixed_t f); /* f in radians */ + +/* Utility functions */ +fixed_t fixed_min(fixed_t a, fixed_t b); +fixed_t fixed_max(fixed_t a, fixed_t b); +fixed_t fixed_clamp(fixed_t f, fixed_t min, fixed_t max); + +#endif /* FIXED_H */ diff --git a/src/vm/vm.c b/src/vm/vm.c index 84bf11f..ba92c73 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -1,7 +1,8 @@ #include "vm.h" #include "device.h" -#include "opcodes.h" +#include "fixed.h" #include "libc.h" +#include "opcodes.h" #define COMPARE_AND_JUMP(type, op) \ do { \ @@ -101,12 +102,10 @@ bool step_vm(VM *vm) { Frame *child; u32 jmp, heap_mask, i; - /* Read call parameters */ jmp = read_u32(vm, code, vm->pc); vm->pc += 4; N = vm->code[vm->pc++]; - /* Read arguments */ for (i = 0; i < N; i++) { args[i] = vm->code[vm->pc++]; } @@ -114,7 +113,6 @@ bool step_vm(VM *vm) { return_reg = vm->code[vm->pc++]; frame->return_reg = return_reg; - /* Stack and frame checks */ if (vm->sp >= STACK_SIZE) return false; vm->stack[vm->sp++] = vm->pc; @@ -123,14 +121,12 @@ bool step_vm(VM *vm) { return false; vm->fp++; - /* Setup child frame */ child = &vm->frames[vm->fp]; child->start = vm->mp; child->end = vm->mp; child->return_reg = 0; child->heap_mask = 0; - /* Optimized register copy with bitmask for heap status */ heap_mask = 0; for (i = 0; i < N; i++) { src_reg = args[i]; @@ -185,11 +181,9 @@ bool step_vm(VM *vm) { if (parent->end + size + 4 > MEMORY_SIZE) { return false; } - *(u32 *)(vm->memory + new_ptr) = size; memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); parent->end += size + 4; - parent->registers[parent->return_reg] = new_ptr; parent->heap_mask |= (1 << parent->return_reg); } else { @@ -198,7 +192,6 @@ bool step_vm(VM *vm) { } } - /* Always handle frame cleanup */ vm->pc = vm->stack[--vm->sp]; vm->mp = child->start; vm->fp--; @@ -603,7 +596,7 @@ bool step_vm(VM *vm) { device_ptr = frame->registers[device_reg]; /* device pointer */ buffer_ptr = frame->registers[buffer_reg]; - size = frame->registers[size_reg]; /* size */ + size = frame->registers[size_reg]; /* size */ handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; @@ -624,7 +617,7 @@ bool step_vm(VM *vm) { vm->pc++; device_ptr = frame->registers[device_reg]; /* device pointer */ - handle = vm->memory[device_ptr + 4]; /* get device handle */ + handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; if (dev && dev->ops->refresh) { vm->flag = dev->ops->refresh(dev->data, &vm->memory[device_ptr + 4]); @@ -635,7 +628,6 @@ bool step_vm(VM *vm) { return true; } - case SYSCALL_DEVICE_WRITE: { Device *dev; u32 handle, buffer_ptr, size, device_ptr; @@ -743,28 +735,28 @@ bool step_vm(VM *vm) { case OP_DIV_INT: MATH_OP(i32, /); case OP_ABS_INT: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; - value = frame->registers[src1]; - if (value < 0) { - value = -value; - } - - frame->registers[dest] = value; - return true; - } + value = frame->registers[src1]; + if (value < 0) { + value = -value; + } + + frame->registers[dest] = value; + return true; + } case OP_NEG_INT: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; - value = frame->registers[src1]; - frame->registers[dest] = -value; - return true; + value = frame->registers[src1]; + frame->registers[dest] = -value; + return true; } case OP_ADD_NAT: MATH_OP(u32, +); @@ -782,7 +774,7 @@ bool step_vm(VM *vm) { src2 = read_u8(vm, code, vm->pc); vm->pc++; frame->registers[dest] = - (frame->registers[src1] * frame->registers[src2]) >> 16; + fixed_mul(frame->registers[src1], frame->registers[src2]); return true; } @@ -794,7 +786,7 @@ bool step_vm(VM *vm) { src2 = read_u8(vm, code, vm->pc); vm->pc++; frame->registers[dest] = - (frame->registers[src1] << 16) / frame->registers[src2]; + fixed_div(frame->registers[src1], frame->registers[src2]); return true; } @@ -805,7 +797,8 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = frame->registers[src1] + frame->registers[src2]; + frame->registers[dest] = + fixed_add(frame->registers[src1], frame->registers[src2]); return true; } @@ -816,7 +809,8 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = frame->registers[src1] - frame->registers[src2]; + frame->registers[dest] = + fixed_sub(frame->registers[src1], frame->registers[src2]); return true; } case OP_REAL_TO_INT: { @@ -826,11 +820,7 @@ bool step_vm(VM *vm) { vm->pc++; value = frame->registers[src1]; - if (value >= 0) { - frame->registers[dest] = value >> 16; - } else { - frame->registers[dest] = -((-value) >> 16); - } + frame->registers[dest] = fixed_to_int(value); return true; } @@ -839,7 +829,7 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = (frame->registers[src1] << 16); + frame->registers[dest] = int_to_fixed(frame->registers[src1]); return true; } case OP_REAL_TO_NAT: { @@ -848,11 +838,7 @@ bool step_vm(VM *vm) { src1 = read_u8(vm, code, vm->pc); vm->pc++; value = frame->registers[src1]; - if (value < 0) { - frame->registers[dest] = 0; - } else { - frame->registers[dest] = AS_NAT(value >> 16); - } + frame->registers[dest] = fixed_to_int(value); return true; } case OP_NAT_TO_REAL: { @@ -860,7 +846,7 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = AS_INT(frame->registers[src1] << 16); + frame->registers[dest] = int_to_fixed(frame->registers[src1]); return true; } case OP_JEQ_NAT: { diff --git a/test/add.rom b/test/add.rom index 618c517c3d9328a670095dc647f4051d1e5bc7ed..0f1dd4cf7279a0013513ad42e030f59d47f9742f 100644 GIT binary patch delta 93 zcmeBX>}O7BU2DF6Qeee&B**8BsbAU+Qf&6fteX3#K6SDEC6Kq gurNaem|2+qa{~qRQ&P+HOHzw+^$mck7#O%10N>vVwg3PC delta 89 zcmeBY>}F(T0D{PgtSZ8aKn4>7Ba;s^6Qeqi&B*+JqJy*v3$p-_yk#5;IuoJvUiNnfYGGf4s9quSqT z7wUmt_w?}G*=Wq`AW{)adng5@VIJT`97i3M>5IeQxT?8&@$aY!l~@S<5)cb&d$J!8^tx UgFfwbEAReW#(wye8}io}1D!191@#Vn+*#di|Op4dmzj7XVmv%Zvr|3iX=QT2R(% tKdCL*n5Y)+6jldj6L72Wk1Q@X>cSr1aL4Ni6P)%6X47+~xDEdCGd_iw3g!R+ diff --git a/test/loop.rom b/test/loop.rom index 42eabd88f168876c9bcae5b8f1b74a8b51f35411..1ae871122caf2b3379b7de38bba2408806ef034d 100644 GIT binary patch literal 258 zcmYL@u?oUK42F|SN{_omaBva1MGzdStBaFcH+=y?5eGZe;xp(o`Eci-mSP~uclpDA zAtL%B$5@YmuE>Iv-&3$NK_w%!u=xsQ&Mf-{1a=zwg=ke6C4{(T(sBZ>eJNtI(yLE^ zS@dF#A$tCxbIeEO0X3h529yj9-OVnRUKiirZrvLx| literal 254 zcmYL?u?oUK42F|SN{>4S#lc172Aw+8Rd>7TI|!EIV24_K1Hm`)@y`ERih(3w@`pb} zL|^0-#RTYz%t-mO1e+wNM1*E(zeQOy%V7h7jk@8$rz(u%oG+L(pMxu#i=>H1EyVZ!8NT#0L<3pX0$H*2&j7lS)04ysU($uxvy@q+v|BtOe-tWaz?i9$=TN z1rO7qnjO;6UwM=+7uDy)-KlXRIoHxQoXzCAG^)vec#`72kC25=KA(w(^Dc+ N3m@h)vKO*9vQGwOFP{Ja literal 594 zcmah`OHRWu5cT*UC(e{AiKv=Zm0-uB2&y8%k`+tTBS4k~i3*k+f@`qn3LJ!kn6XJD zkPvwB-prdBzp>01V;?}!pUn^u%M{Bzl1M${+*iy}>Kj%NNf;3>Y6fyN9=dRpJ2;Bf zA`VF(5jKRXhQe3D!D*`2zgC4HnbnQZM3`q7EzX|_vfLX??iQ1KS&{m;7aWqWHn#9k z{$u>3b=bl+2jn3^5yC6oW$LOi79gC&l~}8}YK>55*YTBnok*t)M9RQO7w;&YA9#%=3_9aPAiF@c9UN@x+q0vXCl z{I~_;M9$+iK95ETYolg1kt6) zEPVsPm9O9F@ZkSs<>rRmoeDLZYmu zBFCzli4lUL3Fc)jNKH;+6;yswIy%eQ5t~%SGaX4GbWNjb+BrHOvh_c*ZOE>@tU2zz z*z*pvS9{)b1Mj<@cbL7=^X?D4w|d@T_HNI6bKrf_^Tt_;!U$2%q(Sgy;kl$&4)FR# zSy>;do%>Mj@f4|aZRr7SXKX&IX#Af-N77W)BWbDyku=qhNSbO+Bu#ZHlBU`gNmD(H zq^X7;lWw}}W~b3RMH(~xLXT`co!OkIyUeW)qcNFotknDOkr&xs2qs*S3|C~8&7Xz_2|P$HR`^;mLjVW!xhcjo)n){JccfTf~EVu{TaK%G`8XTmz4<`al% zn@=GlC>+zeC;-*?QloO?)Z&t>8JkqjD}D+Abb(nusVwgKYVnWiZC|authwktY&_FLpo9Wu=6`GwwO)LJQ8AZip8pVj!4IVX=CeURR>YE>_2El=c@jDFt4kKmx zjFDAAW%Yf!v_7%_l`IzbPY7CDeMl8$_*GHS7O!zYYkgBtItY!a$|Q(O8^%o*6B|WU zP|cmf(z$Tqz^NVn0W6%`QFH%@yH4-U+~pZPsdG3pW`5vEV#qflax*xn kTRyCCTliYYFAm468gM3jDf}tCD|{<_BRm&=7ycIh1uBAfcmMzZ diff --git a/test/simple.rom b/test/simple.rom index 13f146d7d77cfec10f670467ab81563d06737b5d..fd84bf941006f8de59fe213c33f07f662309d1a5 100644 GIT binary patch literal 140 zcmXwyF$#b%3`KwPJK4p_!9g6|>One*N3f%dh}W-)(m-Aw$%g>IX5DIPVza~Gl+n1T uV5iD#F04i%$j9iUyGRV|O|pXY1vCf#%AnPw=_pUS%XOyJ?*7GdfZ_#>D+nL} delta 87 zcmeBS>|kVN00O^>tjhcCAf{%xhXA zq7QbzcD+ECnHs}dXSR)r-%uQPX-)}L8|Qyg(j<_wC&C%PydZAl;tw;842dqY#=uZf zngN=#Bc~Jl6UxY@L#IciiPPL=mE`lP8e$j10s_TjvxS;grVrZaY*@kzbg^o}am-fC z8m@^b{LU0L@|$EMQJ3Odee|#TeziduC#fLqzmJ zhByR(uIEA&OP!l8CVm5PJY)qWKwVt?m5Qcbse+<8hp;G#ySV&|xmbEfn;WG-sI1fo z&6?E8#{P_wnrv+4M3z|bUDiplsOvGZ)U2TZ@kH%_X0-}N8`&kzE2&1XKgDdc+Jdza w`BA2*)xRxPIvQqpYfkRf+>bZsF3p;qUZ_23csga%ZJFCkVQ?EnA( From 1311659bed2ecc225676f8c3faa8eb1edad2333d Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 8 Nov 2025 22:19:18 -0800 Subject: [PATCH 02/27] Add new IR language, update docs --- Makefile | 12 +- README.org | 102 +++++++------ ROADMAP.org | 6 +- src/arch/linux/main.c | 19 +-- src/tools/{ => assembler}/assembler.c | 0 src/tools/{ => assembler}/assembler.h | 4 +- src/tools/{ => assembler}/parser.c | 0 src/tools/{ => assembler}/parser.h | 0 src/tools/compiler.c | 200 -------------------------- src/tools/compiler.h | 38 ----- test/add.ul.ir | 37 +++++ test/fib.rom | Bin 187 -> 0 bytes test/fib.ul.ir | 44 ++++++ test/hello.ul.ir | 23 +++ test/loop.ul | 18 ++- test/loop.ul.ir | 54 +++++++ test/malloc.ul.ir | 36 +++++ test/paint-bw.ul | 2 +- test/paint-bw.ul.ir | 184 ++++++++++++++++++++++++ test/paint.ul.ir | 184 ++++++++++++++++++++++++ test/simple.ul.ir | 31 ++++ test/window.ul | 2 +- test/window.ul.ir | 89 ++++++++++++ 23 files changed, 765 insertions(+), 320 deletions(-) rename src/tools/{ => assembler}/assembler.c (100%) rename src/tools/{ => assembler}/assembler.h (86%) rename src/tools/{ => assembler}/parser.c (100%) rename src/tools/{ => assembler}/parser.h (100%) delete mode 100644 src/tools/compiler.c delete mode 100644 src/tools/compiler.h create mode 100644 test/add.ul.ir delete mode 100644 test/fib.rom create mode 100644 test/fib.ul.ir create mode 100644 test/hello.ul.ir create mode 100644 test/loop.ul.ir create mode 100644 test/malloc.ul.ir create mode 100644 test/paint-bw.ul.ir create mode 100644 test/paint.ul.ir create mode 100644 test/simple.ul.ir create mode 100644 test/window.ul.ir diff --git a/Makefile b/Makefile index 2327006..48b58fc 100644 --- a/Makefile +++ b/Makefile @@ -86,17 +86,13 @@ VM_SOURCES := \ ifeq ($(BUILD_MODE), release) PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c\ - $(SRC_DIR)/tools/parser.c \ - $(SRC_DIR)/tools/lexer.c \ - $(SRC_DIR)/tools/assembler.c\ - $(SRC_DIR)/tools/compiler.c + $(SRC_DIR)/tools/assembler/parser.c \ + $(SRC_DIR)/tools/assembler/assembler.c else PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c \ - $(SRC_DIR)/tools/parser.c \ - $(SRC_DIR)/tools/lexer.c \ - $(SRC_DIR)/tools/assembler.c\ - $(SRC_DIR)/tools/compiler.c + $(SRC_DIR)/tools/assembler/parser.c \ + $(SRC_DIR)/tools/assembler/assembler.c endif # --- OBJECT FILES --- diff --git a/README.org b/README.org index 0df876a..6e56f06 100644 --- a/README.org +++ b/README.org @@ -49,25 +49,35 @@ git clone https://git.alfrescocavern.com/zongor/undar-lang.git cd undar-lang && make #+END_SRC -=Sċieppan= is a minimal assembler that uses s-expressions. -You can view some examples in the =.asm.lisp= files in =/test= -The Undâr compiler will be written in Sċieppan, as well as core VM tests. +=Sċieppan= is a intermediate representation. +You can view some examples in the =.ul.ir= files in =/test= -**Sample Program: =hello.asm.lisp=** +**Sample Program: =hello.ul.ir=** -#+BEGIN_SRC lisp -((code - (label main - (ldi $0 &terminal-namespace) ; get terminal device - (ldi $1 0) - (syscall OPEN $0 $0 $1) - (ldi $1 &hello-str) ; load hello string ptr - (strlen $2 $1) - (syscall WRITE $0 $1 $2) - (halt 0))) -(data - (label terminal-namespace "/dev/term/0") - (label hello-str "nuqneH 'u'?\n"))) +#+BEGIN_SRC sh +function main () + str hello is $0 + + load_heap_immediate "nuqneH 'u'?" -> hello + call pln hello + exit 0 + +function pln (str message is $0) + str ts is $1 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + int mode is $5 + + load_heap_immediate "/dev/term/0" -> ts # get terminal device + load_immediate 0 -> mode + syscall OPEN ts mode -> ts + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length + return #+END_SRC #+BEGIN_SRC sh @@ -82,33 +92,43 @@ memory is managed via frame based arenas. function scopes defines a memory frame heap allocations using the internal malloc opcode push pointers within this frame. when a frame exits, the pointer is reset like stack based gc. -#+BEGIN_SRC lisp -((code - (label main - (ldi $0 &terminal-namespace) ; get terminal device - (ldi $11 0) - (syscall OPEN $0 $0 $11) +#+BEGIN_SRC sh +function main () + int mode is $11 + str term is $10 - (ldi $1 &help) ; print help message - (fcall &pln ($0 $1) nil) + load_heap_immediate "/dev/term/0" -> term + load_immediate 0 -> mode + syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); - (ldi $1 32) ; read in a string of max 32 char length - (malloc $4 $1) ; allocate memory for the string - (syscall READ $0 $4 $1) ; read the string + load_heap_immediate "Enter a string:" -> $7 + string_length $7 -> $8 + syscall WRITE term $7 $8 # print prompt + + str user_string is $9 + load_immediate 32 -> $8 + malloc $8 -> user_string + syscall READ term user_string $8 # read in max 32 byte string + + call pln user_string + exit 0 + +function pln (str message is $0) + str ts is $1 + int mode is $5 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + + load_heap_immediate "/dev/term/0" -> ts + load_immediate 0 -> mode + syscall OPEN ts mode -> ts # get terminal device + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length - (fcall &pln ($0 $4) nil) ; print the string - (halt)) - (label pln - (ldi $3 &new-line) - (strlen $2 $1) - (syscall WRITE $0 $1 $2) - (strlen $4 $3) - (syscall WRITE $0 $3 $4) - (return nil))) -(data - (label terminal-namespace "/dev/term/0") - (label help "Enter a string: ") - (label new-line "\n"))) #+END_SRC values passed to functions must be explicitly returned to propagate. heap values are copy on write, so if a value is modified in a child function it will change the parents value, unless the size of the structure changes then it will copy the parents value and append it to its own frame with the modification. this allows for the low resource usage of a C but the convenience of a Java/Go without the garbage collection. diff --git a/ROADMAP.org b/ROADMAP.org index d3ad267..79f4043 100644 --- a/ROADMAP.org +++ b/ROADMAP.org @@ -170,7 +170,7 @@ function main(int argc, str[] argv) { } } - exits("Client Closed Successfully"); + exit(0); } #+END_SRC @@ -184,7 +184,7 @@ function main(int argc, str[] argv) { Player[] players = [Player("user", [0.0, 0.0, 0.0], RED)]; while (running) { if (Client client = s.accept("players")) { - if (Message message = client.get()) { + if (Message message = client.read()) { if (message.t == "close") { client.close(); running = false; @@ -196,7 +196,7 @@ function main(int argc, str[] argv) { } } } - exits(nil); + exit(0); } #+END_SRC diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 3626374..17e8bd1 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,6 +1,5 @@ -#include "../../tools/assembler.h" -#include "../../tools/compiler.h" -#include "../../tools/parser.h" +#include "../../tools/assembler/assembler.h" +#include "../../tools/assembler/parser.h" #include "../../vm/vm.h" #include "devices.h" #include @@ -146,19 +145,7 @@ bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { source[read] = '\0'; fclose(f); - initLexer(source); - Token token; - do { - token = nextToken(); - if (token.type == TOKEN_ERROR) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); - break; // Stop on error, or continue if you want to see more - } - if (token.type != TOKEN_EOF) { - printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), - token.length, token.start); - } - } while (token.type != TOKEN_EOF); + return true; } diff --git a/src/tools/assembler.c b/src/tools/assembler/assembler.c similarity index 100% rename from src/tools/assembler.c rename to src/tools/assembler/assembler.c diff --git a/src/tools/assembler.h b/src/tools/assembler/assembler.h similarity index 86% rename from src/tools/assembler.h rename to src/tools/assembler/assembler.h index 7c49cf9..26864a3 100644 --- a/src/tools/assembler.h +++ b/src/tools/assembler/assembler.h @@ -1,8 +1,8 @@ #ifndef ASSEMBLER_H #define ASSEMBLER_H -#include "../vm/common.h" -#include "../vm/vm.h" +#include "../../vm/common.h" +#include "../../vm/vm.h" #include "parser.h" #include diff --git a/src/tools/parser.c b/src/tools/assembler/parser.c similarity index 100% rename from src/tools/parser.c rename to src/tools/assembler/parser.c diff --git a/src/tools/parser.h b/src/tools/assembler/parser.h similarity index 100% rename from src/tools/parser.h rename to src/tools/assembler/parser.h diff --git a/src/tools/compiler.c b/src/tools/compiler.c deleted file mode 100644 index d531caf..0000000 --- a/src/tools/compiler.c +++ /dev/null @@ -1,200 +0,0 @@ -#include "compiler.h" -#include "lexer.h" -#include -#include - -//typedef struct { -// Token current; -// Token previous; -// bool hadError; -// bool panicMode; -//} Parser; -// -//typedef enum { -// PREC_NONE, -// PREC_ASSIGNMENT, /* = */ -// PREC_OR, /* or */ -// PREC_AND, /* and */ -// PREC_EQUALITY, /* == != */ -// PREC_COMPARISON, /* < > <= >= */ -// PREC_TERM, /* + - */ -// PREC_FACTOR, /* * / */ -// PREC_UNARY, /* not */ -// PREC_CALL, /* . () */ -// PREC_PRIMARY -//} Precedence; -// -//typedef void (*ParseFn)(char *program); -// -//typedef struct { -// ParseFn prefix; -// ParseFn infix; -// Precedence precedence; -//} ParseRule; -// -//typedef struct { -// i8 rp; // Next free register -//} Compiler; -// -//Parser parser; -// -//const char *internalErrorMsg = -// "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes."; -// -//void errorAt(Token *token, const char *message) { -// if (parser.panicMode) -// return; -// parser.panicMode = true; -// fprintf(stderr, "[line %d] Error", token->line); -// -// if (token->type == TOKEN_EOF) { -// fprintf(stderr, " at end"); -// } else if (token->type == TOKEN_ERROR) { -// } else { -// fprintf(stderr, " at '%.*s'", token->length, token->start); -// } -// -// fprintf(stderr, ": %s\n", message); -// parser.hadError = true; -//} -// -//void error(const char *message) { errorAt(&parser.previous, message); } -// -//void errorAtCurrent(const char *message) { errorAt(&parser.current, message); } -// -//void advance() { -// parser.previous = parser.current; -// -// for (;;) { -// parser.current = nextToken(); -// if (parser.current.type != TOKEN_ERROR) -// break; -// -// errorAtCurrent(parser.current.start); -// } -//} -// -//void consume(TokenType type, const char *message) { -// if (parser.current.type == type) { -// advance(); -// return; -// } -// -// errorAtCurrent(message); -//} -// -//static int allocateRegister(Compiler *c) { -// char buffer[38]; -// if (c->rp > 28) { -// sprintf(buffer, "Out of registers (used %d, max 28)", c->rp); -// error(buffer); -// return -1; -// } -// -// return c->rp++; -//} - -//static void freeRegister(Compiler *c, u8 reg) { -// if (reg == c->rp - 1) { -// c->rp--; -// } -//} - -//void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } -// -//void emit_u32(VM *vm, u32 value) { -// write_u32(vm, code, vm->cp, value); -// vm->cp += 4; -//} -// -//void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } -// -//static bool check(TokenType type) { return parser.current.type == type; } -// -//static bool match(TokenType type) { -// if (!check(type)) -// return false; -// advance(); -// return true; -//} -// -//static void expression(Compiler *c, VM *vm) { -// USED(c); -// USED(vm); -//} -// -//void number(Compiler *c, VM *vm) { -// emit_opcode(vm, OP_LOAD_IMM); -// int reg = allocateRegister(c); -// if (reg < 0) -// return; -// emit_byte(vm, reg); -// -// switch (parser.previous.type) { -// case TOKEN_INT_LITERAL: { -// char *endptr; -// i32 value = (i32)strtol(parser.previous.start, &endptr, 10); -// emit_u32(vm, value); -// return; -// } -// case TOKEN_UINT_LITERAL: { -// long value = atol(parser.previous.start); -// emit_u32(vm, value); -// return; -// } -// case TOKEN_FLOAT_LITERAL: { -// float value = atof(parser.previous.start); -// fixed_t fvalue = float_to_fixed(value); -// emit_u32(vm, fvalue); -// return; -// } -// default: -// return; // Unreachable. -// } -// -// errorAtCurrent("Invalid number format"); -//} -// -//static void unary(Compiler *c, VM *vm) { -// TokenType operatorType = parser.previous.type; -// -// // Compile the operand. -// expression(c, vm); -// -// // Emit the operator instruction. -// switch (operatorType) { -// default: -// return; // Unreachable. -// } -//} -// -//static void emitHalt(Compiler *c, VM *vm) { -// emit_opcode(vm, OP_HALT); -// advance(); -// number(c, vm); -//} -// -//static void endCompiler(Compiler *c, VM *vm) { emitHalt(c, vm); } -// -//static void grouping(Compiler *c, VM *vm) { -// expression(c, vm); -// consume(TOKEN_RPAREN, "Expect ')' after expression."); -//} - -bool compile(const char *source, VM *vm) { - USED(source); - USED(vm); - //initLexer(source); -// - //parser.hadError = false; - //parser.panicMode = false; -// - //Compiler compiler; - //advance(); - //expression(&compiler, vm); - //consume(TOKEN_EOF, "Expect end of expression."); - //endCompiler(&compiler, vm); -// - //return parser.hadError; - return false; -} diff --git a/src/tools/compiler.h b/src/tools/compiler.h deleted file mode 100644 index c657898..0000000 --- a/src/tools/compiler.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef UNDAR_COMPILER_H -#define UNDAR_COMPILER_H - -#include "../vm/common.h" -#include "../vm/vm.h" -#include "../vm/fixed.h" -#include "lexer.h" - -#include -#include -#include -#include - -typedef struct field_s { - char* name; // "handle" - TokenType type; // TOKEN_TYPE_NAT - u32 offset; // 4 (for first field in heap object) - u32 size; // 4 (bytes for nat) -} Field; - -typedef struct plex_def_s { - char* name; - u32 field_count; - u32 logical_size; // Data size (e.g., 12 for Vec3) - u32 physical_size; // Total allocation (logical + 4) - Field *fields; // All offsets are PHYSICAL -} PlexDef; - -typedef struct array_def_s { - TokenType type; - u32 length; - u32 logical_size; // length * element_size - u32 physical_size; // logical_size + 4 -} ArrayDef; - -bool compile(const char *source, VM *vm); - -#endif diff --git a/test/add.ul.ir b/test/add.ul.ir new file mode 100644 index 0000000..352746b --- /dev/null +++ b/test/add.ul.ir @@ -0,0 +1,37 @@ +global const int x = 1 +global const int y = 1 + +function main () + int a is $0 + int b is $1 + int ans is $2 + str ans_string is $3 + + load_absolute_32 &x -> a + load_absolute_32 &y -> b + call add a b -> ans + int_to_string ans -> ans_string + call pln ans_string + exit 0 + +function add (int a is $0, int b is $1) + int result is $2 + add_int a b -> result + return result + +function pln (str message is $0) + str ts is $1 + int mode is $5 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + + load_heap_immediate ts "/dev/term/0" # get terminal device + load_immediate 0 -> mode + syscall OPEN ts mode -> ts + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length + return diff --git a/test/fib.rom b/test/fib.rom deleted file mode 100644 index 969bdb4a741b7866d41b08c48cb5cbb9f13f8864..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 187 zcmYk0K@NgY3`A$z0*P!~xk5J_;ClgYz{yk#5;IuoJvUiNnfYGGf4s9quSqT z7wUmt_w?}G*=Wq`AW{)adng5@ n + call fib n -> n + int_to_string n -> str_n + call pln str_nn + exit 0 + +function fib (int n is $0) + load_immediate 2 -> $1 + + jump_lt_int &base_case n $1 + + load_immediate 2 -> $3 + sub_int n $3 -> $4 + call fib $4 -> $5 + + load_immediate 1 -> $3 + sub_int n $3 -> $4 + call fib $4 -> $6 + + add_int $6 $5 -> $7 + return $7 +&base_case + return n + +function pln (str message is $0) + str ts is $1 + int mode is $5 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + + load_heap_immediate ts "/dev/term/0" # get terminal device + load_immediate 0 -> mode + syscall OPEN ts mode -> ts + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length + return diff --git a/test/hello.ul.ir b/test/hello.ul.ir new file mode 100644 index 0000000..cd2a305 --- /dev/null +++ b/test/hello.ul.ir @@ -0,0 +1,23 @@ +function main () + str hello is $0 + + load_heap_immediate "nuqneH 'u'?" -> hello + call pln hello + exit 0 + +function pln (str message is $0) + str ts is $1 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + int mode is $5 + + load_heap_immediate "/dev/term/0" -> ts # get terminal device + load_immediate 0 -> mode + syscall OPEN ts mode -> ts + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length + return \ No newline at end of file diff --git a/test/loop.ul b/test/loop.ul index 28591b0..c84e524 100644 --- a/test/loop.ul +++ b/test/loop.ul @@ -1,8 +1,3 @@ -/** - * Constants - */ -const str nl = "\n"; - plex Terminal { nat handle; } @@ -17,17 +12,20 @@ function main() { a = a + 5.0; } nat b = a as nat; - pln(term, "Enter a string:"); + pln("Enter a string:"); str user_string = term.read(32); - pln(term, a.str); - pln(term, b.str); - pln(term, user_string); + pln(a as str); + pln(b as str); + pln(user_string); } /** * Print with a newline */ -function pln(Terminal term, str message) { +function pln(str message) { + const str nl = "\n"; + Terminal term = open("/dev/term/0", 0); write(term, message, message.length); write(term, nl, nl.length); } + diff --git a/test/loop.ul.ir b/test/loop.ul.ir new file mode 100644 index 0000000..542f148 --- /dev/null +++ b/test/loop.ul.ir @@ -0,0 +1,54 @@ + +function main () + real a is $0 + int i is $1 + int mode is $11 + str term is $10 + + load_immediate 5.0 -> a + load_immediate 5000 -> i + load_immediate 0 -> $2 + load_immediate -1 -> $3 + load_immediate 5.0 -> $5 + &loop_body + add_real a $5 -> a + add_int i $3 -> i + jump_ge_int &loop_body i $2 + load_heap_immediate "/dev/term/0" -> term + load_immediate 0 -> mode + syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); + + nat b is $1 + real_to_nat a -> b + load_heap_immediate "Enter a string:" -> $7 + string_length $7 -> $8 + syscall WRITE term $7 $8 # print prompt + + str user_string is $9 + load_immediate 32 -> $8 + malloc $8 -> user_string + syscall READ term user_string $8 # read in max 32 byte string + + call pln user_string + nat_to_string b -> $4 + call pln $4 + real_to_string a -> $3 + call pln $3 + exit 0 + +function pln (str message is $0) + str ts is $1 + int mode is $5 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + + load_heap_immediate "/dev/term/0" -> ts + load_immediate 0 -> mode + syscall OPEN ts mode -> ts # get terminal device + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length + return \ No newline at end of file diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir new file mode 100644 index 0000000..ea123d7 --- /dev/null +++ b/test/malloc.ul.ir @@ -0,0 +1,36 @@ + +function main () + int mode is $11 + str term is $10 + + load_heap_immediate "/dev/term/0" -> term + load_immediate 0 -> mode + syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); + + load_heap_immediate "Enter a string:" -> $7 + string_length $7 -> $8 + syscall WRITE term $7 $8 # print prompt + + str user_string is $9 + load_immediate 32 -> $8 + malloc $8 -> user_string + syscall READ term user_string $8 # read in max 32 byte string + + call pln user_string + exit 0 + +function pln (str message is $0) + str ts is $1 + int mode is $5 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + + load_heap_immediate "/dev/term/0" -> ts + load_immediate 0 -> mode + syscall OPEN ts mode -> ts # get terminal device + strlen message -> msg_length + syscall WRITE ts message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE ts nl nl_length diff --git a/test/paint-bw.ul b/test/paint-bw.ul index 9bdec58..a4d036c 100644 --- a/test/paint-bw.ul +++ b/test/paint-bw.ul @@ -21,7 +21,7 @@ plex Screen implements Device { draw() { unsafe { - write(this, this.buffer, this.buffer_size); + write(this, this.buffer, this.buffer.size); } } } diff --git a/test/paint-bw.ul.ir b/test/paint-bw.ul.ir new file mode 100644 index 0000000..dbcf55d --- /dev/null +++ b/test/paint-bw.ul.ir @@ -0,0 +1,184 @@ +global const str screen_namespace = "/dev/screen/0" +global const str mouse_namespace = "/dev/mouse/0" +global const byte BLACK = 0 +global const byte WHITE = 255 +global const byte DARK_GRAY = 73 +global const byte GRAY = 146 +global const byte LIGHT_GRAY = 182 +global byte SELECTED_COLOR = 255 + +function main () + # Open screen + plex screen is $0 + str screen_name is $18 + int mode is $11 + nat screen_buffer is $21 + + # use load immediate because it is a pointer to a string, not a value + load_address &screen_namespace -> screen_name + load_immediate 0 -> mode + syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); + + nat width is $20 + nat size is $22 + load_offset_32 screen 8 -> width # load width + load_offset_32 screen 12 -> size # load size + load_immediate 16 -> $1 # offset for screen buffer + add_nat screen $1 -> screen_buffer + + # open mouse + plex mouse is $15 + str mouse_name is $16 + load_address &mouse_namespace -> mouse_name + syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); + + byte color is $1 + nat x_pos is $12 + nat y_pos is $13 + + load_absolute_32 &BLACK -> color + load_immediate 1 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + + load_absolute_32 &WHITE -> color + load_immediate 21 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + + # screen.draw# + syscall WRITE screen screen_buffer size + + nat zero is $11 + + draw_loop: + # load mouse click data + syscall REFRESH mouse + + byte left_down is $9 + load_offset_8 mouse 16 -> left_down # load btn1 pressed + + jump_eq_nat &draw_loop left_down zero + + nat mouse_x is $7 + nat mouse_y is $8 + load_offset_32 mouse 8 -> mouse_x # load x + load_offset_32 mouse 12 -> mouse_y # load y + + nat box_size is $14 + load_immediate 20 -> box_size + + # first row + load_absolute_32 &BLACK -> color + load_immediate 1 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + + + load_absolute_32 &WHITE -> color + load_immediate 21 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + + syscall WRITE screen screen_buffer size + + byte selected_color is $25 + load_absolute_32 &SELECTED_COLOR -> selected_color + + nat brush_size is $19 + load_immediate 5 -> brush_size + + call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size + + jump &draw_loop + + # Flush and halt + exit 0 + +function set_color_if_clicked (int click_x is $0, int click_y is $1, + int box_x is $2, int box_y is $3, byte color is $4, int box_size is $5) + + # Compute right + int right_edge is $6 + add_int box_x box_size -> right_edge + + # Compute bottom = box_y + box_size + int bottom_edge is $7 + add_int box_y box_size -> bottom_edge + + # Bounds check: x in [box_x, right] and y in [box_y, bottom] + jump_lt_int &fail click_x box_x + jump_ge_int &fail click_x right_edge + jump_lt_int &fail click_y box_y + jump_ge_int &fail click_y bottom_edge + + store_absolute_8 &SELECTED_COLOR color + + fail: + return + +function draw_outlined_swatch(nat base is $0, + byte color is $1, int x is $2, int y is $3, int width is $4) + + # Constants + nat background_color is $5 + load_absolute_32 &GRAY -> background_color + + byte selected_color is $10 + load_absolute_32 &SELECTED_COLOR -> selected_color + + jump_eq_int &set_selected selected_color color + jump &end_set_selected + set_selected: + load_absolute_32 &DARK_GRAY -> background_color + end_set_selected: + + nat outline_size is $6 + load_immediate 20 -> outline_size + + nat fill_size is $7 + load_immediate 17 -> fill_size + + nat offset is $8 + load_immediate 2 -> offset + + call &draw_box base width background_color x y outline_size outline_size + + add_int x offset -> $9 # x + 2 + add_int y offset -> $10 # y + 2 + + call &draw_box base width color $9 $10 fill_size fill_size + + return + +function draw_box (nat base is $0, nat screen_width is $1, + byte color is $2, nat x_start is $3, nat y_start is $4, nat width is $5, nat height is $6) + + # Compute start address: base + y*640 + x + nat offset is $15 + mul_int y_start screen_width -> offset + add_int offset x_start -> offset + add_nat offset base -> offset + nat fat_ptr_size is $25 + load_immediate 4 -> fat_ptr_size + add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size + + int i is $30 + load_immediate 1 -> i + + int zero is $26 + load_immediate 0 -> zero + + int row_end is $27 + nat pixel_ptr is $29 + + draw_box_outer: + add_int offset width -> row_end # current + width + register_move offset -> pixel_ptr # set pixel point + memset_8 pixel_ptr color width # draw row + add_int offset screen_width -> offset # next row += 640 + sub_int height i -> height # decrement row count + jump_gt_int &draw_box_outer height zero + return diff --git a/test/paint.ul.ir b/test/paint.ul.ir new file mode 100644 index 0000000..dbcf55d --- /dev/null +++ b/test/paint.ul.ir @@ -0,0 +1,184 @@ +global const str screen_namespace = "/dev/screen/0" +global const str mouse_namespace = "/dev/mouse/0" +global const byte BLACK = 0 +global const byte WHITE = 255 +global const byte DARK_GRAY = 73 +global const byte GRAY = 146 +global const byte LIGHT_GRAY = 182 +global byte SELECTED_COLOR = 255 + +function main () + # Open screen + plex screen is $0 + str screen_name is $18 + int mode is $11 + nat screen_buffer is $21 + + # use load immediate because it is a pointer to a string, not a value + load_address &screen_namespace -> screen_name + load_immediate 0 -> mode + syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); + + nat width is $20 + nat size is $22 + load_offset_32 screen 8 -> width # load width + load_offset_32 screen 12 -> size # load size + load_immediate 16 -> $1 # offset for screen buffer + add_nat screen $1 -> screen_buffer + + # open mouse + plex mouse is $15 + str mouse_name is $16 + load_address &mouse_namespace -> mouse_name + syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); + + byte color is $1 + nat x_pos is $12 + nat y_pos is $13 + + load_absolute_32 &BLACK -> color + load_immediate 1 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + + load_absolute_32 &WHITE -> color + load_immediate 21 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + + # screen.draw# + syscall WRITE screen screen_buffer size + + nat zero is $11 + + draw_loop: + # load mouse click data + syscall REFRESH mouse + + byte left_down is $9 + load_offset_8 mouse 16 -> left_down # load btn1 pressed + + jump_eq_nat &draw_loop left_down zero + + nat mouse_x is $7 + nat mouse_y is $8 + load_offset_32 mouse 8 -> mouse_x # load x + load_offset_32 mouse 12 -> mouse_y # load y + + nat box_size is $14 + load_immediate 20 -> box_size + + # first row + load_absolute_32 &BLACK -> color + load_immediate 1 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + + + load_absolute_32 &WHITE -> color + load_immediate 21 -> x_pos + load_immediate 1 -> y_pos + call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + + syscall WRITE screen screen_buffer size + + byte selected_color is $25 + load_absolute_32 &SELECTED_COLOR -> selected_color + + nat brush_size is $19 + load_immediate 5 -> brush_size + + call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size + + jump &draw_loop + + # Flush and halt + exit 0 + +function set_color_if_clicked (int click_x is $0, int click_y is $1, + int box_x is $2, int box_y is $3, byte color is $4, int box_size is $5) + + # Compute right + int right_edge is $6 + add_int box_x box_size -> right_edge + + # Compute bottom = box_y + box_size + int bottom_edge is $7 + add_int box_y box_size -> bottom_edge + + # Bounds check: x in [box_x, right] and y in [box_y, bottom] + jump_lt_int &fail click_x box_x + jump_ge_int &fail click_x right_edge + jump_lt_int &fail click_y box_y + jump_ge_int &fail click_y bottom_edge + + store_absolute_8 &SELECTED_COLOR color + + fail: + return + +function draw_outlined_swatch(nat base is $0, + byte color is $1, int x is $2, int y is $3, int width is $4) + + # Constants + nat background_color is $5 + load_absolute_32 &GRAY -> background_color + + byte selected_color is $10 + load_absolute_32 &SELECTED_COLOR -> selected_color + + jump_eq_int &set_selected selected_color color + jump &end_set_selected + set_selected: + load_absolute_32 &DARK_GRAY -> background_color + end_set_selected: + + nat outline_size is $6 + load_immediate 20 -> outline_size + + nat fill_size is $7 + load_immediate 17 -> fill_size + + nat offset is $8 + load_immediate 2 -> offset + + call &draw_box base width background_color x y outline_size outline_size + + add_int x offset -> $9 # x + 2 + add_int y offset -> $10 # y + 2 + + call &draw_box base width color $9 $10 fill_size fill_size + + return + +function draw_box (nat base is $0, nat screen_width is $1, + byte color is $2, nat x_start is $3, nat y_start is $4, nat width is $5, nat height is $6) + + # Compute start address: base + y*640 + x + nat offset is $15 + mul_int y_start screen_width -> offset + add_int offset x_start -> offset + add_nat offset base -> offset + nat fat_ptr_size is $25 + load_immediate 4 -> fat_ptr_size + add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size + + int i is $30 + load_immediate 1 -> i + + int zero is $26 + load_immediate 0 -> zero + + int row_end is $27 + nat pixel_ptr is $29 + + draw_box_outer: + add_int offset width -> row_end # current + width + register_move offset -> pixel_ptr # set pixel point + memset_8 pixel_ptr color width # draw row + add_int offset screen_width -> offset # next row += 640 + sub_int height i -> height # decrement row count + jump_gt_int &draw_box_outer height zero + return diff --git a/test/simple.ul.ir b/test/simple.ul.ir new file mode 100644 index 0000000..be015d4 --- /dev/null +++ b/test/simple.ul.ir @@ -0,0 +1,31 @@ +global const real x = 1.0 +global const real y = 1.0 + +function main () + real x is $0 + load_absolute_32 &x -> x + real y is $1 + load_absolute_32 &y -> y + real result is $2 + add_real x y -> result + str result_str is $3 + real_to_string result -> result_str + call &pln result_str + exit 0 + +function pln (str message is $0) + str term is $1 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + int mode is $5 + + load_heap_immediate "/dev/term/0" -> term # get terminal device + load_immediate 0 -> mode + syscall OPEN term mode -> term + strlen message -> msg_length + syscall WRITE term message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE term nl nl_length + return diff --git a/test/window.ul b/test/window.ul index a946203..a578d96 100644 --- a/test/window.ul +++ b/test/window.ul @@ -49,7 +49,7 @@ function main() { screen.draw(); loop { - if (mouse.btn1) { + if (mouse.left) { unsafe { screen.buffer[mouse.y * width + mouse.x + screen.buffer.ptr + 4] = WHITE; diff --git a/test/window.ul.ir b/test/window.ul.ir new file mode 100644 index 0000000..c133112 --- /dev/null +++ b/test/window.ul.ir @@ -0,0 +1,89 @@ +global const str screen_namespace = "/dev/screen/0" +global const str mouse_namespace = "/dev/mouse/0" +global const str terminal_namespace = "/dev/term/0" +global const str new_line = "\n" +global const byte WHITE = 255 + +function main () + # Open screen + # use load immediate because it is a pointer to a string, not a value + plex screen + load_address &screen_namespace -> $18 + int mode is $11 + load_immediate 0 -> mode + syscall OPEN $18 mode -> screen # openout Plex screen, in namespace, in flags + + nat_to_string screen -> $5 + call &pln $5 + + nat width is $20 + load_offset_32 screen 8 -> width # load width + nat_to_string width -> $5 + call &pln $5 + + nat buffer_size is $22 + load_offset_32 screen 12 -> buffer_size # load size + nat_to_string buffer_size -> $5 + call &pln $5 + + nat screen_buffer is $21 + load_immediate $1 16 # offset for screen buffer + add_nat screen $1 -> screen_buffer + + nat_to_string screen_buffer -> $5 + call &pln $5 + + # open mouse + plex mouse is $15 + load_address &mouse_namespace -> $16 + syscall OPEN $16 mode -> mouse # openout Plex mouse, in namespace, in flags + + syscall WRITE screen screen_buffer buffer_size # redraw + + draw_loop: + # load mouse click data + syscall STAT mouse + + byte left_down is $9 + load_offset_8 mouse 16 -> left_down # load btn1 pressed + + jump_eq_nat &draw_loop left_down mode # mode is 0 which is an alias for false + + nat x is $7 + load_offset_32 mouse 8 -> x # load x + nat y is $8 + load_offset_32 mouse 12 -> y # load y + + # Compute start address: y*width + x + nat pixel_pos is $30 + mul_nat y $20 -> pixel_pos # = y * width + add_nat x pixel_pos -> pixel_pos # += x + add_nat screen_buffer pixel_pos -> pixel_pos # += pixel_offset + nat fat_ptr_size is $1 + load_immediate 4 -> fat_ptr_size # need to add offset for fat pointer size + add_nat pixel_pos fat_ptr_size -> pixel_pos + + byte color is $3 + load_absolute_32 &WHITE -> color + store_absolute_8 pixel_pos color # draw color at screen [x,y] + syscall WRITE screen screen_buffer buffer_size # redraw + + jump &draw_loop + exit 0 + +function pln (str message is $0) + str term is $1 + int msg_length is $2 + str nl is $3 + int nl_length is $4 + int mode is $5 + + load_heap_immediate "/dev/term/0" -> term # get terminal device + load_immediate 0 -> mode + syscall OPEN term mode -> term + strlen message -> msg_length + syscall WRITE term message msg_length + load_heap_immediate "\n" -> nl + strlen nl -> nl_length + syscall WRITE term nl nl_length + return \ No newline at end of file From a1197e8b431d2da46dd72dda4391a687b3d254f5 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 8 Nov 2025 22:29:32 -0800 Subject: [PATCH 03/27] fix small bug --- test/window.ul.ir | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/window.ul.ir b/test/window.ul.ir index c133112..9a72395 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -7,7 +7,7 @@ global const byte WHITE = 255 function main () # Open screen # use load immediate because it is a pointer to a string, not a value - plex screen + plex screen is $0 load_address &screen_namespace -> $18 int mode is $11 load_immediate 0 -> mode @@ -78,12 +78,12 @@ function pln (str message is $0) int nl_length is $4 int mode is $5 - load_heap_immediate "/dev/term/0" -> term # get terminal device + load_address &terminal_namespace -> term # get terminal device load_immediate 0 -> mode syscall OPEN term mode -> term strlen message -> msg_length syscall WRITE term message msg_length - load_heap_immediate "\n" -> nl + load_address &new_line -> nl strlen nl -> nl_length syscall WRITE term nl nl_length - return \ No newline at end of file + return From 99e2f2c0c36b2088455aa438be584f687c40bb59 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 8 Nov 2025 23:44:42 -0800 Subject: [PATCH 04/27] some experimental syntax --- test/window.ul | 21 +++++++------ test/window.ul.ir | 54 +++++++++++++++++---------------- test/window.ul.uir | 73 +++++++++++++++++++++++++++++++++++++++++++++ test/window.ul.vuir | 72 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 35 deletions(-) create mode 100644 test/window.ul.uir create mode 100644 test/window.ul.vuir diff --git a/test/window.ul b/test/window.ul index a578d96..8b31ac5 100644 --- a/test/window.ul +++ b/test/window.ul @@ -1,8 +1,11 @@ /** * Constants */ -const str nl = "\n"; -const nat WHITE = 255; +const str screen_namespace = "/dev/screen/0" +const str mouse_namespace = "/dev/mouse/0" +const str terminal_namespace = "/dev/term/0" +const str new_line = "\n" +const byte WHITE = 255 /** * Devices @@ -37,15 +40,15 @@ plex Mouse { * Main function */ function main() { - Screen screen = open("/dev/screen/0", 0); - pln(screen.handle.str); - pln(screen.width.str); - pln(screen.size.str); + Screen screen = open(screen_namespace, 0); + pln(screen.handle as str); + pln(screen.width as str); + pln(screen.size as str); unsafe { - pln(screen.screen_buffer.ptr.str); + pln(screen.screen_buffer.ptr as str); } - Mouse mouse = open("/dev/mouse/0", 0); + Mouse mouse = open(mouse_namespace, 0); screen.draw(); loop { @@ -63,7 +66,7 @@ function main() { * Print with a newline */ function pln(str message) { - Terminal term = open("/dev/term/0", 0); + Terminal term = open(terminal_namespace, 0); write(term, message, message.length); write(term, nl, nl.length); } diff --git a/test/window.ul.ir b/test/window.ul.ir index 9a72395..a5a60b2 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -8,35 +8,43 @@ function main () # Open screen # use load immediate because it is a pointer to a string, not a value plex screen is $0 - load_address &screen_namespace -> $18 - int mode is $11 + plex mouse is $1 + str tmp_str is $2 + byte color is $3 + byte left_down is $4 + int mode is $5 + nat offset_temp is $6 + nat x is $7 + nat y is $8 + nat width is $9 + nat screen_buffer is $10 + nat buffer_size is $11 + nat pixel_pos is $12 + + load_address &screen_namespace -> tmp_str load_immediate 0 -> mode - syscall OPEN $18 mode -> screen # openout Plex screen, in namespace, in flags + syscall OPEN tmp_str mode -> screen # openout Plex screen, in namespace, in flags - nat_to_string screen -> $5 - call &pln $5 + nat_to_string screen -> tmp_str + call &pln tmp_str - nat width is $20 load_offset_32 screen 8 -> width # load width - nat_to_string width -> $5 - call &pln $5 + nat_to_string width -> tmp_str + call &pln tmp_str - nat buffer_size is $22 load_offset_32 screen 12 -> buffer_size # load size - nat_to_string buffer_size -> $5 - call &pln $5 + nat_to_string buffer_size -> tmp_str + call &pln tmp_str - nat screen_buffer is $21 - load_immediate $1 16 # offset for screen buffer - add_nat screen $1 -> screen_buffer + load_immediate 16 -> offset_temp # offset for screen buffer + add_nat screen offset_temp -> screen_buffer - nat_to_string screen_buffer -> $5 - call &pln $5 + nat_to_string screen_buffer -> tmp_str + call &pln tmp_str # open mouse - plex mouse is $15 - load_address &mouse_namespace -> $16 - syscall OPEN $16 mode -> mouse # openout Plex mouse, in namespace, in flags + load_address &mouse_namespace -> tmp_str + syscall OPEN tmp_str mode -> mouse # openout Plex mouse, in namespace, in flags syscall WRITE screen screen_buffer buffer_size # redraw @@ -44,26 +52,20 @@ function main () # load mouse click data syscall STAT mouse - byte left_down is $9 load_offset_8 mouse 16 -> left_down # load btn1 pressed jump_eq_nat &draw_loop left_down mode # mode is 0 which is an alias for false - nat x is $7 load_offset_32 mouse 8 -> x # load x - nat y is $8 load_offset_32 mouse 12 -> y # load y # Compute start address: y*width + x - nat pixel_pos is $30 - mul_nat y $20 -> pixel_pos # = y * width + mul_nat y width -> pixel_pos # = y * width add_nat x pixel_pos -> pixel_pos # += x add_nat screen_buffer pixel_pos -> pixel_pos # += pixel_offset - nat fat_ptr_size is $1 load_immediate 4 -> fat_ptr_size # need to add offset for fat pointer size add_nat pixel_pos fat_ptr_size -> pixel_pos - byte color is $3 load_absolute_32 &WHITE -> color store_absolute_8 pixel_pos color # draw color at screen [x,y] syscall WRITE screen screen_buffer buffer_size # redraw diff --git a/test/window.ul.uir b/test/window.ul.uir new file mode 100644 index 0000000..b75e516 --- /dev/null +++ b/test/window.ul.uir @@ -0,0 +1,73 @@ +global const str screen_namespace = "/dev/screen/0" +global const str mouse_namespace = "/dev/mouse/0" +global const str terminal_namespace = "/dev/term/0" +global const str new_line = "\n" +global const byte WHITE = 255 + +function main () + # open screen + # use load immediate because it is a pointer to a string, not a value + + nat tmp_ptr = load_address screen_namespace + int mode = load_immediate 0 + plex screen = open tmp_ptr, mode + + nat screen_handle = load_offset_32 screen, 4 + str tmp_str = nat_to_string screen_handle + call pln tmp_str + + nat width = load_offset_32 screen, 8 + tmp_str = nat_to_string width + call pln tmp_str + + nat buffer_size = load_offset_32 screen, 12 + tmp_str = nat_to_string buffer_size + call pln tmp_str + + nat offset_temp = load_immediate 16 + nat screen_buffer = add_nat screen, offset_temp + + tmp_str = nat_to_string screen_buffer + call pln tmp_str + + # open mouse + tmp_ptr = load_address mouse_namespace + plex mouse = open tmp_ptr, mode + + write screen, screen_buffer, buffer_size # redraw + + draw_loop: + # load mouse click data + stat mouse + + byte left_down = load_offset_8 mouse, 16 # load btn1 pressed + + jump_eq_nat draw_loop, left_down, mode # mode is 0 which is an alias for false + + nat x = load_offset_32 mouse, 8 + nat y = load_offset_32 mouse, 12 + + # Compute start address: y*width + x + nat pixel_pos = mul_nat y, width # = y * width + pixel_pos = add_nat x, pixel_pos # += x + pixel_pos = add_nat screen_buffer, pixel_pos # += pixel_offset + nat fat_ptr_size = load_immediate 4 # need to add offset for fat pointer size + pixel_pos = add_nat pixel_pos, fat_ptr_size + + byte color = load_absolute_32 WHITE + store_absolute_8 pixel_pos, color # draw color at screen [x,y] + write screen, screen_buffer, buffer_size # redraw + + jump draw_loop + exit 0 + +function pln (str message) + nat term_ns = load_address terminal_namespace # get terminal device + int mode = load_immediate 0 + plex term = open term_ns, mode + int msg_length = strlen message + write term, message, msg_length + str nl = load_address new_line + int nl_length = strlen nl + write term, nl, nl_length + return diff --git a/test/window.ul.vuir b/test/window.ul.vuir new file mode 100644 index 0000000..a7c9aac --- /dev/null +++ b/test/window.ul.vuir @@ -0,0 +1,72 @@ +const str screen_namespace = "/dev/screen/0"; +const str mouse_namespace = "/dev/mouse/0"; +const str terminal_namespace = "/dev/term/0"; +const str new_line = "\n"; +const byte WHITE = 255; + +function main () { + // open screen + // use load immediate because it is a pointer to a string, not a value + + nat tmp_ptr = &screen_namespace; + int mode = 0; + plex screen = open(tmp_ptr, mode); + + nat screen_handle = screen.handle; + str tmp_str = screen_handle as str; + pln(tmp_str); + + nat width = screen.width; + tmp_str = width as str; + pln(tmp_str); + + nat buffer_size = screen.size; + tmp_str = buffer_size as str; + pln(tmp_str); + + nat screen_buffer = screen.buffer.ptr; + tmp_str = screen_buffer as str; + pln(tmp_str); + + // open mouse + tmp_ptr = &mouse_namespace; + plex mouse = open(tmp_ptr, mode); + + write(screen, screen_buffer, buffer_size); // redraw + + loop { + // load mouse click data + stat(mouse); + + byte left_down = mouse.left; + + if (left_down == 0) continue; + + nat x = mouse.x; + nat y = mouse.y; + + // Compute start address: y*width + x + nat pixel_pos = y * width; // = y * width + pixel_pos = x + pixel_pos; // += x + pixel_pos = screen_buffer + pixel_pos; // += pixel_offset + nat fat_ptr_size = 4; // need to add offset for fat pointer size + pixel_pos = pixel_pos + fat_ptr_size; + + byte color = WHITE; + store_absolute_8(pixel_pos, color); // draw color at screen [x,y] + write(screen, screen_buffer, buffer_size); // redraw + } + exit(0); +} + +function pln (str message) { + nat term_ns = &terminal_namespace; // get terminal device + int mode = 0; + plex term = open(term_ns, mode); + int msg_length = message.length; + write(term, message, msg_length); + str nl = &new_line; + int nl_length = nl.length; + write(term, nl, nl_length); + return +} \ No newline at end of file From c90f236ab329e909502fafe86ab5f080edfe5dbc Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 9 Nov 2025 13:22:46 -0800 Subject: [PATCH 05/27] Refactor and rename to align with new IR better --- Makefile | 12 +- README.org | 16 +- ROADMAP.org | 10 + src/arch/linux/main.c | 227 +-- src/tools/assembler/assembler.c | 1240 +---------------- src/tools/assembler/assembler.h | 22 +- src/tools/{ => assembler}/lexer.c | 2 +- src/tools/{ => assembler}/lexer.h | 0 src/tools/old_assembler/assembler.c | 1211 ++++++++++++++++ src/tools/old_assembler/assembler.h | 20 + .../{assembler => old_assembler}/parser.c | 0 .../{assembler => old_assembler}/parser.h | 0 src/vm/opcodes.h | 171 +-- src/vm/vm.c | 198 +-- test/add.asm.lisp | 2 +- test/add.ul.ir | 4 +- test/fib.asm.lisp | 2 +- test/fib.ul.ir | 4 +- test/hello.asm.lisp | 2 +- test/hello.ul.ir | 6 +- test/loop.asm.lisp | 2 +- test/loop.ul.ir | 8 +- test/malloc.asm.lisp | 2 +- test/malloc.ul.ir | 8 +- test/paint-bw.asm.lisp | 4 +- test/paint-bw.ul.ir | 2 +- test/paint.asm.lisp | 4 +- test/paint.ul.ir | 2 +- test/simple.asm.lisp | 2 +- test/simple.ul.ir | 4 +- test/window.asm.lisp | 2 +- test/window.ul.vuir | 26 +- 32 files changed, 1541 insertions(+), 1674 deletions(-) rename src/tools/{ => assembler}/lexer.c (99%) rename src/tools/{ => assembler}/lexer.h (100%) create mode 100644 src/tools/old_assembler/assembler.c create mode 100644 src/tools/old_assembler/assembler.h rename src/tools/{assembler => old_assembler}/parser.c (100%) rename src/tools/{assembler => old_assembler}/parser.h (100%) diff --git a/Makefile b/Makefile index 48b58fc..88100b7 100644 --- a/Makefile +++ b/Makefile @@ -86,13 +86,17 @@ VM_SOURCES := \ ifeq ($(BUILD_MODE), release) PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c\ - $(SRC_DIR)/tools/assembler/parser.c \ - $(SRC_DIR)/tools/assembler/assembler.c + $(SRC_DIR)/tools/old_assembler/parser.c \ + $(SRC_DIR)/tools/old_assembler/assembler.c \ + $(SRC_DIR)/tools/assembler/lexer.c \ + $(SRC_DIR)/tools/assembler/assembler.c else PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c \ - $(SRC_DIR)/tools/assembler/parser.c \ - $(SRC_DIR)/tools/assembler/assembler.c + $(SRC_DIR)/tools/old_assembler/parser.c \ + $(SRC_DIR)/tools/old_assembler/assembler.c\ + $(SRC_DIR)/tools/assembler/lexer.c \ + $(SRC_DIR)/tools/assembler/assembler.c endif # --- OBJECT FILES --- diff --git a/README.org b/README.org index 6e56f06..640f1fb 100644 --- a/README.org +++ b/README.org @@ -21,7 +21,7 @@ Undâr is a programming language for the purpose of creating 3D games and graphi It has an internal REPL that allows for quick development as well as the ability to dump the program to a binary rom for preserving that program/game/etc. -It runs on the =Reality Engine=, a VM written in freestanding C89, has a CISC like instruction format of one byte opcode and a variable byte operand. 32 general purpose registers. +It runs on the =Reality Engine=, a VM written in freestanding C89, has a CISC like instruction format of one byte opcode and a variable byte operand. 32 local variables per frame. * Philosophy @@ -58,7 +58,7 @@ You can view some examples in the =.ul.ir= files in =/test= function main () str hello is $0 - load_heap_immediate "nuqneH 'u'?" -> hello + malloc_immediate "nuqneH 'u'?" -> hello call pln hello exit 0 @@ -69,12 +69,12 @@ function pln (str message is $0) int nl_length is $4 int mode is $5 - load_heap_immediate "/dev/term/0" -> ts # get terminal device + malloc_immediate "/dev/term/0" -> ts # get terminal device load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return @@ -97,11 +97,11 @@ function main () int mode is $11 str term is $10 - load_heap_immediate "/dev/term/0" -> term + malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); - load_heap_immediate "Enter a string:" -> $7 + malloc_immediate "Enter a string:" -> $7 string_length $7 -> $8 syscall WRITE term $7 $8 # print prompt @@ -120,12 +120,12 @@ function pln (str message is $0) str nl is $3 int nl_length is $4 - load_heap_immediate "/dev/term/0" -> ts + malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts # get terminal device strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length diff --git a/ROADMAP.org b/ROADMAP.org index 79f4043..d408d62 100644 --- a/ROADMAP.org +++ b/ROADMAP.org @@ -10,6 +10,16 @@ * Roadmap +** Fixes for devices + +Devices should be moved into the Tunnel concept + +Make it so that instead of returning the whole plex from the OPEN syscall we only return the handle + +We should remove the "refesh" options and make it so that "stat" + +Stat is the one that actually returns the plex with that info + ** Example: Hello world (=hello.ul=) *WIP syntax, not final implementation** diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 17e8bd1..461daf9 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,5 +1,6 @@ +#include "../../tools/old_assembler/assembler.h" +#include "../../tools/old_assembler/parser.h" #include "../../tools/assembler/assembler.h" -#include "../../tools/assembler/parser.h" #include "../../vm/vm.h" #include "devices.h" #include @@ -52,7 +53,7 @@ bool saveVM(const char *filename, VM *vm) { return false; } - // Write VM state (registers and pointers) + // Write VM state (locals and pointers) if (fwrite(&vm->pc, sizeof(u32), 1, file) != 1 || fwrite(&vm->cp, sizeof(u32), 1, file) != 1 || fwrite(&vm->fp, sizeof(u32), 1, file) != 1 || @@ -90,7 +91,7 @@ bool loadVM(const char *filename, VM *vm) { return false; } - // Read VM state (registers and pointers) + // Read VM state (locals and pointers) if (fread(&vm->pc, sizeof(u32), 1, file) != 1 || fread(&vm->cp, sizeof(u32), 1, file) != 1 || fread(&vm->fp, sizeof(u32), 1, file) != 1 || @@ -145,7 +146,7 @@ bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { source[read] = '\0'; fclose(f); - + assemble(vm, source); return true; } @@ -177,7 +178,7 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { printf("Parse failed.\n"); return false; } else { - assemble(vm, ast); + old_assemble(vm, ast); expr_free(ast); // If output file specified, save the VM @@ -192,216 +193,6 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { } } -void repl(VM *vm) { - USED(vm); - - char buffer[1024 * 10] = {0}; // Larger buffer for multi-line input - char line[1024]; - - for (;;) { - // Count current parentheses balance - i32 paren_balance = 0; - for (i32 i = 0; buffer[i]; i++) { - if (buffer[i] == '(') - paren_balance++; - else if (buffer[i] == ')') - paren_balance--; - } - - // Show appropriate prompt - if (paren_balance > 0) { - printf(".. "); // Continuation prompt when unbalanced - } else { - printf("> "); // Normal prompt when balanced - } - fflush(stdout); - - if (!fgets(line, sizeof(line), stdin)) { - printf("\n"); - break; - } - - // Append the new line to buffer - strncat(buffer, line, sizeof(buffer) - strlen(buffer) - 1); - - // Recalculate balance after adding new line - paren_balance = 0; - for (i32 i = 0; buffer[i]; i++) { - if (buffer[i] == '(') - paren_balance++; - else if (buffer[i] == ')') - paren_balance--; - } - - // Only parse when parentheses are balanced - if (paren_balance == 0) { - // Check if buffer has actual content (not just whitespace) - i32 has_content = 0; - for (i32 i = 0; buffer[i]; i++) { - if (!isspace(buffer[i])) { - has_content = 1; - break; - } - } - - if (has_content) { - ExprNode *ast = expr_parse(buffer, strlen(buffer)); - if (!ast) { - printf("Parse failed.\n"); - } else { - assemble(vm, ast); - while (step_vm(vm)) { - } - expr_free(ast); - } - } - - // Reset buffer for next input - buffer[0] = '\0'; - } - // If unbalanced, continue reading more lines - } - exit(vm->flag); -} -#ifdef ASM_DEBUG -const char *opcode_to_string(Opcode op) { - static const char *names[] = { - [OP_HALT] = "halt", - [OP_JMP] = "jump", - [OP_JMPF] = "jmpf", - [OP_FCALL] = "fcall", - [OP_FRETURN] = "return", - - /* Immediate loads (only 32-bit variant needed) */ - [OP_LOAD_IMM] = "ldi", - - /* Register-indirect loads */ - [OP_LOAD_IND_8] = "ld8", - [OP_LOAD_IND_16] = "ld16", - [OP_LOAD_IND_32] = "ld32", - - /* Absolute address loads */ - [OP_LOAD_ABS_8] = "lda8", - [OP_LOAD_ABS_16] = "lda16", - [OP_LOAD_ABS_32] = "lda32", - - /* Base+offset loads */ - [OP_LOAD_OFF_8] = "ldo8", - [OP_LOAD_OFF_16] = "ldo16", - [OP_LOAD_OFF_32] = "ldo32", - - /* Absolute address stores */ - [OP_STORE_ABS_8] = "sta8", - [OP_STORE_ABS_16] = "sta16", - [OP_STORE_ABS_32] = "sta32", - - /* Register-indirect stores */ - [OP_STORE_IND_8] = "sti8", - [OP_STORE_IND_16] = "sti16", - [OP_STORE_IND_32] = "sti32", - - /* Base+offset stores */ - [OP_STORE_OFF_8] = "sto8", - [OP_STORE_OFF_16] = "sto16", - [OP_STORE_OFF_32] = "sto32", - - /* Memory operations */ - [OP_MALLOC] = "malloc", - [OP_MEMSET_8] = "set8", - [OP_MEMSET_16] = "set16", - [OP_MEMSET_32] = "set32", - - /* Register operations */ - [OP_REG_MOV] = "mov", - [OP_SYSCALL] = "syscall", - - /* Bit operations */ - [OP_BIT_SHIFT_LEFT] = "sll", - [OP_BIT_SHIFT_RIGHT] = "srl", - [OP_BIT_SHIFT_R_EXT] = "sre", - [OP_BAND] = "and", - [OP_BOR] = "or", - [OP_BXOR] = "xor", - - /* Integer arithmetic */ - [OP_ADD_INT] = "addi", - [OP_SUB_INT] = "subi", - [OP_MUL_INT] = "muli", - [OP_DIV_INT] = "divi", - [OP_ABS_INT] = "absi", // ← NEW - [OP_NEG_INT] = "negi", // ← NEW - - /* Natural number arithmetic */ - [OP_ADD_NAT] = "addn", - [OP_SUB_NAT] = "subn", - [OP_MUL_NAT] = "muln", - [OP_DIV_NAT] = "divn", - [OP_ABS_NAT] = "absn", // ← NEW - [OP_NEG_NAT] = "negn", // ← NEW - - /* Floating point operations */ - [OP_ADD_REAL] = "addr", - [OP_SUB_REAL] = "subr", - [OP_MUL_REAL] = "mulr", - [OP_DIV_REAL] = "divr", - [OP_ABS_REAL] = "absr", // ← NEW - [OP_NEG_REAL] = "negr", // ← NEW - - /* Type conversions */ - [OP_INT_TO_REAL] = "itor", - [OP_NAT_TO_REAL] = "ntor", - [OP_REAL_TO_INT] = "rtoi", - [OP_REAL_TO_NAT] = "rton", - - /* Integer comparisons */ - [OP_JEQ_INT] = "jeqi", - [OP_JNEQ_INT] = "jneqi", - [OP_JGT_INT] = "jgti", - [OP_JLT_INT] = "jlti", - [OP_JLE_INT] = "jlei", - [OP_JGE_INT] = "jgei", - - /* Natural number comparisons */ - [OP_JEQ_NAT] = "jeqn", - [OP_JNEQ_NAT] = "jneqn", - [OP_JGT_NAT] = "jgtn", - [OP_JLT_NAT] = "jltn", - [OP_JLE_NAT] = "jlen", - [OP_JGE_NAT] = "jgen", - - /* Floating point comparisons */ - [OP_JEQ_REAL] = "jeqr", - [OP_JNEQ_REAL] = "jneqr", - [OP_JGE_REAL] = "jger", - [OP_JGT_REAL] = "jgtr", - [OP_JLT_REAL] = "jltr", - [OP_JLE_REAL] = "jler", - - /* String operations */ - [OP_STRLEN] = "strlen", - [OP_STREQ] = "streq", - [OP_STRCAT] = "strcat", - [OP_STR_GET_CHAR] = "getch", - [OP_STR_FIND_CHAR] = "findch", - [OP_STR_SLICE] = "strcut", - - /* String conversions */ - [OP_INT_TO_STRING] = "itos", - [OP_NAT_TO_STRING] = "ntos", - [OP_REAL_TO_STRING] = "rtos", - [OP_STRING_TO_INT] = "stoi", - [OP_STRING_TO_NAT] = "ston", - [OP_STRING_TO_REAL] = "stor"}; - - if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { - return ""; - } - - const char *name = names[op]; - return name ? name : ""; -} -#endif - i32 main(i32 argc, char *argv[]) { bool dump_rom = false; char *input_file = nil; @@ -453,8 +244,7 @@ i32 main(i32 argc, char *argv[]) { } } } else { - // No input file - enter REPL mode - repl(&vm); + printf("usage: undar ..."); return 0; } @@ -567,9 +357,6 @@ i32 main(i32 argc, char *argv[]) { int cycles_this_frame = 0; int max_cycles_per_frame = 100; // Adjust this value while (cycles_this_frame < max_cycles_per_frame) { -#ifdef ASM_DEBUG - printf("| %s %d\n", opcode_to_string(vm.code[vm.pc]), vm.pc); -#endif if (!step_vm(&vm)) { running = false; break; diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 49b6be4..93d8a19 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,1211 +1,55 @@ #include "assembler.h" -#include "parser.h" -typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType; +#include -typedef struct { +typedef struct field_s { + char* name; + TokenType type; + u32 offset; + u32 size; +} Field; + +typedef struct plex_def_s { + char* name; + u32 field_count; + u32 logical_size; + u32 physical_size; + Field *fields; +} PlexDef; + +typedef struct array_def_s { + TokenType type; + u32 length; + u32 logical_size; + u32 physical_size; // logical_size * type_size + 4 +} ArrayDef; + +typedef struct symbol_s { char *name; u32 address; - SymbolType type; - int size; // How much memory this symbol occupies - int is_constant; // 1 = constant, 0 = variable + TokenType type; + u32 logical_size; + u32 physical_size; // logical_size * type_size + 4 } Symbol; -typedef struct { +typedef struct symbol_tab_s { Symbol *symbols; int count; int capacity; } SymbolTable; -void symbol_table_init(SymbolTable *table) { - table->capacity = 32; - table->count = 0; - table->symbols = malloc(table->capacity * sizeof(Symbol)); -} - -void symbol_table_add(SymbolTable *table, const char *name, u32 address, - SymbolType type) { - // Check for duplicates - for (int i = 0; i < table->count; i++) { - if (strcmp(table->symbols[i].name, name) == 0) { - fprintf(stderr, "Error: Duplicate label '%s'\n", name); - exit(1); +void assemble(VM *vm, char *source) { + USED(vm); + initLexer(source); + Token token; + do { + token = nextToken(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + break; } - } - - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); - } - - Symbol *sym = &table->symbols[table->count++]; - sym->name = strdup(name); - sym->address = address; - sym->type = type; - sym->size = 4; // Default size - sym->is_constant = 0; -} - -Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { - for (int i = 0; i < table->count; i++) { - if (strcmp(table->symbols[i].name, name) == 0) { - return &table->symbols[i]; + if (token.type != TOKEN_EOF) { + printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), + token.length, token.start); } - } - return NULL; -} - -u32 find_label_in_table(SymbolTable *table, const char *name) { - Symbol *sym = symbol_table_lookup(table, name); - if (!sym) { - fprintf(stderr, "Error: Undefined label '%s'\n", name); - exit(1); - } - return sym->address; -} - -int get_instruction_byte_size(ExprNode *node) { - const char *opname = node->token; - - // Return (1 + 1) - if (strcmp(opname, "return") == 0) { - return 2; // 1 byte opcode + 1 byte return register - } - - if (strcmp(opname, "neg-int") == 0 || - strcmp(opname, "abs-int") == 0 || - strcmp(opname, "neg-nat") == 0 || - strcmp(opname, "abs-nat") == 0 || - strcmp(opname, "neg-real") == 0 || - strcmp(opname, "abs-real") == 0 || - strcmp(opname, "int-to-string") == 0 || - strcmp(opname, "load-indirect-8") == 0 || - strcmp(opname, "nat-to-string") == 0 || - strcmp(opname, "load-indirect-16") == 0 || - strcmp(opname, "real-to-string") == 0 || - strcmp(opname, "load-indirect-32") == 0 || - strcmp(opname, "int-to-real") == 0 || - strcmp(opname, "store-indirect-8") == 0 || - strcmp(opname, "nat-to-real") == 0 || - strcmp(opname, "store-indirect-16") == 0 || - strcmp(opname, "real-to-int") == 0 || - strcmp(opname, "store-indirect-32") == 0 || - strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 || - strcmp(opname, "int-to-nat") == 0 || - strcmp(opname, "string-length") == 0 || - strcmp(opname, "store-absolute-32") == 0 || - strcmp(opname, "store-absolute-8") == 0 || - strcmp(opname, "store-absolute-16") == 0 || - strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || - strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 || - strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) { - return 3; - } - - // Register-register-register opcodes (4 bytes: 1 + 3) - if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 || - strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 || - strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || - strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || - strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || - strcmp(opname, "bit-shift-left") == 0 || - strcmp(opname, "bit-shift-right") == 0 || - strcmp(opname, "bit-shift-r-ext") == 0 || - strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || - strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || - strcmp(opname, "div-real") == 0) { - return 4; - } - - // (5 bytes: 1 + 4) - if (strcmp(opname, "halt") == 0 || strcmp(opname, "jump-if-flag") == 0 || - strcmp(opname, "jump") == 0) { - return 5; - } - - // Load, Load-immediate (6 bytes: 1 + 1 + 4) - if (strcmp(opname, "load-absolute-32") == 0 || - strcmp(opname, "load-immediate") == 0 || - strcmp(opname, "load-absolute-16") == 0 || - strcmp(opname, "load-absolute-8") == 0) { - return 6; - } - - // jump compare (7 bytes: 1 + 4 + 1 + 1) - if (strcmp(opname, "jump-eq-int") == 0 || - strcmp(opname, "jump-neq-int") == 0 || - strcmp(opname, "jump-gt-int") == 0 || - strcmp(opname, "jump-lt-int") == 0 || - strcmp(opname, "jump-le-int") == 0 || - strcmp(opname, "jump-ge-int") == 0 || - strcmp(opname, "jump-eq-nat") == 0 || - strcmp(opname, "jump-neq-nat") == 0 || - strcmp(opname, "jump-gt-nat") == 0 || - strcmp(opname, "jump-lt-nat") == 0 || - strcmp(opname, "jump-le-nat") == 0 || - strcmp(opname, "jump-ge-nat") == 0 || - strcmp(opname, "jump-eq-real") == 0 || - strcmp(opname, "jump-neq-real") == 0 || - strcmp(opname, "jump-gt-real") == 0 || - strcmp(opname, "jump-lt-real") == 0 || - strcmp(opname, "jump-le-real") == 0 || - strcmp(opname, "jump-ge-real") == 0 || - strcmp(opname, "store-offset-8") == 0 || - strcmp(opname, "store-offset-16") == 0 || - strcmp(opname, "store-offset-32") == 0 || - strcmp(opname, "load-offset-8") == 0 || - strcmp(opname, "load-offset-16") == 0 || - strcmp(opname, "load-offset-32") == 0) { - return 7; - } - - // Call (1 + 4 + 1 + args + 1) - if (strcmp(opname, "call") == 0) { - ExprNode *args_node = node->children[1]; - u32 args_count; - - if (strcmp(args_node->token, "nil") == 0) { - args_count = 0; - } else { - args_count = 1 + args_node->child_count; - } - - return 1 + 1 + 1 + 4 + args_count; - } - - // Syscall (1 + syscall_id (4) + args) - if (strcmp(opname, "syscall") == 0) { - return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); - } - - fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); - exit(-1); -} - -int calculate_instruction_size(ExprNode *node) { - if (node->child_count == 0) - return 0; - - return get_instruction_byte_size(node); -} - -void collect_symbols_in_node(SymbolTable *table, ExprNode *node, - u32 *current_addr, int depth) { - char indent[32] = ""; - for (int i = 0; i < depth; i++) - strcat(indent, " "); - -#ifdef ASM_DEBUG - printf("%s%d %s ", indent, *current_addr, node->token); -#endif - - if (strcmp(node->token, "label") == 0) { - if (node->child_count >= 1) { - const char *name = node->children[0]->token; -#ifdef ASM_DEBUG - printf(" %s -> %d\n", name, *current_addr); -#endif - symbol_table_add(table, name, *current_addr, SYMBOL_CODE); - } - - for (size_t i = 1; i < node->child_count; i++) { - collect_symbols_in_node(table, node->children[i], current_addr, - depth + 1); - } - } else { - int size = get_instruction_byte_size(node); - *current_addr += size; -#ifdef ASM_DEBUG - printf(" +%d bytes -> %d\n", size, *current_addr); -#endif - } -} - -void collect_symbols(SymbolTable *table, ExprNode *program) { - // First, collect all data labels (with placeholder address) - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "data") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - ExprNode *item = section->children[j]; - if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { - const char *name = item->children[0]->token; - symbol_table_add(table, name, 0, SYMBOL_DATA); - } - } - } - } - - // Second, collect all code labels with proper nesting - u32 code_addr = 0; - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "code") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - collect_symbols_in_node(table, section->children[j], &code_addr, 0); - } - } - } -} - -u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { - u32 addr = vm->mp; - vm->mp += size; - vm->frames[vm->fp].end += size; - - // Update the symbol's address - Symbol *sym = symbol_table_lookup(table, name); - if (sym && sym->type == SYMBOL_DATA) { - sym->address = addr; - sym->size = size; - } - - return addr; -} - -void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } - -void emit_u32(VM *vm, u32 value) { - write_u32(vm, code, vm->cp, value); - vm->cp += 4; -} - -void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } - -int parse_register(const char *reg_str) { - if (reg_str[0] != '$') - return -1; - return atoi(reg_str + 1); -} - -u32 resolve_symbol(SymbolTable *table, const char *ref) { - // Handle symbol references (e.g., &label) - if (ref[0] == '&') { - return find_label_in_table(table, ref + 1); - } - - // Handle fixed-point numbers (e.g., 0.5) - if (strchr(ref, '.')) { - return TO_FIXED(atof(ref)); - } - - // Handle hexadecimal literals (e.g., 0x7) - if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) { - char *endptr; - u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x" - - if (endptr == ref + 2 || *endptr != '\0') { - fprintf(stderr, "Invalid hex literal: %s\n", ref); - exit(1); - } - return value; - } - - // Handle decimal literals (e.g., 7) - char *endptr; - u32 value = (u32)strtoul(ref, &endptr, 10); - - if (endptr == ref || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: %s\n", ref); - exit(1); - } - return value; -} - -static char *unwrap_string(const char *quoted_str) { - if (!quoted_str) - return NULL; - - size_t len = strlen(quoted_str); - if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { - // Remove quotes and process escape sequences - const char *src = quoted_str + 1; - size_t src_len = len - 2; - - // First pass: calculate the actual length needed after escape processing - size_t actual_len = 0; - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Escape sequence - actual_len++; - i++; // Skip the next character - } else { - actual_len++; - } - } - - char *unwrapped = (char *)malloc(actual_len + 1); - size_t dst_idx = 0; - - // Second pass: process escape sequences - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Handle escape sequences - switch (src[i + 1]) { - case 'n': - unwrapped[dst_idx++] = '\n'; - break; - case 't': - unwrapped[dst_idx++] = '\t'; - break; - case 'r': - unwrapped[dst_idx++] = '\r'; - break; - case '\\': - unwrapped[dst_idx++] = '\\'; - break; - case '"': - unwrapped[dst_idx++] = '"'; - break; - case '\'': - unwrapped[dst_idx++] = '\''; - break; - default: - // Unknown escape, keep both characters - unwrapped[dst_idx++] = src[i]; - unwrapped[dst_idx++] = src[i + 1]; - break; - } - i++; // Skip the next character - } else { - unwrapped[dst_idx++] = src[i]; - } - } - unwrapped[dst_idx] = '\0'; - return unwrapped; - } - // Not quoted, return copy - return strdup(quoted_str); -} - -void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { - for (size_t i = 0; i < block->child_count; ++i) { - ExprNode *item = block->children[i]; - if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { - const char *name = item->children[0]->token; - ExprNode *val = item->children[1]; - - if (val->child_count == 0) { - const char *token = val->token; - - // Case 1: String literal (enclosed in quotes) - if (token[0] == '"' && token[strlen(token) - 1] == '"') { - char *unwrapped = unwrap_string(token); - int len = strlen(unwrapped); - u32 addr = allocate_data(vm, table, name, len + 1 + 4); - - write_u32(vm, memory, addr, len); - for (int i = 0; i < len; i++) { - write_u8(vm, memory, addr + 4 + i, unwrapped[i]); - } - write_u8(vm, memory, addr + 4 + len, '\0'); - free(unwrapped); - } - // Case 2: Hexadecimal integer (0x...) - else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) { - char *endptr; - u32 value = (u32)strtoul(token + 2, &endptr, 16); - - if (endptr != token + strlen(token)) { - fprintf(stderr, "Invalid hex in data block: %s\n", token); - exit(1); - } - - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, value); - } - // Case 3: Floating-point (has decimal point) - else if (strchr(token, '.')) { - float f = atof(token); - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, TO_FIXED(f)); - } - // Case 4: Decimal integer - else { - char *endptr; - u32 value = (u32)strtoul(token, &endptr, 10); - - if (endptr != token + strlen(token)) { - fprintf(stderr, "Invalid decimal in data block: %s\n", token); - exit(1); - } - - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, value); - //vm->mp += 4; - } - } else { - fprintf(stderr, "Unsupported data item\n"); - exit(1); - } - } - } -} - -void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { - const char *opname = node->token; - if (strcmp(opname, "label") == 0) { - for (size_t i = 1; i < node->child_count; i++) { - process_code_expr(vm, table, node->children[i]); - } - } else if (strcmp(opname, "halt") == 0) { - emit_opcode(vm, OP_HALT); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "jump") == 0) { - emit_opcode(vm, OP_JMP); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "jump-if-flag") == 0) { - emit_opcode(vm, OP_JMPF); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "call") == 0) { - emit_opcode(vm, OP_CALL); - - if (node->child_count < 3) { - fprintf(stderr, "Error: call requires (args) and return register\n"); - return; - } - - // Parse function address (first child) - u32 addr = resolve_symbol(table, node->children[0]->token); - if (addr == (u32)-1) { - fprintf(stderr, "Error: undefined symbol '%s'\n", - node->children[0]->token); - return; - } - emit_u32(vm, addr); - - // Parse argument list (second child) - ExprNode *args_node = node->children[1]; - u8 arg_count = 0; - - if (args_node->child_count > 0) { - // Multiple arguments case - arg_count = args_node->child_count + 1; // +1 for the token - } else { - // Single argument case - token is the argument - arg_count = (args_node->token[0] != '\0') ? 1 : 0; - } - emit_byte(vm, arg_count); - - // Emit arguments based on representation - if (arg_count > 0) { - // First argument is always the token - const char *reg_str = args_node->token; - int reg = parse_register(reg_str); - if (reg < 0) { - fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); - return; - } - emit_byte(vm, (u8)reg); - - // Emit children if present - for (size_t i = 0; i < args_node->child_count; i++) { - reg_str = args_node->children[i]->token; - reg = parse_register(reg_str); - if (reg < 0) { - fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); - return; - } - emit_byte(vm, (u8)reg); - } - } - // Parse return register (third child) - const char *return_reg_str = node->children[2]->token; - int return_reg = parse_register(return_reg_str); - - if (return_reg < 0) { - if (strcmp(return_reg_str, "nil") == 0) { - return_reg = 0xFF; - } else { - fprintf(stderr, "Error: invalid return register '%s'\n", - return_reg_str); - return; - } - } - emit_byte(vm, (u8)return_reg); - -} else if (strcmp(opname, "return") == 0) { - emit_opcode(vm, OP_RETURN); - - if (node->child_count != 1) { - fprintf(stderr, "Error: return requires exactly one argument\n"); - return; - } - - const char *reg_str = node->children[0]->token; - int reg = parse_register(reg_str); - - // Handle "nil" as special case (no return value) - if (reg < 0) { - if (strcmp(reg_str, "nil") == 0) { - reg = 0xFF; // Special value for "no return" - } else { - fprintf(stderr, "Error: invalid return register '%s'\n", reg_str); - return; - } - } - emit_byte(vm, (u8)reg); - } else if (strcmp(opname, "load-immediate") == 0) { - emit_opcode(vm, OP_LOAD_IMM); - int reg = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, reg); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-8") == 0) { - emit_opcode(vm, OP_LOAD_ABS_8); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-16") == 0) { - emit_opcode(vm, OP_LOAD_ABS_16); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-32") == 0) { - emit_opcode(vm, OP_LOAD_ABS_32); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-indirect-8") == 0) { - emit_opcode(vm, OP_LOAD_IND_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "load-indirect-16") == 0) { - emit_opcode(vm, OP_LOAD_IND_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "load-indirect-32") == 0) { - emit_opcode(vm, OP_LOAD_IND_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "malloc") == 0) { - emit_opcode(vm, OP_MALLOC); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "memset-8") == 0) { - emit_opcode(vm, OP_MEMSET_8); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "memset-16") == 0) { - emit_opcode(vm, OP_MEMSET_16); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "memset") == 0) { - emit_opcode(vm, OP_MEMSET_32); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "store-absolute-8") == 0) { - emit_opcode(vm, OP_STORE_ABS_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-absolute-16") == 0) { - emit_opcode(vm, OP_STORE_ABS_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-absolute-32") == 0) { - emit_opcode(vm, OP_STORE_ABS_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-8") == 0) { - emit_opcode(vm, OP_STORE_IND_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-16") == 0) { - emit_opcode(vm, OP_STORE_IND_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-32") == 0) { - emit_opcode(vm, OP_STORE_IND_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-offset-8") == 0) { - emit_opcode(vm, OP_STORE_OFF_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "store-offset-16") == 0) { - emit_opcode(vm, OP_STORE_OFF_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "store-offset-32") == 0) { - emit_opcode(vm, OP_STORE_OFF_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-8") == 0) { - emit_opcode(vm, OP_LOAD_OFF_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-16") == 0) { - emit_opcode(vm, OP_LOAD_OFF_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-32") == 0) { - emit_opcode(vm, OP_LOAD_OFF_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "register-move") == 0) { - emit_opcode(vm, OP_REG_MOV); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "syscall") == 0) { - emit_opcode(vm, OP_SYSCALL); - - // Parse syscall ID - u32 syscall_id = 0; - const char *syscall_name = node->children[0]->token; - if (strcmp(syscall_name, "EXIT") == 0) - syscall_id = SYSCALL_EXIT; - else if (strcmp(syscall_name, "OPEN") == 0) - syscall_id = SYSCALL_DEVICE_OPEN; - else if (strcmp(syscall_name, "READ") == 0) - syscall_id = SYSCALL_DEVICE_READ; - else if (strcmp(syscall_name, "WRITE") == 0) - syscall_id = SYSCALL_DEVICE_WRITE; - else if (strcmp(syscall_name, "CLOSE") == 0) - syscall_id = SYSCALL_DEVICE_CLOSE; - else if (strcmp(syscall_name, "IOCTL") == 0) - syscall_id = SYSCALL_DEVICE_IOCTL; - else if (strcmp(syscall_name, "REFRESH") == 0) - syscall_id = SYSCALL_DEVICE_REFRESH; - - emit_u32(vm, syscall_id); - - // Emit register arguments - for (size_t i = 1; i < node->child_count; ++i) { - int reg = parse_register(node->children[i]->token); - emit_byte(vm, reg); - } - } else if (strcmp(opname, "bit-shift-left") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_LEFT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-shift-right") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_RIGHT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-shift-r-ext") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_R_EXT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-and") == 0) { - emit_opcode(vm, OP_BAND); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-or") == 0) { - emit_opcode(vm, OP_BOR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-xor") == 0) { - emit_opcode(vm, OP_BXOR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "add-int") == 0) { - emit_opcode(vm, OP_ADD_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-int") == 0) { - emit_opcode(vm, OP_SUB_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-int") == 0) { - emit_opcode(vm, OP_MUL_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-int") == 0) { - emit_opcode(vm, OP_DIV_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-int") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-int") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "add-nat") == 0) { - emit_opcode(vm, OP_ADD_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-nat") == 0) { - emit_opcode(vm, OP_SUB_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-nat") == 0) { - emit_opcode(vm, OP_MUL_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-nat") == 0) { - emit_opcode(vm, OP_DIV_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-nat") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-nat") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "add-real") == 0) { - emit_opcode(vm, OP_ADD_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-real") == 0) { - emit_opcode(vm, OP_SUB_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-real") == 0) { - emit_opcode(vm, OP_MUL_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-real") == 0) { - emit_opcode(vm, OP_DIV_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-real") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-real") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "int-to-real") == 0) { - emit_opcode(vm, OP_INT_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "nat-to-real") == 0) { - emit_opcode(vm, OP_NAT_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-int") == 0) { - emit_opcode(vm, OP_REAL_TO_INT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-nat") == 0) { - emit_opcode(vm, OP_REAL_TO_NAT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "jump-eq-int") == 0) { - emit_opcode(vm, OP_JEQ_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-int") == 0) { - emit_opcode(vm, OP_JNEQ_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-int") == 0) { - emit_opcode(vm, OP_JGT_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-int") == 0) { - emit_opcode(vm, OP_JLT_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-int") == 0) { - emit_opcode(vm, OP_JLE_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-int") == 0) { - emit_opcode(vm, OP_JGE_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-eq-nat") == 0) { - emit_opcode(vm, OP_JEQ_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-nat") == 0) { - emit_opcode(vm, OP_JNEQ_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-nat") == 0) { - emit_opcode(vm, OP_JGT_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-nat") == 0) { - emit_opcode(vm, OP_JLT_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-nat") == 0) { - emit_opcode(vm, OP_JLE_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-nat") == 0) { - emit_opcode(vm, OP_JGE_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-eq-real") == 0) { - emit_opcode(vm, OP_JEQ_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-real") == 0) { - emit_opcode(vm, OP_JNEQ_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-real") == 0) { - emit_opcode(vm, OP_JGT_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-real") == 0) { - emit_opcode(vm, OP_JLT_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-real") == 0) { - emit_opcode(vm, OP_JLE_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-real") == 0) { - emit_opcode(vm, OP_JGE_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-length") == 0) { - emit_opcode(vm, OP_STRLEN); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-eq") == 0) { - emit_opcode(vm, OP_STREQ); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-concat") == 0) { - emit_opcode(vm, OP_STRCAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-get-char") == 0) { - emit_opcode(vm, OP_STR_GET_CHAR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-find-char") == 0) { - emit_opcode(vm, OP_STR_FIND_CHAR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-slice") == 0) { - emit_opcode(vm, OP_STR_SLICE); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - int src3 = parse_register(node->children[3]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - emit_byte(vm, src3); - } else if (strcmp(opname, "int-to-string") == 0) { - emit_opcode(vm, OP_INT_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "nat-to-string") == 0) { - emit_opcode(vm, OP_NAT_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-string") == 0) { - emit_opcode(vm, OP_REAL_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-int") == 0) { - emit_opcode(vm, OP_STRING_TO_INT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-nat") == 0) { - emit_opcode(vm, OP_STRING_TO_NAT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-real") == 0) { - emit_opcode(vm, OP_STRING_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else { - fprintf(stderr, "Unknown opcode: %s\n", opname); - } -} - -void assemble(VM *vm, ExprNode *program) { - SymbolTable table; - symbol_table_init(&table); - - // PASS 1: Collect all symbols (both code and data) - collect_symbols(&table, program); - - // PASS 2: Process data section using symbol table - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "data") == 0) { - process_data_block(vm, &table, section); - } - } - - // PASS 3: Process code section using complete symbol table - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "code") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - process_code_expr(vm, &table, section->children[j]); - } - } - } - - // Cleanup symbol table - for (int i = 0; i < table.count; i++) { -#ifdef ASM_DEBUG - Symbol s = table.symbols[i]; - printf("%s[%d]\n", s.name, s.address); -#endif - free(table.symbols[i].name); - } - free(table.symbols); -} + } while (token.type != TOKEN_EOF); +} \ No newline at end of file diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 26864a3..1b3c38a 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -1,20 +1,10 @@ -#ifndef ASSEMBLER_H -#define ASSEMBLER_H +#ifndef UNDAR_IR_ASSEMBLER_H +#define UNDAR_IR_ASSEMBLER_H #include "../../vm/common.h" -#include "../../vm/vm.h" -#include "parser.h" +#include "../../vm/opcodes.h" +#include "lexer.h" -#include -#include -#include -#include +void assemble(VM *vm, char *source); -#define AS_FIXED(v) ((float)(i32)(v) / 65536.0f) -#define TO_FIXED(f) ((i32)( \ - ((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \ -)) - -void assemble(VM *vm, ExprNode *program); - -#endif +#endif \ No newline at end of file diff --git a/src/tools/lexer.c b/src/tools/assembler/lexer.c similarity index 99% rename from src/tools/lexer.c rename to src/tools/assembler/lexer.c index 82c88fc..b634dc5 100644 --- a/src/tools/lexer.c +++ b/src/tools/assembler/lexer.c @@ -1,6 +1,6 @@ #include -#include "../vm/common.h" +#include "../../vm/common.h" #include "lexer.h" typedef struct { diff --git a/src/tools/lexer.h b/src/tools/assembler/lexer.h similarity index 100% rename from src/tools/lexer.h rename to src/tools/assembler/lexer.h diff --git a/src/tools/old_assembler/assembler.c b/src/tools/old_assembler/assembler.c new file mode 100644 index 0000000..e66be6b --- /dev/null +++ b/src/tools/old_assembler/assembler.c @@ -0,0 +1,1211 @@ +#include "assembler.h" +#include "parser.h" +typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType; + +typedef struct { + char *name; + u32 address; + SymbolType type; + int size; // How much memory this symbol occupies + int is_constant; // 1 = constant, 0 = variable +} Symbol; + +typedef struct { + Symbol *symbols; + int count; + int capacity; +} SymbolTable; + +void symbol_table_init(SymbolTable *table) { + table->capacity = 32; + table->count = 0; + table->symbols = malloc(table->capacity * sizeof(Symbol)); +} + +void symbol_table_add(SymbolTable *table, const char *name, u32 address, + SymbolType type) { + // Check for duplicates + for (int i = 0; i < table->count; i++) { + if (strcmp(table->symbols[i].name, name) == 0) { + fprintf(stderr, "Error: Duplicate label '%s'\n", name); + exit(1); + } + } + + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + } + + Symbol *sym = &table->symbols[table->count++]; + sym->name = strdup(name); + sym->address = address; + sym->type = type; + sym->size = 4; // Default size + sym->is_constant = 0; +} + +Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { + for (int i = 0; i < table->count; i++) { + if (strcmp(table->symbols[i].name, name) == 0) { + return &table->symbols[i]; + } + } + return NULL; +} + +u32 find_label_in_table(SymbolTable *table, const char *name) { + Symbol *sym = symbol_table_lookup(table, name); + if (!sym) { + fprintf(stderr, "Error: Undefined label '%s'\n", name); + exit(1); + } + return sym->address; +} + +int get_instruction_byte_size(ExprNode *node) { + const char *opname = node->token; + + // Return (1 + 1) + if (strcmp(opname, "return") == 0) { + return 2; // 1 byte opcode + 1 byte return register + } + + if (strcmp(opname, "neg-int") == 0 || + strcmp(opname, "abs-int") == 0 || + strcmp(opname, "neg-nat") == 0 || + strcmp(opname, "abs-nat") == 0 || + strcmp(opname, "neg-real") == 0 || + strcmp(opname, "abs-real") == 0 || + strcmp(opname, "int-to-string") == 0 || + strcmp(opname, "load-indirect-8") == 0 || + strcmp(opname, "nat-to-string") == 0 || + strcmp(opname, "load-indirect-16") == 0 || + strcmp(opname, "real-to-string") == 0 || + strcmp(opname, "load-indirect-32") == 0 || + strcmp(opname, "int-to-real") == 0 || + strcmp(opname, "store-indirect-8") == 0 || + strcmp(opname, "nat-to-real") == 0 || + strcmp(opname, "store-indirect-16") == 0 || + strcmp(opname, "real-to-int") == 0 || + strcmp(opname, "store-indirect-32") == 0 || + strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 || + strcmp(opname, "int-to-nat") == 0 || + strcmp(opname, "string-length") == 0 || + strcmp(opname, "store-absolute-32") == 0 || + strcmp(opname, "store-absolute-8") == 0 || + strcmp(opname, "store-absolute-16") == 0 || + strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || + strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 || + strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) { + return 3; + } + + // Register-register-register opcodes (4 bytes: 1 + 3) + if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 || + strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 || + strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || + strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || + strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || + strcmp(opname, "bit-shift-left") == 0 || + strcmp(opname, "bit-shift-right") == 0 || + strcmp(opname, "bit-shift-r-ext") == 0 || + strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || + strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || + strcmp(opname, "div-real") == 0) { + return 4; + } + + // (5 bytes: 1 + 4) + if (strcmp(opname, "exit") == 0 || strcmp(opname, "jump-if-flag") == 0 || + strcmp(opname, "jump") == 0) { + return 5; + } + + // Load, Load-immediate (6 bytes: 1 + 1 + 4) + if (strcmp(opname, "load-absolute-32") == 0 || + strcmp(opname, "load-immediate") == 0 || + strcmp(opname, "load-absolute-16") == 0 || + strcmp(opname, "load-absolute-8") == 0) { + return 6; + } + + // jump compare (7 bytes: 1 + 4 + 1 + 1) + if (strcmp(opname, "jump-eq-int") == 0 || + strcmp(opname, "jump-neq-int") == 0 || + strcmp(opname, "jump-gt-int") == 0 || + strcmp(opname, "jump-lt-int") == 0 || + strcmp(opname, "jump-le-int") == 0 || + strcmp(opname, "jump-ge-int") == 0 || + strcmp(opname, "jump-eq-nat") == 0 || + strcmp(opname, "jump-neq-nat") == 0 || + strcmp(opname, "jump-gt-nat") == 0 || + strcmp(opname, "jump-lt-nat") == 0 || + strcmp(opname, "jump-le-nat") == 0 || + strcmp(opname, "jump-ge-nat") == 0 || + strcmp(opname, "jump-eq-real") == 0 || + strcmp(opname, "jump-neq-real") == 0 || + strcmp(opname, "jump-gt-real") == 0 || + strcmp(opname, "jump-lt-real") == 0 || + strcmp(opname, "jump-le-real") == 0 || + strcmp(opname, "jump-ge-real") == 0 || + strcmp(opname, "store-offset-8") == 0 || + strcmp(opname, "store-offset-16") == 0 || + strcmp(opname, "store-offset-32") == 0 || + strcmp(opname, "load-offset-8") == 0 || + strcmp(opname, "load-offset-16") == 0 || + strcmp(opname, "load-offset-32") == 0) { + return 7; + } + + // Call (1 + 4 + 1 + args + 1) + if (strcmp(opname, "call") == 0) { + ExprNode *args_node = node->children[1]; + u32 args_count; + + if (strcmp(args_node->token, "nil") == 0) { + args_count = 0; + } else { + args_count = 1 + args_node->child_count; + } + + return 1 + 1 + 1 + 4 + args_count; + } + + // Syscall (1 + syscall_id (4) + args) + if (strcmp(opname, "syscall") == 0) { + return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); + } + + fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); + exit(-1); +} + +int calculate_instruction_size(ExprNode *node) { + if (node->child_count == 0) + return 0; + + return get_instruction_byte_size(node); +} + +void collect_symbols_in_node(SymbolTable *table, ExprNode *node, + u32 *current_addr, int depth) { + char indent[32] = ""; + for (int i = 0; i < depth; i++) + strcat(indent, " "); + +#ifdef ASM_DEBUG + printf("%s%d %s ", indent, *current_addr, node->token); +#endif + + if (strcmp(node->token, "label") == 0) { + if (node->child_count >= 1) { + const char *name = node->children[0]->token; +#ifdef ASM_DEBUG + printf(" %s -> %d\n", name, *current_addr); +#endif + symbol_table_add(table, name, *current_addr, SYMBOL_CODE); + } + + for (size_t i = 1; i < node->child_count; i++) { + collect_symbols_in_node(table, node->children[i], current_addr, + depth + 1); + } + } else { + int size = get_instruction_byte_size(node); + *current_addr += size; +#ifdef ASM_DEBUG + printf(" +%d bytes -> %d\n", size, *current_addr); +#endif + } +} + +void collect_symbols(SymbolTable *table, ExprNode *program) { + // First, collect all data labels (with placeholder address) + for (size_t i = 0; i < program->child_count; ++i) { + ExprNode *section = program->children[i]; + if (strcmp(section->token, "data") == 0) { + for (size_t j = 0; j < section->child_count; ++j) { + ExprNode *item = section->children[j]; + if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { + const char *name = item->children[0]->token; + symbol_table_add(table, name, 0, SYMBOL_DATA); + } + } + } + } + + // Second, collect all code labels with proper nesting + u32 code_addr = 0; + for (size_t i = 0; i < program->child_count; ++i) { + ExprNode *section = program->children[i]; + if (strcmp(section->token, "code") == 0) { + for (size_t j = 0; j < section->child_count; ++j) { + collect_symbols_in_node(table, section->children[j], &code_addr, 0); + } + } + } +} + +u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { + u32 addr = vm->mp; + vm->mp += size; + vm->frames[vm->fp].end += size; + + // Update the symbol's address + Symbol *sym = symbol_table_lookup(table, name); + if (sym && sym->type == SYMBOL_DATA) { + sym->address = addr; + sym->size = size; + } + + return addr; +} + +void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } + +void emit_u32(VM *vm, u32 value) { + write_u32(vm, code, vm->cp, value); + vm->cp += 4; +} + +void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } + +int parse_register(const char *reg_str) { + if (reg_str[0] != '$') + return -1; + return atoi(reg_str + 1); +} + +u32 resolve_symbol(SymbolTable *table, const char *ref) { + // Handle symbol references (e.g., &label) + if (ref[0] == '&') { + return find_label_in_table(table, ref + 1); + } + + // Handle fixed-point numbers (e.g., 0.5) + if (strchr(ref, '.')) { + return TO_FIXED(atof(ref)); + } + + // Handle hexadecimal literals (e.g., 0x7) + if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) { + char *endptr; + u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x" + + if (endptr == ref + 2 || *endptr != '\0') { + fprintf(stderr, "Invalid hex literal: %s\n", ref); + exit(1); + } + return value; + } + + // Handle decimal literals (e.g., 7) + char *endptr; + u32 value = (u32)strtoul(ref, &endptr, 10); + + if (endptr == ref || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", ref); + exit(1); + } + return value; +} + +static char *unwrap_string(const char *quoted_str) { + if (!quoted_str) + return NULL; + + size_t len = strlen(quoted_str); + if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { + // Remove quotes and process escape sequences + const char *src = quoted_str + 1; + size_t src_len = len - 2; + + // First pass: calculate the actual length needed after escape processing + size_t actual_len = 0; + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Escape sequence + actual_len++; + i++; // Skip the next character + } else { + actual_len++; + } + } + + char *unwrapped = (char *)malloc(actual_len + 1); + size_t dst_idx = 0; + + // Second pass: process escape sequences + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Handle escape sequences + switch (src[i + 1]) { + case 'n': + unwrapped[dst_idx++] = '\n'; + break; + case 't': + unwrapped[dst_idx++] = '\t'; + break; + case 'r': + unwrapped[dst_idx++] = '\r'; + break; + case '\\': + unwrapped[dst_idx++] = '\\'; + break; + case '"': + unwrapped[dst_idx++] = '"'; + break; + case '\'': + unwrapped[dst_idx++] = '\''; + break; + default: + // Unknown escape, keep both characters + unwrapped[dst_idx++] = src[i]; + unwrapped[dst_idx++] = src[i + 1]; + break; + } + i++; // Skip the next character + } else { + unwrapped[dst_idx++] = src[i]; + } + } + unwrapped[dst_idx] = '\0'; + return unwrapped; + } + // Not quoted, return copy + return strdup(quoted_str); +} + +void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { + for (size_t i = 0; i < block->child_count; ++i) { + ExprNode *item = block->children[i]; + if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { + const char *name = item->children[0]->token; + ExprNode *val = item->children[1]; + + if (val->child_count == 0) { + const char *token = val->token; + + // Case 1: String literal (enclosed in quotes) + if (token[0] == '"' && token[strlen(token) - 1] == '"') { + char *unwrapped = unwrap_string(token); + int len = strlen(unwrapped); + u32 addr = allocate_data(vm, table, name, len + 1 + 4); + + write_u32(vm, memory, addr, len); + for (int i = 0; i < len; i++) { + write_u8(vm, memory, addr + 4 + i, unwrapped[i]); + } + write_u8(vm, memory, addr + 4 + len, '\0'); + free(unwrapped); + } + // Case 2: Hexadecimal integer (0x...) + else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) { + char *endptr; + u32 value = (u32)strtoul(token + 2, &endptr, 16); + + if (endptr != token + strlen(token)) { + fprintf(stderr, "Invalid hex in data block: %s\n", token); + exit(1); + } + + u32 addr = allocate_data(vm, table, name, 4); + write_u32(vm, memory, addr, value); + } + // Case 3: Floating-point (has decimal point) + else if (strchr(token, '.')) { + float f = atof(token); + u32 addr = allocate_data(vm, table, name, 4); + write_u32(vm, memory, addr, TO_FIXED(f)); + } + // Case 4: Decimal integer + else { + char *endptr; + u32 value = (u32)strtoul(token, &endptr, 10); + + if (endptr != token + strlen(token)) { + fprintf(stderr, "Invalid decimal in data block: %s\n", token); + exit(1); + } + + u32 addr = allocate_data(vm, table, name, 4); + write_u32(vm, memory, addr, value); + //vm->mp += 4; + } + } else { + fprintf(stderr, "Unsupported data item\n"); + exit(1); + } + } + } +} + +void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { + const char *opname = node->token; + if (strcmp(opname, "label") == 0) { + for (size_t i = 1; i < node->child_count; i++) { + process_code_expr(vm, table, node->children[i]); + } + } else if (strcmp(opname, "exit") == 0) { + emit_opcode(vm, OP_EXIT); + u32 addr = resolve_symbol(table, node->children[0]->token); + emit_u32(vm, addr); + } else if (strcmp(opname, "jump") == 0) { + emit_opcode(vm, OP_JMP); + u32 addr = resolve_symbol(table, node->children[0]->token); + emit_u32(vm, addr); + } else if (strcmp(opname, "jump-if-flag") == 0) { + emit_opcode(vm, OP_JMPF); + u32 addr = resolve_symbol(table, node->children[0]->token); + emit_u32(vm, addr); + } else if (strcmp(opname, "call") == 0) { + emit_opcode(vm, OP_CALL); + + if (node->child_count < 3) { + fprintf(stderr, "Error: call requires (args) and return register\n"); + return; + } + + // Parse function address (first child) + u32 addr = resolve_symbol(table, node->children[0]->token); + if (addr == (u32)-1) { + fprintf(stderr, "Error: undefined symbol '%s'\n", + node->children[0]->token); + return; + } + emit_u32(vm, addr); + + // Parse argument list (second child) + ExprNode *args_node = node->children[1]; + u8 arg_count = 0; + + if (args_node->child_count > 0) { + // Multiple arguments case + arg_count = args_node->child_count + 1; // +1 for the token + } else { + // Single argument case - token is the argument + arg_count = (args_node->token[0] != '\0') ? 1 : 0; + } + emit_byte(vm, arg_count); + + // Emit arguments based on representation + if (arg_count > 0) { + // First argument is always the token + const char *reg_str = args_node->token; + int reg = parse_register(reg_str); + if (reg < 0) { + fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); + return; + } + emit_byte(vm, (u8)reg); + + // Emit children if present + for (size_t i = 0; i < args_node->child_count; i++) { + reg_str = args_node->children[i]->token; + reg = parse_register(reg_str); + if (reg < 0) { + fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); + return; + } + emit_byte(vm, (u8)reg); + } + } + // Parse return register (third child) + const char *return_reg_str = node->children[2]->token; + int return_reg = parse_register(return_reg_str); + + if (return_reg < 0) { + if (strcmp(return_reg_str, "nil") == 0) { + return_reg = 0xFF; + } else { + fprintf(stderr, "Error: invalid return register '%s'\n", + return_reg_str); + return; + } + } + emit_byte(vm, (u8)return_reg); + +} else if (strcmp(opname, "return") == 0) { + emit_opcode(vm, OP_RETURN); + + if (node->child_count != 1) { + fprintf(stderr, "Error: return requires exactly one argument\n"); + return; + } + + const char *reg_str = node->children[0]->token; + int reg = parse_register(reg_str); + + // Handle "nil" as special case (no return value) + if (reg < 0) { + if (strcmp(reg_str, "nil") == 0) { + reg = 0xFF; // Special value for "no return" + } else { + fprintf(stderr, "Error: invalid return register '%s'\n", reg_str); + return; + } + } + emit_byte(vm, (u8)reg); + } else if (strcmp(opname, "load-immediate") == 0) { + emit_opcode(vm, OP_LOAD_IMM); + int reg = parse_register(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[1]->token); + emit_byte(vm, reg); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-absolute-8") == 0) { + emit_opcode(vm, OP_LOAD_ABS_8); + int dest = parse_register(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[1]->token); + emit_byte(vm, dest); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-absolute-16") == 0) { + emit_opcode(vm, OP_LOAD_ABS_16); + int dest = parse_register(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[1]->token); + emit_byte(vm, dest); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-absolute-32") == 0) { + emit_opcode(vm, OP_LOAD_ABS_32); + int dest = parse_register(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[1]->token); + emit_byte(vm, dest); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-indirect-8") == 0) { + emit_opcode(vm, OP_LOAD_IND_8); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "load-indirect-16") == 0) { + emit_opcode(vm, OP_LOAD_IND_16); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "load-indirect-32") == 0) { + emit_opcode(vm, OP_LOAD_IND_32); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "malloc") == 0) { + emit_opcode(vm, OP_MALLOC); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "memset-8") == 0) { + emit_opcode(vm, OP_MEMSET_8); + int dest = parse_register(node->children[0]->token); + int value = parse_register(node->children[1]->token); + int count = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, value); + emit_byte(vm, count); + } else if (strcmp(opname, "memset-16") == 0) { + emit_opcode(vm, OP_MEMSET_16); + int dest = parse_register(node->children[0]->token); + int value = parse_register(node->children[1]->token); + int count = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, value); + emit_byte(vm, count); + } else if (strcmp(opname, "memset") == 0) { + emit_opcode(vm, OP_MEMSET_32); + int dest = parse_register(node->children[0]->token); + int value = parse_register(node->children[1]->token); + int count = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, value); + emit_byte(vm, count); + } else if (strcmp(opname, "store-absolute-8") == 0) { + emit_opcode(vm, OP_STORE_ABS_8); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "store-absolute-16") == 0) { + emit_opcode(vm, OP_STORE_ABS_16); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "store-absolute-32") == 0) { + emit_opcode(vm, OP_STORE_ABS_32); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "store-indirect-8") == 0) { + emit_opcode(vm, OP_STORE_IND_8); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "store-indirect-16") == 0) { + emit_opcode(vm, OP_STORE_IND_16); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "store-indirect-32") == 0) { + emit_opcode(vm, OP_STORE_IND_32); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "store-offset-8") == 0) { + emit_opcode(vm, OP_STORE_OFF_8); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_u32(vm, addr); + } else if (strcmp(opname, "store-offset-16") == 0) { + emit_opcode(vm, OP_STORE_OFF_16); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_u32(vm, addr); + } else if (strcmp(opname, "store-offset-32") == 0) { + emit_opcode(vm, OP_STORE_OFF_32); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-offset-8") == 0) { + emit_opcode(vm, OP_LOAD_OFF_8); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-offset-16") == 0) { + emit_opcode(vm, OP_LOAD_OFF_16); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_u32(vm, addr); + } else if (strcmp(opname, "load-offset-32") == 0) { + emit_opcode(vm, OP_LOAD_OFF_32); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_u32(vm, addr); + } else if (strcmp(opname, "register-move") == 0) { + emit_opcode(vm, OP_REG_MOV); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "syscall") == 0) { + emit_opcode(vm, OP_SYSCALL); + + // Parse syscall ID + u32 syscall_id = 0; + const char *syscall_name = node->children[0]->token; + if (strcmp(syscall_name, "EXIT") == 0) + syscall_id = SYSCALL_EXIT; + else if (strcmp(syscall_name, "OPEN") == 0) + syscall_id = SYSCALL_DEVICE_OPEN; + else if (strcmp(syscall_name, "READ") == 0) + syscall_id = SYSCALL_DEVICE_READ; + else if (strcmp(syscall_name, "WRITE") == 0) + syscall_id = SYSCALL_DEVICE_WRITE; + else if (strcmp(syscall_name, "CLOSE") == 0) + syscall_id = SYSCALL_DEVICE_CLOSE; + else if (strcmp(syscall_name, "IOCTL") == 0) + syscall_id = SYSCALL_DEVICE_IOCTL; + else if (strcmp(syscall_name, "REFRESH") == 0) + syscall_id = SYSCALL_DEVICE_REFRESH; + + emit_u32(vm, syscall_id); + + // Emit register arguments + for (size_t i = 1; i < node->child_count; ++i) { + int reg = parse_register(node->children[i]->token); + emit_byte(vm, reg); + } + } else if (strcmp(opname, "bit-shift-left") == 0) { + emit_opcode(vm, OP_BIT_SHIFT_LEFT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "bit-shift-right") == 0) { + emit_opcode(vm, OP_BIT_SHIFT_RIGHT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "bit-shift-r-ext") == 0) { + emit_opcode(vm, OP_BIT_SHIFT_R_EXT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "bit-and") == 0) { + emit_opcode(vm, OP_BAND); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "bit-or") == 0) { + emit_opcode(vm, OP_BOR); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "bit-xor") == 0) { + emit_opcode(vm, OP_BXOR); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "add-int") == 0) { + emit_opcode(vm, OP_ADD_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "sub-int") == 0) { + emit_opcode(vm, OP_SUB_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "mul-int") == 0) { + emit_opcode(vm, OP_MUL_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "div-int") == 0) { + emit_opcode(vm, OP_DIV_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "abs-int") == 0) { + emit_opcode(vm, OP_ABS_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "neg-int") == 0) { + emit_opcode(vm, OP_NEG_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "add-nat") == 0) { + emit_opcode(vm, OP_ADD_NAT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "sub-nat") == 0) { + emit_opcode(vm, OP_SUB_NAT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "mul-nat") == 0) { + emit_opcode(vm, OP_MUL_NAT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "div-nat") == 0) { + emit_opcode(vm, OP_DIV_NAT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "abs-nat") == 0) { + emit_opcode(vm, OP_ABS_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "neg-nat") == 0) { + emit_opcode(vm, OP_NEG_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "add-real") == 0) { + emit_opcode(vm, OP_ADD_REAL); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "sub-real") == 0) { + emit_opcode(vm, OP_SUB_REAL); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "mul-real") == 0) { + emit_opcode(vm, OP_MUL_REAL); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "div-real") == 0) { + emit_opcode(vm, OP_DIV_REAL); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "abs-real") == 0) { + emit_opcode(vm, OP_ABS_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "neg-real") == 0) { + emit_opcode(vm, OP_NEG_INT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + } else if (strcmp(opname, "int-to-real") == 0) { + emit_opcode(vm, OP_INT_TO_REAL); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "nat-to-real") == 0) { + emit_opcode(vm, OP_NAT_TO_REAL); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "real-to-int") == 0) { + emit_opcode(vm, OP_REAL_TO_INT); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "real-to-nat") == 0) { + emit_opcode(vm, OP_REAL_TO_NAT); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "jump-eq-int") == 0) { + emit_opcode(vm, OP_JEQ_INT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-neq-int") == 0) { + emit_opcode(vm, OP_JNEQ_INT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-gt-int") == 0) { + emit_opcode(vm, OP_JGT_INT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-lt-int") == 0) { + emit_opcode(vm, OP_JLT_INT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-le-int") == 0) { + emit_opcode(vm, OP_JLE_INT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-ge-int") == 0) { + emit_opcode(vm, OP_JGE_INT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-eq-nat") == 0) { + emit_opcode(vm, OP_JEQ_NAT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-neq-nat") == 0) { + emit_opcode(vm, OP_JNEQ_NAT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-gt-nat") == 0) { + emit_opcode(vm, OP_JGT_NAT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-lt-nat") == 0) { + emit_opcode(vm, OP_JLT_NAT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-le-nat") == 0) { + emit_opcode(vm, OP_JLE_NAT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-ge-nat") == 0) { + emit_opcode(vm, OP_JGE_NAT); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-eq-real") == 0) { + emit_opcode(vm, OP_JEQ_REAL); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-neq-real") == 0) { + emit_opcode(vm, OP_JNEQ_REAL); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-gt-real") == 0) { + emit_opcode(vm, OP_JGT_REAL); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-lt-real") == 0) { + emit_opcode(vm, OP_JLT_REAL); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-le-real") == 0) { + emit_opcode(vm, OP_JLE_REAL); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "jump-ge-real") == 0) { + emit_opcode(vm, OP_JGE_REAL); + u32 addr = resolve_symbol(table, node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_u32(vm, addr); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "string-length") == 0) { + emit_opcode(vm, OP_STRLEN); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "string-eq") == 0) { + emit_opcode(vm, OP_STREQ); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "string-concat") == 0) { + emit_opcode(vm, OP_STRCAT); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "string-get-char") == 0) { + emit_opcode(vm, OP_STR_GET_CHAR); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "string-find-char") == 0) { + emit_opcode(vm, OP_STR_FIND_CHAR); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + } else if (strcmp(opname, "string-slice") == 0) { + emit_opcode(vm, OP_STR_SLICE); + int dest = parse_register(node->children[0]->token); + int src1 = parse_register(node->children[1]->token); + int src2 = parse_register(node->children[2]->token); + int src3 = parse_register(node->children[3]->token); + emit_byte(vm, dest); + emit_byte(vm, src1); + emit_byte(vm, src2); + emit_byte(vm, src3); + } else if (strcmp(opname, "int-to-string") == 0) { + emit_opcode(vm, OP_INT_TO_STRING); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "nat-to-string") == 0) { + emit_opcode(vm, OP_NAT_TO_STRING); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "real-to-string") == 0) { + emit_opcode(vm, OP_REAL_TO_STRING); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "string-to-int") == 0) { + emit_opcode(vm, OP_STRING_TO_INT); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "string-to-nat") == 0) { + emit_opcode(vm, OP_STRING_TO_NAT); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else if (strcmp(opname, "string-to-real") == 0) { + emit_opcode(vm, OP_STRING_TO_REAL); + int dest = parse_register(node->children[0]->token); + int src = parse_register(node->children[1]->token); + emit_byte(vm, dest); + emit_byte(vm, src); + } else { + fprintf(stderr, "Unknown opcode: %s\n", opname); + } +} + +void old_assemble(VM *vm, ExprNode *program) { + SymbolTable table; + symbol_table_init(&table); + + // PASS 1: Collect all symbols (both code and data) + collect_symbols(&table, program); + + // PASS 2: Process data section using symbol table + for (size_t i = 0; i < program->child_count; ++i) { + ExprNode *section = program->children[i]; + if (strcmp(section->token, "data") == 0) { + process_data_block(vm, &table, section); + } + } + + // PASS 3: Process code section using complete symbol table + for (size_t i = 0; i < program->child_count; ++i) { + ExprNode *section = program->children[i]; + if (strcmp(section->token, "code") == 0) { + for (size_t j = 0; j < section->child_count; ++j) { + process_code_expr(vm, &table, section->children[j]); + } + } + } + + // Cleanup symbol table + for (int i = 0; i < table.count; i++) { +#ifdef ASM_DEBUG + Symbol s = table.symbols[i]; + printf("%s[%d]\n", s.name, s.address); +#endif + free(table.symbols[i].name); + } + free(table.symbols); +} diff --git a/src/tools/old_assembler/assembler.h b/src/tools/old_assembler/assembler.h new file mode 100644 index 0000000..76b9326 --- /dev/null +++ b/src/tools/old_assembler/assembler.h @@ -0,0 +1,20 @@ +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#include "../../vm/common.h" +#include "../../vm/vm.h" +#include "parser.h" + +#include +#include +#include +#include + +#define AS_FIXED(v) ((float)(i32)(v) / 65536.0f) +#define TO_FIXED(f) ((i32)( \ + ((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \ +)) + +void old_assemble(VM *vm, ExprNode *program); + +#endif diff --git a/src/tools/assembler/parser.c b/src/tools/old_assembler/parser.c similarity index 100% rename from src/tools/assembler/parser.c rename to src/tools/old_assembler/parser.c diff --git a/src/tools/assembler/parser.h b/src/tools/old_assembler/parser.h similarity index 100% rename from src/tools/assembler/parser.h rename to src/tools/old_assembler/parser.h diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index 979c612..f74a18f 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -4,99 +4,100 @@ #include "common.h" typedef enum { - OP_HALT, /* halt : terminate execution with code [src1] */ + OP_EXIT, /* exit : terminate execution with code [src1] */ OP_CALL, /* call : creates a new frame */ OP_RETURN, /* return : returns from a frame to the parent frame */ OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */ - OP_LOAD_IMM, /* load-immediate : registers[dest] = constant */ - OP_LOAD_IND_8, /* load-indirect-8 : registers[dest] = memory[registers[src1]] as u8 */ - OP_LOAD_IND_16, /* load-indirect-16 : registers[dest] = memory[registers[src1]] as u8 */ - OP_LOAD_IND_32, /* load-indirect-32 : registers[dest] = memory[registers[src1]] as u32 */ - OP_LOAD_ABS_8, /* load-absolute-8 : registers[dest] = memory[src1 as u32] */ - OP_LOAD_ABS_16, /* load-absolute-16 : registers[dest] = memory[src1 as u32] */ - OP_LOAD_ABS_32, /* load-absolute-32 : registers[dest] = memory[src1 as u32] */ - OP_LOAD_OFF_8, /* load-offset-8 : registers[dest] = memory[registers[src1] + offset] as u8 */ - OP_LOAD_OFF_16, /* load-offset-16 : registers[dest] = memory[registers[src1] + offset] as u16 */ - OP_LOAD_OFF_32, /* load-offset-32 : registers[dest] = memory[registers[src1] + offset] as u32 */ - OP_STORE_ABS_8, /* store-absolute-8 : memory[dest] = src1 && 0xFF */ - OP_STORE_ABS_16, /* store-absolute-16 : memory[dest] = src1 && 0xFFFF */ - OP_STORE_ABS_32, /* store-absolute-32 : memory[dest] = src1 */ - OP_STORE_IND_8, /* store-indirect-8 : memory[dest] = registers[src1] && 0xFF */ - OP_STORE_IND_16, /* store-indirect-16 : memory[dest] = registers[src1] && 0xFFFF*/ - OP_STORE_IND_32, /* store-indirect-32 : memory[dest] = registers[src1] */ - OP_STORE_OFF_8, /* store-offset-8 : memory[registers[dest] + offset] = registers[src1] && 0xFF */ - OP_STORE_OFF_16, /* store-offset-16 : memory[registers[dest] + offset] = registers[src1] && 0xFFFF */ - OP_STORE_OFF_32, /* store-offset-32 : memory[registers[dest] + offset] = registers[src1] */ + OP_LOAD_IMM, /* load_immediate : locals[dest] = constant */ + OP_LOAD_IND_8, /* load_indirect_8 : locals[dest] = memory[locals[src1]] as u8 */ + OP_LOAD_IND_16, /* load_indirect_16 : locals[dest] = memory[locals[src1]] as u8 */ + OP_LOAD_IND_32, /* load_indirect_32 : locals[dest] = memory[locals[src1]] as u32 */ + OP_LOAD_ABS_8, /* load_absolute_8 : locals[dest] = memory[src1 as u32] */ + OP_LOAD_ABS_16, /* load_absolute_16 : locals[dest] = memory[src1 as u32] */ + OP_LOAD_ABS_32, /* load_absolute_32 : locals[dest] = memory[src1 as u32] */ + OP_LOAD_OFF_8, /* load_offset_8 : locals[dest] = memory[locals[src1] + offset] as u8 */ + OP_LOAD_OFF_16, /* load_offset_16 : locals[dest] = memory[locals[src1] + offset] as u16 */ + OP_LOAD_OFF_32, /* load_offset_32 : locals[dest] = memory[locals[src1] + offset] as u32 */ + OP_STORE_ABS_8, /* store_absolute_8 : memory[dest] = src1 && 0xFF */ + OP_STORE_ABS_16, /* store_absolute_16 : memory[dest] = src1 && 0xFFFF */ + OP_STORE_ABS_32, /* store_absolute_32 : memory[dest] = src1 */ + OP_STORE_IND_8, /* store_indirect_8 : memory[dest] = locals[src1] && 0xFF */ + OP_STORE_IND_16, /* store_indirect_16 : memory[dest] = locals[src1] && 0xFFFF*/ + OP_STORE_IND_32, /* store_indirect_32 : memory[dest] = locals[src1] */ + OP_STORE_OFF_8, /* store_offset_8 : memory[locals[dest] + offset] = locals[src1] && 0xFF */ + OP_STORE_OFF_16, /* store_offset_16 : memory[locals[dest] + offset] = locals[src1] && 0xFFFF */ + OP_STORE_OFF_32, /* store_offset_32 : memory[locals[dest] + offset] = locals[src1] */ OP_MALLOC, /* malloc : dest = fat ptr to memory of ((src1 as size) + 4) */ - OP_MEMSET_8, /* memset-8 : dest <-> dest+count = src1 as u8 */ - OP_MEMSET_16, /* memset-16 : dest <-> dest+count = src1 as u8 */ - OP_MEMSET_32, /* memset-32 : dest <-> dest+count = src1 as u32 */ - OP_REG_MOV, /* register-move : registers[dest] = registers[src1] */ - OP_ADD_INT, /* add-int : registers[dest] = registers[src1] + registers[src2] */ - OP_SUB_INT, /* sub-int : registers[dest] = registers[src1] - registers[src2] */ - OP_MUL_INT, /* mul-int : registers[dest] = registers[src1] * registers[src2] */ - OP_DIV_INT, /* div-int : registers[dest] = registers[src1] / registers[src2] */ - OP_ABS_INT, /* abs-int : registers[dest] = | registers[src1] | */ - OP_NEG_INT, /* neg-int : registers[dest] = -registers[src1] */ - OP_ADD_NAT, /* add-nat : registers[dest] = registers[src1] + registers[src2] */ - OP_SUB_NAT, /* sub-nat : registers[dest] = registers[src1] - registers[src2] */ - OP_MUL_NAT, /* mul-nat : registers[dest] = registers[src1] * registers[src2] */ - OP_DIV_NAT, /* div-nat : registers[dest] = registers[src1] / registers[src2] */ - OP_ABS_NAT, /* abs-nat : registers[dest] = | registers[src1] | */ - OP_NEG_NAT, /* neg-nat : registers[dest] = -registers[src1] */ - OP_ADD_REAL, /* add-real : registers[dest] = registers[src1] + registers[src2] */ - OP_SUB_REAL, /* sub-real : registers[dest] = registers[src1] - registers[src2] */ - OP_MUL_REAL, /* mul-real : registers[dest] = registers[src1] * registers[src2] */ - OP_DIV_REAL, /* div-real : registers[dest] = registers[src1] / registers[src2] */ - OP_ABS_REAL, /* abs-real : registers[dest] = | registers[src1] | */ - OP_NEG_REAL, /* neg-real : registers[dest] = -registers[src1] */ - OP_INT_TO_REAL, /* int-to-real : registers[dest] = registers[src1] as real */ - OP_NAT_TO_REAL, /* nat-to-real : registers[dest] = registers[src1] as real */ - OP_REAL_TO_INT, /* real-to-int : registers[dest] = registers[src1] as int */ - OP_REAL_TO_NAT, /* real-to-nat : registers[dest] = registers[src1] as nat */ - OP_BIT_SHIFT_LEFT, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */ - OP_BIT_SHIFT_RIGHT,/* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */ - OP_BIT_SHIFT_R_EXT,/* bit-shift-r-ext : registers[dest] as i32 = registers[src1] >> registers[src2] */ - OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */ - OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */ - OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */ + OP_MALLOC_IMM, /* malloc_immediate : dest = fat ptr to memory of raw */ + OP_MEMSET_8, /* memset_8 : dest <-> dest+count = src1 as u8 */ + OP_MEMSET_16, /* memset_16 : dest <-> dest+count = src1 as u8 */ + OP_MEMSET_32, /* memset_32 : dest <-> dest+count = src1 as u32 */ + OP_REG_MOV, /* register_move : locals[dest] = locals[src1] */ + OP_ADD_INT, /* add_int : locals[dest] = locals[src1] + locals[src2] */ + OP_SUB_INT, /* sub_int : locals[dest] = locals[src1] _ locals[src2] */ + OP_MUL_INT, /* mul_int : locals[dest] = locals[src1] * locals[src2] */ + OP_DIV_INT, /* div_int : locals[dest] = locals[src1] / locals[src2] */ + OP_ABS_INT, /* abs_int : locals[dest] = | locals[src1] | */ + OP_NEG_INT, /* neg_int : locals[dest] = -locals[src1] */ + OP_ADD_NAT, /* add_nat : locals[dest] = locals[src1] + locals[src2] */ + OP_SUB_NAT, /* sub_nat : locals[dest] = locals[src1] _ locals[src2] */ + OP_MUL_NAT, /* mul_nat : locals[dest] = locals[src1] * locals[src2] */ + OP_DIV_NAT, /* div_nat : locals[dest] = locals[src1] / locals[src2] */ + OP_ABS_NAT, /* abs_nat : locals[dest] = | locals[src1] | */ + OP_NEG_NAT, /* neg_nat : locals[dest] = -locals[src1] */ + OP_ADD_REAL, /* add_real : locals[dest] = locals[src1] + locals[src2] */ + OP_SUB_REAL, /* sub_real : locals[dest] = locals[src1] _ locals[src2] */ + OP_MUL_REAL, /* mul_real : locals[dest] = locals[src1] * locals[src2] */ + OP_DIV_REAL, /* div_real : locals[dest] = locals[src1] / locals[src2] */ + OP_ABS_REAL, /* abs_real : locals[dest] = | locals[src1] | */ + OP_NEG_REAL, /* neg_real : locals[dest] = _locals[src1] */ + OP_INT_TO_REAL, /* int_to_real : locals[dest] = locals[src1] as real */ + OP_NAT_TO_REAL, /* nat_to_real : locals[dest] = locals[src1] as real */ + OP_REAL_TO_INT, /* real_to_int : locals[dest] = locals[src1] as int */ + OP_REAL_TO_NAT, /* real_to_nat : locals[dest] = locals[src1] as nat */ + OP_BIT_SHIFT_LEFT, /* bit_shift_left : locals[dest] = locals[src1] << locals[src2] */ + OP_BIT_SHIFT_RIGHT,/* bit_shift_right : locals[dest] = locals[src1] >> locals[src2] */ + OP_BIT_SHIFT_R_EXT,/* bit_shift_r_ext : locals[dest] as i32 = locals[src1] >> locals[src2] */ + OP_BAND, /* bit_and : locals[dest] = locals[src1] & locals[src2] */ + OP_BOR, /* bit_or : locals[dest] = locals[src1] | locals[src2] */ + OP_BXOR, /* bit_xor : locals[dest] = locals[src1] ^ locals[src2] */ OP_JMP, /* jump : jump to &dest unconditionally */ - OP_JMPF, /* jump-if-flag : jump to &dest if flag != 0 */ - OP_JEQ_INT, /* jump-eq-int : jump to &dest if registers[src1] as int == registers[src2] as int */ - OP_JNEQ_INT, /* jump-neq-int : jump to &dest if registers[src1] as int != registers[src2] as int */ - OP_JGT_INT, /* jump-gt-int : jump to &dest if registers[src1] as int > registers[src2] as int */ - OP_JLT_INT, /* jump-lt-int : jump to &dest if registers[src1] as int < registers[src2] as int */ - OP_JLE_INT, /* jump-le-int : jump to &dest if registers[src1] as int <= registers[src2] as int */ - OP_JGE_INT, /* jump-ge-int : jump to &dest if registers[src1] as int >= registers[src2] as int */ - OP_JEQ_NAT, /* jump-eq-nat : jump to &dest if registers[src1] as nat == registers[src2] as nat */ - OP_JNEQ_NAT, /* jump-neq-nat : jump to &dest if registers[src1] as nat != registers[src2] as nat */ - OP_JGT_NAT, /* jump-gt-nat : jump to &dest if registers[src1] as nat > registers[src2] as nat */ - OP_JLT_NAT, /* jump-lt-nat : jump to &dest if registers[src1] as nat < registers[src2] as nat */ - OP_JLE_NAT, /* jump-le-nat : jump to &dest if registers[src1] as nat <= registers[src2] as nat */ - OP_JGE_NAT, /* jump-ge-nat : jump to &dest if registers[src1] as nat >= registers[src2] as nat */ - OP_JEQ_REAL, /* jump-eq-real : jump to &dest if registers[src1] as real == registers[src2] as real */ - OP_JNEQ_REAL, /* jump-neq-real : jump to &dest if registers[src1] as real != registers[src2] as real */ - OP_JGE_REAL, /* jump-ge-real : jump to &dest if registers[src1] as real >= registers[src2] as real */ - OP_JGT_REAL, /* jump-gt-real : jump to &dest if registers[src1] as real > registers[src2] as real */ - OP_JLT_REAL, /* jump-lt-real : jump to &dest if registers[src1] as real < registers[src2] as real */ - OP_JLE_REAL, /* jump-le-real : jump to &dest if registers[src1] as real <= registers[src2] as real */ - OP_STRLEN, /* string-length : registers[dest] = length of str at src1 ptr */ - OP_STREQ, /* string-eq : registers[dest] = src1 ptr string == src2 ptr string */ - OP_STRCAT, /* string-concat : registers[dest] = ptr of src1 ptr string + src2 ptr string */ - OP_STR_GET_CHAR, /* string-get-char : registers[dest] = ptr of src1 ptr str, src2 index of str */ - OP_STR_FIND_CHAR, /* string-find-char : registers[dest] = ptr of src1 ptr string, src2 nat8 char */ - OP_STR_SLICE, /* string-slice : registers[dest] = ptr of src1 ptr str, src2 start index, src3 end index */ - OP_INT_TO_STRING, /* int-to-string : registers[dest] = src1 as str */ - OP_NAT_TO_STRING, /* nat-to-string : registers[dest] = src1 as str */ - OP_REAL_TO_STRING, /* real-to-string : registers[dest] = src1 as str */ - OP_STRING_TO_INT, /* string-to-int : registers[dest] = src1 as int */ - OP_STRING_TO_NAT, /* string-to-nat : registers[dest] = src1 as nat */ - OP_STRING_TO_REAL /* string-to-real : registers[dest] = src1 as real */ + OP_JMPF, /* jump_if_flag : jump to &dest if flag != 0 */ + OP_JEQ_INT, /* jump_eq_int : jump to &dest if locals[src1] as int == locals[src2] as int */ + OP_JNEQ_INT, /* jump_neq_int : jump to &dest if locals[src1] as int != locals[src2] as int */ + OP_JGT_INT, /* jump_gt_int : jump to &dest if locals[src1] as int > locals[src2] as int */ + OP_JLT_INT, /* jump_lt_int : jump to &dest if locals[src1] as int < locals[src2] as int */ + OP_JLE_INT, /* jump_le_int : jump to &dest if locals[src1] as int <= locals[src2] as int */ + OP_JGE_INT, /* jump_ge_int : jump to &dest if locals[src1] as int >= locals[src2] as int */ + OP_JEQ_NAT, /* jump_eq_nat : jump to &dest if locals[src1] as nat == locals[src2] as nat */ + OP_JNEQ_NAT, /* jump_neq_nat : jump to &dest if locals[src1] as nat != locals[src2] as nat */ + OP_JGT_NAT, /* jump_gt_nat : jump to &dest if locals[src1] as nat > locals[src2] as nat */ + OP_JLT_NAT, /* jump_lt_nat : jump to &dest if locals[src1] as nat < locals[src2] as nat */ + OP_JLE_NAT, /* jump_le_nat : jump to &dest if locals[src1] as nat <= locals[src2] as nat */ + OP_JGE_NAT, /* jump_ge_nat : jump to &dest if locals[src1] as nat >= locals[src2] as nat */ + OP_JEQ_REAL, /* jump_eq_real : jump to &dest if locals[src1] as real == locals[src2] as real */ + OP_JNEQ_REAL, /* jump_neq_real : jump to &dest if locals[src1] as real != locals[src2] as real */ + OP_JGE_REAL, /* jump_ge_real : jump to &dest if locals[src1] as real >= locals[src2] as real */ + OP_JGT_REAL, /* jump_gt_real : jump to &dest if locals[src1] as real > locals[src2] as real */ + OP_JLT_REAL, /* jump_lt_real : jump to &dest if locals[src1] as real < locals[src2] as real */ + OP_JLE_REAL, /* jump_le_real : jump to &dest if locals[src1] as real <= locals[src2] as real */ + OP_STRLEN, /* string_length : locals[dest] = length of str at src1 ptr */ + OP_STREQ, /* string_eq : locals[dest] = src1 ptr string == src2 ptr string */ + OP_STRCAT, /* string_concat : locals[dest] = ptr of src1 ptr string + src2 ptr string */ + OP_STR_GET_CHAR, /* string_get_char : locals[dest] = ptr of src1 ptr str, src2 index of str */ + OP_STR_FIND_CHAR, /* string_find_char : locals[dest] = ptr of src1 ptr string, src2 nat8 char */ + OP_STR_SLICE, /* string_slice : locals[dest] = ptr of src1 ptr str, src2 start index, src3 end index */ + OP_INT_TO_STRING, /* int_to_string : locals[dest] = src1 as str */ + OP_NAT_TO_STRING, /* nat_to_string : locals[dest] = src1 as str */ + OP_REAL_TO_STRING, /* real_to_string : locals[dest] = src1 as str */ + OP_STRING_TO_INT, /* string_to_int : locals[dest] = src1 as int */ + OP_STRING_TO_NAT, /* string_to_nat : locals[dest] = src1 as nat */ + OP_STRING_TO_REAL /* string_to_real : locals[dest] = src1 as real */ } Opcode; #define MAX_REGS 32 typedef struct frame_s { - u32 registers[MAX_REGS]; /* R0-R31 */ + u32 locals[MAX_REGS]; /* R0-R31 */ u32 start; /* start of memory block */ u32 end; /* end of memory block */ u32 return_reg; /* register to store return value in parent */ @@ -130,7 +131,7 @@ typedef struct device_s { char type[DEVICE_TYPE_MAX_LENGTH]; /* e.g., "screen", "mouse", "gpio" */ char path[DEVICE_PATH_MAX_LENGTH]; /* "/dev/screen", "/dev/input/mouse/0", etc. */ - void *data; /* device-specific data */ + void *data; /* device_specific data */ DeviceOps *ops; /* operations vtable */ u32 flags; /* permissions, status, etc. */ u32 handle; /* id for fast access in VM */ diff --git a/src/vm/vm.c b/src/vm/vm.c index ba92c73..a9a27b3 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -17,8 +17,8 @@ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ - value = (type)frame->registers[src1]; \ - value2 = (type)frame->registers[src2]; \ + value = (type)frame->locals[src1]; \ + value2 = (type)frame->locals[src2]; \ cond = !!(value op value2); \ mask = -(u32)cond; \ vm->pc = (target & mask) | (vm->pc & ~mask); \ @@ -27,7 +27,7 @@ #define MATH_OP(type, op) \ do { \ - u32 *regs = frame->registers; \ + u32 *regs = frame->locals; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ @@ -40,7 +40,7 @@ #define BIT_OP(op) \ do { \ - u32 *regs = frame->registers; \ + u32 *regs = frame->locals; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ @@ -93,7 +93,7 @@ bool step_vm(VM *vm) { frame = &vm->frames[vm->fp]; switch (opcode) { - case OP_HALT: { + case OP_EXIT: { vm->flag = read_u32(vm, code, vm->pc); return false; } @@ -130,7 +130,7 @@ bool step_vm(VM *vm) { heap_mask = 0; for (i = 0; i < N; i++) { src_reg = args[i]; - child->registers[i] = frame->registers[src_reg]; + child->locals[i] = frame->locals[src_reg]; /* Bitmask operation instead of conditional branch */ heap_mask |= ((frame->heap_mask >> src_reg) & 1) << i; @@ -154,7 +154,7 @@ bool step_vm(VM *vm) { parent = &vm->frames[vm->fp - 1]; if (child_return_reg != 0xFF && parent->return_reg != 0xFF) { - value = child->registers[child_return_reg]; + value = child->locals[child_return_reg]; if (is_heap_value(vm, child_return_reg)) { ptr = value; @@ -171,7 +171,7 @@ bool step_vm(VM *vm) { memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); parent->end += size + 4; - parent->registers[parent->return_reg] = new_ptr; + parent->locals[parent->return_reg] = new_ptr; parent->heap_mask |= (1 << parent->return_reg); return true; } @@ -184,10 +184,10 @@ bool step_vm(VM *vm) { *(u32 *)(vm->memory + new_ptr) = size; memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); parent->end += size + 4; - parent->registers[parent->return_reg] = new_ptr; + parent->locals[parent->return_reg] = new_ptr; parent->heap_mask |= (1 << parent->return_reg); } else { - parent->registers[parent->return_reg] = value; + parent->locals[parent->return_reg] = value; parent->heap_mask &= ~(1 << parent->return_reg); } } @@ -203,8 +203,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = vm->mp; - size = frame->registers[src1]; + frame->locals[dest] = vm->mp; + size = frame->locals[src1]; write_u32(vm, memory, vm->mp, size); vm->mp += (size + 4); set_heap_status(vm, dest, true); /* Mark as heap pointer */ @@ -216,9 +216,9 @@ bool step_vm(VM *vm) { u8 value_reg = read_u8(vm, code, vm->pc++); u8 count_reg = read_u8(vm, code, vm->pc++); - u32 dest = frame->registers[dest_reg]; - u32 value = frame->registers[value_reg]; - u32 count = frame->registers[count_reg]; + u32 dest = frame->locals[dest_reg]; + u32 value = frame->locals[value_reg]; + u32 count = frame->locals[count_reg]; if (count == 0) { vm->flag = 1; @@ -237,7 +237,7 @@ bool step_vm(VM *vm) { write_u32(vm, memory, i, value); } - frame->registers[0] = dest; + frame->locals[0] = dest; vm->flag = 1; return true; } @@ -247,9 +247,9 @@ bool step_vm(VM *vm) { u8 value_reg = read_u8(vm, code, vm->pc++); u8 count_reg = read_u8(vm, code, vm->pc++); - u32 dest = frame->registers[dest_reg]; - u16 value = (u16)(frame->registers[value_reg]); - u32 count = frame->registers[count_reg]; + u32 dest = frame->locals[dest_reg]; + u16 value = (u16)(frame->locals[value_reg]); + u32 count = frame->locals[count_reg]; if (count == 0) { vm->flag = 1; @@ -268,7 +268,7 @@ bool step_vm(VM *vm) { write_u16(vm, memory, i, value); } - frame->registers[0] = dest; + frame->locals[0] = dest; vm->flag = 1; return true; } @@ -278,9 +278,9 @@ bool step_vm(VM *vm) { u8 value_reg = read_u8(vm, code, vm->pc++); u8 count_reg = read_u8(vm, code, vm->pc++); - u32 dest = frame->registers[dest_reg]; - u8 value = (u8)(frame->registers[value_reg]); - u32 count = frame->registers[count_reg]; + u32 dest = frame->locals[dest_reg]; + u8 value = (u8)(frame->locals[value_reg]); + u32 count = frame->locals[count_reg]; if (count == 0) { vm->flag = 1; @@ -299,7 +299,7 @@ bool step_vm(VM *vm) { write_u8(vm, memory, i, value); } - frame->registers[0] = dest; + frame->locals[0] = dest; vm->flag = 1; return true; } @@ -308,7 +308,7 @@ bool step_vm(VM *vm) { vm->pc++; v = read_u32(vm, code, vm->pc); vm->pc += 4; - frame->registers[dest] = v; + frame->locals[dest] = v; return true; } case OP_LOAD_ABS_32: { @@ -317,7 +317,7 @@ bool step_vm(VM *vm) { ptr = read_u32(vm, code, vm->pc); vm->pc += 4; v = read_u32(vm, memory, ptr); - frame->registers[dest] = v; + frame->locals[dest] = v; return true; } case OP_LOAD_ABS_16: { @@ -326,7 +326,7 @@ bool step_vm(VM *vm) { ptr = read_u32(vm, code, vm->pc); vm->pc += 4; v = read_u16(vm, memory, ptr); - frame->registers[dest] = v; + frame->locals[dest] = v; return true; } case OP_LOAD_ABS_8: { @@ -335,7 +335,7 @@ bool step_vm(VM *vm) { ptr = read_u32(vm, code, vm->pc); vm->pc += 4; v = read_u8(vm, memory, ptr); - frame->registers[dest] = v; + frame->locals[dest] = v; return true; } case OP_LOAD_IND_32: { @@ -343,9 +343,9 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - v = frame->registers[src1]; + v = frame->locals[src1]; ptr = read_u32(vm, memory, v); - frame->registers[dest] = ptr; + frame->locals[dest] = ptr; return true; } case OP_LOAD_IND_16: { @@ -354,9 +354,9 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - v = frame->registers[src1]; + v = frame->locals[src1]; v16 = read_u16(vm, memory, v); - frame->registers[dest] = v16; + frame->locals[dest] = v16; return true; } case OP_LOAD_IND_8: { @@ -365,9 +365,9 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - v = frame->registers[src1]; + v = frame->locals[src1]; v8 = read_u8(vm, memory, v); - frame->registers[dest] = v8; + frame->locals[dest] = v8; return true; } case OP_LOAD_OFF_8: { @@ -379,9 +379,9 @@ bool step_vm(VM *vm) { vm->pc++; offset = read_u32(vm, code, vm->pc); vm->pc += 4; - v = frame->registers[src1]; + v = frame->locals[src1]; v8 = read_u8(vm, memory, (v + offset)); - frame->registers[dest] = v8; + frame->locals[dest] = v8; return true; } case OP_LOAD_OFF_16: { @@ -393,9 +393,9 @@ bool step_vm(VM *vm) { vm->pc++; offset = read_u32(vm, code, vm->pc); vm->pc += 4; - v = frame->registers[src1]; + v = frame->locals[src1]; v16 = read_u16(vm, memory, (v + offset)); - frame->registers[dest] = v16; + frame->locals[dest] = v16; return true; } case OP_LOAD_OFF_32: { @@ -406,9 +406,9 @@ bool step_vm(VM *vm) { vm->pc++; offset = read_u32(vm, code, vm->pc); vm->pc += 4; - v = frame->registers[src1]; + v = frame->locals[src1]; ptr = read_u32(vm, memory, (v + offset)); - frame->registers[dest] = ptr; + frame->locals[dest] = ptr; return true; } case OP_STORE_ABS_32: { @@ -416,8 +416,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - v = frame->registers[src1]; - ptr = frame->registers[dest]; + v = frame->locals[src1]; + ptr = frame->locals[dest]; write_u32(vm, memory, ptr, v); return true; } @@ -426,8 +426,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - v = frame->registers[src1]; - ptr = frame->registers[dest]; + v = frame->locals[src1]; + ptr = frame->locals[dest]; write_u16(vm, memory, ptr, v); return true; } @@ -436,8 +436,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - v = frame->registers[src1]; - ptr = frame->registers[dest]; + v = frame->locals[src1]; + ptr = frame->locals[dest]; write_u8(vm, memory, ptr, v); return true; } @@ -446,8 +446,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - ptr = frame->registers[dest]; - v = frame->registers[src1]; + ptr = frame->locals[dest]; + v = frame->locals[src1]; write_u32(vm, memory, ptr, v); return true; } @@ -457,8 +457,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - ptr = frame->registers[dest]; - v16 = frame->registers[src1]; + ptr = frame->locals[dest]; + v16 = frame->locals[src1]; write_u16(vm, memory, ptr, v16); return true; } @@ -468,8 +468,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - ptr = frame->registers[dest]; - v8 = frame->registers[src1]; + ptr = frame->locals[dest]; + v8 = frame->locals[src1]; write_u8(vm, memory, ptr, v8); return true; } @@ -482,8 +482,8 @@ bool step_vm(VM *vm) { vm->pc++; offset = read_u32(vm, code, vm->pc); vm->pc += 4; - ptr = frame->registers[dest]; - v8 = frame->registers[src1]; + ptr = frame->locals[dest]; + v8 = frame->locals[src1]; write_u8(vm, memory, (ptr + offset), v8); return true; } @@ -496,8 +496,8 @@ bool step_vm(VM *vm) { vm->pc++; offset = read_u32(vm, code, vm->pc); vm->pc += 4; - ptr = frame->registers[dest]; - v16 = frame->registers[src1]; + ptr = frame->locals[dest]; + v16 = frame->locals[src1]; write_u16(vm, memory, (ptr + offset), v16); return true; } @@ -509,8 +509,8 @@ bool step_vm(VM *vm) { vm->pc++; offset = read_u32(vm, code, vm->pc); vm->pc += 4; - ptr = frame->registers[dest]; - v = frame->registers[src1]; + ptr = frame->locals[dest]; + v = frame->locals[src1]; write_u32(vm, memory, (ptr + offset), v); return true; } @@ -519,7 +519,7 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = frame->registers[src1]; + frame->locals[dest] = frame->locals[src1]; if (is_heap_value(vm, src1)) { set_heap_status(vm, dest, true); @@ -559,14 +559,14 @@ bool step_vm(VM *vm) { mode_reg = read_u8(vm, code, vm->pc); vm->pc++; - path_ptr = frame->registers[path_reg]; - mode = frame->registers[mode_reg]; + path_ptr = frame->locals[path_reg]; + mode = frame->locals[mode_reg]; dev = find_device_by_path(vm, (const char *)&vm->memory[path_ptr + 4]); if (dev) { if (dev->ops->open) { /* return device plex to user */ device_ptr = vm->mp; - frame->registers[dest_reg] = device_ptr; + frame->locals[dest_reg] = device_ptr; /* malloc size for device */ write_u32(vm, memory, device_ptr, dev->size); vm->mp += (dev->size + 4); @@ -594,9 +594,9 @@ bool step_vm(VM *vm) { size_reg = read_u8(vm, code, vm->pc); vm->pc++; - device_ptr = frame->registers[device_reg]; /* device pointer */ - buffer_ptr = frame->registers[buffer_reg]; - size = frame->registers[size_reg]; /* size */ + device_ptr = frame->locals[device_reg]; /* device pointer */ + buffer_ptr = frame->locals[buffer_reg]; + size = frame->locals[size_reg]; /* size */ handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; @@ -616,7 +616,7 @@ bool step_vm(VM *vm) { device_reg = read_u8(vm, code, vm->pc); vm->pc++; - device_ptr = frame->registers[device_reg]; /* device pointer */ + device_ptr = frame->locals[device_reg]; /* device pointer */ handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; if (dev && dev->ops->refresh) { @@ -639,9 +639,9 @@ bool step_vm(VM *vm) { size_reg = read_u8(vm, code, vm->pc); vm->pc++; - device_ptr = frame->registers[device_reg]; /* device pointer */ - buffer_ptr = frame->registers[buffer_reg]; /* R1: buffer pointer */ - size = frame->registers[size_reg]; /* R2: size */ + device_ptr = frame->locals[device_reg]; /* device pointer */ + buffer_ptr = frame->locals[buffer_reg]; /* R1: buffer pointer */ + size = frame->locals[size_reg]; /* R2: size */ handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; @@ -662,7 +662,7 @@ bool step_vm(VM *vm) { device_reg = read_u8(vm, code, vm->pc); vm->pc++; - device_ptr = frame->registers[device_reg]; /* device pointer */ + device_ptr = frame->locals[device_reg]; /* device pointer */ handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; @@ -687,9 +687,9 @@ bool step_vm(VM *vm) { args_ptr_reg = read_u8(vm, code, vm->pc); vm->pc++; - device_ptr = frame->registers[device_reg]; /* device pointer */ - cmd = frame->registers[cmd_reg]; /* R1: ioctl command */ - args_ptr = frame->registers[args_ptr_reg]; /* R2: args pointer */ + device_ptr = frame->locals[device_reg]; /* device pointer */ + cmd = frame->locals[cmd_reg]; /* R1: ioctl command */ + args_ptr = frame->locals[args_ptr_reg]; /* R2: args pointer */ handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; @@ -740,12 +740,12 @@ bool step_vm(VM *vm) { src1 = read_u8(vm, code, vm->pc); vm->pc++; - value = frame->registers[src1]; + value = frame->locals[src1]; if (value < 0) { value = -value; } - frame->registers[dest] = value; + frame->locals[dest] = value; return true; } case OP_NEG_INT: { @@ -754,8 +754,8 @@ bool step_vm(VM *vm) { src1 = read_u8(vm, code, vm->pc); vm->pc++; - value = frame->registers[src1]; - frame->registers[dest] = -value; + value = frame->locals[src1]; + frame->locals[dest] = -value; return true; } case OP_ADD_NAT: @@ -773,8 +773,8 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = - fixed_mul(frame->registers[src1], frame->registers[src2]); + frame->locals[dest] = + fixed_mul(frame->locals[src1], frame->locals[src2]); return true; } @@ -785,8 +785,8 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = - fixed_div(frame->registers[src1], frame->registers[src2]); + frame->locals[dest] = + fixed_div(frame->locals[src1], frame->locals[src2]); return true; } @@ -797,8 +797,8 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = - fixed_add(frame->registers[src1], frame->registers[src2]); + frame->locals[dest] = + fixed_add(frame->locals[src1], frame->locals[src2]); return true; } @@ -809,8 +809,8 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = - fixed_sub(frame->registers[src1], frame->registers[src2]); + frame->locals[dest] = + fixed_sub(frame->locals[src1], frame->locals[src2]); return true; } case OP_REAL_TO_INT: { @@ -818,9 +818,9 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - value = frame->registers[src1]; + value = frame->locals[src1]; - frame->registers[dest] = fixed_to_int(value); + frame->locals[dest] = fixed_to_int(value); return true; } @@ -829,7 +829,7 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = int_to_fixed(frame->registers[src1]); + frame->locals[dest] = int_to_fixed(frame->locals[src1]); return true; } case OP_REAL_TO_NAT: { @@ -837,8 +837,8 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - value = frame->registers[src1]; - frame->registers[dest] = fixed_to_int(value); + value = frame->locals[src1]; + frame->locals[dest] = fixed_to_int(value); return true; } case OP_NAT_TO_REAL: { @@ -846,7 +846,7 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - frame->registers[dest] = int_to_fixed(frame->registers[src1]); + frame->locals[dest] = int_to_fixed(frame->locals[src1]); return true; } case OP_JEQ_NAT: { @@ -909,9 +909,9 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - int_to_string(AS_INT(frame->registers[src1]), buffer); + int_to_string(AS_INT(frame->locals[src1]), buffer); ptr = str_alloc(vm, frame, buffer, strlength(buffer)); - frame->registers[dest] = ptr; + frame->locals[dest] = ptr; set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } @@ -921,9 +921,9 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - nat_to_string(frame->registers[src1], buffer); + nat_to_string(frame->locals[src1], buffer); ptr = str_alloc(vm, frame, buffer, strlength(buffer)); - frame->registers[dest] = ptr; + frame->locals[dest] = ptr; set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } @@ -933,10 +933,10 @@ bool step_vm(VM *vm) { vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; - fixed_to_string(AS_INT(frame->registers[src1]), buffer); + fixed_to_string(AS_INT(frame->locals[src1]), buffer); ptr = str_alloc(vm, frame, buffer, strlength(buffer)); /* copy buffer to dest */ - frame->registers[dest] = ptr; + frame->locals[dest] = ptr; set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } @@ -947,9 +947,9 @@ bool step_vm(VM *vm) { src1 = read_u8(vm, code, vm->pc); vm->pc++; - ptr = frame->registers[src1]; + ptr = frame->locals[src1]; length = read_u32(vm, memory, ptr); - frame->registers[dest] = length; + frame->locals[dest] = length; return true; } case OP_STRCAT: { diff --git a/test/add.asm.lisp b/test/add.asm.lisp index 488709f..1c783c7 100644 --- a/test/add.asm.lisp +++ b/test/add.asm.lisp @@ -5,7 +5,7 @@ (call &add ($0 $1) $2) (int-to-string $3 $2) (call &pln ($3) nil) - (halt 0)) + (exit 0)) (label add (add-int $2 $1 $0) diff --git a/test/add.ul.ir b/test/add.ul.ir index 352746b..3c54c04 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -26,12 +26,12 @@ function pln (str message is $0) str nl is $3 int nl_length is $4 - load_heap_immediate ts "/dev/term/0" # get terminal device + malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/fib.asm.lisp b/test/fib.asm.lisp index 3e18589..6004071 100644 --- a/test/fib.asm.lisp +++ b/test/fib.asm.lisp @@ -4,7 +4,7 @@ (call &fib ($0) $0) (int-to-string $1 $0) (call &pln ($1) nil) - (halt 0)) + (exit 0)) (label fib (load-immediate $1 2) (jump-lt-int &base-case $0 $1) diff --git a/test/fib.ul.ir b/test/fib.ul.ir index b35c3fc..03760c8 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -33,12 +33,12 @@ function pln (str message is $0) str nl is $3 int nl_length is $4 - load_heap_immediate ts "/dev/term/0" # get terminal device + malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/hello.asm.lisp b/test/hello.asm.lisp index d5e2a58..f14933f 100644 --- a/test/hello.asm.lisp +++ b/test/hello.asm.lisp @@ -2,7 +2,7 @@ (label main (load-immediate $1 &hello-str) ; load hello string ptr (call &pln ($1) nil) - (halt 0)) ; done + (exit 0)) ; done (label pln (load-immediate $1 &terminal-namespace) ; get terminal device (load-immediate $11 0) diff --git a/test/hello.ul.ir b/test/hello.ul.ir index cd2a305..67e729e 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -1,7 +1,7 @@ function main () str hello is $0 - load_heap_immediate "nuqneH 'u'?" -> hello + malloc_immediate "nuqneH 'u'?" -> hello call pln hello exit 0 @@ -12,12 +12,12 @@ function pln (str message is $0) int nl_length is $4 int mode is $5 - load_heap_immediate "/dev/term/0" -> ts # get terminal device + malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return \ No newline at end of file diff --git a/test/loop.asm.lisp b/test/loop.asm.lisp index 6be6c73..7e2b95e 100644 --- a/test/loop.asm.lisp +++ b/test/loop.asm.lisp @@ -27,7 +27,7 @@ (call &pln ($4) nil) (real-to-string $3 $0) (call &pln ($3) nil) - (halt 0)) + (exit 0)) (label pln (load-immediate $1 &terminal-namespace) ; get terminal device (load-immediate $11 0) diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 542f148..88f7660 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -14,13 +14,13 @@ function main () add_real a $5 -> a add_int i $3 -> i jump_ge_int &loop_body i $2 - load_heap_immediate "/dev/term/0" -> term + malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); nat b is $1 real_to_nat a -> b - load_heap_immediate "Enter a string:" -> $7 + malloc_immediate "Enter a string:" -> $7 string_length $7 -> $8 syscall WRITE term $7 $8 # print prompt @@ -43,12 +43,12 @@ function pln (str message is $0) str nl is $3 int nl_length is $4 - load_heap_immediate "/dev/term/0" -> ts + malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts # get terminal device strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return \ No newline at end of file diff --git a/test/malloc.asm.lisp b/test/malloc.asm.lisp index 382e36a..4f76cf6 100644 --- a/test/malloc.asm.lisp +++ b/test/malloc.asm.lisp @@ -12,7 +12,7 @@ (syscall READ $0 $4 $1) ; read the string (call &pln ($0 $4) nil) ; print the string - (halt 0)) + (exit 0)) (label pln (load-immediate $3 &new-line) (string-length $2 $1) diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index ea123d7..ef7f66b 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -3,11 +3,11 @@ function main () int mode is $11 str term is $10 - load_heap_immediate "/dev/term/0" -> term + malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); - load_heap_immediate "Enter a string:" -> $7 + malloc_immediate "Enter a string:" -> $7 string_length $7 -> $8 syscall WRITE term $7 $8 # print prompt @@ -26,11 +26,11 @@ function pln (str message is $0) str nl is $3 int nl_length is $4 - load_heap_immediate "/dev/term/0" -> ts + malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts # get terminal device strlen message -> msg_length syscall WRITE ts message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length diff --git a/test/paint-bw.asm.lisp b/test/paint-bw.asm.lisp index d927e30..81bf808 100644 --- a/test/paint-bw.asm.lisp +++ b/test/paint-bw.asm.lisp @@ -66,8 +66,8 @@ (jump &draw-loop)) - ; Flush and halt - (halt 0)) + ; Flush and exit + (exit 0)) (label set-color-if-clicked ; (click_x, click_y, box_x, box_y, color, box_size) diff --git a/test/paint-bw.ul.ir b/test/paint-bw.ul.ir index dbcf55d..ab40219 100644 --- a/test/paint-bw.ul.ir +++ b/test/paint-bw.ul.ir @@ -94,7 +94,7 @@ function main () jump &draw_loop - # Flush and halt + # Flush and exit exit 0 function set_color_if_clicked (int click_x is $0, int click_y is $1, diff --git a/test/paint.asm.lisp b/test/paint.asm.lisp index 63d3b80..2e946d5 100644 --- a/test/paint.asm.lisp +++ b/test/paint.asm.lisp @@ -153,8 +153,8 @@ (jump &draw-loop)) - ; Flush and halt - (halt 0)) + ; Flush and exit + (exit 0)) (label set-color-if-clicked ; (click_x, click_y, box_x, box_y, color, box_size) diff --git a/test/paint.ul.ir b/test/paint.ul.ir index dbcf55d..ab40219 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -94,7 +94,7 @@ function main () jump &draw_loop - # Flush and halt + # Flush and exit exit 0 function set_color_if_clicked (int click_x is $0, int click_y is $1, diff --git a/test/simple.asm.lisp b/test/simple.asm.lisp index faa33ff..1cd4d01 100644 --- a/test/simple.asm.lisp +++ b/test/simple.asm.lisp @@ -5,7 +5,7 @@ (add-real $2 $1 $0) (real-to-string $3 $2) (call &pln ($3) nil) - (halt 0)) + (exit 0)) (label pln (load-immediate $1 &terminal-namespace) ; get terminal device (load-immediate $11 0) diff --git a/test/simple.ul.ir b/test/simple.ul.ir index be015d4..8f549d9 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -20,12 +20,12 @@ function pln (str message is $0) int nl_length is $4 int mode is $5 - load_heap_immediate "/dev/term/0" -> term # get terminal device + malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode syscall OPEN term mode -> term strlen message -> msg_length syscall WRITE term message msg_length - load_heap_immediate "\n" -> nl + malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE term nl nl_length return diff --git a/test/window.asm.lisp b/test/window.asm.lisp index 0340856..b697ebe 100644 --- a/test/window.asm.lisp +++ b/test/window.asm.lisp @@ -52,7 +52,7 @@ (syscall WRITE $0 $21 $22) ; redraw (jump &draw-loop)) - (halt 0)) + (exit 0)) (label pln (load-immediate $1 &terminal-namespace) ; get terminal device (load-immediate $11 0) diff --git a/test/window.ul.vuir b/test/window.ul.vuir index a7c9aac..8ac09b0 100644 --- a/test/window.ul.vuir +++ b/test/window.ul.vuir @@ -40,21 +40,21 @@ function main () { byte left_down = mouse.left; - if (left_down == 0) continue; + if (left_down == 1) { + nat x = mouse.x; + nat y = mouse.y; - nat x = mouse.x; - nat y = mouse.y; + // Compute start address: y*width + x + nat pixel_pos = y * width; // = y * width + pixel_pos = x + pixel_pos; // += x + pixel_pos = screen_buffer + pixel_pos; // += pixel_offset + nat fat_ptr_size = 4; // need to add offset for fat pointer size + pixel_pos = pixel_pos + fat_ptr_size; - // Compute start address: y*width + x - nat pixel_pos = y * width; // = y * width - pixel_pos = x + pixel_pos; // += x - pixel_pos = screen_buffer + pixel_pos; // += pixel_offset - nat fat_ptr_size = 4; // need to add offset for fat pointer size - pixel_pos = pixel_pos + fat_ptr_size; - - byte color = WHITE; - store_absolute_8(pixel_pos, color); // draw color at screen [x,y] - write(screen, screen_buffer, buffer_size); // redraw + byte color = WHITE; + store_absolute_8(pixel_pos, color); // draw color at screen [x,y] + write(screen, screen_buffer, buffer_size); // redraw + } } exit(0); } From 0113411f899e21521b4dc740dc11ff37a7b0002c Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 9 Nov 2025 22:23:55 -0800 Subject: [PATCH 06/27] rename some things, create new parser defs. --- README.org | 2 +- ROADMAP.org | 6 ++-- docs/SPECIFICATION.org | 2 +- src/tools/assembler/assembler.c | 64 ++++++++++++++++++++++++++++----- src/vm/device.c | 9 ++--- src/vm/opcodes.h | 11 +++--- src/vm/vm.c | 2 +- test/paint-bw.ul | 2 +- test/paint.ul | 2 +- 9 files changed, 71 insertions(+), 29 deletions(-) diff --git a/README.org b/README.org index 640f1fb..5386796 100644 --- a/README.org +++ b/README.org @@ -17,7 +17,7 @@ * Undâr -Undâr is a programming language for the purpose of creating 3D games and graphical user interfaces that work on constrained systems, microcontrollers, retro consoles, and the web using emscripten. The language emphasizes hardware longevity, energy efficiency, and the preservation of digital art and games for future generations. +Undâr is a programming language for the purpose of creating 3D games and graphical user traits that work on constrained systems, microcontrollers, retro consoles, and the web using emscripten. The language emphasizes hardware longevity, energy efficiency, and the preservation of digital art and games for future generations. It has an internal REPL that allows for quick development as well as the ability to dump the program to a binary rom for preserving that program/game/etc. diff --git a/ROADMAP.org b/ROADMAP.org index d408d62..a85fabe 100644 --- a/ROADMAP.org +++ b/ROADMAP.org @@ -214,7 +214,7 @@ function main(int argc, str[] argv) { A =plex= is a structure which, works like a struct but syntactically looks like a class. the naming of "plex" comes from douglas ross's paper "a generalized technique for symbol manipulation and numerical calculation". It is used instead of "class" or "struct" is to make a break from the historical baggage of "classes". unlike classes it does not describe a object in real life and copy it, but it allows for a convenient way to mediate states and handling. i.e. compositions + encodings instead of oop/polymorphisms. for example, instead of having a struct with a bool flag in it (like for a game alive/dead), we can create a multilist where the structs move from a "alive" list to a "dead" list; or things like that. instances of a plex in memory are called "atoms". -Plexes support permacomputing by allowing the developer to use lower level interfaces with a friendly syntax. +Plexes support permacomputing by allowing the developer to use lower level traits with a friendly syntax. **WIP syntax, not final implementation** @@ -260,7 +260,7 @@ core devices include - terminal - tunnel -Devices are accessed via a namespace "path/to/device" and are implemented outside of the VM's runtime. This allows for the interface of the system to be the same within the VM but allow for specific variations on a concept depending on the device it is running on. +Devices are accessed via a namespace "path/to/device" and are implemented outside of the VM's runtime. This allows for the trait of the system to be the same within the VM but allow for specific variations on a concept depending on the device it is running on. *** Immediate Mode GUI @@ -272,7 +272,7 @@ UI elements are draw directly on the canvas and syntactically are spacial to sho ** Tunnels: Unified I/O (Plan 9 / 9P-Inspired) -Tunnels are an abstraction called a tunnel which acts like a device like a screen, mouse, etc. that is inspired by Plan9. Plan 9 was an operating system developed at Bell Labs that treated all resources (including network connections and UI elements) as files in a hierarchical namespace. it allows files, web requests, sockets, etc. to be viewed through a simple unified interface. it is very similar to the 9p protocol where everything is a file in a filesystem, it just might be a file located on a server halfway across the world or on another planet. the only thing that is the difference is that it takes longer to read the file on mars compared to the file on your local system. the way the device system works is that it is written by a developer in another language like c, but from Undâr's pov the interface remains the same, its just a namespaced device like all the other devices, so that it is easy to understand by a new developer. so it is comparable with tcp/udp sockets or something esoteric like a lorawan network, or some new communication method that hasn't been invented yet. +Tunnels are an abstraction called a tunnel which acts like a device like a screen, mouse, etc. that is inspired by Plan9. Plan 9 was an operating system developed at Bell Labs that treated all resources (including network connections and UI elements) as files in a hierarchical namespace. it allows files, web requests, sockets, etc. to be viewed through a simple unified trait. it is very similar to the 9p protocol where everything is a file in a filesystem, it just might be a file located on a server halfway across the world or on another planet. the only thing that is the difference is that it takes longer to read the file on mars compared to the file on your local system. the way the device system works is that it is written by a developer in another language like c, but from Undâr's pov the trait remains the same, its just a namespaced device like all the other devices, so that it is easy to understand by a new developer. so it is comparable with tcp/udp sockets or something esoteric like a lorawan network, or some new communication method that hasn't been invented yet. **WIP syntax, not final implementation** diff --git a/docs/SPECIFICATION.org b/docs/SPECIFICATION.org index 00770f1..7d099db 100644 --- a/docs/SPECIFICATION.org +++ b/docs/SPECIFICATION.org @@ -261,7 +261,7 @@ scope closes the tunnel note the plex must always be of a plex which is "tunnel-able" i.e. Files, sockets, etc -Tunnels have almost the same interface as 9p since they are closely +Tunnels have almost the same trait as 9p since they are closely based on 9p. *** transplexs for tunnels diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 93d8a19..ca8ca68 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,34 +1,82 @@ #include "assembler.h" #include +typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION + } SymbolType; + typedef struct field_s { - char* name; - TokenType type; + char *name; + SymbolType type; u32 offset; u32 size; } Field; -typedef struct plex_def_s { - char* name; +typedef struct function_def_s { + char *name; + SymbolType args[8]; + u8 arg_count; + SymbolType return_type; +} FunctionDef; + +typedef struct trait_def_s { + char *name; + Field *fields; u32 field_count; + FunctionDef *methods; + u32 method_count; +} TraitDef; + +typedef struct plex_def_s { + char *name; u32 logical_size; u32 physical_size; - Field *fields; + Field *fields; + u32 field_count; + TraitDef *traits; + u32 trait_count; + FunctionDef *methods; + u32 method_count; } PlexDef; typedef struct array_def_s { - TokenType type; + SymbolType type; u32 length; u32 logical_size; u32 physical_size; // logical_size * type_size + 4 + union { + PlexDef *plex; + struct array_def_s *array; + } ref; } ArrayDef; typedef struct symbol_s { char *name; u32 address; - TokenType type; + ScopeType scope; + SymbolType type; u32 logical_size; u32 physical_size; // logical_size * type_size + 4 + union { + PlexDef *plex; + ArrayDef *array; + FunctionDef *function; + } ref; } Symbol; typedef struct symbol_tab_s { @@ -52,4 +100,4 @@ void assemble(VM *vm, char *source) { token.length, token.start); } } while (token.type != TOKEN_EOF); -} \ No newline at end of file +} diff --git a/src/vm/device.c b/src/vm/device.c index ff2cb7d..30b472e 100644 --- a/src/vm/device.c +++ b/src/vm/device.c @@ -10,12 +10,9 @@ i32 vm_register_device(VM *vm, const char *path, const char *type, void *data, dev = &vm->devices[vm->dc]; dev->handle = vm->dc++; - strcopy(dev->path, path, DEVICE_PATH_MAX_LENGTH); - dev->path[DEVICE_PATH_MAX_LENGTH - 1] = '\0'; - - strcopy(dev->type, type, DEVICE_TYPE_MAX_LENGTH); - dev->type[DEVICE_TYPE_MAX_LENGTH - 1] = '\0'; - + + dev->path = path; + dev->type = type; dev->data = data; dev->ops = ops; dev->size = size; diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index f74a18f..fe22ff4 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -95,9 +95,9 @@ typedef enum { OP_STRING_TO_REAL /* string_to_real : locals[dest] = src1 as real */ } Opcode; -#define MAX_REGS 32 +#define MAX_LOCALS 32 typedef struct frame_s { - u32 locals[MAX_REGS]; /* R0-R31 */ + u32 locals[MAX_LOCALS]; /* $0-$31 */ u32 start; /* start of memory block */ u32 end; /* end of memory block */ u32 return_reg; /* register to store return value in parent */ @@ -124,13 +124,10 @@ typedef struct device_ops_s { i32 (*refresh)(void *device_data, u8 *buffer); } DeviceOps; -#define DEVICE_TYPE_MAX_LENGTH 16 /* 15 chars + null terminator */ -#define DEVICE_PATH_MAX_LENGTH 64 /* 63 chars + null terminator */ typedef struct device_s { - char type[DEVICE_TYPE_MAX_LENGTH]; /* e.g., "screen", "mouse", "gpio" */ - char path[DEVICE_PATH_MAX_LENGTH]; /* "/dev/screen", "/dev/input/mouse/0", - etc. */ + const char *type; /* e.g., "screen", "mouse", "gpio" */ + const char *path; /* "/dev/screen", "/dev/input/mouse/0", etc. */ void *data; /* device_specific data */ DeviceOps *ops; /* operations vtable */ u32 flags; /* permissions, status, etc. */ diff --git a/src/vm/vm.c b/src/vm/vm.c index a9a27b3..e606efd 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -98,7 +98,7 @@ bool step_vm(VM *vm) { return false; } case OP_CALL: { - u8 N, return_reg, src_reg, args[MAX_REGS]; + u8 N, return_reg, src_reg, args[MAX_LOCALS]; Frame *child; u32 jmp, heap_mask, i; diff --git a/test/paint-bw.ul b/test/paint-bw.ul index a4d036c..802eccd 100644 --- a/test/paint-bw.ul +++ b/test/paint-bw.ul @@ -9,7 +9,7 @@ const byte LIGHT_GRAY = 182; byte selected_color = 255; -interface Device { +trait Device { nat handle; } diff --git a/test/paint.ul b/test/paint.ul index 9419005..1ae3af2 100644 --- a/test/paint.ul +++ b/test/paint.ul @@ -9,7 +9,7 @@ const byte LIGHT_GRAY = 182; byte selected_color = 255; -interface Device { +trait Device { nat handle; } From 32365a889532c78b2ab77ef42b8d1491adf16c2a Mon Sep 17 00:00:00 2001 From: zongor Date: Mon, 10 Nov 2025 23:45:09 -0800 Subject: [PATCH 07/27] wip assembler v2 --- src/tools/assembler/assembler.c | 196 +++++++++++++++----------- src/tools/assembler/lexer.c | 238 ++++++++++++++++++++++---------- src/tools/assembler/lexer.h | 21 ++- src/tools/compiler/compiler.h | 90 ++++++++++++ 4 files changed, 384 insertions(+), 161 deletions(-) create mode 100644 src/tools/compiler/compiler.h diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index ca8ca68..c137e64 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,89 +1,22 @@ +#include "../../vm/libc.h" #include "assembler.h" #include -typedef enum { GLOBAL, LOCAL } ScopeType; -typedef enum { - VOID, - BOOL, - I8, - I16, - I32, - U8, - U16, - U32, - F8, - F16, - F32, - STR, - PLEX, - ARRAY, - FUNCTION - } SymbolType; +bool global() { + Token token = nextToken(); + if (token.type == TOKEN_KEYWORD_CONST) { + token = nextToken(); + } -typedef struct field_s { - char *name; - SymbolType type; - u32 offset; - u32 size; -} Field; + if (token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_NAT || + token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { + return true; + } -typedef struct function_def_s { - char *name; - SymbolType args[8]; - u8 arg_count; - SymbolType return_type; -} FunctionDef; + return false; +} -typedef struct trait_def_s { - char *name; - Field *fields; - u32 field_count; - FunctionDef *methods; - u32 method_count; -} TraitDef; - -typedef struct plex_def_s { - char *name; - u32 logical_size; - u32 physical_size; - Field *fields; - u32 field_count; - TraitDef *traits; - u32 trait_count; - FunctionDef *methods; - u32 method_count; -} PlexDef; - -typedef struct array_def_s { - SymbolType type; - u32 length; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - struct array_def_s *array; - } ref; -} ArrayDef; - -typedef struct symbol_s { - char *name; - u32 address; - ScopeType scope; - SymbolType type; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - ArrayDef *array; - FunctionDef *function; - } ref; -} Symbol; - -typedef struct symbol_tab_s { - Symbol *symbols; - int count; - int capacity; -} SymbolTable; +void function() {} void assemble(VM *vm, char *source) { USED(vm); @@ -98,6 +31,109 @@ void assemble(VM *vm, char *source) { if (token.type != TOKEN_EOF) { printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), token.length, token.start); + + if (token.type == TOKEN_KEYWORD_GLOBAL) { + if (!global()) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } + } + + if (token.type == TOKEN_KEYWORD_FN) { + function(); + } + + if (token.type == TOKEN_IDENTIFIER) { + if (streq(token.start, "exit")) { + } else if (streq(token.start, "call")) { + } else if (streq(token.start, "syscall")) { + } else if (streq(token.start, "load_immediate")) { + } else if (streq(token.start, "load_indirect_8")) { + } else if (streq(token.start, "load_indirect_16")) { + } else if (streq(token.start, "load_indirect_32")) { + } else if (streq(token.start, "load_absolute_8")) { + } else if (streq(token.start, "load_absolute_16")) { + } else if (streq(token.start, "load_absolute_32")) { + } else if (streq(token.start, "load_offset_8")) { + } else if (streq(token.start, "load_offset_16")) { + } else if (streq(token.start, "load_offset_32")) { + } else if (streq(token.start, "store_absolute_8")) { + } else if (streq(token.start, "store_absolute_16")) { + } else if (streq(token.start, "store_absolute_32")) { + } else if (streq(token.start, "store_indirect_8")) { + } else if (streq(token.start, "store_indirect_16")) { + } else if (streq(token.start, "store_indirect_32")) { + } else if (streq(token.start, "store_offset_8")) { + } else if (streq(token.start, "store_offset_16")) { + } else if (streq(token.start, "store_offset_32")) { + } else if (streq(token.start, "malloc")) { + } else if (streq(token.start, "malloc_immediate")) { + } else if (streq(token.start, "memset_8")) { + } else if (streq(token.start, "memset_16")) { + } else if (streq(token.start, "memset_32")) { + } else if (streq(token.start, "register_move")) { + } else if (streq(token.start, "add_int")) { + } else if (streq(token.start, "sub_int")) { + } else if (streq(token.start, "mul_int")) { + } else if (streq(token.start, "div_int")) { + } else if (streq(token.start, "abs_int")) { + } else if (streq(token.start, "neg_int")) { + } else if (streq(token.start, "add_nat")) { + } else if (streq(token.start, "sub_nat")) { + } else if (streq(token.start, "mul_nat")) { + } else if (streq(token.start, "div_nat")) { + } else if (streq(token.start, "abs_nat")) { + } else if (streq(token.start, "neg_nat")) { + } else if (streq(token.start, "add_real")) { + } else if (streq(token.start, "sub_real")) { + } else if (streq(token.start, "mul_real")) { + } else if (streq(token.start, "div_real")) { + } else if (streq(token.start, "abs_real")) { + } else if (streq(token.start, "neg_real")) { + } else if (streq(token.start, "int_to_real")) { + } else if (streq(token.start, "nat_to_real")) { + } else if (streq(token.start, "real_to_int")) { + } else if (streq(token.start, "real_to_nat")) { + } else if (streq(token.start, "bit_shift_left")) { + } else if (streq(token.start, "bit_shift_right")) { + } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (streq(token.start, "bit_and")) { + } else if (streq(token.start, "bit_or")) { + } else if (streq(token.start, "bit_xor")) { + } else if (streq(token.start, "jump")) { + } else if (streq(token.start, "jump_if_flag")) { + } else if (streq(token.start, "jump_eq_int")) { + } else if (streq(token.start, "jump_neq_int")) { + } else if (streq(token.start, "jump_gt_int")) { + } else if (streq(token.start, "jump_lt_int")) { + } else if (streq(token.start, "jump_le_int")) { + } else if (streq(token.start, "jump_ge_int")) { + } else if (streq(token.start, "jump_eq_nat")) { + } else if (streq(token.start, "jump_neq_nat")) { + } else if (streq(token.start, "jump_gt_nat")) { + } else if (streq(token.start, "jump_lt_nat")) { + } else if (streq(token.start, "jump_le_nat")) { + } else if (streq(token.start, "jump_ge_nat")) { + } else if (streq(token.start, "jump_eq_real")) { + } else if (streq(token.start, "jump_neq_real")) { + } else if (streq(token.start, "jump_ge_real")) { + } else if (streq(token.start, "jump_gt_real")) { + } else if (streq(token.start, "jump_lt_real")) { + } else if (streq(token.start, "jump_le_real")) { + } else if (streq(token.start, "string_length")) { + } else if (streq(token.start, "string_eq")) { + } else if (streq(token.start, "string_concat")) { + } else if (streq(token.start, "string_get_char")) { + } else if (streq(token.start, "string_find_char")) { + } else if (streq(token.start, "string_slice")) { + } else if (streq(token.start, "int_to_string")) { + } else if (streq(token.start, "nat_to_string")) { + } else if (streq(token.start, "real_to_string")) { + } else if (streq(token.start, "string_to_int")) { + } else if (streq(token.start, "string_to_nat")) { + } else if (streq(token.start, "string_to_real")) { + } + } } } while (token.type != TOKEN_EOF); } diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index b634dc5..8c26f61 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -81,19 +81,20 @@ static void skipWhitespace() { case '/': if (peekNext() == '/') { // Single-line comment: skip until newline or end of file - advance(); + advance(); while (peek() != '\n' && !isAtEnd()) advance(); } else if (peekNext() == '*') { // Multi-line comment: skip until '*/' or end of file - advance(); - advance(); + advance(); + advance(); while (!isAtEnd()) { - if (peek() == '\n') lexer.line++; + if (peek() == '\n') + lexer.line++; if (peek() == '*' && peekNext() == '/') { - advance(); - advance(); - break; // Exit loop, comment ended + advance(); + advance(); + break; // Exit loop, comment ended } advance(); } @@ -120,7 +121,15 @@ static TokenType checkKeyword(int start, int length, const char *rest, static TokenType identifierType() { switch (lexer.start[0]) { case 'a': - return checkKeyword(1, 2, "nd", TOKEN_OPERATOR_AND); + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'n': + return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; case 'c': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { @@ -130,7 +139,7 @@ static TokenType identifierType() { return checkKeyword(2, 3, "nst", TOKEN_KEYWORD_CONST); } } - break; + break; case 'e': return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); case 'f': @@ -149,13 +158,15 @@ static TokenType identifierType() { switch (lexer.start[1]) { case 'f': return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); case 'n': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { case 'i': return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT); case 't': - return checkKeyword(3, 1, "", TOKEN_TYPE_INT); + return checkKeyword(3, 0, "", TOKEN_TYPE_INT); } } break; @@ -215,7 +226,7 @@ static TokenType identifierType() { return checkKeyword(2, 1, "r", TOKEN_TYPE_STR); } } - break; + break; case 't': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { @@ -244,6 +255,8 @@ static TokenType identifierType() { } } break; + case 'g': + return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); } return TOKEN_IDENTIFIER; @@ -267,10 +280,10 @@ static Token number() { while (isDigit(peek())) advance(); - return makeToken(TOKEN_FLOAT_LITERAL); + return makeToken(TOKEN_LITERAL_REAL); } - return makeToken(TOKEN_INT_LITERAL); + return makeToken(TOKEN_LITERAL_INT); } static Token string() { @@ -285,7 +298,7 @@ static Token string() { /* The closing quote. */ advance(); - return makeToken(TOKEN_STRING_LITERAL); + return makeToken(TOKEN_LITERAL_STR); } Token nextToken() { @@ -321,11 +334,17 @@ Token nextToken() { case '.': return makeToken(TOKEN_DOT); case '-': - return makeToken(TOKEN_MINUS); + return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); case '+': return makeToken(TOKEN_PLUS); case '/': return makeToken(TOKEN_SLASH); + case '&': + return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return makeToken(TOKEN_MESH); + case '$': + return makeToken(TOKEN_BIG_MONEY); case '*': return makeToken(TOKEN_STAR); case '!': @@ -343,64 +362,135 @@ Token nextToken() { return errorToken("Unexpected character."); } -const char* tokenTypeToString(TokenType type) { +const char *tokenTypeToString(TokenType type) { switch (type) { - case TOKEN_EOF: return "EOF"; - case TOKEN_IDENTIFIER: return "IDENTIFIER"; - case TOKEN_INT_LITERAL: return "INT_LITERAL"; - case TOKEN_UINT_LITERAL: return "UINT_LITERAL"; - case TOKEN_FLOAT_LITERAL: return "FLOAT_LITERAL"; - case TOKEN_STRING_LITERAL: return "STRING_LITERAL"; - case TOKEN_TYPE_INT: return "TYPE_INT"; - case TOKEN_TYPE_NAT: return "TYPE_NAT"; - case TOKEN_TYPE_REAL: return "TYPE_REAL"; - case TOKEN_TYPE_STR: return "TYPE_STR"; - case TOKEN_KEYWORD_PLEX: return "KEYWORD_PLEX"; - case TOKEN_KEYWORD_FN: return "KEYWORD_FN"; - case TOKEN_KEYWORD_CONST: return "KEYWORD_CONST"; - case TOKEN_KEYWORD_IF: return "KEYWORD_IF"; - case TOKEN_KEYWORD_ELSE: return "KEYWORD_ELSE"; - case TOKEN_KEYWORD_WHILE: return "KEYWORD_WHILE"; - case TOKEN_KEYWORD_FOR: return "KEYWORD_FOR"; - case TOKEN_KEYWORD_RETURN: return "KEYWORD_RETURN"; - case TOKEN_KEYWORD_USE: return "KEYWORD_USE"; - case TOKEN_KEYWORD_INIT: return "KEYWORD_INIT"; - case TOKEN_KEYWORD_THIS: return "KEYWORD_THIS"; - case TOKEN_KEYWORD_OPEN: return "TOKEN_KEYWORD_OPEN"; - case TOKEN_KEYWORD_READ: return "TOKEN_KEYWORD_READ"; - case TOKEN_KEYWORD_WRITE: return "TOKEN_KEYWORD_WRITE"; - case TOKEN_KEYWORD_REFRESH: return "TOKEN_KEYWORD_REFRESH"; - case TOKEN_KEYWORD_CLOSE: return "TOKEN_KEYWORD_CLOSE"; - case TOKEN_KEYWORD_NIL: return "KEYWORD_NIL"; - case TOKEN_KEYWORD_TRUE: return "KEYWORD_TRUE"; - case TOKEN_KEYWORD_FALSE: return "KEYWORD_FALSE"; - case TOKEN_OPERATOR_IS: return "OPERATOR_IS"; - case TOKEN_OPERATOR_NOT: return "OPERATOR_NOT"; - case TOKEN_OPERATOR_AND: return "OPERATOR_AND"; - case TOKEN_OPERATOR_OR: return "OPERATOR_OR"; - case TOKEN_BANG: return "BANG"; - case TOKEN_BANG_EQ: return "BANG_EQ"; - case TOKEN_EQ: return "EQ"; - case TOKEN_EQ_EQ: return "EQ_EQ"; - case TOKEN_GT: return "GT"; - case TOKEN_LT: return "LT"; - case TOKEN_GTE: return "GTE"; - case TOKEN_LTE: return "LTE"; - case TOKEN_DOT: return "DOT"; - case TOKEN_COMMA: return "COMMA"; - case TOKEN_COLON: return "COLON"; - case TOKEN_SEMICOLON: return "SEMICOLON"; - case TOKEN_PLUS: return "PLUS"; - case TOKEN_MINUS: return "MINUS"; - case TOKEN_STAR: return "STAR"; - case TOKEN_SLASH: return "SLASH"; - case TOKEN_LPAREN: return "LPAREN"; - case TOKEN_RPAREN: return "RPAREN"; - case TOKEN_LBRACE: return "LBRACE"; - case TOKEN_RBRACE: return "RBRACE"; - case TOKEN_LBRACKET: return "LBRACKET"; - case TOKEN_RBRACKET: return "RBRACKET"; - case TOKEN_ERROR: return "ERROR"; - default: return "UNKNOWN_TOKEN"; + case TOKEN_EOF: + return "EOF"; + case TOKEN_IDENTIFIER: + return "IDENTIFIER"; + case TOKEN_LITERAL_INT: + return "LITERAL_INT"; + case TOKEN_LITERAL_NAT: + return "LITERAL_NAT"; + case TOKEN_LITERAL_REAL: + return "LITERAL_REAL"; + case TOKEN_LITERAL_STR: + return "LITERAL_STR"; + case TOKEN_TYPE_INT: + return "TYPE_INT"; + case TOKEN_TYPE_NAT: + return "TYPE_NAT"; + case TOKEN_TYPE_REAL: + return "TYPE_REAL"; + case TOKEN_TYPE_STR: + return "TYPE_STR"; + case TOKEN_KEYWORD_PLEX: + return "KEYWORD_PLEX"; + case TOKEN_KEYWORD_FN: + return "KEYWORD_FN"; + case TOKEN_KEYWORD_CONST: + return "KEYWORD_CONST"; + case TOKEN_KEYWORD_IF: + return "KEYWORD_IF"; + case TOKEN_KEYWORD_IS: + return "IS"; + case TOKEN_KEYWORD_AS: + return "AS"; + case TOKEN_KEYWORD_ELSE: + return "KEYWORD_ELSE"; + case TOKEN_KEYWORD_WHILE: + return "KEYWORD_WHILE"; + case TOKEN_KEYWORD_FOR: + return "KEYWORD_FOR"; + case TOKEN_KEYWORD_RETURN: + return "KEYWORD_RETURN"; + case TOKEN_KEYWORD_USE: + return "KEYWORD_USE"; + case TOKEN_KEYWORD_INIT: + return "KEYWORD_INIT"; + case TOKEN_KEYWORD_THIS: + return "KEYWORD_THIS"; + case TOKEN_KEYWORD_OPEN: + return "TOKEN_KEYWORD_OPEN"; + case TOKEN_KEYWORD_READ: + return "TOKEN_KEYWORD_READ"; + case TOKEN_KEYWORD_WRITE: + return "TOKEN_KEYWORD_WRITE"; + case TOKEN_KEYWORD_REFRESH: + return "TOKEN_KEYWORD_REFRESH"; + case TOKEN_KEYWORD_CLOSE: + return "TOKEN_KEYWORD_CLOSE"; + case TOKEN_KEYWORD_NIL: + return "KEYWORD_NIL"; + case TOKEN_KEYWORD_TRUE: + return "KEYWORD_TRUE"; + case TOKEN_KEYWORD_FALSE: + return "KEYWORD_FALSE"; + case TOKEN_KEYWORD_GLOBAL: + return "KEYWORD_GLOBAL"; + case TOKEN_OPERATOR_NOT: + return "OPERATOR_NOT"; + case TOKEN_OPERATOR_AND: + return "OPERATOR_AND"; + case TOKEN_OPERATOR_OR: + return "OPERATOR_OR"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_BANG_EQ: + return "BANG_EQ"; + case TOKEN_EQ: + return "EQ"; + case TOKEN_EQ_EQ: + return "EQ_EQ"; + case TOKEN_GT: + return "GT"; + case TOKEN_LT: + return "LT"; + case TOKEN_GTE: + return "GTE"; + case TOKEN_LTE: + return "LTE"; + case TOKEN_DOT: + return "DOT"; + case TOKEN_COMMA: + return "COMMA"; + case TOKEN_COLON: + return "COLON"; + case TOKEN_SEMICOLON: + return "SEMICOLON"; + case TOKEN_PLUS: + return "PLUS"; + case TOKEN_MINUS: + return "MINUS"; + case TOKEN_STAR: + return "STAR"; + case TOKEN_SLASH: + return "SLASH"; + case TOKEN_LPAREN: + return "LPAREN"; + case TOKEN_RPAREN: + return "RPAREN"; + case TOKEN_LBRACE: + return "LBRACE"; + case TOKEN_RBRACE: + return "RBRACE"; + case TOKEN_LBRACKET: + return "LBRACKET"; + case TOKEN_RBRACKET: + return "RBRACKET"; + case TOKEN_ARROW_LEFT: + return "ARROW_LEFT"; + case TOKEN_MESH: + return "MESH"; + case TOKEN_BIG_MONEY: + return "BIG_MONEY"; + case TOKEN_AND: + return "AND"; + case TOKEN_AND_AND: + return "AND_AND"; + case TOKEN_ERROR: + return "ERROR"; + default: + return "UNKNOWN_TOKEN"; } -} \ No newline at end of file +} diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index 086cc31..4b7a8ae 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -4,10 +4,10 @@ typedef enum { TOKEN_EOF, TOKEN_IDENTIFIER, - TOKEN_INT_LITERAL, - TOKEN_UINT_LITERAL, - TOKEN_FLOAT_LITERAL, - TOKEN_STRING_LITERAL, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, TOKEN_TYPE_INT, TOKEN_TYPE_NAT, TOKEN_TYPE_REAL, @@ -16,6 +16,8 @@ typedef enum { TOKEN_KEYWORD_FN, TOKEN_KEYWORD_CONST, TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, TOKEN_KEYWORD_ELSE, TOKEN_KEYWORD_WHILE, TOKEN_KEYWORD_FOR, @@ -23,15 +25,15 @@ typedef enum { TOKEN_KEYWORD_USE, TOKEN_KEYWORD_INIT, TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, TOKEN_KEYWORD_OPEN, TOKEN_KEYWORD_READ, TOKEN_KEYWORD_WRITE, TOKEN_KEYWORD_REFRESH, - TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_CLOSE, TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_FALSE, - TOKEN_OPERATOR_IS, TOKEN_OPERATOR_NOT, TOKEN_OPERATOR_AND, TOKEN_OPERATOR_OR, @@ -39,6 +41,8 @@ typedef enum { TOKEN_BANG_EQ, TOKEN_EQ, TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, TOKEN_GT, TOKEN_LT, TOKEN_GTE, @@ -51,12 +55,15 @@ typedef enum { TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_LBRACE, TOKEN_RBRACE, TOKEN_LBRACKET, TOKEN_RBRACKET, + TOKEN_ARROW_LEFT, TOKEN_ERROR } TokenType; @@ -71,4 +78,4 @@ void initLexer(const char *source); Token nextToken(); const char* tokenTypeToString(TokenType type); -#endif \ No newline at end of file +#endif diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h new file mode 100644 index 0000000..21f1a25 --- /dev/null +++ b/src/tools/compiler/compiler.h @@ -0,0 +1,90 @@ +#ifndef UNDAR_COMPILER_H +#define UNDAR_COMPILER_H + +#import "../../vm/common.h" + +typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION + } SymbolType; + +typedef struct field_s { + char *name; + SymbolType type; + u32 offset; + u32 size; +} Field; + +typedef struct function_def_s { + char *name; + SymbolType args[8]; + u8 arg_count; + SymbolType return_type; +} FunctionDef; + +typedef struct trait_def_s { + char *name; + Field *fields; + u32 field_count; + FunctionDef *methods; + u32 method_count; +} TraitDef; + +typedef struct plex_def_s { + char *name; + u32 logical_size; + u32 physical_size; + Field *fields; + u32 field_count; + TraitDef *traits; + u32 trait_count; + FunctionDef *methods; + u32 method_count; +} PlexDef; + +typedef struct array_def_s { + SymbolType type; + u32 length; + u32 logical_size; + u32 physical_size; // logical_size * type_size + 4 + union { + PlexDef *plex; + struct array_def_s *array; + } ref; +} ArrayDef; + +typedef struct symbol_s { + char *name; + u32 address; + ScopeType scope; + SymbolType type; + u32 logical_size; + u32 physical_size; // logical_size * type_size + 4 + union { + PlexDef *plex; + ArrayDef *array; + FunctionDef *function; + } ref; +} Symbol; + +typedef struct symbol_tab_s { + Symbol *symbols; + int count; + int capacity; +} SymbolTable; + +#endif From 96deaa9ff704578b9cefcb1c46cfbda92ddc6e14 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 22 Nov 2025 13:52:31 -0800 Subject: [PATCH 08/27] more syntax refinement --- test/window.ul.ir | 113 +++++++++++++++++++------------------- test/window.ul.ir2 | 73 +++++++++++++++++++++++++ test/window.ul.uir | 73 ------------------------- test/window.ul.vuir | 129 ++++++++++++++++++++++++++------------------ 4 files changed, 206 insertions(+), 182 deletions(-) create mode 100644 test/window.ul.ir2 delete mode 100644 test/window.ul.uir diff --git a/test/window.ul.ir b/test/window.ul.ir index a5a60b2..7f52163 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -1,91 +1,92 @@ -global const str screen_namespace = "/dev/screen/0" -global const str mouse_namespace = "/dev/mouse/0" -global const str terminal_namespace = "/dev/term/0" -global const str new_line = "\n" -global const byte WHITE = 255 +global str screen_namespace = "/dev/screen/0" +global str mouse_namespace = "/dev/mouse/0" +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global byte WHITE = 255 function main () - # Open screen - # use load immediate because it is a pointer to a string, not a value - plex screen is $0 - plex mouse is $1 - str tmp_str is $2 - byte color is $3 - byte left_down is $4 - int mode is $5 - nat offset_temp is $6 - nat x is $7 - nat y is $8 - nat width is $9 - nat screen_buffer is $10 - nat buffer_size is $11 - nat pixel_pos is $12 + // Open screen + // use load immediate because it a pointer to a string, not a value - load_address &screen_namespace -> tmp_str + plex screen $0 + plex mouse $1 + str tmp_str $2 + byte color $3 + byte left_down $4 + int mode $5 + nat offset_temp $6 + nat x $7 + nat y $8 + nat width $9 + nat screen_buffer $10 + nat buffer_size $11 + nat pixel_pos $12 + + load_address screen_namespace -> tmp_str load_immediate 0 -> mode - syscall OPEN tmp_str mode -> screen # openout Plex screen, in namespace, in flags + syscall OPEN tmp_str mode -> screen // open Plex screen, in namespace, in flags nat_to_string screen -> tmp_str - call &pln tmp_str + call pln tmp_str - load_offset_32 screen 8 -> width # load width + load_offset_32 screen 8 -> width // load width nat_to_string width -> tmp_str - call &pln tmp_str + call pln tmp_str - load_offset_32 screen 12 -> buffer_size # load size + load_offset_32 screen 12 -> buffer_size // load size nat_to_string buffer_size -> tmp_str - call &pln tmp_str + call pln tmp_str - load_immediate 16 -> offset_temp # offset for screen buffer + load_immediate 16 -> offset_temp // offset for screen buffer add_nat screen offset_temp -> screen_buffer nat_to_string screen_buffer -> tmp_str - call &pln tmp_str + call pln tmp_str - # open mouse - load_address &mouse_namespace -> tmp_str - syscall OPEN tmp_str mode -> mouse # openout Plex mouse, in namespace, in flags + // open mouse + load_address mouse_namespace -> tmp_str + syscall OPEN tmp_str mode -> mouse // open Plex mouse, in namespace, in flags - syscall WRITE screen screen_buffer buffer_size # redraw + syscall WRITE screen screen_buffer buffer_size // redraw - draw_loop: - # load mouse click data + loop draw_loop + // load mouse click data syscall STAT mouse - load_offset_8 mouse 16 -> left_down # load btn1 pressed + load_offset_8 mouse 16 -> left_down // load btn1 pressed - jump_eq_nat &draw_loop left_down mode # mode is 0 which is an alias for false + jump_eq_nat draw_loop left_down mode // mode 0 which an alias for false - load_offset_32 mouse 8 -> x # load x - load_offset_32 mouse 12 -> y # load y + load_offset_32 mouse 8 -> x // load x + load_offset_32 mouse 12 -> y // load y - # Compute start address: y*width + x - mul_nat y width -> pixel_pos # = y * width - add_nat x pixel_pos -> pixel_pos # += x - add_nat screen_buffer pixel_pos -> pixel_pos # += pixel_offset - load_immediate 4 -> fat_ptr_size # need to add offset for fat pointer size + // Compute start address: y*width + x + mul_nat y width -> pixel_pos // = y * width + add_nat x pixel_pos -> pixel_pos // += x + add_nat screen_buffer pixel_pos -> pixel_pos // += pixel_offset + load_immediate 4 -> fat_ptr_size // need to add offset for fat pointer size add_nat pixel_pos fat_ptr_size -> pixel_pos - load_absolute_32 &WHITE -> color - store_absolute_8 pixel_pos color # draw color at screen [x,y] - syscall WRITE screen screen_buffer buffer_size # redraw + load_absolute_32 WHITE -> color + store_absolute_8 pixel_pos color // draw color at screen [x,y] + syscall WRITE screen screen_buffer buffer_size // redraw - jump &draw_loop + jump draw_loop exit 0 -function pln (str message is $0) - str term is $1 - int msg_length is $2 - str nl is $3 - int nl_length is $4 - int mode is $5 +function pln (str message $0) + str term $1 + int msg_length $2 + str nl $3 + int nl_length $4 + int mode $5 - load_address &terminal_namespace -> term # get terminal device + load_address terminal_namespace -> term // get terminal device load_immediate 0 -> mode syscall OPEN term mode -> term strlen message -> msg_length syscall WRITE term message msg_length - load_address &new_line -> nl + load_address new_line -> nl strlen nl -> nl_length syscall WRITE term nl nl_length return diff --git a/test/window.ul.ir2 b/test/window.ul.ir2 new file mode 100644 index 0000000..e47579c --- /dev/null +++ b/test/window.ul.ir2 @@ -0,0 +1,73 @@ +global str screen_namespace = "/dev/screen/0" +global str mouse_namespace = "/dev/mouse/0" +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global byte WHITE = 255 + +function main () + // open screen + // use load immediate because it is a pointer to a string not a value + + ptr tmp_ptr $0 = &screen_namespace + int mode $1 = 0 + ptr screen $2 = open tmp_ptr mode + + nat screen_handle $3 = @memory.u32[screen + 4] + str tmp_str $4 = nat_to_string screen_handle + pln(tmp_str) + + nat width $5 = @memory.u32[screen + 8] + tmp_str = nat_to_string width + pln(tmp_str) + + nat buffer_size $6 = @memory.u32[screen + 12] + tmp_str = nat_to_string buffer_size + pln(tmp_str) + + nat offset_temp $7 = 16 + ptr screen_buffer = add_nat screen offset_temp + + tmp_str = nat_to_string screen_buffer + pln(tmp_str) + + // open mouse + tmp_ptr = &mouse_namespace + ptr mouse $8 = open tmp_ptr mode + + write screen screen_buffer buffer_size // redraw + + loop draw_loop + // load mouse click data + stat mouse + + bool left_down $9 = @memory.u8[mouse + 16] // load btn1 pressed + + jump_eq_nat &draw_loop left_down mode // mode is 0 which is an alias for false + + nat x $10 = @memory.u32[mouse + 8] + nat y $11 = @memory.u32[mouse + 12] + + // Compute start address: y*width + x + nat pixel_pos $12 = mul_nat y width + pixel_pos = add_nat x pixel_pos + pixel_pos = add_nat screen_buffer pixel_pos + nat fat_ptr_size $13 = 4 // need to add offset for fat pointer size + pixel_pos = add_nat pixel_pos fat_ptr_size + + byte color $14 = @memory.u8[ &WHITE ] + @memory.u8[pixel_pos] = color // draw color at screen [xy] + write screen screen_buffer buffer_size // redraw + + jump &draw_loop + exit 0 + +function pln (str message $0) + nat term_ns $1 = &terminal_namespace // get terminal device + int mode $2 = 0 + ptr term $3 = open term_ns mode + int msg_length $4 = strlen message + write term message msg_length + str nl $5 = &new_line + int nl_length $6 = strlen nl + write term nl nl_length + return diff --git a/test/window.ul.uir b/test/window.ul.uir deleted file mode 100644 index b75e516..0000000 --- a/test/window.ul.uir +++ /dev/null @@ -1,73 +0,0 @@ -global const str screen_namespace = "/dev/screen/0" -global const str mouse_namespace = "/dev/mouse/0" -global const str terminal_namespace = "/dev/term/0" -global const str new_line = "\n" -global const byte WHITE = 255 - -function main () - # open screen - # use load immediate because it is a pointer to a string, not a value - - nat tmp_ptr = load_address screen_namespace - int mode = load_immediate 0 - plex screen = open tmp_ptr, mode - - nat screen_handle = load_offset_32 screen, 4 - str tmp_str = nat_to_string screen_handle - call pln tmp_str - - nat width = load_offset_32 screen, 8 - tmp_str = nat_to_string width - call pln tmp_str - - nat buffer_size = load_offset_32 screen, 12 - tmp_str = nat_to_string buffer_size - call pln tmp_str - - nat offset_temp = load_immediate 16 - nat screen_buffer = add_nat screen, offset_temp - - tmp_str = nat_to_string screen_buffer - call pln tmp_str - - # open mouse - tmp_ptr = load_address mouse_namespace - plex mouse = open tmp_ptr, mode - - write screen, screen_buffer, buffer_size # redraw - - draw_loop: - # load mouse click data - stat mouse - - byte left_down = load_offset_8 mouse, 16 # load btn1 pressed - - jump_eq_nat draw_loop, left_down, mode # mode is 0 which is an alias for false - - nat x = load_offset_32 mouse, 8 - nat y = load_offset_32 mouse, 12 - - # Compute start address: y*width + x - nat pixel_pos = mul_nat y, width # = y * width - pixel_pos = add_nat x, pixel_pos # += x - pixel_pos = add_nat screen_buffer, pixel_pos # += pixel_offset - nat fat_ptr_size = load_immediate 4 # need to add offset for fat pointer size - pixel_pos = add_nat pixel_pos, fat_ptr_size - - byte color = load_absolute_32 WHITE - store_absolute_8 pixel_pos, color # draw color at screen [x,y] - write screen, screen_buffer, buffer_size # redraw - - jump draw_loop - exit 0 - -function pln (str message) - nat term_ns = load_address terminal_namespace # get terminal device - int mode = load_immediate 0 - plex term = open term_ns, mode - int msg_length = strlen message - write term, message, msg_length - str nl = load_address new_line - int nl_length = strlen nl - write term, nl, nl_length - return diff --git a/test/window.ul.vuir b/test/window.ul.vuir index 8ac09b0..aacf89b 100644 --- a/test/window.ul.vuir +++ b/test/window.ul.vuir @@ -1,72 +1,95 @@ -const str screen_namespace = "/dev/screen/0"; -const str mouse_namespace = "/dev/mouse/0"; -const str terminal_namespace = "/dev/term/0"; -const str new_line = "\n"; -const byte WHITE = 255; +global str screen_namespace = "/dev/screen/0" +global str mouse_namespace = "/dev/mouse/0" +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global byte WHITE = 255 -function main () { +/** + * Devices + */ +plex Terminal + nat handle + +plex Screen + nat handle + nat width + nat height + byte[] buffer + +plex Mouse + nat handle + nat x + nat y + bool left + bool right + bool middle + bool btn4 + nat size + +function main () // open screen // use load immediate because it is a pointer to a string, not a value - nat tmp_ptr = &screen_namespace; - int mode = 0; - plex screen = open(tmp_ptr, mode); + nat tmp_ptr = &screen_namespace + int mode = 0 + ptr screen = open(tmp_ptr, mode) - nat screen_handle = screen.handle; - str tmp_str = screen_handle as str; - pln(tmp_str); + nat screen_handle = screen.handle + str tmp_str = nat_to_string(screen_handle) + pln(tmp_str) - nat width = screen.width; - tmp_str = width as str; - pln(tmp_str); + nat width = screen.width + tmp_str = nat_to_string(width) + pln(tmp_str) - nat buffer_size = screen.size; - tmp_str = buffer_size as str; - pln(tmp_str); + nat buffer_size = screen.buffer + tmp_str = nat_to_string(buffer_size) + pln(tmp_str) - nat screen_buffer = screen.buffer.ptr; - tmp_str = screen_buffer as str; - pln(tmp_str); + nat offset_temp = 16 + nat screen_buffer = add_nat(screen, offset_temp) + + tmp_str = nat_to_string(screen_buffer) + pln(tmp_str) // open mouse - tmp_ptr = &mouse_namespace; - plex mouse = open(tmp_ptr, mode); + tmp_ptr = &mouse_namespace + ptr mouse = open(tmp_ptr, mode) - write(screen, screen_buffer, buffer_size); // redraw + write(screen, screen_buffer, buffer_size) // redraw - loop { + loop draw_loop // load mouse click data - stat(mouse); + stat(mouse) - byte left_down = mouse.left; + byte left_down = mouse.left // load btn1 pressed - if (left_down == 1) { - nat x = mouse.x; - nat y = mouse.y; + jump_eq_nat(&draw_loop, left_down, mode) // mode is 0 which is an alias for false - // Compute start address: y*width + x - nat pixel_pos = y * width; // = y * width - pixel_pos = x + pixel_pos; // += x - pixel_pos = screen_buffer + pixel_pos; // += pixel_offset - nat fat_ptr_size = 4; // need to add offset for fat pointer size - pixel_pos = pixel_pos + fat_ptr_size; + nat x = mouse.x + nat y = mouse.y - byte color = WHITE; - store_absolute_8(pixel_pos, color); // draw color at screen [x,y] - write(screen, screen_buffer, buffer_size); // redraw - } - } - exit(0); -} + // Compute start address: y*width + x + nat pixel_pos = mul_nat(y, width) + pixel_pos = add_nat(x, pixel_pos) + pixel_pos = add_nat(screen_buffer, pixel_pos) + nat fat_ptr_size = 4 // need to add offset for fat pointer size + pixel_pos = add_nat(pixel_pos, fat_ptr_size) -function pln (str message) { - nat term_ns = &terminal_namespace; // get terminal device - int mode = 0; - plex term = open(term_ns, mode); - int msg_length = message.length; - write(term, message, msg_length); - str nl = &new_line; - int nl_length = nl.length; - write(term, nl, nl_length); + byte color = WHITE + screen.buffer[pixel_pos] = color // draw color at screen [x,y] + write(screen, screen_buffer, buffer_size) // redraw + + jump(&draw_loop) + exit 0 + +function pln (str message) + nat term_ns = &terminal_namespace // get terminal device + int mode = 0 + ptr term = open(term_ns, mode) + int msg_length = strlen(message) + write(term, message, msg_length) + str nl = &new_line + int nl_length = strlen(nl) + write(term, nl, nl_length) return -} \ No newline at end of file From 6f47ee7ea17b82637547b5329dcf7294d269262e Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 23 Nov 2025 23:48:26 -0800 Subject: [PATCH 09/27] wip new assembler; start of compiler --- src/tools/assembler/assembler.c | 311 +++++++++++++++++-- src/tools/assembler/assembler.h | 51 +++- src/tools/assembler/lexer.c | 26 ++ src/tools/assembler/lexer.h | 4 + src/tools/compiler/compiler.c | 502 +++++++++++++++++++++++++++++++ src/tools/compiler/compiler.h | 176 ++++++----- src/tools/compiler/lexer.c | 510 ++++++++++++++++++++++++++++++++ src/tools/compiler/lexer.h | 85 ++++++ src/vm/vm.c | 44 +-- test/add.ul.ir | 40 +-- test/fib.ul.ir | 36 ++- test/hello.ul.ir | 29 +- test/window.ul.ir | 54 ++-- test/window.ul.ir2 | 73 ----- test/window.ul.vuir | 95 ------ 15 files changed, 1662 insertions(+), 374 deletions(-) create mode 100644 src/tools/compiler/compiler.c create mode 100644 src/tools/compiler/lexer.c create mode 100644 src/tools/compiler/lexer.h delete mode 100644 test/window.ul.ir2 delete mode 100644 test/window.ul.vuir diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index c137e64..8a21f2d 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,25 +1,291 @@ -#include "../../vm/libc.h" #include "assembler.h" +#include "../../vm/common.h" +#include "../../vm/fixed.h" +#include "../../vm/libc.h" +#include "../../vm/opcodes.h" #include +#include +#include -bool global() { - Token token = nextToken(); - if (token.type == TOKEN_KEYWORD_CONST) { - token = nextToken(); - } - - if (token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_NAT || - token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { - return true; - } - - return false; +SymbolTable *symbol_table_init() { + SymbolTable *table = malloc(sizeof(SymbolTable)); + table->symbols = malloc(16 * sizeof(Symbol)); + table->count = 0; + table->capacity = 16; + return table; } -void function() {} +NamesTable *names_table_init() { + NamesTable *table = malloc(sizeof(NamesTable)); + table->names = malloc(16 * sizeof(char *)); + table->count = 0; + table->capacity = 16; + return table; +} + +u32 names_table_add(NamesTable *table, const char *name) { + for (u32 i = 0; i < table->count; i++) { + if (strcmp(table->names[i], name) == 0) { + return i; + } + } + + if (table->count >= table->capacity) { + table->capacity *= 2; + table->names = realloc(table->names, table->capacity * sizeof(char *)); + } + + table->names[table->count] = malloc(strlen(name) + 1); + strcpy(table->names[table->count], name); + u32 index = table->count; + table->count++; + return index; +} + +u32 symbol_table_add(SymbolTable *table, Symbol s) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + } + + table->symbols[table->count] = s; + u32 index = table->count; + table->count++; + return index; +} + +Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, + const char *name) { + for (u32 i = 0; i < nt->count; i++) { + if (strcmp(nt->names[i], name) == 0) { + for (int j = 0; j < table->count; j++) { + if (table->symbols[j].name == i) { + return &table->symbols[j]; + } + } + } + } + return nil; +} + +u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) { + Symbol *sym = symbol_table_lookup(nt, table, name); + if (!sym) { + fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); + exit(1); + } + return sym->ref; +} + +void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } + +void emit_u32(VM *vm, u32 value) { + write_u32(vm, code, vm->cp, value); + vm->cp += 4; +} + +void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } + +int parse_register(const char *reg_str) { + if (reg_str[0] != '$') + return -1; + return atoi(reg_str + 1); +} + +u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { + // Handle symbol references (e.g., &label) + if (ref[0] == '&') { + return get_ref(nt, table, ref + 1); + } + + // fixed-point numbers (e.g., 0.5) + if (strchr(ref, '.')) { + return float_to_fixed(atof(ref)); + } + + // decimal literals (e.g., 7) + char *endptr; + u32 value = (u32)strtoul(ref, &endptr, 10); + + if (endptr == ref || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", ref); + exit(1); + } + return value; +} + +static char *unwrap_string(const char *quoted_str) { + if (!quoted_str) + return nil; + + size_t len = strlen(quoted_str); + if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { + // Remove quotes and process escape sequences + const char *src = quoted_str + 1; + size_t src_len = len - 2; + + // First pass: calculate the actual length needed after escape processing + size_t actual_len = 0; + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Escape sequence + actual_len++; + i++; // Skip the next character + } else { + actual_len++; + } + } + + char *unwrapped = (char *)malloc(actual_len + 1); + size_t dst_idx = 0; + + // Second pass: process escape sequences + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Handle escape sequences + switch (src[i + 1]) { + case 'n': + unwrapped[dst_idx++] = '\n'; + break; + case 't': + unwrapped[dst_idx++] = '\t'; + break; + case 'r': + unwrapped[dst_idx++] = '\r'; + break; + case '\\': + unwrapped[dst_idx++] = '\\'; + break; + case '"': + unwrapped[dst_idx++] = '"'; + break; + case '\'': + unwrapped[dst_idx++] = '\''; + break; + default: + // Unknown escape, keep both characters + unwrapped[dst_idx++] = src[i]; + unwrapped[dst_idx++] = src[i + 1]; + break; + } + i++; // Skip the next character + } else { + unwrapped[dst_idx++] = src[i]; + } + } + unwrapped[dst_idx] = '\0'; + return unwrapped; + } + // Not quoted, return copy + return strdup(quoted_str); +} + +Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { + Symbol *s = (Symbol *)malloc(sizeof(Symbol)); + ValueType t; + + Token token_type = nextToken(); + switch (token_type.type) { + case TOKEN_TYPE_I8: + t.type = I8; + t.size = 1; + break; + case TOKEN_TYPE_I16: + t.type = I16; + t.size = 2; + break; + case TOKEN_TYPE_U8: + t.type = U8; + t.size = 1; + break; + case TOKEN_TYPE_U16: + t.type = U16; + t.size = 2; + break; + case TOKEN_TYPE_INT: + t.type = I32; + t.size = 4; + break; + case TOKEN_TYPE_NAT: + t.type = U32; + t.size = 4; + break; + case TOKEN_TYPE_REAL: + t.type = F32; + t.size = 4; + break; + case TOKEN_TYPE_STR: + t.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: + return nil; + } + + Token eq = nextToken(); + if (eq.type != TOKEN_EQ) + return nil; + + Token name = nextToken(); + if (name.type != TOKEN_IDENTIFIER) + return nil; + + s->name = names_table_add(nt, name.start); + + u32 addr = vm->mp; + s->ref = addr; + + u32 result; + Token value = nextToken(); + switch (value.type) { + case TOKEN_LITERAL_INT: + case TOKEN_LITERAL_NAT: + case TOKEN_LITERAL_REAL: + result = resolve_symbol(nt, st, value.start); + write_u32(vm, memory, addr, result); + + vm->mp += t.size; + vm->frames[vm->fp].end += t.size; + break; + case TOKEN_LITERAL_STR: { + char *unwrapped = unwrap_string(value.start); + int len = strlen(unwrapped); + + u32 addr = vm->mp; + u32 size = len + 1 + 4; + t.size = size; + + vm->mp += size; + vm->frames[vm->fp].end += size; + + write_u32(vm, memory, addr, len); + for (int i = 0; i < len; i++) { + write_u8(vm, memory, addr + 4 + i, unwrapped[i]); + } + write_u8(vm, memory, addr + 4 + len, '\0'); + free(unwrapped); + break; + } + default: + return nil; + } + + s->type = t; + return s; +} + +Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) { + USED(vm); + USED(nt); + USED(st); + return nil; +} void assemble(VM *vm, char *source) { - USED(vm); + SymbolTable *st = symbol_table_init(); + NamesTable *nt = names_table_init(); + initLexer(source); Token token; do { @@ -33,17 +299,26 @@ void assemble(VM *vm, char *source) { token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { - if (!global()) { + if (!global(vm, nt, st)) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); } } if (token.type == TOKEN_KEYWORD_FN) { - function(); + function(vm, nt, st); + } + + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR) { + } if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first if (streq(token.start, "exit")) { } else if (streq(token.start, "call")) { } else if (streq(token.start, "syscall")) { @@ -132,6 +407,8 @@ void assemble(VM *vm, char *source) { } else if (streq(token.start, "string_to_int")) { } else if (streq(token.start, "string_to_nat")) { } else if (streq(token.start, "string_to_real")) { + } else { + // some other identifier } } } diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 1b3c38a..515a74c 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -5,6 +5,55 @@ #include "../../vm/opcodes.h" #include "lexer.h" +typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION +} SymbolType; + +typedef struct names_tab_s NamesTable; +typedef struct value_type_s ValueType; +typedef struct symbol_s Symbol; +typedef struct symbol_tab_s SymbolTable; + +struct names_tab_s { + char **names; + u32 count; + u32 capacity; +}; + +struct value_type_s { + SymbolType type; + u32 name; + u32 size; +}; + +struct symbol_s { + u32 name; + ValueType type; + ScopeType scope; + u32 ref; // address if global, register if local +}; + +struct symbol_tab_s { + Symbol *symbols; + u32 count; + u32 capacity; +}; + void assemble(VM *vm, char *source); -#endif \ No newline at end of file +#endif diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index 8c26f61..ee0bdab 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -257,6 +257,32 @@ static TokenType identifierType() { break; case 'g': return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + case 'I': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); + } + } + break; + case 'U': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'F': + return checkKeyword(1, 2, "32", TOKEN_TYPE_REAL); } return TOKEN_IDENTIFIER; diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index 4b7a8ae..eaa137c 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -8,7 +8,11 @@ typedef enum { TOKEN_LITERAL_NAT, TOKEN_LITERAL_REAL, TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, TOKEN_TYPE_NAT, TOKEN_TYPE_REAL, TOKEN_TYPE_STR, diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c new file mode 100644 index 0000000..1b29e68 --- /dev/null +++ b/src/tools/compiler/compiler.c @@ -0,0 +1,502 @@ +#include "compiler.h" +#include "../../vm/common.h" +#include "../../vm/libc.h" +#include +#include +#include + +NamesTable *names_table_init() { + NamesTable *table = malloc(sizeof(NamesTable)); + table->names = malloc(16 * sizeof(char *)); + table->count = 0; + table->capacity = 16; + return table; +} + +FunctionTable *function_table_init() { + FunctionTable *table = malloc(sizeof(FunctionTable)); + table->symbols = malloc(16 * sizeof(FunctionDef)); + table->count = 0; + table->capacity = 16; + return table; +} + +ArrayTable *array_table_init() { + ArrayTable *table = malloc(sizeof(ArrayTable)); + table->symbols = malloc(16 * sizeof(ArrayDef)); + table->count = 0; + table->capacity = 16; + return table; +} + +PlexTable *plex_table_init() { + PlexTable *table = malloc(sizeof(PlexTable)); + table->symbols = malloc(16 * sizeof(PlexDef)); + table->count = 0; + table->capacity = 16; + return table; +} + +PlexFieldsTable *plex_fields_table_init() { + PlexFieldsTable *table = malloc(sizeof(PlexFieldsTable)); + table->plex_refs = malloc(64 * sizeof(u32)); + table->fields = malloc(64 * sizeof(ValueType)); + table->count = 0; + table->capacity = 64; + return table; +} + +u32 names_table_add(NamesTable *table, const char *name) { + for (u32 i = 0; i < table->count; i++) { + if (strcmp(table->names[i], name) == 0) { + return (u32)i; + } + } + + if (table->count >= table->capacity) { + table->capacity *= 2; + table->names = realloc(table->names, table->capacity * sizeof(char *)); + } + + table->names[table->count] = malloc(strlen(name) + 1); + strcpy(table->names[table->count], name); + u32 index = (u32)table->count; + table->count++; + return index; +} + +u32 function_table_add(FunctionTable *table, FunctionDef def) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = + realloc(table->symbols, table->capacity * sizeof(FunctionDef)); + } + + table->symbols[table->count] = def; + u32 index = (u32)table->count; + table->count++; + return index; +} + +u32 array_table_add(ArrayTable *table, ArrayDef def) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(ArrayDef)); + } + + table->symbols[table->count] = def; + u32 index = (u32)table->count; + table->count++; + return index; +} + +u32 plex_add(PlexTable *plex_table, u32 name, u32 size, u32 field_start, + u32 field_count) { + if (plex_table->count >= plex_table->capacity) { + plex_table->capacity *= 2; + plex_table->symbols = + realloc(plex_table->symbols, plex_table->capacity * sizeof(PlexDef)); + } + + plex_table->symbols[plex_table->count].name = name; + plex_table->symbols[plex_table->count].size = size; + plex_table->symbols[plex_table->count].field_ref_start = field_start; + plex_table->symbols[plex_table->count].field_count = field_count; + + u32 index = (u32)plex_table->count; + plex_table->count++; + return index; +} + +u32 plex_fields_add(PlexFieldsTable *fields_table, u32 plex_ref, + ValueType field) { + if (fields_table->count + 1 > fields_table->capacity) { + u32 new_capacity = fields_table->capacity * 2; + if (new_capacity < fields_table->count + 1) { + new_capacity = fields_table->count + 1; + } + fields_table->plex_refs = + realloc(fields_table->plex_refs, new_capacity * sizeof(u32)); + fields_table->fields = + realloc(fields_table->fields, new_capacity * sizeof(ValueType)); + fields_table->capacity = new_capacity; + } + + u32 start_index = fields_table->count; + fields_table->plex_refs[start_index] = plex_ref; + fields_table->fields[start_index] = field; + fields_table->count++; + return start_index; +} + +int plex_get_field_index_by_name(PlexTable *plex_table, + PlexFieldsTable *fields_table, + NamesTable *names_table, u32 plex_index, + const char *field_name) { + if (plex_index >= plex_table->count) + return -1; + + PlexDef *plex_def = &plex_table->symbols[plex_index]; + u32 field_start = plex_def->field_ref_start; + u32 field_count = plex_def->field_count; + + for (u32 i = 0; i < field_count; i++) { + u32 field_table_index = field_start + i; + ValueType *field = &fields_table->fields[field_table_index]; + + if (field->name < names_table->count) { + if (strcmp(names_table->names[field->name], field_name) == 0) { + return (int)i; // Return field index within the plex + } + } + } + return -1; // Not found +} + +ValueType *plex_get_field(PlexTable *plex_table, PlexFieldsTable *fields_table, + u32 plex_index, u32 field_in_plex_index) { + if (plex_index >= plex_table->count) + return nil; + + PlexDef *plex_def = &plex_table->symbols[plex_index]; + if (field_in_plex_index >= plex_def->field_count) + return nil; + + u32 field_table_index = plex_def->field_ref_start + field_in_plex_index; + return &fields_table->fields[field_table_index]; +} + +ValueType *plex_get_field_by_name(PlexTable *plex_table, + PlexFieldsTable *fields_table, + NamesTable *names_table, u32 plex_index, + const char *field_name) { + int field_index = plex_get_field_index_by_name( + plex_table, fields_table, names_table, plex_index, field_name); + if (field_index == -1) + return nil; + + return plex_get_field(plex_table, fields_table, plex_index, (u32)field_index); +} + +Symbol *global(VM *vm) { + Symbol s; + ValueType t; + + s.ref.global = vm->mp; + + Token token_type = nextToken(); + Token array_or_eq = nextToken(); + if (array_or_eq.type == TOKEN_LBRACKET) { + Token rb = nextToken(); + if (rb.type != TOKEN_RBRACKET) + return nil; + + Token eq = nextToken(); + if (eq.type != TOKEN_EQ) + return nil; + + t.type = ARRAY; + ValueType array_type; + + switch (token_type.type) { + case TOKEN_TYPE_I8: + array_type.type = I8; + break; + case TOKEN_TYPE_I16: + array_type.type = I16; + break; + case TOKEN_TYPE_INT: + array_type.type = I32; + break; + case TOKEN_TYPE_U8: + array_type.type = U8; + break; + case TOKEN_TYPE_U16: + array_type.type = U16; + break; + case TOKEN_TYPE_NAT: + array_type.type = U32; + break; + case TOKEN_TYPE_REAL: + array_type.type = F32; + break; + case TOKEN_TYPE_STR: + array_type.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: + return nil; + } + + } else { + // its not an array, so should be = + if (array_or_eq.type != TOKEN_EQ) + return nil; + + switch (token_type.type) { + case TOKEN_TYPE_I8: + t.type = I8; + break; + case TOKEN_TYPE_I16: + t.type = I16; + break; + case TOKEN_TYPE_INT: + t.type = I32; + break; + case TOKEN_TYPE_U8: + t.type = U8; + break; + case TOKEN_TYPE_U16: + t.type = U16; + break; + case TOKEN_TYPE_NAT: + t.type = U32; + break; + case TOKEN_TYPE_REAL: + t.type = F32; + break; + case TOKEN_TYPE_STR: + t.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: + return nil; + } + } + + s.type = t; + + Token value = nextToken(); + + return nil; +} + +typedef struct { + Token current; + Token previous; + bool hadError; + bool panicMode; +} Parser; + +typedef enum { + PREC_NONE, + PREC_ASSIGNMENT, /* = */ + PREC_OR, /* or */ + PREC_AND, /* and */ + PREC_EQUALITY, /* == != */ + PREC_COMPARISON, /* < > <= >= */ + PREC_TERM, /* + - */ + PREC_FACTOR, /* * / */ + PREC_UNARY, /* not */ + PREC_CALL, /* . () */ + PREC_PRIMARY +} Precedence; + +typedef void (*ParseFn)(char *program); + +typedef struct { + ParseFn prefix; + ParseFn infix; + Precedence precedence; +} ParseRule; + +typedef struct { + SymbolTable table; + Symbol current; + Symbol last; + i8 rp; // Next free register +} Compiler; + +Parser parser; + +const char *internalErrorMsg = + "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes."; + +bool isType(TokenType type) { + return type == TOKEN_TYPE_INT || type == TOKEN_TYPE_NAT || + type == TOKEN_TYPE_REAL || type == TOKEN_TYPE_STR || + type == TOKEN_TYPE_BOOL; +} + +void errorAt(Token *token, const char *message) { + if (parser.panicMode) + return; + parser.panicMode = true; + fprintf(stderr, "[line %d] Error", token->line); + + if (token->type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } else if (token->type == TOKEN_ERROR) { + } else { + fprintf(stderr, " at '%.*s'", token->length, token->start); + } + + fprintf(stderr, ": %s\n", message); + parser.hadError = true; +} + +void error(const char *message) { errorAt(&parser.previous, message); } + +void errorAtCurrent(const char *message) { errorAt(&parser.current, message); } + +void advance() { + parser.previous = parser.current; + + for (;;) { + parser.current = nextToken(); + if (parser.current.type != TOKEN_ERROR) + break; + + errorAtCurrent(parser.current.start); + } +} + +void consume(TokenType type, const char *message) { + if (parser.current.type == type) { + advance(); + return; + } + + errorAtCurrent(message); +} + +static int allocateRegister(Compiler *c) { + char buffer[38]; + if (c->rp + 1 > 31) { + sprintf(buffer, "Out of registers (used %d, max 32)", c->rp + 1); + error(buffer); + return -1; + } + + return c->rp++; +} + +static void popRegister(Compiler *c) { + if (c->rp - 1 > 0) { + c->rp--; + } +} + +static void freeRegister(Compiler *c, u8 reg) { + if (reg == c->rp - 1) { + c->rp--; + } +} + +static void clearRegisters(Compiler *c, u8 reg) { c->rp = 0; } + +void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } + +void emit_u32(VM *vm, u32 value) { + write_u32(vm, code, vm->cp, value); + vm->cp += 4; +} + +void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } + +static bool check(TokenType type) { return parser.current.type == type; } + +static bool match(TokenType type) { + if (!check(type)) + return false; + advance(); + return true; +} + +static void expression(Compiler *c, VM *vm) { + USED(c); + USED(vm); +} + +void number(Compiler *c, VM *vm) { + emit_opcode(vm, OP_LOAD_IMM); + int reg = allocateRegister(c); + if (reg < 0) + return; + emit_byte(vm, reg); + + c->last = Symbol{ .type=parser.previous.type }; + + switch (parser.previous.type) { + case TOKEN_INT_LITERAL: { + char *endptr; + i32 value = (i32)strtol(parser.previous.start, &endptr, 10); + emit_u32(vm, value); + return; + } + case TOKEN_UINT_LITERAL: { + long value = atol(parser.previous.start); + emit_u32(vm, value); + return; + } + case TOKEN_FLOAT_LITERAL: { + float value = atof(parser.previous.start); + fixed_t fvalue = float_to_fixed(value); + emit_u32(vm, fvalue); + return; + } + default: + return; // Unreachable. + } + + errorAtCurrent("Invalid number format"); +} + +static void unary(Compiler *c, VM *vm) { + TokenType operatorType = parser.previous.type; + + // Compile the operand. + expression(c, vm); + + // Emit the operator instruction. + switch (operatorType) { + case TOKEN_MINUS: { + switch (c->last.type) { + case TOKEN_UINT_LITERAL: + emit_opcode(vm, OP_NEG_UINT); + case TOKEN_FLOAT_LITERAL: + emit_opcode(vm, OP_NEG_FLOAT); + default: + emit_opcode(vm, OP_NEG_INT); + } + + int dest = allocateRegister(); + emit_byte(vm, dest); + emit_byte(vm, dest); + } + default: + return; // Unreachable. + } +} + +static void emitHalt(Compiler *c, VM *vm) { + emit_opcode(vm, OP_HALT); + advance(); + number(c, vm); +} + +static void endCompiler(Compiler *c, VM *vm) { emitHalt(c, vm); } + +static void grouping(Compiler *c, VM *vm) { + expression(c, vm); + consume(TOKEN_RPAREN, "Expect ')' after expression."); +} + +bool compile(const char *source, VM *vm) { + USED(source); + USED(vm); + initLexer(source); + + parser.hadError = false; + parser.panicMode = false; + + Compiler compiler; + advance(); + expression(&compiler, vm); + consume(TOKEN_EOF, "Expect end of expression."); + endCompiler(&compiler, vm); + + return parser.hadError; +} diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h index 21f1a25..e223513 100644 --- a/src/tools/compiler/compiler.h +++ b/src/tools/compiler/compiler.h @@ -4,87 +4,109 @@ #import "../../vm/common.h" typedef enum { GLOBAL, LOCAL } ScopeType; -typedef enum { - VOID, - BOOL, - I8, - I16, - I32, - U8, - U16, - U32, - F8, - F16, - F32, - STR, - PLEX, - ARRAY, - FUNCTION - } SymbolType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION +} SymbolType; -typedef struct field_s { - char *name; - SymbolType type; - u32 offset; - u32 size; -} Field; +typedef struct value_type_s ValueType; +typedef struct function_def_s FunctionDef; +typedef struct function_tab_s FunctionTable; +typedef struct plex_def_s PlexDef; +typedef struct plex_tab_s PlexTable; +typedef struct array_def_s ArrayDef; +typedef struct array_tab_s ArrayTable; +typedef struct symbol_s Symbol; +typedef struct symbol_tab_s SymbolTable; +typedef struct names_tab_s NamesTable; +typedef struct plex_fields_tab_s PlexFieldsTable; -typedef struct function_def_s { - char *name; - SymbolType args[8]; - u8 arg_count; - SymbolType return_type; -} FunctionDef; - -typedef struct trait_def_s { - char *name; - Field *fields; - u32 field_count; - FunctionDef *methods; - u32 method_count; -} TraitDef; - -typedef struct plex_def_s { - char *name; - u32 logical_size; - u32 physical_size; - Field *fields; - u32 field_count; - TraitDef *traits; - u32 trait_count; - FunctionDef *methods; - u32 method_count; -} PlexDef; - -typedef struct array_def_s { - SymbolType type; - u32 length; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - struct array_def_s *array; - } ref; -} ArrayDef; - -typedef struct symbol_s { - char *name; - u32 address; - ScopeType scope; +struct value_type_s { SymbolType type; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - ArrayDef *array; - FunctionDef *function; - } ref; -} Symbol; + u32 name; + u32 size; + u32 table_ref; // if it is a heap object +}; -typedef struct symbol_tab_s { +struct function_def_s { + u32 name; + ValueType args[8]; + u8 arg_count; + ValueType return_type; +}; + +struct plex_def_s { + u32 name; + u32 size; + u32 field_ref_start; + u32 field_count; +}; + +struct array_def_s { + ValueType type; + u32 length; + u32 logical_size; // length of the array + u32 physical_size; // logical_size * type_size + fat pointer +}; + +struct symbol_s { + u32 name; + ValueType type; + ScopeType scope; + union { + u32 local; // register + u32 global; // address + } ref; +}; + +struct plex_fields_tab_s { + u32 *plex_refs; + ValueType *fields; + u32 count; + u32 capacity; +}; + +struct plex_tab_s { + PlexDef *symbols; + u32 count; + u32 capacity; +}; + +struct array_tab_s { + ArrayDef *symbols; + u32 count; + u32 capacity; +}; + +struct function_tab_s { + FunctionDef *symbols; + u32 count; + u32 capacity; +}; + +struct names_tab_s { + char **names; + u32 count; + u32 capacity; +}; + +struct symbol_tab_s { Symbol *symbols; - int count; - int capacity; -} SymbolTable; + u32 count; + u32 capacity; +}; #endif diff --git a/src/tools/compiler/lexer.c b/src/tools/compiler/lexer.c new file mode 100644 index 0000000..397d7cc --- /dev/null +++ b/src/tools/compiler/lexer.c @@ -0,0 +1,510 @@ +#include + +#include "../../vm/common.h" +#include "lexer.h" + +typedef struct { + const char *start; + const char *current; + int line; +} Lexer; + +Lexer lexer; + +void initLexer(const char *source) { + lexer.start = source; + lexer.current = source; + lexer.line = 1; +} + +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static bool isDigit(char c) { return c >= '0' && c <= '9'; } + +static bool isAtEnd() { return *lexer.current == '\0'; } + +static char advance() { + lexer.current++; + return lexer.current[-1]; +} + +static char peek() { return *lexer.current; } + +static char peekNext() { + if (isAtEnd()) + return '\0'; + return lexer.current[1]; +} + +static bool match(char expected) { + if (isAtEnd()) + return false; + if (*lexer.current != expected) + return false; + lexer.current++; + return true; +} + +static Token makeToken(TokenType type) { + Token token; + token.type = type; + token.start = lexer.start; + token.length = (int)(lexer.current - lexer.start); + token.line = lexer.line; + return token; +} + +static Token errorToken(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int)strlen(message); + token.line = lexer.line; + return token; +} + +static void skipWhitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '/': + if (peekNext() == '/') { + // Single-line comment: skip until newline or end of file + advance(); + while (peek() != '\n' && !isAtEnd()) + advance(); + } else if (peekNext() == '*') { + // Multi-line comment: skip until '*/' or end of file + advance(); + advance(); + while (!isAtEnd()) { + if (peek() == '\n') + lexer.line++; + if (peek() == '*' && peekNext() == '/') { + advance(); + advance(); + break; // Exit loop, comment ended + } + advance(); + } + } else { + return; // Not a comment, let tokenization handle it + } + break; + default: + return; + } + } +} + +static TokenType checkKeyword(int start, int length, const char *rest, + TokenType type) { + if (lexer.current - lexer.start == start + length && + memcmp(lexer.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + switch (lexer.start[0]) { + case 'a': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'n': + return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; + case 'c': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return checkKeyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); + case 'o': + return checkKeyword(2, 3, "nst", TOKEN_KEYWORD_CONST); + } + } + break; + case 'e': + return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + case 'o': + return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_REAL); + } + return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN); + } + break; + case 'i': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'f': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); + case 'n': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'i': + return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT); + case 't': + return checkKeyword(3, 0, "", TOKEN_TYPE_INT); + } + } + break; + } + } + break; + case 'n': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return checkKeyword(2, 1, "t", TOKEN_TYPE_NAT); + case 'i': + return checkKeyword(2, 1, "l", TOKEN_KEYWORD_NIL); + } + } + break; + case 'o': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'p': + return checkKeyword(2, 2, "en", TOKEN_KEYWORD_OPEN); + case 'r': + return checkKeyword(2, 0, "", TOKEN_OPERATOR_OR); + } + } + break; + case 'p': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return checkKeyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); + } + } + break; + case 'r': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'e': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'a': + return checkKeyword(3, 1, "d", TOKEN_KEYWORD_READ); + case 'f': + return checkKeyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); + case 't': + return checkKeyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + } + } + break; + } + } + break; + case 's': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 't': + return checkKeyword(2, 1, "r", TOKEN_TYPE_STR); + } + } + break; + case 't': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return checkKeyword(2, 2, "is", TOKEN_KEYWORD_THIS); + case 'r': + return checkKeyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + } + } + break; + case 'u': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 's': + return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'w': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return checkKeyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); + case 'r': + return checkKeyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); + } + } + break; + case 'g': + return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (isAlpha(peek()) || isDigit(peek())) + advance(); + return makeToken(identifierType()); +} + +static Token number() { + while (isDigit(peek())) + advance(); + + /* Look for a fractional part. */ + if (peek() == '.' && isDigit(peekNext())) { + /* Consume the ".". */ + advance(); + + while (isDigit(peek())) + advance(); + + return makeToken(TOKEN_LITERAL_REAL); + } + + return makeToken(TOKEN_LITERAL_INT); +} + +static Token string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') + lexer.line++; + advance(); + } + + if (isAtEnd()) + return errorToken("Unterminated string."); + + /* The closing quote. */ + advance(); + return makeToken(TOKEN_LITERAL_STR); +} + +Token nextToken() { + skipWhitespace(); + lexer.start = lexer.current; + + if (isAtEnd()) + return makeToken(TOKEN_EOF); + + char c = advance(); + if (isAlpha(c)) + return identifier(); + if (isDigit(c)) + return number(); + + switch (c) { + case '(': + return makeToken(TOKEN_LPAREN); + case ')': + return makeToken(TOKEN_RPAREN); + case '{': + return makeToken(TOKEN_LBRACE); + case '}': + return makeToken(TOKEN_RBRACE); + case '[': + return makeToken(TOKEN_LBRACKET); + case ']': + return makeToken(TOKEN_RBRACKET); + case ';': + return makeToken(TOKEN_SEMICOLON); + case ',': + return makeToken(TOKEN_COMMA); + case '.': + return makeToken(TOKEN_DOT); + case '-': + return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); + case '+': + return makeToken(TOKEN_PLUS); + case '/': + return makeToken(TOKEN_SLASH); + case '&': + return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return makeToken(TOKEN_MESH); + case '$': + return makeToken(TOKEN_BIG_MONEY); + case '*': + return makeToken(TOKEN_STAR); + case '!': + return makeToken(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + case '=': + return makeToken(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + case '<': + return makeToken(match('=') ? TOKEN_LTE : TOKEN_LT); + case '>': + return makeToken(match('=') ? TOKEN_GTE : TOKEN_GT); + case '"': + return string(); + } + + return errorToken("Unexpected character."); +} + +const char *tokenTypeToString(TokenType type) { + switch (type) { + case TOKEN_EOF: + return "EOF"; + case TOKEN_IDENTIFIER: + return "IDENTIFIER"; + case TOKEN_LITERAL_INT: + return "LITERAL_INT"; + case TOKEN_LITERAL_NAT: + return "LITERAL_NAT"; + case TOKEN_LITERAL_REAL: + return "LITERAL_REAL"; + case TOKEN_LITERAL_STR: + return "LITERAL_STR"; + case TOKEN_TYPE_INT: + return "TYPE_INT"; + case TOKEN_TYPE_NAT: + return "TYPE_NAT"; + case TOKEN_TYPE_REAL: + return "TYPE_REAL"; + case TOKEN_TYPE_STR: + return "TYPE_STR"; + case TOKEN_KEYWORD_PLEX: + return "KEYWORD_PLEX"; + case TOKEN_KEYWORD_FN: + return "KEYWORD_FN"; + case TOKEN_KEYWORD_CONST: + return "KEYWORD_CONST"; + case TOKEN_KEYWORD_IF: + return "KEYWORD_IF"; + case TOKEN_KEYWORD_IS: + return "IS"; + case TOKEN_KEYWORD_AS: + return "AS"; + case TOKEN_KEYWORD_ELSE: + return "KEYWORD_ELSE"; + case TOKEN_KEYWORD_WHILE: + return "KEYWORD_WHILE"; + case TOKEN_KEYWORD_FOR: + return "KEYWORD_FOR"; + case TOKEN_KEYWORD_RETURN: + return "KEYWORD_RETURN"; + case TOKEN_KEYWORD_USE: + return "KEYWORD_USE"; + case TOKEN_KEYWORD_INIT: + return "KEYWORD_INIT"; + case TOKEN_KEYWORD_THIS: + return "KEYWORD_THIS"; + case TOKEN_KEYWORD_OPEN: + return "TOKEN_KEYWORD_OPEN"; + case TOKEN_KEYWORD_READ: + return "TOKEN_KEYWORD_READ"; + case TOKEN_KEYWORD_WRITE: + return "TOKEN_KEYWORD_WRITE"; + case TOKEN_KEYWORD_REFRESH: + return "TOKEN_KEYWORD_REFRESH"; + case TOKEN_KEYWORD_CLOSE: + return "TOKEN_KEYWORD_CLOSE"; + case TOKEN_KEYWORD_NIL: + return "KEYWORD_NIL"; + case TOKEN_KEYWORD_TRUE: + return "KEYWORD_TRUE"; + case TOKEN_KEYWORD_FALSE: + return "KEYWORD_FALSE"; + case TOKEN_KEYWORD_GLOBAL: + return "KEYWORD_GLOBAL"; + case TOKEN_OPERATOR_NOT: + return "OPERATOR_NOT"; + case TOKEN_OPERATOR_AND: + return "OPERATOR_AND"; + case TOKEN_OPERATOR_OR: + return "OPERATOR_OR"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_BANG_EQ: + return "BANG_EQ"; + case TOKEN_EQ: + return "EQ"; + case TOKEN_EQ_EQ: + return "EQ_EQ"; + case TOKEN_GT: + return "GT"; + case TOKEN_LT: + return "LT"; + case TOKEN_GTE: + return "GTE"; + case TOKEN_LTE: + return "LTE"; + case TOKEN_DOT: + return "DOT"; + case TOKEN_COMMA: + return "COMMA"; + case TOKEN_COLON: + return "COLON"; + case TOKEN_SEMICOLON: + return "SEMICOLON"; + case TOKEN_PLUS: + return "PLUS"; + case TOKEN_MINUS: + return "MINUS"; + case TOKEN_STAR: + return "STAR"; + case TOKEN_SLASH: + return "SLASH"; + case TOKEN_LPAREN: + return "LPAREN"; + case TOKEN_RPAREN: + return "RPAREN"; + case TOKEN_LBRACE: + return "LBRACE"; + case TOKEN_RBRACE: + return "RBRACE"; + case TOKEN_LBRACKET: + return "LBRACKET"; + case TOKEN_RBRACKET: + return "RBRACKET"; + case TOKEN_ARROW_LEFT: + return "ARROW_LEFT"; + case TOKEN_MESH: + return "MESH"; + case TOKEN_BIG_MONEY: + return "BIG_MONEY"; + case TOKEN_AND: + return "AND"; + case TOKEN_AND_AND: + return "AND_AND"; + case TOKEN_ERROR: + return "ERROR"; + default: + return "UNKNOWN_TOKEN"; + } +} diff --git a/src/tools/compiler/lexer.h b/src/tools/compiler/lexer.h new file mode 100644 index 0000000..eaa137c --- /dev/null +++ b/src/tools/compiler/lexer.h @@ -0,0 +1,85 @@ +#ifndef UNDAR_LEXER_H +#define UNDAR_LEXER_H + +typedef enum { + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, + TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, + TOKEN_KEYWORD_PLEX, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, + TOKEN_KEYWORD_OPEN, + TOKEN_KEYWORD_READ, + TOKEN_KEYWORD_WRITE, + TOKEN_KEYWORD_REFRESH, + TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_NIL, + TOKEN_KEYWORD_TRUE, + TOKEN_KEYWORD_FALSE, + TOKEN_OPERATOR_NOT, + TOKEN_OPERATOR_AND, + TOKEN_OPERATOR_OR, + TOKEN_BANG, + TOKEN_BANG_EQ, + TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ARROW_LEFT, + TOKEN_ERROR +} TokenType; + +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; + +void initLexer(const char *source); +Token nextToken(); +const char* tokenTypeToString(TokenType type); + +#endif diff --git a/src/vm/vm.c b/src/vm/vm.c index e606efd..cca75e5 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -17,8 +17,8 @@ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ - value = (type)frame->locals[src1]; \ - value2 = (type)frame->locals[src2]; \ + value = (type)frame->locals[src1]; \ + value2 = (type)frame->locals[src2]; \ cond = !!(value op value2); \ mask = -(u32)cond; \ vm->pc = (target & mask) | (vm->pc & ~mask); \ @@ -27,7 +27,7 @@ #define MATH_OP(type, op) \ do { \ - u32 *regs = frame->locals; \ + u32 *regs = frame->locals; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ @@ -40,7 +40,7 @@ #define BIT_OP(op) \ do { \ - u32 *regs = frame->locals; \ + u32 *regs = frame->locals; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ @@ -94,7 +94,7 @@ bool step_vm(VM *vm) { switch (opcode) { case OP_EXIT: { - vm->flag = read_u32(vm, code, vm->pc); + vm->flag = read_u32(vm, code, vm->pc); return false; } case OP_CALL: { @@ -131,8 +131,6 @@ bool step_vm(VM *vm) { for (i = 0; i < N; i++) { src_reg = args[i]; child->locals[i] = frame->locals[src_reg]; - - /* Bitmask operation instead of conditional branch */ heap_mask |= ((frame->heap_mask >> src_reg) & 1) << i; } child->heap_mask = heap_mask; @@ -159,24 +157,6 @@ bool step_vm(VM *vm) { if (is_heap_value(vm, child_return_reg)) { ptr = value; size = *(u32 *)(vm->memory + ptr - 4); - - /* Fast path for small objects (70% of cases) */ - if (size <= 64) { - new_ptr = parent->end; - if (parent->end + size + 4 > MEMORY_SIZE) { - return false; - } - - *(u32 *)(vm->memory + new_ptr) = size; - memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); - parent->end += size + 4; - - parent->locals[parent->return_reg] = new_ptr; - parent->heap_mask |= (1 << parent->return_reg); - return true; - } - - /* Handle larger objects */ new_ptr = parent->end; if (parent->end + size + 4 > MEMORY_SIZE) { return false; @@ -617,7 +597,7 @@ bool step_vm(VM *vm) { vm->pc++; device_ptr = frame->locals[device_reg]; /* device pointer */ - handle = vm->memory[device_ptr + 4]; /* get device handle */ + handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; if (dev && dev->ops->refresh) { vm->flag = dev->ops->refresh(dev->data, &vm->memory[device_ptr + 4]); @@ -773,8 +753,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_mul(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_mul(frame->locals[src1], frame->locals[src2]); return true; } @@ -785,8 +764,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_div(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_div(frame->locals[src1], frame->locals[src2]); return true; } @@ -797,8 +775,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_add(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_add(frame->locals[src1], frame->locals[src2]); return true; } @@ -809,8 +786,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_sub(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_sub(frame->locals[src1], frame->locals[src2]); return true; } case OP_REAL_TO_INT: { diff --git a/test/add.ul.ir b/test/add.ul.ir index 3c54c04..abdfc8b 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -1,11 +1,13 @@ -global const int x = 1 -global const int y = 1 +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global int x = 1 +global int y = 1 function main () - int a is $0 - int b is $1 - int ans is $2 - str ans_string is $3 + int a $0 + int b $1 + int ans $2 + str ans_string $3 load_absolute_32 &x -> a load_absolute_32 &y -> b @@ -14,24 +16,22 @@ function main () call pln ans_string exit 0 -function add (int a is $0, int b is $1) - int result is $2 +function add (int a $0, int b $1) + int result $2 add_int a b -> result return result -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str term $1 + int msg_length $2 + str nl $3 + int nl_length $4 + int mode $5 - malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode - syscall OPEN ts mode -> ts + syscall OPEN &terminal_namespace mode -> term strlen message -> msg_length - syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length + syscall WRITE term message msg_length + strlen &new_line -> nl_length + syscall WRITE term nl nl_length return diff --git a/test/fib.ul.ir b/test/fib.ul.ir index 03760c8..7f73714 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -1,44 +1,48 @@ +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" + function main () - int n is $0 - int str_n is $1 + int n $0 + int str_n $1 load_immediate 35 -> n - call fib n -> n + call &fib n -> n int_to_string n -> str_n - call pln str_nn + call &pln str_n exit 0 -function fib (int n is $0) +function fib (int n $0) load_immediate 2 -> $1 jump_lt_int &base_case n $1 load_immediate 2 -> $3 sub_int n $3 -> $4 - call fib $4 -> $5 + call &fib $4 -> $5 load_immediate 1 -> $3 sub_int n $3 -> $4 - call fib $4 -> $6 + call &fib $4 -> $6 add_int $6 $5 -> $7 return $7 -&base_case + + else base_case return n -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + load_immediate &terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate &new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 67e729e..3c48000 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -1,23 +1,28 @@ -function main () - str hello is $0 +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global str message = "nuqneH 'u'?" - malloc_immediate "nuqneH 'u'?" -> hello +function main () + str hello $0 + + load_immediate &message -> hello call pln hello exit 0 -function pln (str message is $0) - str ts is $1 - int msg_length is $2 - str nl is $3 - int nl_length is $4 - int mode is $5 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + + load_immediate &terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate &new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length - return \ No newline at end of file + return diff --git a/test/window.ul.ir b/test/window.ul.ir index 7f52163..4fb6c04 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -2,18 +2,15 @@ global str screen_namespace = "/dev/screen/0" global str mouse_namespace = "/dev/mouse/0" global str terminal_namespace = "/dev/term/0" global str new_line = "\n" -global byte WHITE = 255 +global byte white = 255 function main () - // Open screen - // use load immediate because it a pointer to a string, not a value - plex screen $0 plex mouse $1 str tmp_str $2 byte color $3 - byte left_down $4 - int mode $5 + bool left_down $4 + int mode $5 nat offset_temp $6 nat x $7 nat y $8 @@ -22,30 +19,30 @@ function main () nat buffer_size $11 nat pixel_pos $12 - load_address screen_namespace -> tmp_str + load_immediate &screen_namespace -> screen load_immediate 0 -> mode - syscall OPEN tmp_str mode -> screen // open Plex screen, in namespace, in flags + syscall OPEN screen mode -> screen nat_to_string screen -> tmp_str - call pln tmp_str + call &pln tmp_str - load_offset_32 screen 8 -> width // load width + load_offset_32 screen 8 -> width nat_to_string width -> tmp_str - call pln tmp_str + call &pln tmp_str - load_offset_32 screen 12 -> buffer_size // load size + load_offset_32 screen 12 -> buffer_size nat_to_string buffer_size -> tmp_str - call pln tmp_str + call &pln tmp_str - load_immediate 16 -> offset_temp // offset for screen buffer + load_immediate 16 -> offset_temp add_nat screen offset_temp -> screen_buffer nat_to_string screen_buffer -> tmp_str - call pln tmp_str + call &pln tmp_str // open mouse - load_address mouse_namespace -> tmp_str - syscall OPEN tmp_str mode -> mouse // open Plex mouse, in namespace, in flags + load_immediate &mouse_namespace -> mouse + syscall OPEN mouse mode -> mouse syscall WRITE screen screen_buffer buffer_size // redraw @@ -53,21 +50,21 @@ function main () // load mouse click data syscall STAT mouse - load_offset_8 mouse 16 -> left_down // load btn1 pressed + load_offset_8 mouse 16 -> left_down - jump_eq_nat draw_loop left_down mode // mode 0 which an alias for false + jump_eq_nat draw_loop left_down mode // mode = 0 / false - load_offset_32 mouse 8 -> x // load x - load_offset_32 mouse 12 -> y // load y + load_offset_32 mouse 8 -> x + load_offset_32 mouse 12 -> y - // Compute start address: y*width + x - mul_nat y width -> pixel_pos // = y * width - add_nat x pixel_pos -> pixel_pos // += x - add_nat screen_buffer pixel_pos -> pixel_pos // += pixel_offset - load_immediate 4 -> fat_ptr_size // need to add offset for fat pointer size + // Compute start address: y *width + x + mul_nat y width -> pixel_pos + add_nat x pixel_pos -> pixel_pos + add_nat screen_buffer pixel_pos -> pixel_pos + load_immediate 4 -> fat_ptr_size add_nat pixel_pos fat_ptr_size -> pixel_pos - load_absolute_32 WHITE -> color + load_absolute_32 white -> color store_absolute_8 pixel_pos color // draw color at screen [x,y] syscall WRITE screen screen_buffer buffer_size // redraw @@ -81,9 +78,8 @@ function pln (str message $0) int nl_length $4 int mode $5 - load_address terminal_namespace -> term // get terminal device load_immediate 0 -> mode - syscall OPEN term mode -> term + syscall OPEN &terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length load_address new_line -> nl diff --git a/test/window.ul.ir2 b/test/window.ul.ir2 deleted file mode 100644 index e47579c..0000000 --- a/test/window.ul.ir2 +++ /dev/null @@ -1,73 +0,0 @@ -global str screen_namespace = "/dev/screen/0" -global str mouse_namespace = "/dev/mouse/0" -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global byte WHITE = 255 - -function main () - // open screen - // use load immediate because it is a pointer to a string not a value - - ptr tmp_ptr $0 = &screen_namespace - int mode $1 = 0 - ptr screen $2 = open tmp_ptr mode - - nat screen_handle $3 = @memory.u32[screen + 4] - str tmp_str $4 = nat_to_string screen_handle - pln(tmp_str) - - nat width $5 = @memory.u32[screen + 8] - tmp_str = nat_to_string width - pln(tmp_str) - - nat buffer_size $6 = @memory.u32[screen + 12] - tmp_str = nat_to_string buffer_size - pln(tmp_str) - - nat offset_temp $7 = 16 - ptr screen_buffer = add_nat screen offset_temp - - tmp_str = nat_to_string screen_buffer - pln(tmp_str) - - // open mouse - tmp_ptr = &mouse_namespace - ptr mouse $8 = open tmp_ptr mode - - write screen screen_buffer buffer_size // redraw - - loop draw_loop - // load mouse click data - stat mouse - - bool left_down $9 = @memory.u8[mouse + 16] // load btn1 pressed - - jump_eq_nat &draw_loop left_down mode // mode is 0 which is an alias for false - - nat x $10 = @memory.u32[mouse + 8] - nat y $11 = @memory.u32[mouse + 12] - - // Compute start address: y*width + x - nat pixel_pos $12 = mul_nat y width - pixel_pos = add_nat x pixel_pos - pixel_pos = add_nat screen_buffer pixel_pos - nat fat_ptr_size $13 = 4 // need to add offset for fat pointer size - pixel_pos = add_nat pixel_pos fat_ptr_size - - byte color $14 = @memory.u8[ &WHITE ] - @memory.u8[pixel_pos] = color // draw color at screen [xy] - write screen screen_buffer buffer_size // redraw - - jump &draw_loop - exit 0 - -function pln (str message $0) - nat term_ns $1 = &terminal_namespace // get terminal device - int mode $2 = 0 - ptr term $3 = open term_ns mode - int msg_length $4 = strlen message - write term message msg_length - str nl $5 = &new_line - int nl_length $6 = strlen nl - write term nl nl_length - return diff --git a/test/window.ul.vuir b/test/window.ul.vuir deleted file mode 100644 index aacf89b..0000000 --- a/test/window.ul.vuir +++ /dev/null @@ -1,95 +0,0 @@ -global str screen_namespace = "/dev/screen/0" -global str mouse_namespace = "/dev/mouse/0" -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global byte WHITE = 255 - -/** - * Devices - */ -plex Terminal - nat handle - -plex Screen - nat handle - nat width - nat height - byte[] buffer - -plex Mouse - nat handle - nat x - nat y - bool left - bool right - bool middle - bool btn4 - nat size - -function main () - // open screen - // use load immediate because it is a pointer to a string, not a value - - nat tmp_ptr = &screen_namespace - int mode = 0 - ptr screen = open(tmp_ptr, mode) - - nat screen_handle = screen.handle - str tmp_str = nat_to_string(screen_handle) - pln(tmp_str) - - nat width = screen.width - tmp_str = nat_to_string(width) - pln(tmp_str) - - nat buffer_size = screen.buffer - tmp_str = nat_to_string(buffer_size) - pln(tmp_str) - - nat offset_temp = 16 - nat screen_buffer = add_nat(screen, offset_temp) - - tmp_str = nat_to_string(screen_buffer) - pln(tmp_str) - - // open mouse - tmp_ptr = &mouse_namespace - ptr mouse = open(tmp_ptr, mode) - - write(screen, screen_buffer, buffer_size) // redraw - - loop draw_loop - // load mouse click data - stat(mouse) - - byte left_down = mouse.left // load btn1 pressed - - jump_eq_nat(&draw_loop, left_down, mode) // mode is 0 which is an alias for false - - nat x = mouse.x - nat y = mouse.y - - // Compute start address: y*width + x - nat pixel_pos = mul_nat(y, width) - pixel_pos = add_nat(x, pixel_pos) - pixel_pos = add_nat(screen_buffer, pixel_pos) - nat fat_ptr_size = 4 // need to add offset for fat pointer size - pixel_pos = add_nat(pixel_pos, fat_ptr_size) - - byte color = WHITE - screen.buffer[pixel_pos] = color // draw color at screen [x,y] - write(screen, screen_buffer, buffer_size) // redraw - - jump(&draw_loop) - exit 0 - -function pln (str message) - nat term_ns = &terminal_namespace // get terminal device - int mode = 0 - ptr term = open(term_ns, mode) - int msg_length = strlen(message) - write(term, message, msg_length) - str nl = &new_line - int nl_length = strlen(nl) - write(term, nl, nl_length) - return From 7b8059e6c7b3c4e514af2f03f0b86c31130da4bc Mon Sep 17 00:00:00 2001 From: zongor Date: Mon, 24 Nov 2025 21:40:57 -0800 Subject: [PATCH 10/27] refactor strings, add loop to lexer --- src/tools/assembler/assembler.c | 171 +++++++++++++------------------- src/tools/assembler/lexer.c | 42 +++----- src/tools/assembler/lexer.h | 1 + test/paint.ul.ir | 82 +++++++-------- test/simple.ul.ir | 30 +++--- 5 files changed, 144 insertions(+), 182 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 8a21f2d..39bf0da 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -3,6 +3,7 @@ #include "../../vm/fixed.h" #include "../../vm/libc.h" #include "../../vm/opcodes.h" +#include "lexer.h" #include #include #include @@ -42,13 +43,13 @@ u32 names_table_add(NamesTable *table, const char *name) { return index; } -u32 symbol_table_add(SymbolTable *table, Symbol s) { +u32 symbol_table_add(SymbolTable *table, Symbol *s) { if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } - table->symbols[table->count] = s; + table->symbols[table->count] = *s; u32 index = table->count; table->count++; return index; @@ -58,7 +59,7 @@ Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, const char *name) { for (u32 i = 0; i < nt->count; i++) { if (strcmp(nt->names[i], name) == 0) { - for (int j = 0; j < table->count; j++) { + for (u32 j = 0; j < table->count; j++) { if (table->symbols[j].name == i) { return &table->symbols[j]; } @@ -93,17 +94,17 @@ int parse_register(const char *reg_str) { } u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { - // Handle symbol references (e.g., &label) + // symbol references (e.g., &label) if (ref[0] == '&') { return get_ref(nt, table, ref + 1); } - // fixed-point numbers (e.g., 0.5) + // fixed-point numbers if (strchr(ref, '.')) { return float_to_fixed(atof(ref)); } - // decimal literals (e.g., 7) + // decimal literals char *endptr; u32 value = (u32)strtoul(ref, &endptr, 10); @@ -114,73 +115,7 @@ u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { return value; } -static char *unwrap_string(const char *quoted_str) { - if (!quoted_str) - return nil; - - size_t len = strlen(quoted_str); - if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { - // Remove quotes and process escape sequences - const char *src = quoted_str + 1; - size_t src_len = len - 2; - - // First pass: calculate the actual length needed after escape processing - size_t actual_len = 0; - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Escape sequence - actual_len++; - i++; // Skip the next character - } else { - actual_len++; - } - } - - char *unwrapped = (char *)malloc(actual_len + 1); - size_t dst_idx = 0; - - // Second pass: process escape sequences - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Handle escape sequences - switch (src[i + 1]) { - case 'n': - unwrapped[dst_idx++] = '\n'; - break; - case 't': - unwrapped[dst_idx++] = '\t'; - break; - case 'r': - unwrapped[dst_idx++] = '\r'; - break; - case '\\': - unwrapped[dst_idx++] = '\\'; - break; - case '"': - unwrapped[dst_idx++] = '"'; - break; - case '\'': - unwrapped[dst_idx++] = '\''; - break; - default: - // Unknown escape, keep both characters - unwrapped[dst_idx++] = src[i]; - unwrapped[dst_idx++] = src[i + 1]; - break; - } - i++; // Skip the next character - } else { - unwrapped[dst_idx++] = src[i]; - } - } - unwrapped[dst_idx] = '\0'; - return unwrapped; - } - // Not quoted, return copy - return strdup(quoted_str); -} - -Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { +bool global(VM *vm, NamesTable *nt, SymbolTable *st) { Symbol *s = (Symbol *)malloc(sizeof(Symbol)); ValueType t; @@ -190,14 +125,14 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { t.type = I8; t.size = 1; break; - case TOKEN_TYPE_I16: - t.type = I16; - t.size = 2; - break; case TOKEN_TYPE_U8: t.type = U8; t.size = 1; break; + case TOKEN_TYPE_I16: + t.type = I16; + t.size = 2; + break; case TOKEN_TYPE_U16: t.type = U16; t.size = 2; @@ -220,16 +155,16 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { case TOKEN_IDENTIFIER: break; default: - return nil; + return false; } Token eq = nextToken(); if (eq.type != TOKEN_EQ) - return nil; + return false; Token name = nextToken(); if (name.type != TOKEN_IDENTIFIER) - return nil; + return false; s->name = names_table_add(nt, name.start); @@ -249,37 +184,59 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { vm->frames[vm->fp].end += t.size; break; case TOKEN_LITERAL_STR: { - char *unwrapped = unwrap_string(value.start); - int len = strlen(unwrapped); + const char* src = value.start; + u32 len = 0; + u32 i = 0; - u32 addr = vm->mp; - u32 size = len + 1 + 4; - t.size = size; + while (i < value.length) { + char c = src[i++]; + if (c == '\\' && i < value.length) { + switch (src[i++]) { + case 'n': c = '\n'; break; + case 't': c = '\t'; break; + case 'r': c = '\r'; break; + case '\\': case '"': case '\'': break; // Keep as-is + default: i--; // Rewind for unknown escapes + } + } + write_u8(vm, memory, addr + 4 + len++, c); + } - vm->mp += size; - vm->frames[vm->fp].end += size; - - write_u32(vm, memory, addr, len); - for (int i = 0; i < len; i++) { - write_u8(vm, memory, addr + 4 + i, unwrapped[i]); - } - write_u8(vm, memory, addr + 4 + len, '\0'); - free(unwrapped); - break; + u32 size = len + 5; // 4 (len) + dst_len + 1 (null) + vm->mp += size; + vm->frames[vm->fp].end += size; + write_u32(vm, memory, addr, len); + write_u8(vm, memory, addr + 4 + len, '\0'); + break; } default: - return nil; + return false; } s->type = t; - return s; + symbol_table_add(st, s); + return true; } -Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) { +bool function(VM *vm, NamesTable *nt, SymbolTable *st) { USED(vm); USED(nt); USED(st); - return nil; + return true; +} + +bool variable(VM *vm, NamesTable *nt, SymbolTable *st) { + USED(vm); + USED(nt); + USED(st); + return true; +} + +bool label(VM *vm, NamesTable *nt, SymbolTable *st) { + USED(vm); + USED(nt); + USED(st); + return true; } void assemble(VM *vm, char *source) { @@ -306,7 +263,10 @@ void assemble(VM *vm, char *source) { } if (token.type == TOKEN_KEYWORD_FN) { - function(vm, nt, st); + if (!function(vm, nt, st)) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || @@ -314,7 +274,18 @@ void assemble(VM *vm, char *source) { token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { + if (!variable(vm, nt, st)) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } + } + if (token.type == TOKEN_KEYWORD_LOOP || + token.type == TOKEN_KEYWORD_ELSE) { + if (!label(vm, nt, st)) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } } if (token.type == TOKEN_IDENTIFIER) { diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index ee0bdab..655aa94 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -149,6 +149,8 @@ static TokenType identifierType() { return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); case 'o': return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return checkKeyword(1, 1, "2", TOKEN_TYPE_REAL); } return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN); } @@ -160,6 +162,12 @@ static TokenType identifierType() { return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); case 's': return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); case 'n': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { @@ -242,6 +250,12 @@ static TokenType identifierType() { switch (lexer.start[1]) { case 's': return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); } } break; @@ -257,32 +271,8 @@ static TokenType identifierType() { break; case 'g': return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); - case 'I': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case '8': - return checkKeyword(2, 0, "", TOKEN_TYPE_I8); - case '1': - return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); - case '3': - return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); - } - } - break; - case 'U': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case '8': - return checkKeyword(2, 0, "", TOKEN_TYPE_U8); - case '1': - return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); - case '3': - return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); - } - } - break; - case 'F': - return checkKeyword(1, 2, "32", TOKEN_TYPE_REAL); + case 'l': + return checkKeyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); } return TOKEN_IDENTIFIER; diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index eaa137c..5ad14ef 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -35,6 +35,7 @@ typedef enum { TOKEN_KEYWORD_WRITE, TOKEN_KEYWORD_REFRESH, TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_LOOP, TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_FALSE, diff --git a/test/paint.ul.ir b/test/paint.ul.ir index ab40219..e7ebc4c 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -9,32 +9,32 @@ global byte SELECTED_COLOR = 255 function main () # Open screen - plex screen is $0 - str screen_name is $18 - int mode is $11 - nat screen_buffer is $21 + plex screen $0 + str screen_name $18 + int mode $11 + nat screen_buffer $21 - # use load immediate because it is a pointer to a string, not a value + # use load immediate because it a pointer to a string, not a value load_address &screen_namespace -> screen_name load_immediate 0 -> mode syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); - nat width is $20 - nat size is $22 + nat width $20 + nat size $22 load_offset_32 screen 8 -> width # load width load_offset_32 screen 12 -> size # load size load_immediate 16 -> $1 # offset for screen buffer add_nat screen $1 -> screen_buffer # open mouse - plex mouse is $15 - str mouse_name is $16 + plex mouse $15 + str mouse_name $16 load_address &mouse_namespace -> mouse_name syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); - byte color is $1 - nat x_pos is $12 - nat y_pos is $13 + byte color $1 + nat x_pos $12 + nat y_pos $13 load_absolute_32 &BLACK -> color load_immediate 1 -> x_pos @@ -49,23 +49,23 @@ function main () # screen.draw# syscall WRITE screen screen_buffer size - nat zero is $11 + nat zero $11 - draw_loop: + loop draw_loop # load mouse click data syscall REFRESH mouse - byte left_down is $9 + byte left_down $9 load_offset_8 mouse 16 -> left_down # load btn1 pressed jump_eq_nat &draw_loop left_down zero - nat mouse_x is $7 - nat mouse_y is $8 + nat mouse_x $7 + nat mouse_y $8 load_offset_32 mouse 8 -> mouse_x # load x load_offset_32 mouse 12 -> mouse_y # load y - nat box_size is $14 + nat box_size $14 load_immediate 20 -> box_size # first row @@ -84,10 +84,10 @@ function main () syscall WRITE screen screen_buffer size - byte selected_color is $25 + byte selected_color $25 load_absolute_32 &SELECTED_COLOR -> selected_color - nat brush_size is $19 + nat brush_size $19 load_immediate 5 -> brush_size call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size @@ -97,15 +97,15 @@ function main () # Flush and exit exit 0 -function set_color_if_clicked (int click_x is $0, int click_y is $1, - int box_x is $2, int box_y is $3, byte color is $4, int box_size is $5) +function set_color_if_clicked (int click_x $0, int click_y $1, + int box_x $2, int box_y $3, byte color $4, int box_size $5) # Compute right - int right_edge is $6 + int right_edge $6 add_int box_x box_size -> right_edge # Compute bottom = box_y + box_size - int bottom_edge is $7 + int bottom_edge $7 add_int box_y box_size -> bottom_edge # Bounds check: x in [box_x, right] and y in [box_y, bottom] @@ -116,17 +116,17 @@ function set_color_if_clicked (int click_x is $0, int click_y is $1, store_absolute_8 &SELECTED_COLOR color - fail: + else fail return -function draw_outlined_swatch(nat base is $0, - byte color is $1, int x is $2, int y is $3, int width is $4) +function draw_outlined_swatch(nat base $0, + byte color $1, int x $2, int y $3, int width $4) # Constants - nat background_color is $5 + nat background_color $5 load_absolute_32 &GRAY -> background_color - byte selected_color is $10 + byte selected_color $10 load_absolute_32 &SELECTED_COLOR -> selected_color jump_eq_int &set_selected selected_color color @@ -135,13 +135,13 @@ function draw_outlined_swatch(nat base is $0, load_absolute_32 &DARK_GRAY -> background_color end_set_selected: - nat outline_size is $6 + nat outline_size $6 load_immediate 20 -> outline_size - nat fill_size is $7 + nat fill_size $7 load_immediate 17 -> fill_size - nat offset is $8 + nat offset $8 load_immediate 2 -> offset call &draw_box base width background_color x y outline_size outline_size @@ -153,28 +153,28 @@ function draw_outlined_swatch(nat base is $0, return -function draw_box (nat base is $0, nat screen_width is $1, - byte color is $2, nat x_start is $3, nat y_start is $4, nat width is $5, nat height is $6) +function draw_box (nat base $0, nat screen_width $1, + byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6) # Compute start address: base + y*640 + x - nat offset is $15 + nat offset $15 mul_int y_start screen_width -> offset add_int offset x_start -> offset add_nat offset base -> offset - nat fat_ptr_size is $25 + nat fat_ptr_size $25 load_immediate 4 -> fat_ptr_size add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size - int i is $30 + int i $30 load_immediate 1 -> i - int zero is $26 + int zero $26 load_immediate 0 -> zero - int row_end is $27 - nat pixel_ptr is $29 + int row_end $27 + nat pixel_ptr $29 - draw_box_outer: + loop draw_box_outer add_int offset width -> row_end # current + width register_move offset -> pixel_ptr # set pixel point memset_8 pixel_ptr color width # draw row diff --git a/test/simple.ul.ir b/test/simple.ul.ir index 8f549d9..e4a1407 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -1,31 +1,31 @@ -global const real x = 1.0 -global const real y = 1.0 +global str terminal_namespace = "/dev/term/0" +global real x = 1.0 +global real y = 1.0 function main () - real x is $0 + real x $0 load_absolute_32 &x -> x - real y is $1 + real y $1 load_absolute_32 &y -> y - real result is $2 + real result $2 add_real x y -> result - str result_str is $3 + str result_str $3 real_to_string result -> result_str call &pln result_str exit 0 -function pln (str message is $0) - str term is $1 - int msg_length is $2 - str nl is $3 - int nl_length is $4 - int mode is $5 +function pln (str message $0) + str term $1 + int msg_length $2 + str nl $3 + int nl_length $4 + int mode $5 - malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode - syscall OPEN term mode -> term + syscall OPEN &terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length - malloc_immediate "\n" -> nl + load_address new_line -> nl strlen nl -> nl_length syscall WRITE term nl nl_length return From 4f73339efb6108ec64c0c46b1b01673f89fff9c6 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 00:50:25 -0800 Subject: [PATCH 11/27] wip symbols + offsets --- src/tools/assembler/assembler.c | 623 ++++++++++++++++++++++---------- src/tools/assembler/assembler.h | 26 +- src/vm/opcodes.h | 39 +- test/add.ul.ir | 12 +- test/loop.ul.ir | 5 +- test/malloc.ul.ir | 21 +- test/paint.ul.ir | 118 +++--- test/window.ul.ir | 14 +- 8 files changed, 559 insertions(+), 299 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 39bf0da..3110e60 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -8,76 +8,6 @@ #include #include -SymbolTable *symbol_table_init() { - SymbolTable *table = malloc(sizeof(SymbolTable)); - table->symbols = malloc(16 * sizeof(Symbol)); - table->count = 0; - table->capacity = 16; - return table; -} - -NamesTable *names_table_init() { - NamesTable *table = malloc(sizeof(NamesTable)); - table->names = malloc(16 * sizeof(char *)); - table->count = 0; - table->capacity = 16; - return table; -} - -u32 names_table_add(NamesTable *table, const char *name) { - for (u32 i = 0; i < table->count; i++) { - if (strcmp(table->names[i], name) == 0) { - return i; - } - } - - if (table->count >= table->capacity) { - table->capacity *= 2; - table->names = realloc(table->names, table->capacity * sizeof(char *)); - } - - table->names[table->count] = malloc(strlen(name) + 1); - strcpy(table->names[table->count], name); - u32 index = table->count; - table->count++; - return index; -} - -u32 symbol_table_add(SymbolTable *table, Symbol *s) { - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); - } - - table->symbols[table->count] = *s; - u32 index = table->count; - table->count++; - return index; -} - -Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, - const char *name) { - for (u32 i = 0; i < nt->count; i++) { - if (strcmp(nt->names[i], name) == 0) { - for (u32 j = 0; j < table->count; j++) { - if (table->symbols[j].name == i) { - return &table->symbols[j]; - } - } - } - } - return nil; -} - -u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) { - Symbol *sym = symbol_table_lookup(nt, table, name); - if (!sym) { - fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); - exit(1); - } - return sym->ref; -} - void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } void emit_u32(VM *vm, u32 value) { @@ -87,70 +17,96 @@ void emit_u32(VM *vm, u32 value) { void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } -int parse_register(const char *reg_str) { - if (reg_str[0] != '$') - return -1; - return atoi(reg_str + 1); +SymbolTable *symbol_table_init() { + SymbolTable *table = malloc(sizeof(SymbolTable)); + table->symbols = malloc(16 * sizeof(Symbol)); + table->count = 0; + table->capacity = 16; + return table; } -u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { - // symbol references (e.g., &label) - if (ref[0] == '&') { - return get_ref(nt, table, ref + 1); +u32 symbol_table_add(SymbolTable *table, Symbol s) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } - // fixed-point numbers - if (strchr(ref, '.')) { - return float_to_fixed(atof(ref)); + table->symbols[table->count] = s; + u32 index = table->count; + table->count++; + return index; +} + +Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { + for (u32 i = 0; i < table->count; i++) { + if (streq(table->symbols[i].name, name)) { + return &table->symbols[i]; + } } + return nil; +} - // decimal literals - char *endptr; - u32 value = (u32)strtoul(ref, &endptr, 10); +u32 get_ref(VM *vm, SymbolTable *st, const char *name, ScopeType scope) { + Symbol *sym = symbol_table_lookup(st, name); + if (!sym) { + fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); + exit(1); + return 0; + } + return sym->ref; +} - if (endptr == ref || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: %s\n", ref); +Token nextTokenIs(TokenType type) { + Token token = nextToken(); + if (token.type != type) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } - return value; + return token; } -bool global(VM *vm, NamesTable *nt, SymbolTable *st) { - Symbol *s = (Symbol *)malloc(sizeof(Symbol)); - ValueType t; +/** + * Global . + */ +bool define_global(VM *vm, SymbolTable *st) { + Symbol s; Token token_type = nextToken(); switch (token_type.type) { + case TOKEN_TYPE_BOOL: + s.type = BOOL; + s.size = 1; + break; case TOKEN_TYPE_I8: - t.type = I8; - t.size = 1; + s.type = I8; + s.size = 1; break; case TOKEN_TYPE_U8: - t.type = U8; - t.size = 1; + s.type = U8; + s.size = 1; break; case TOKEN_TYPE_I16: - t.type = I16; - t.size = 2; + s.type = I16; + s.size = 2; break; case TOKEN_TYPE_U16: - t.type = U16; - t.size = 2; + s.type = U16; + s.size = 2; break; case TOKEN_TYPE_INT: - t.type = I32; - t.size = 4; + s.type = I32; + s.size = 4; break; case TOKEN_TYPE_NAT: - t.type = U32; - t.size = 4; + s.type = U32; + s.size = 4; break; case TOKEN_TYPE_REAL: - t.type = F32; - t.size = 4; + s.type = F32; + s.size = 4; break; case TOKEN_TYPE_STR: - t.type = STR; + s.type = STR; break; case TOKEN_IDENTIFIER: break; @@ -158,134 +114,291 @@ bool global(VM *vm, NamesTable *nt, SymbolTable *st) { return false; } - Token eq = nextToken(); - if (eq.type != TOKEN_EQ) - return false; + Token eq = nextTokenIs(TOKEN_EQ); + Token name = nextTokenIs(TOKEN_IDENTIFIER); - Token name = nextToken(); - if (name.type != TOKEN_IDENTIFIER) + if (name.length > MAX_SYMBOL_NAME_LENGTH) { return false; + } - s->name = names_table_add(nt, name.start); + memcpy(s.name, name.start, name.length); u32 addr = vm->mp; - s->ref = addr; + s.ref = addr; + s.scope = GLOBAL; - u32 result; Token value = nextToken(); switch (value.type) { - case TOKEN_LITERAL_INT: - case TOKEN_LITERAL_NAT: - case TOKEN_LITERAL_REAL: - result = resolve_symbol(nt, st, value.start); - write_u32(vm, memory, addr, result); + case TOKEN_KEYWORD_TRUE: { + u32 addr = vm->mp; + write_u8(vm, memory, addr, 1); - vm->mp += t.size; - vm->frames[vm->fp].end += t.size; + vm->mp += 1; + vm->frames[vm->fp].end += 1; + } + case TOKEN_KEYWORD_FALSE: { + u32 addr = vm->mp; + write_u8(vm, memory, addr, 0); + + vm->mp += 1; + vm->frames[vm->fp].end += 1; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", value.start); + exit(1); + } + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + } + case TOKEN_LITERAL_REAL: { + fixed_t out = float_to_fixed(atof(value.start)); + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; break; + } case TOKEN_LITERAL_STR: { - const char* src = value.start; - u32 len = 0; - u32 i = 0; + const char *src = value.start; + u32 len = 0; + u32 i = 0; - while (i < value.length) { - char c = src[i++]; - if (c == '\\' && i < value.length) { - switch (src[i++]) { - case 'n': c = '\n'; break; - case 't': c = '\t'; break; - case 'r': c = '\r'; break; - case '\\': case '"': case '\'': break; // Keep as-is - default: i--; // Rewind for unknown escapes - } - } - write_u8(vm, memory, addr + 4 + len++, c); + while (i < value.length) { + char c = src[i++]; + if (c == '\\' && i < value.length) { + switch (src[i++]) { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + case '\\': + case '"': + case '\'': + break; // Keep as-is + default: + i--; // Rewind for unknown escapes + } } + write_u8(vm, memory, addr + 4 + len++, c); + } - u32 size = len + 5; // 4 (len) + dst_len + 1 (null) - vm->mp += size; - vm->frames[vm->fp].end += size; - write_u32(vm, memory, addr, len); - write_u8(vm, memory, addr + 4 + len, '\0'); - break; + u32 size = len + 5; // 4 (len) + dst_len + 1 (null) + s.size = size; + + vm->mp += size; + vm->frames[vm->fp].end += size; + + write_u32(vm, memory, addr, len); + write_u8(vm, memory, addr + 4 + len, '\0'); + break; } default: return false; } - s->type = t; symbol_table_add(st, s); return true; } -bool function(VM *vm, NamesTable *nt, SymbolTable *st) { - USED(vm); - USED(nt); - USED(st); - return true; +/** + * Var . + */ +void define_var(VM *vm, SymbolTable *st, Token regType) { + Symbol s; + s.scope = VAR; + switch (regType.type) { + case TOKEN_KEYWORD_PLEX: { + s.type = PLEX; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; + } + case TOKEN_TYPE_I8: { + s.type = I8; + s.size = 1; + break; + } + case TOKEN_TYPE_I16: { + s.type = I16; + s.size = 2; + break; + } + case TOKEN_TYPE_INT: { + s.type = I32; + s.size = 4; + break; + } + case TOKEN_TYPE_U8: { + s.type = U8; + s.size = 1; + break; + } + case TOKEN_TYPE_U16: { + s.type = U16; + s.size = 2; + break; + } + case TOKEN_TYPE_NAT: { + s.type = U32; + s.size = 4; + break; + } + case TOKEN_TYPE_REAL: { + s.type = REAL; + s.size = 4; + break; + } + case TOKEN_TYPE_BOOL: { + s.type = BOOL; + s.size = 1; + break; + } + case TOKEN_TYPE_STR: { + s.type = STR; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; + } + default: + printf("ERROR at line %d: %.*s\n", regType.line, regType.length, + regType.start); + exit(1); + } + + Token name = nextTokenIs(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + + memcpy(s.name, name.start, name.length); + + nextTokenIs(TOKEN_BIG_MONEY); + + Token reg_num = nextTokenIs(TOKEN_LITERAL_INT); + s.ref = atoi(reg_num.start); + symbol_table_add(st, s); } -bool variable(VM *vm, NamesTable *nt, SymbolTable *st) { - USED(vm); - USED(nt); - USED(st); - return true; +/** + * function . + */ +void define_function(vm *vm, SymbolTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = FUNCTION; + + Token name = nextTokenIs(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + memcpy(s.name, name.start, name.length); + + nextTokenIs(TOKEN_LPAREN); + + Token next = nextToken(); + while (next.type != TOKEN_RPAREN) { + Token regType = nextToken(); + define_var(vm, st, regType); + + Token comma = nextToken(); + if (comma.type == TOKEN_COMMA) { + continue; + } else if (comma.type == TOKEN_RPAREN) { + break; + } else { + printf("ERROR at line %d: %.*s\n", comma.line, comma.length, comma.start); + exit(1); + } + } + s.ref = vm->pc; + symbol_table_add(st, s); } -bool label(VM *vm, NamesTable *nt, SymbolTable *st) { - USED(vm); - USED(nt); - USED(st); - return true; +/** + * Branch. + */ +void define_branch(VM *vm, SymbolTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = VOID; + + token name = nextTokenIs(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("BRANCH NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + memcpy(s.name, name.start, name.length); + + s.ref = vm->pc; + symbol_table_add(st, s); } -void assemble(VM *vm, char *source) { - SymbolTable *st = symbol_table_init(); - NamesTable *nt = names_table_init(); - - initLexer(source); +/** + * Build the symbol table and calculate the types/size/offsets of all values. + */ +void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token token; + initLexer(source); do { token = nextToken(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); - break; + exit(1); } if (token.type != TOKEN_EOF) { printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { - if (!global(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + define_global(vm, st); + continue; } if (token.type == TOKEN_KEYWORD_FN) { - if (!function(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + define_function(vm, st); + continue; } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || - token.type == TOKEN_TYPE_STR) { - if (!variable(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { + define_var(vm, st, token); + continue; } - if (token.type == TOKEN_KEYWORD_LOOP || - token.type == TOKEN_KEYWORD_ELSE) { - if (!label(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(vm, st); + continue; } if (token.type == TOKEN_IDENTIFIER) { @@ -385,3 +498,149 @@ void assemble(VM *vm, char *source) { } } while (token.type != TOKEN_EOF); } + +/** + * 2nd pass, emit the bytecode + */ +void emit_bytecode(VM *vm, char *source, SymbolTable *st) { + Token token; + initLexer(source); + do { + token = nextToken(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + break; + } + if (token.type != TOKEN_EOF) { + printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), + token.length, token.start); + + if (token.type == TOKEN_KEYWORD_GLOBAL) { + // ignore, already processed + } + + if (token.type == TOKEN_KEYWORD_FN) { + // ignore, already processed + } + + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR) { + // ignore, already processed + } + + if (token.type == TOKEN_KEYWORD_LOOP || + token.type == TOKEN_KEYWORD_ELSE) { + // ignore, already processed + } + + if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first + if (streq(token.start, "exit")) { + } else if (streq(token.start, "call")) { + } else if (streq(token.start, "syscall")) { + } else if (streq(token.start, "load_immediate")) { + } else if (streq(token.start, "load_indirect_8")) { + } else if (streq(token.start, "load_indirect_16")) { + } else if (streq(token.start, "load_indirect_32")) { + } else if (streq(token.start, "load_absolute_8")) { + } else if (streq(token.start, "load_absolute_16")) { + } else if (streq(token.start, "load_absolute_32")) { + } else if (streq(token.start, "load_offset_8")) { + } else if (streq(token.start, "load_offset_16")) { + } else if (streq(token.start, "load_offset_32")) { + } else if (streq(token.start, "store_absolute_8")) { + } else if (streq(token.start, "store_absolute_16")) { + } else if (streq(token.start, "store_absolute_32")) { + } else if (streq(token.start, "store_indirect_8")) { + } else if (streq(token.start, "store_indirect_16")) { + } else if (streq(token.start, "store_indirect_32")) { + } else if (streq(token.start, "store_offset_8")) { + } else if (streq(token.start, "store_offset_16")) { + } else if (streq(token.start, "store_offset_32")) { + } else if (streq(token.start, "malloc")) { + } else if (streq(token.start, "malloc_immediate")) { + } else if (streq(token.start, "memset_8")) { + } else if (streq(token.start, "memset_16")) { + } else if (streq(token.start, "memset_32")) { + } else if (streq(token.start, "register_move")) { + } else if (streq(token.start, "add_int")) { + } else if (streq(token.start, "sub_int")) { + } else if (streq(token.start, "mul_int")) { + } else if (streq(token.start, "div_int")) { + } else if (streq(token.start, "abs_int")) { + } else if (streq(token.start, "neg_int")) { + } else if (streq(token.start, "add_nat")) { + } else if (streq(token.start, "sub_nat")) { + } else if (streq(token.start, "mul_nat")) { + } else if (streq(token.start, "div_nat")) { + } else if (streq(token.start, "abs_nat")) { + } else if (streq(token.start, "neg_nat")) { + } else if (streq(token.start, "add_real")) { + } else if (streq(token.start, "sub_real")) { + } else if (streq(token.start, "mul_real")) { + } else if (streq(token.start, "div_real")) { + } else if (streq(token.start, "abs_real")) { + } else if (streq(token.start, "neg_real")) { + } else if (streq(token.start, "int_to_real")) { + } else if (streq(token.start, "nat_to_real")) { + } else if (streq(token.start, "real_to_int")) { + } else if (streq(token.start, "real_to_nat")) { + } else if (streq(token.start, "bit_shift_left")) { + } else if (streq(token.start, "bit_shift_right")) { + } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (streq(token.start, "bit_and")) { + } else if (streq(token.start, "bit_or")) { + } else if (streq(token.start, "bit_xor")) { + } else if (streq(token.start, "jump")) { + } else if (streq(token.start, "jump_if_flag")) { + } else if (streq(token.start, "jump_eq_int")) { + } else if (streq(token.start, "jump_neq_int")) { + } else if (streq(token.start, "jump_gt_int")) { + } else if (streq(token.start, "jump_lt_int")) { + } else if (streq(token.start, "jump_le_int")) { + } else if (streq(token.start, "jump_ge_int")) { + } else if (streq(token.start, "jump_eq_nat")) { + } else if (streq(token.start, "jump_neq_nat")) { + } else if (streq(token.start, "jump_gt_nat")) { + } else if (streq(token.start, "jump_lt_nat")) { + } else if (streq(token.start, "jump_le_nat")) { + } else if (streq(token.start, "jump_ge_nat")) { + } else if (streq(token.start, "jump_eq_real")) { + } else if (streq(token.start, "jump_neq_real")) { + } else if (streq(token.start, "jump_ge_real")) { + } else if (streq(token.start, "jump_gt_real")) { + } else if (streq(token.start, "jump_lt_real")) { + } else if (streq(token.start, "jump_le_real")) { + } else if (streq(token.start, "string_length")) { + } else if (streq(token.start, "string_eq")) { + } else if (streq(token.start, "string_concat")) { + } else if (streq(token.start, "string_get_char")) { + } else if (streq(token.start, "string_find_char")) { + } else if (streq(token.start, "string_slice")) { + } else if (streq(token.start, "int_to_string")) { + } else if (streq(token.start, "nat_to_string")) { + } else if (streq(token.start, "real_to_string")) { + } else if (streq(token.start, "string_to_int")) { + } else if (streq(token.start, "string_to_nat")) { + } else if (streq(token.start, "string_to_real")) { + } else { + // some other identifier + } + } + } + } while (token.type != TOKEN_EOF); +} + +/** + * Emit bytecode to the VM from the source string. + */ +void assemble(VM *vm, char *source) { + SymbolTable *st = symbol_table_init(); + build_symbol_table(vm, source, st); + emit_bytecode(vm, source, st); + free(st->symbols); + free(st); +} diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 515a74c..657dd40 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -5,7 +5,7 @@ #include "../../vm/opcodes.h" #include "lexer.h" -typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { GLOBAL, LOCAL, VAR } ScopeType; typedef enum { VOID, BOOL, @@ -24,28 +24,16 @@ typedef enum { FUNCTION } SymbolType; -typedef struct names_tab_s NamesTable; -typedef struct value_type_s ValueType; typedef struct symbol_s Symbol; typedef struct symbol_tab_s SymbolTable; -struct names_tab_s { - char **names; - u32 count; - u32 capacity; -}; - -struct value_type_s { - SymbolType type; - u32 name; - u32 size; -}; - +#define MAX_SYMBOL_NAME_LENGTH 64 struct symbol_s { - u32 name; - ValueType type; - ScopeType scope; - u32 ref; // address if global, register if local + char name[MAX_SYMBOL_NAME_LENGTH]; + SymbolType type; + ScopeType scope; + u32 ref; // vm->mp if global, vm->pc local, register if var + u32 size; // size of symbol }; struct symbol_tab_s { diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index fe22ff4..6a53937 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -52,7 +52,9 @@ typedef enum { OP_ABS_REAL, /* abs_real : locals[dest] = | locals[src1] | */ OP_NEG_REAL, /* neg_real : locals[dest] = _locals[src1] */ OP_INT_TO_REAL, /* int_to_real : locals[dest] = locals[src1] as real */ + OP_INT_TO_NAT, /* int_to_nat : locals[dest] = locals[src1] as nat */ OP_NAT_TO_REAL, /* nat_to_real : locals[dest] = locals[src1] as real */ + OP_NAT_TO_INT, /* nat_to_int : locals[dest] = locals[src1] as int */ OP_REAL_TO_INT, /* real_to_int : locals[dest] = locals[src1] as int */ OP_REAL_TO_NAT, /* real_to_nat : locals[dest] = locals[src1] as nat */ OP_BIT_SHIFT_LEFT, /* bit_shift_left : locals[dest] = locals[src1] << locals[src2] */ @@ -92,7 +94,8 @@ typedef enum { OP_REAL_TO_STRING, /* real_to_string : locals[dest] = src1 as str */ OP_STRING_TO_INT, /* string_to_int : locals[dest] = src1 as int */ OP_STRING_TO_NAT, /* string_to_nat : locals[dest] = src1 as nat */ - OP_STRING_TO_REAL /* string_to_real : locals[dest] = src1 as real */ + OP_STRING_TO_REAL, /* string_to_real : locals[dest] = src1 as real */ + OP_MAX_OPCODE /* not really an opcode but used to check max length of ops */ } Opcode; #define MAX_LOCALS 32 @@ -141,20 +144,30 @@ typedef struct device_s { #define STACK_SIZE 256 #define DEVICES_SIZE 8 typedef struct vm_s { - u32 pc; /* program counter */ - u32 cp; /* code pointer (last allocated opcode) */ - u32 fp; /* frame pointer (current frame) */ - u32 sp; /* stack pointer (top of stack) */ - u32 mp; /* memory pointer (last allocated value) */ - u32 dc; /* device count */ - i32 flag; /* flag (temporary results like SYSCALL status) */ - Frame frames[FRAMES_SIZE]; /* function call frames */ - u32 stack[STACK_SIZE]; /* main stack */ - Device devices[DEVICES_SIZE]; /* device definitions */ - u8 code[CODE_SIZE]; /* code block */ - u8 memory[MEMORY_SIZE]; /* memory block */ + u32 pc; /* program counter */ + u32 cp; /* code pointer (last allocated opcode) */ + u32 fp; /* frame pointer (current frame) */ + u32 sp; /* stack pointer (top of stack) */ + u32 mp; /* memory pointer (last allocated value) */ + u32 dc; /* device count */ + i32 flag; /* flag (temporary results like SYSCALL status) */ + Frame *frames; /* function call frames */ + u32 frames_size; /* max frames */ + u32 *stack; /* main stack */ + u32 stack_size; /* max stack */ + Device *devices; /* device definitions */ + u32 device_size; /* max devices */ + u8 *code; /* code block */ + u32 code_size; /* max code size */ + u8 *memory; /* memory block */ + u32 memory_size; /* max memory size */ } VM; +/** + * Creates a new vm based on the arch. + */ +bool init_vm(VM *vm); + #define read_u8(vm, location, addr) ((vm)->location[addr]) #define read_u16(vm, location, addr) \ diff --git a/test/add.ul.ir b/test/add.ul.ir index abdfc8b..f7e6a8a 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -4,14 +4,12 @@ global int x = 1 global int y = 1 function main () - int a $0 - int b $1 int ans $2 str ans_string $3 - load_absolute_32 &x -> a - load_absolute_32 &y -> b - call add a b -> ans + load_absolute_32 x -> $0 + load_absolute_32 y -> $1 + call add $0 $1 -> ans int_to_string ans -> ans_string call pln ans_string exit 0 @@ -29,9 +27,9 @@ function pln (str message $0) int mode $5 load_immediate 0 -> mode - syscall OPEN &terminal_namespace mode -> term + syscall OPEN terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length - strlen &new_line -> nl_length + strlen new_line -> nl_length syscall WRITE term nl nl_length return diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 88f7660..a72d97c 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -10,10 +10,11 @@ function main () load_immediate 0 -> $2 load_immediate -1 -> $3 load_immediate 5.0 -> $5 - &loop_body + loop loop_body add_real a $5 -> a add_int i $3 -> i - jump_ge_int &loop_body i $2 + jump_ge_int loop_body i $2 + malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index ef7f66b..cc69b12 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -1,7 +1,7 @@ function main () - int mode is $11 - str term is $10 + int mode $11; + str term $10; malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode @@ -11,20 +11,20 @@ function main () string_length $7 -> $8 syscall WRITE term $7 $8 # print prompt - str user_string is $9 + str user_string $9 load_immediate 32 -> $8 malloc $8 -> user_string syscall READ term user_string $8 # read in max 32 byte string - call pln user_string + call pln user_string; exit 0 -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode @@ -34,3 +34,4 @@ function pln (str message is $0) malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length + return diff --git a/test/paint.ul.ir b/test/paint.ul.ir index e7ebc4c..3dac05e 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -8,113 +8,113 @@ global const byte LIGHT_GRAY = 182 global byte SELECTED_COLOR = 255 function main () - # Open screen + // Open screen plex screen $0 str screen_name $18 int mode $11 nat screen_buffer $21 - # use load immediate because it a pointer to a string, not a value - load_address &screen_namespace -> screen_name + // use load immediate because it a pointer to a string, not a value + load_address screen_namespace -> screen_name load_immediate 0 -> mode - syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); + syscall OPEN screen_name mode -> screen // Screen screen = open("/dev/screen/0", 0); nat width $20 nat size $22 - load_offset_32 screen 8 -> width # load width - load_offset_32 screen 12 -> size # load size - load_immediate 16 -> $1 # offset for screen buffer + load_offset_32 screen 8 -> width // load width + load_offset_32 screen 12 -> size // load size + load_immediate 16 -> $1 // offset for screen buffer add_nat screen $1 -> screen_buffer - # open mouse + // open mouse plex mouse $15 str mouse_name $16 - load_address &mouse_namespace -> mouse_name - syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); + load_address mouse_namespace -> mouse_name + syscall OPEN mouse_name mode -> mouse // Mouse mouse = open("/dev/mouse/0", 0); byte color $1 nat x_pos $12 nat y_pos $13 - load_absolute_32 &BLACK -> color + load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width - load_absolute_32 &WHITE -> color + load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width - # screen.draw# + // screen.draw// syscall WRITE screen screen_buffer size nat zero $11 loop draw_loop - # load mouse click data + // load mouse click data syscall REFRESH mouse byte left_down $9 - load_offset_8 mouse 16 -> left_down # load btn1 pressed + load_offset_8 mouse 16 -> left_down // load btn1 pressed - jump_eq_nat &draw_loop left_down zero + jump_eq_nat draw_loop left_down zero nat mouse_x $7 nat mouse_y $8 - load_offset_32 mouse 8 -> mouse_x # load x - load_offset_32 mouse 12 -> mouse_y # load y + load_offset_32 mouse 8 -> mouse_x // load x + load_offset_32 mouse 12 -> mouse_y // load y nat box_size $14 load_immediate 20 -> box_size - # first row - load_absolute_32 &BLACK -> color + // first row + load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width - call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size - load_absolute_32 &WHITE -> color + load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width - call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size syscall WRITE screen screen_buffer size byte selected_color $25 - load_absolute_32 &SELECTED_COLOR -> selected_color + load_absolute_32 SELECTED_COLOR -> selected_color nat brush_size $19 load_immediate 5 -> brush_size - call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size + call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size - jump &draw_loop + jump draw_loop - # Flush and exit + // Flush and exit exit 0 function set_color_if_clicked (int click_x $0, int click_y $1, int box_x $2, int box_y $3, byte color $4, int box_size $5) - # Compute right + // Compute right int right_edge $6 add_int box_x box_size -> right_edge - # Compute bottom = box_y + box_size + // Compute bottom = box_y + box_size int bottom_edge $7 add_int box_y box_size -> bottom_edge - # Bounds check: x in [box_x, right] and y in [box_y, bottom] - jump_lt_int &fail click_x box_x - jump_ge_int &fail click_x right_edge - jump_lt_int &fail click_y box_y - jump_ge_int &fail click_y bottom_edge + // Bounds check: x in [box_x, right] and y in [box_y, bottom] + jump_lt_int fail click_x box_x + jump_ge_int fail click_x right_edge + jump_lt_int fail click_y box_y + jump_ge_int fail click_y bottom_edge - store_absolute_8 &SELECTED_COLOR color + store_absolute_8 SELECTED_COLOR color else fail return @@ -122,18 +122,18 @@ function set_color_if_clicked (int click_x $0, int click_y $1, function draw_outlined_swatch(nat base $0, byte color $1, int x $2, int y $3, int width $4) - # Constants + // Constants nat background_color $5 - load_absolute_32 &GRAY -> background_color + load_absolute_32 GRAY -> background_color byte selected_color $10 - load_absolute_32 &SELECTED_COLOR -> selected_color + load_absolute_32 SELECTED_COLOR -> selected_color - jump_eq_int &set_selected selected_color color - jump &end_set_selected - set_selected: - load_absolute_32 &DARK_GRAY -> background_color - end_set_selected: + jump_eq_int set_selected selected_color color + jump end_set_selected + do set_selected + load_absolute_32 DARK_GRAY -> background_color + else end_set_selected nat outline_size $6 load_immediate 20 -> outline_size @@ -144,26 +144,26 @@ function draw_outlined_swatch(nat base $0, nat offset $8 load_immediate 2 -> offset - call &draw_box base width background_color x y outline_size outline_size + call draw_box base width background_color x y outline_size outline_size - add_int x offset -> $9 # x + 2 - add_int y offset -> $10 # y + 2 + add_int x offset -> $9 // x + 2 + add_int y offset -> $10 // y + 2 - call &draw_box base width color $9 $10 fill_size fill_size + call draw_box base width color $9 $10 fill_size fill_size return function draw_box (nat base $0, nat screen_width $1, byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6) - # Compute start address: base + y*640 + x + // Compute start address: base + y*640 + x nat offset $15 mul_int y_start screen_width -> offset add_int offset x_start -> offset add_nat offset base -> offset nat fat_ptr_size $25 load_immediate 4 -> fat_ptr_size - add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size + add_nat offset fat_ptr_size -> offset // need to add offset for fat pointer size int i $30 load_immediate 1 -> i @@ -175,10 +175,10 @@ function draw_box (nat base $0, nat screen_width $1, nat pixel_ptr $29 loop draw_box_outer - add_int offset width -> row_end # current + width - register_move offset -> pixel_ptr # set pixel point - memset_8 pixel_ptr color width # draw row - add_int offset screen_width -> offset # next row += 640 - sub_int height i -> height # decrement row count - jump_gt_int &draw_box_outer height zero + add_int offset width -> row_end // current + width + register_move offset -> pixel_ptr // set pixel point + memset_8 pixel_ptr color width // draw row + add_int offset screen_width -> offset // next row += 640 + sub_int height i -> height // decrement row count + jump_gt_int draw_box_outer height zero return diff --git a/test/window.ul.ir b/test/window.ul.ir index 4fb6c04..c942eeb 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -19,29 +19,29 @@ function main () nat buffer_size $11 nat pixel_pos $12 - load_immediate &screen_namespace -> screen + load_immediate screen_namespace -> screen load_immediate 0 -> mode syscall OPEN screen mode -> screen nat_to_string screen -> tmp_str - call &pln tmp_str + call pln tmp_str load_offset_32 screen 8 -> width nat_to_string width -> tmp_str - call &pln tmp_str + call pln tmp_str load_offset_32 screen 12 -> buffer_size nat_to_string buffer_size -> tmp_str - call &pln tmp_str + call pln tmp_str load_immediate 16 -> offset_temp add_nat screen offset_temp -> screen_buffer nat_to_string screen_buffer -> tmp_str - call &pln tmp_str + call pln tmp_str // open mouse - load_immediate &mouse_namespace -> mouse + load_immediate mouse_namespace -> mouse syscall OPEN mouse mode -> mouse syscall WRITE screen screen_buffer buffer_size // redraw @@ -79,7 +79,7 @@ function pln (str message $0) int mode $5 load_immediate 0 -> mode - syscall OPEN &terminal_namespace mode -> term + syscall OPEN terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length load_address new_line -> nl From 9d2053aef077bee39fbcc71985222e952588344b Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 08:26:12 -0800 Subject: [PATCH 12/27] wip symbol table, remove malloc_imm, fix docs --- README.org | 30 +-- src/tools/assembler/assembler.c | 317 +++++++++++++++++--------------- src/tools/assembler/lexer.c | 194 +++++++++---------- src/tools/assembler/lexer.h | 9 +- src/tools/compiler/compiler.c | 12 +- src/tools/compiler/lexer.c | 4 +- src/tools/compiler/lexer.h | 4 +- src/vm/opcodes.h | 1 - test/add.ul.ir | 2 +- test/fib.ul.ir | 14 +- test/hello.ul.ir | 9 +- test/loop.ul.ir | 51 ++--- test/malloc.ul.ir | 25 +-- test/paint-bw.ul.ir | 195 ++++++++++---------- test/paint.ul.ir | 20 +- test/simple.ul.ir | 2 +- test/window.ul.ir | 25 +-- 17 files changed, 471 insertions(+), 443 deletions(-) diff --git a/README.org b/README.org index 5386796..37686f5 100644 --- a/README.org +++ b/README.org @@ -55,33 +55,37 @@ You can view some examples in the =.ul.ir= files in =/test= **Sample Program: =hello.ul.ir=** #+BEGIN_SRC sh -function main () - str hello is $0 +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global str message = "nuqneH 'u'?" - malloc_immediate "nuqneH 'u'?" -> hello - call pln hello +function main () + str hello $0 + + load_immediate message -> hello + call pln hello -> void exit 0 -function pln (str message is $0) - str ts is $1 - int msg_length is $2 - str nl is $3 - int nl_length is $4 - int mode is $5 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts # get terminal device + load_immediate terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return #+END_SRC #+BEGIN_SRC sh -./build/linux/undar-linux-debug ./test/hello.asm.lisp +./build/linux/undar-linux-debug ./test/hello.ul.ir #+END_SRC Running the compiler without arguments will put it in "REPL" mode. It will function similar to a LISP repl. diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 3110e60..6b769d9 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -15,8 +15,6 @@ void emit_u32(VM *vm, u32 value) { vm->cp += 4; } -void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } - SymbolTable *symbol_table_init() { SymbolTable *table = malloc(sizeof(SymbolTable)); table->symbols = malloc(16 * sizeof(Symbol)); @@ -56,8 +54,8 @@ u32 get_ref(VM *vm, SymbolTable *st, const char *name, ScopeType scope) { return sym->ref; } -Token nextTokenIs(TokenType type) { - Token token = nextToken(); +Token next_token_is(TokenType type) { + Token token = next_token(); if (token.type != type) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); @@ -71,7 +69,7 @@ Token nextTokenIs(TokenType type) { bool define_global(VM *vm, SymbolTable *st) { Symbol s; - Token token_type = nextToken(); + Token token_type = next_token(); switch (token_type.type) { case TOKEN_TYPE_BOOL: s.type = BOOL; @@ -114,8 +112,8 @@ bool define_global(VM *vm, SymbolTable *st) { return false; } - Token eq = nextTokenIs(TOKEN_EQ); - Token name = nextTokenIs(TOKEN_IDENTIFIER); + Token eq = next_token_is(TOKEN_EQ); + Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { return false; @@ -127,21 +125,21 @@ bool define_global(VM *vm, SymbolTable *st) { s.ref = addr; s.scope = GLOBAL; - Token value = nextToken(); + Token value = next_token(); switch (value.type) { case TOKEN_KEYWORD_TRUE: { u32 addr = vm->mp; write_u8(vm, memory, addr, 1); - vm->mp += 1; - vm->frames[vm->fp].end += 1; + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; } case TOKEN_KEYWORD_FALSE: { u32 addr = vm->mp; write_u8(vm, memory, addr, 0); - vm->mp += 1; - vm->frames[vm->fp].end += 1; + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; } case TOKEN_LITERAL_INT: { i32 out = atoi(value.start); @@ -266,7 +264,7 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { break; } case TOKEN_TYPE_REAL: { - s.type = REAL; + s.type = F32; s.size = 4; break; } @@ -286,7 +284,7 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { exit(1); } - Token name = nextTokenIs(TOKEN_IDENTIFIER); + Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, regType.length, regType.start); @@ -295,9 +293,9 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { memcpy(s.name, name.start, name.length); - nextTokenIs(TOKEN_BIG_MONEY); + next_token_is(TOKEN_BIG_MONEY); - Token reg_num = nextTokenIs(TOKEN_LITERAL_INT); + Token reg_num = next_token_is(TOKEN_LITERAL_INT); s.ref = atoi(reg_num.start); symbol_table_add(st, s); } @@ -305,33 +303,33 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { /** * function . */ -void define_function(vm *vm, SymbolTable *st) { +void define_function(VM *vm, SymbolTable *st) { Symbol s; s.scope = LOCAL; s.type = FUNCTION; - Token name = nextTokenIs(TOKEN_IDENTIFIER); + Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { - printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", regType.line, - regType.length, regType.start); + printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, + name.length, name.start); exit(1); } memcpy(s.name, name.start, name.length); - nextTokenIs(TOKEN_LPAREN); + next_token_is(TOKEN_LPAREN); - Token next = nextToken(); + Token next = next_token(); while (next.type != TOKEN_RPAREN) { - Token regType = nextToken(); + Token regType = next_token(); define_var(vm, st, regType); - Token comma = nextToken(); - if (comma.type == TOKEN_COMMA) { + Token next = next_token(); + if (next.type == TOKEN_COMMA) { continue; - } else if (comma.type == TOKEN_RPAREN) { + } else if (next.type == TOKEN_RPAREN) { break; } else { - printf("ERROR at line %d: %.*s\n", comma.line, comma.length, comma.start); + printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start); exit(1); } } @@ -347,10 +345,10 @@ void define_branch(VM *vm, SymbolTable *st) { s.scope = LOCAL; s.type = VOID; - token name = nextTokenIs(TOKEN_IDENTIFIER); + Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { - printf("BRANCH NAME TOO LONG at line %d: %.*s\n", regType.line, - regType.length, regType.start); + printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, + name.length, name.start); exit(1); } memcpy(s.name, name.start, name.length); @@ -364,136 +362,150 @@ void define_branch(VM *vm, SymbolTable *st) { */ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token token; - initLexer(source); + init_lexer(source); do { - token = nextToken(); + token = next_token(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } - if (token.type != TOKEN_EOF) { - printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), - token.length, token.start); - if (token.type == TOKEN_KEYWORD_GLOBAL) { - define_global(vm, st); - continue; - } + if (token.type == TOKEN_KEYWORD_GLOBAL) { + define_global(vm, st); + continue; + } - if (token.type == TOKEN_KEYWORD_FN) { - define_function(vm, st); - continue; - } + if (token.type == TOKEN_KEYWORD_FN) { + define_function(vm, st); + continue; + } - if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || - token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || - token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || - token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || - token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { - define_var(vm, st, token); - continue; - } + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { + define_var(vm, st, token); + continue; + } - if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || - token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || - token.type == TOKEN_KEYWORD_FOR) { - define_branch(vm, st); - continue; - } + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(vm, st); + continue; + } - if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first - if (streq(token.start, "exit")) { - } else if (streq(token.start, "call")) { - } else if (streq(token.start, "syscall")) { - } else if (streq(token.start, "load_immediate")) { - } else if (streq(token.start, "load_indirect_8")) { - } else if (streq(token.start, "load_indirect_16")) { - } else if (streq(token.start, "load_indirect_32")) { - } else if (streq(token.start, "load_absolute_8")) { - } else if (streq(token.start, "load_absolute_16")) { - } else if (streq(token.start, "load_absolute_32")) { - } else if (streq(token.start, "load_offset_8")) { - } else if (streq(token.start, "load_offset_16")) { - } else if (streq(token.start, "load_offset_32")) { - } else if (streq(token.start, "store_absolute_8")) { - } else if (streq(token.start, "store_absolute_16")) { - } else if (streq(token.start, "store_absolute_32")) { - } else if (streq(token.start, "store_indirect_8")) { - } else if (streq(token.start, "store_indirect_16")) { - } else if (streq(token.start, "store_indirect_32")) { - } else if (streq(token.start, "store_offset_8")) { - } else if (streq(token.start, "store_offset_16")) { - } else if (streq(token.start, "store_offset_32")) { - } else if (streq(token.start, "malloc")) { - } else if (streq(token.start, "malloc_immediate")) { - } else if (streq(token.start, "memset_8")) { - } else if (streq(token.start, "memset_16")) { - } else if (streq(token.start, "memset_32")) { - } else if (streq(token.start, "register_move")) { - } else if (streq(token.start, "add_int")) { - } else if (streq(token.start, "sub_int")) { - } else if (streq(token.start, "mul_int")) { - } else if (streq(token.start, "div_int")) { - } else if (streq(token.start, "abs_int")) { - } else if (streq(token.start, "neg_int")) { - } else if (streq(token.start, "add_nat")) { - } else if (streq(token.start, "sub_nat")) { - } else if (streq(token.start, "mul_nat")) { - } else if (streq(token.start, "div_nat")) { - } else if (streq(token.start, "abs_nat")) { - } else if (streq(token.start, "neg_nat")) { - } else if (streq(token.start, "add_real")) { - } else if (streq(token.start, "sub_real")) { - } else if (streq(token.start, "mul_real")) { - } else if (streq(token.start, "div_real")) { - } else if (streq(token.start, "abs_real")) { - } else if (streq(token.start, "neg_real")) { - } else if (streq(token.start, "int_to_real")) { - } else if (streq(token.start, "nat_to_real")) { - } else if (streq(token.start, "real_to_int")) { - } else if (streq(token.start, "real_to_nat")) { - } else if (streq(token.start, "bit_shift_left")) { - } else if (streq(token.start, "bit_shift_right")) { - } else if (streq(token.start, "bit_shift_r_ext")) { - } else if (streq(token.start, "bit_and")) { - } else if (streq(token.start, "bit_or")) { - } else if (streq(token.start, "bit_xor")) { - } else if (streq(token.start, "jump")) { - } else if (streq(token.start, "jump_if_flag")) { - } else if (streq(token.start, "jump_eq_int")) { - } else if (streq(token.start, "jump_neq_int")) { - } else if (streq(token.start, "jump_gt_int")) { - } else if (streq(token.start, "jump_lt_int")) { - } else if (streq(token.start, "jump_le_int")) { - } else if (streq(token.start, "jump_ge_int")) { - } else if (streq(token.start, "jump_eq_nat")) { - } else if (streq(token.start, "jump_neq_nat")) { - } else if (streq(token.start, "jump_gt_nat")) { - } else if (streq(token.start, "jump_lt_nat")) { - } else if (streq(token.start, "jump_le_nat")) { - } else if (streq(token.start, "jump_ge_nat")) { - } else if (streq(token.start, "jump_eq_real")) { - } else if (streq(token.start, "jump_neq_real")) { - } else if (streq(token.start, "jump_ge_real")) { - } else if (streq(token.start, "jump_gt_real")) { - } else if (streq(token.start, "jump_lt_real")) { - } else if (streq(token.start, "jump_le_real")) { - } else if (streq(token.start, "string_length")) { - } else if (streq(token.start, "string_eq")) { - } else if (streq(token.start, "string_concat")) { - } else if (streq(token.start, "string_get_char")) { - } else if (streq(token.start, "string_find_char")) { - } else if (streq(token.start, "string_slice")) { - } else if (streq(token.start, "int_to_string")) { - } else if (streq(token.start, "nat_to_string")) { - } else if (streq(token.start, "real_to_string")) { - } else if (streq(token.start, "string_to_int")) { - } else if (streq(token.start, "string_to_nat")) { - } else if (streq(token.start, "string_to_real")) { - } else { - // some other identifier + if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first + if (streq(token.start, "exit")) { + vm->pc++; + + next_token_is(TOKEN_LITERAL_NAT); + vm->pc+=4; + } else if (streq(token.start, "call")) { + vm->pc++; + + next_token_is(TOKEN_IDENTIFIER); + vm->pc+=4; + + vm->pc++; /* number of args (implied) */ + + Token next = next_token(); + while (next.type != TOKEN_ARROW_LEFT) { + vm->pc++; + Token next = next_token(); } + /* return type */ + next = next_token(); + vm->pc++; + } else if (streq(token.start, "syscall")) { + } else if (streq(token.start, "load_immediate")) { + } else if (streq(token.start, "load_indirect_8")) { + } else if (streq(token.start, "load_indirect_16")) { + } else if (streq(token.start, "load_indirect_32")) { + } else if (streq(token.start, "load_absolute_8")) { + } else if (streq(token.start, "load_absolute_16")) { + } else if (streq(token.start, "load_absolute_32")) { + } else if (streq(token.start, "load_offset_8")) { + } else if (streq(token.start, "load_offset_16")) { + } else if (streq(token.start, "load_offset_32")) { + } else if (streq(token.start, "store_absolute_8")) { + } else if (streq(token.start, "store_absolute_16")) { + } else if (streq(token.start, "store_absolute_32")) { + } else if (streq(token.start, "store_indirect_8")) { + } else if (streq(token.start, "store_indirect_16")) { + } else if (streq(token.start, "store_indirect_32")) { + } else if (streq(token.start, "store_offset_8")) { + } else if (streq(token.start, "store_offset_16")) { + } else if (streq(token.start, "store_offset_32")) { + } else if (streq(token.start, "malloc")) { + } else if (streq(token.start, "memset_8")) { + } else if (streq(token.start, "memset_16")) { + } else if (streq(token.start, "memset_32")) { + } else if (streq(token.start, "register_move")) { + } else if (streq(token.start, "add_int")) { + } else if (streq(token.start, "sub_int")) { + } else if (streq(token.start, "mul_int")) { + } else if (streq(token.start, "div_int")) { + } else if (streq(token.start, "abs_int")) { + } else if (streq(token.start, "neg_int")) { + } else if (streq(token.start, "add_nat")) { + } else if (streq(token.start, "sub_nat")) { + } else if (streq(token.start, "mul_nat")) { + } else if (streq(token.start, "div_nat")) { + } else if (streq(token.start, "abs_nat")) { + } else if (streq(token.start, "neg_nat")) { + } else if (streq(token.start, "add_real")) { + } else if (streq(token.start, "sub_real")) { + } else if (streq(token.start, "mul_real")) { + } else if (streq(token.start, "div_real")) { + } else if (streq(token.start, "abs_real")) { + } else if (streq(token.start, "neg_real")) { + } else if (streq(token.start, "int_to_real")) { + } else if (streq(token.start, "nat_to_real")) { + } else if (streq(token.start, "real_to_int")) { + } else if (streq(token.start, "real_to_nat")) { + } else if (streq(token.start, "bit_shift_left")) { + } else if (streq(token.start, "bit_shift_right")) { + } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (streq(token.start, "bit_and")) { + } else if (streq(token.start, "bit_or")) { + } else if (streq(token.start, "bit_xor")) { + } else if (streq(token.start, "jump")) { + } else if (streq(token.start, "jump_if_flag")) { + } else if (streq(token.start, "jump_eq_int")) { + } else if (streq(token.start, "jump_neq_int")) { + } else if (streq(token.start, "jump_gt_int")) { + } else if (streq(token.start, "jump_lt_int")) { + } else if (streq(token.start, "jump_le_int")) { + } else if (streq(token.start, "jump_ge_int")) { + } else if (streq(token.start, "jump_eq_nat")) { + } else if (streq(token.start, "jump_neq_nat")) { + } else if (streq(token.start, "jump_gt_nat")) { + } else if (streq(token.start, "jump_lt_nat")) { + } else if (streq(token.start, "jump_le_nat")) { + } else if (streq(token.start, "jump_ge_nat")) { + } else if (streq(token.start, "jump_eq_real")) { + } else if (streq(token.start, "jump_neq_real")) { + } else if (streq(token.start, "jump_ge_real")) { + } else if (streq(token.start, "jump_gt_real")) { + } else if (streq(token.start, "jump_lt_real")) { + } else if (streq(token.start, "jump_le_real")) { + } else if (streq(token.start, "string_length")) { + } else if (streq(token.start, "string_eq")) { + } else if (streq(token.start, "string_concat")) { + } else if (streq(token.start, "string_get_char")) { + } else if (streq(token.start, "string_find_char")) { + } else if (streq(token.start, "string_slice")) { + } else if (streq(token.start, "int_to_string")) { + } else if (streq(token.start, "nat_to_string")) { + } else if (streq(token.start, "real_to_string")) { + } else if (streq(token.start, "string_to_int")) { + } else if (streq(token.start, "string_to_nat")) { + } else if (streq(token.start, "string_to_real")) { + } else { + // some other identifier } } } while (token.type != TOKEN_EOF); @@ -504,15 +516,15 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { */ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { Token token; - initLexer(source); + init_lexer(source); do { - token = nextToken(); + token = next_token(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); break; } if (token.type != TOKEN_EOF) { - printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), + printf("Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { @@ -561,7 +573,6 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { } else if (streq(token.start, "store_offset_16")) { } else if (streq(token.start, "store_offset_32")) { } else if (streq(token.start, "malloc")) { - } else if (streq(token.start, "malloc_immediate")) { } else if (streq(token.start, "memset_8")) { } else if (streq(token.start, "memset_16")) { } else if (streq(token.start, "memset_32")) { diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index 655aa94..371bed0 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -6,24 +6,24 @@ typedef struct { const char *start; const char *current; - int line; + i32 line; } Lexer; Lexer lexer; -void initLexer(const char *source) { +void init_lexer(const char *source) { lexer.start = source; lexer.current = source; lexer.line = 1; } -static bool isAlpha(char c) { +static bool is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; } -static bool isDigit(char c) { return c >= '0' && c <= '9'; } +static bool is_digit(char c) { return c >= '0' && c <= '9'; } -static bool isAtEnd() { return *lexer.current == '\0'; } +static bool is_at_end() { return *lexer.current == '\0'; } static char advance() { lexer.current++; @@ -32,14 +32,14 @@ static char advance() { static char peek() { return *lexer.current; } -static char peekNext() { - if (isAtEnd()) +static char peek_next() { + if (is_at_end()) return '\0'; return lexer.current[1]; } static bool match(char expected) { - if (isAtEnd()) + if (is_at_end()) return false; if (*lexer.current != expected) return false; @@ -47,25 +47,25 @@ static bool match(char expected) { return true; } -static Token makeToken(TokenType type) { +static Token make_token(TokenType type) { Token token; token.type = type; token.start = lexer.start; - token.length = (int)(lexer.current - lexer.start); + token.length = (i32)(lexer.current - lexer.start); token.line = lexer.line; return token; } -static Token errorToken(const char *message) { +static Token error_token(const char *message) { Token token; token.type = TOKEN_ERROR; token.start = message; - token.length = (int)strlen(message); + token.length = (i32)strlen(message); token.line = lexer.line; return token; } -static void skipWhitespace() { +static void skip_whitespace() { for (;;) { char c = peek(); switch (c) { @@ -79,19 +79,19 @@ static void skipWhitespace() { advance(); break; case '/': - if (peekNext() == '/') { + if (peek_next() == '/') { // Single-line comment: skip until newline or end of file advance(); - while (peek() != '\n' && !isAtEnd()) + while (peek() != '\n' && !is_at_end()) advance(); - } else if (peekNext() == '*') { + } else if (peek_next() == '*') { // Multi-line comment: skip until '*/' or end of file advance(); advance(); - while (!isAtEnd()) { + while (!is_at_end()) { if (peek() == '\n') lexer.line++; - if (peek() == '*' && peekNext() == '/') { + if (peek() == '*' && peek_next() == '/') { advance(); advance(); break; // Exit loop, comment ended @@ -108,7 +108,7 @@ static void skipWhitespace() { } } -static TokenType checkKeyword(int start, int length, const char *rest, +static TokenType check_keyword(i32 start, i32 length, const char *rest, TokenType type) { if (lexer.current - lexer.start == start + length && memcmp(lexer.start + start, rest, length) == 0) { @@ -124,9 +124,9 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'n': - return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND); + return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); case 's': - return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS); + return check_keyword(2, 0, "", TOKEN_KEYWORD_AS); } } break; @@ -134,47 +134,47 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'l': - return checkKeyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); + return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); case 'o': - return checkKeyword(2, 3, "nst", TOKEN_KEYWORD_CONST); + return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST); } } break; case 'e': - return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); case 'f': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'a': - return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); case 'o': - return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); + return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR); case '3': - return checkKeyword(1, 1, "2", TOKEN_TYPE_REAL); + return check_keyword(1, 1, "2", TOKEN_TYPE_REAL); } - return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN); + return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN); } break; case 'i': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'f': - return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); + return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); case 's': - return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); + return check_keyword(2, 0, "", TOKEN_KEYWORD_IS); case '8': - return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + return check_keyword(2, 0, "", TOKEN_TYPE_I8); case '1': - return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + return check_keyword(2, 1, "6", TOKEN_TYPE_I16); case '3': - return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); + return check_keyword(2, 1, "2", TOKEN_TYPE_INT); case 'n': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { case 'i': - return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT); + return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); case 't': - return checkKeyword(3, 0, "", TOKEN_TYPE_INT); + return check_keyword(3, 0, "", TOKEN_TYPE_INT); } } break; @@ -185,9 +185,9 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'a': - return checkKeyword(2, 1, "t", TOKEN_TYPE_NAT); + return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); case 'i': - return checkKeyword(2, 1, "l", TOKEN_KEYWORD_NIL); + return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL); } } break; @@ -195,9 +195,9 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'p': - return checkKeyword(2, 2, "en", TOKEN_KEYWORD_OPEN); + return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); case 'r': - return checkKeyword(2, 0, "", TOKEN_OPERATOR_OR); + return check_keyword(2, 0, "", TOKEN_OPERATOR_OR); } } break; @@ -205,7 +205,7 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'l': - return checkKeyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); + return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); } } break; @@ -216,11 +216,11 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { case 'a': - return checkKeyword(3, 1, "d", TOKEN_KEYWORD_READ); + return check_keyword(3, 1, "d", TOKEN_KEYWORD_READ); case 'f': - return checkKeyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); + return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); case 't': - return checkKeyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); } } break; @@ -231,7 +231,7 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 't': - return checkKeyword(2, 1, "r", TOKEN_TYPE_STR); + return check_keyword(2, 1, "r", TOKEN_TYPE_STR); } } break; @@ -239,9 +239,9 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'h': - return checkKeyword(2, 2, "is", TOKEN_KEYWORD_THIS); + return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); case 'r': - return checkKeyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); } } break; @@ -249,13 +249,13 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 's': - return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE); + return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); case '8': - return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + return check_keyword(2, 0, "", TOKEN_TYPE_U8); case '1': - return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + return check_keyword(2, 1, "6", TOKEN_TYPE_U16); case '3': - return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); + return check_keyword(2, 1, "2", TOKEN_TYPE_NAT); } } break; @@ -263,129 +263,133 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 'h': - return checkKeyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); + return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); case 'r': - return checkKeyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); + return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); } } break; case 'g': - return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); case 'l': - return checkKeyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); + return check_keyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); + case 'd': + return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); + case 'v': + return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID); } return TOKEN_IDENTIFIER; } static Token identifier() { - while (isAlpha(peek()) || isDigit(peek())) + while (is_alpha(peek()) || is_digit(peek())) advance(); - return makeToken(identifierType()); + return make_token(identifierType()); } static Token number() { - while (isDigit(peek())) + while (is_digit(peek())) advance(); /* Look for a fractional part. */ - if (peek() == '.' && isDigit(peekNext())) { + if (peek() == '.' && is_digit(peek_next())) { /* Consume the ".". */ advance(); - while (isDigit(peek())) + while (is_digit(peek())) advance(); - return makeToken(TOKEN_LITERAL_REAL); + return make_token(TOKEN_LITERAL_REAL); } - return makeToken(TOKEN_LITERAL_INT); + return make_token(TOKEN_LITERAL_INT); } static Token string() { - while (peek() != '"' && !isAtEnd()) { + while (peek() != '"' && !is_at_end()) { if (peek() == '\n') lexer.line++; advance(); } - if (isAtEnd()) - return errorToken("Unterminated string."); + if (is_at_end()) + return error_token("Unterminated string."); /* The closing quote. */ advance(); - return makeToken(TOKEN_LITERAL_STR); + return make_token(TOKEN_LITERAL_STR); } -Token nextToken() { - skipWhitespace(); +Token next_token() { + skip_whitespace(); lexer.start = lexer.current; - if (isAtEnd()) - return makeToken(TOKEN_EOF); + if (is_at_end()) + return make_token(TOKEN_EOF); char c = advance(); - if (isAlpha(c)) + if (is_alpha(c)) return identifier(); - if (isDigit(c)) + if (is_digit(c)) return number(); switch (c) { case '(': - return makeToken(TOKEN_LPAREN); + return make_token(TOKEN_LPAREN); case ')': - return makeToken(TOKEN_RPAREN); + return make_token(TOKEN_RPAREN); case '{': - return makeToken(TOKEN_LBRACE); + return make_token(TOKEN_LBRACE); case '}': - return makeToken(TOKEN_RBRACE); + return make_token(TOKEN_RBRACE); case '[': - return makeToken(TOKEN_LBRACKET); + return make_token(TOKEN_LBRACKET); case ']': - return makeToken(TOKEN_RBRACKET); + return make_token(TOKEN_RBRACKET); case ';': - return makeToken(TOKEN_SEMICOLON); + return make_token(TOKEN_SEMICOLON); case ',': - return makeToken(TOKEN_COMMA); + return make_token(TOKEN_COMMA); case '.': - return makeToken(TOKEN_DOT); + return make_token(TOKEN_DOT); case '-': - return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); + return make_token(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); case '+': - return makeToken(TOKEN_PLUS); + return make_token(TOKEN_PLUS); case '/': - return makeToken(TOKEN_SLASH); + return make_token(TOKEN_SLASH); case '&': - return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND); + return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND); case '#': - return makeToken(TOKEN_MESH); + return make_token(TOKEN_MESH); case '$': - return makeToken(TOKEN_BIG_MONEY); + return make_token(TOKEN_BIG_MONEY); case '*': - return makeToken(TOKEN_STAR); + return make_token(TOKEN_STAR); case '!': - return makeToken(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); case '=': - return makeToken(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); case '<': - return makeToken(match('=') ? TOKEN_LTE : TOKEN_LT); + return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); case '>': - return makeToken(match('=') ? TOKEN_GTE : TOKEN_GT); + return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); case '"': return string(); } - return errorToken("Unexpected character."); + return error_token("Unexpected character."); } -const char *tokenTypeToString(TokenType type) { +const char *token_type_to_string(TokenType type) { switch (type) { case TOKEN_EOF: return "EOF"; case TOKEN_IDENTIFIER: return "IDENTIFIER"; case TOKEN_LITERAL_INT: - return "LITERAL_INT"; + return "LITERAL_i32"; case TOKEN_LITERAL_NAT: return "LITERAL_NAT"; case TOKEN_LITERAL_REAL: @@ -393,7 +397,7 @@ const char *tokenTypeToString(TokenType type) { case TOKEN_LITERAL_STR: return "LITERAL_STR"; case TOKEN_TYPE_INT: - return "TYPE_INT"; + return "TYPE_i32"; case TOKEN_TYPE_NAT: return "TYPE_NAT"; case TOKEN_TYPE_REAL: diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index 5ad14ef..3d8ad1a 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -16,6 +16,8 @@ typedef enum { TOKEN_TYPE_NAT, TOKEN_TYPE_REAL, TOKEN_TYPE_STR, + TOKEN_TYPE_BOOL, + TOKEN_TYPE_VOID, TOKEN_KEYWORD_PLEX, TOKEN_KEYWORD_FN, TOKEN_KEYWORD_CONST, @@ -36,6 +38,7 @@ typedef enum { TOKEN_KEYWORD_REFRESH, TOKEN_KEYWORD_CLOSE, TOKEN_KEYWORD_LOOP, + TOKEN_KEYWORD_DO, TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_FALSE, @@ -79,8 +82,8 @@ typedef struct { int line; } Token; -void initLexer(const char *source); -Token nextToken(); -const char* tokenTypeToString(TokenType type); +void init_lexer(const char *source); +Token next_token(); +const char* token_type_to_string(TokenType type); #endif diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c index 1b29e68..889c277 100644 --- a/src/tools/compiler/compiler.c +++ b/src/tools/compiler/compiler.c @@ -184,14 +184,14 @@ Symbol *global(VM *vm) { s.ref.global = vm->mp; - Token token_type = nextToken(); - Token array_or_eq = nextToken(); + Token token_type = next_token(); + Token array_or_eq = next_token(); if (array_or_eq.type == TOKEN_LBRACKET) { - Token rb = nextToken(); + Token rb = next_token(); if (rb.type != TOKEN_RBRACKET) return nil; - Token eq = nextToken(); + Token eq = next_token(); if (eq.type != TOKEN_EQ) return nil; @@ -268,7 +268,7 @@ Symbol *global(VM *vm) { s.type = t; - Token value = nextToken(); + Token value = next_token(); return nil; } @@ -345,7 +345,7 @@ void advance() { parser.previous = parser.current; for (;;) { - parser.current = nextToken(); + parser.current = next_token(); if (parser.current.type != TOKEN_ERROR) break; diff --git a/src/tools/compiler/lexer.c b/src/tools/compiler/lexer.c index 397d7cc..490e952 100644 --- a/src/tools/compiler/lexer.c +++ b/src/tools/compiler/lexer.c @@ -315,7 +315,7 @@ static Token string() { return makeToken(TOKEN_LITERAL_STR); } -Token nextToken() { +Token next_token() { skipWhitespace(); lexer.start = lexer.current; @@ -376,7 +376,7 @@ Token nextToken() { return errorToken("Unexpected character."); } -const char *tokenTypeToString(TokenType type) { +const char *token_type_to_string(TokenType type) { switch (type) { case TOKEN_EOF: return "EOF"; diff --git a/src/tools/compiler/lexer.h b/src/tools/compiler/lexer.h index eaa137c..3e0ab11 100644 --- a/src/tools/compiler/lexer.h +++ b/src/tools/compiler/lexer.h @@ -79,7 +79,7 @@ typedef struct { } Token; void initLexer(const char *source); -Token nextToken(); -const char* tokenTypeToString(TokenType type); +Token next_token(); +const char* token_type_to_string(TokenType type); #endif diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index 6a53937..91d2b57 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -28,7 +28,6 @@ typedef enum { OP_STORE_OFF_16, /* store_offset_16 : memory[locals[dest] + offset] = locals[src1] && 0xFFFF */ OP_STORE_OFF_32, /* store_offset_32 : memory[locals[dest] + offset] = locals[src1] */ OP_MALLOC, /* malloc : dest = fat ptr to memory of ((src1 as size) + 4) */ - OP_MALLOC_IMM, /* malloc_immediate : dest = fat ptr to memory of raw */ OP_MEMSET_8, /* memset_8 : dest <-> dest+count = src1 as u8 */ OP_MEMSET_16, /* memset_16 : dest <-> dest+count = src1 as u8 */ OP_MEMSET_32, /* memset_32 : dest <-> dest+count = src1 as u32 */ diff --git a/test/add.ul.ir b/test/add.ul.ir index f7e6a8a..3364204 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -11,7 +11,7 @@ function main () load_absolute_32 y -> $1 call add $0 $1 -> ans int_to_string ans -> ans_string - call pln ans_string + call pln ans_string -> void exit 0 function add (int a $0, int b $1) diff --git a/test/fib.ul.ir b/test/fib.ul.ir index 7f73714..d823c9a 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -6,23 +6,23 @@ function main () int str_n $1 load_immediate 35 -> n - call &fib n -> n + call fib n -> n int_to_string n -> str_n - call &pln str_n + call pln str_n -> void exit 0 function fib (int n $0) load_immediate 2 -> $1 - jump_lt_int &base_case n $1 + jump_lt_int base_case n $1 load_immediate 2 -> $3 sub_int n $3 -> $4 - call &fib $4 -> $5 + call fib $4 -> $5 load_immediate 1 -> $3 sub_int n $3 -> $4 - call &fib $4 -> $6 + call fib $4 -> $6 add_int $6 $5 -> $7 return $7 @@ -37,12 +37,12 @@ function pln (str message $0) str nl $3 int nl_length $4 - load_immediate &terminal_namespace -> ts + load_immediate terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - load_immediate &new_line -> nl + load_immediate new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 3c48000..204fab3 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -5,8 +5,8 @@ global str message = "nuqneH 'u'?" function main () str hello $0 - load_immediate &message -> hello - call pln hello + load_immediate message -> hello + call pln hello -> void exit 0 function pln (str message $0) @@ -16,13 +16,12 @@ function pln (str message $0) str nl $3 int nl_length $4 - - load_immediate &terminal_namespace -> ts + load_immediate terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - load_immediate &new_line -> nl + load_immediate new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/loop.ul.ir b/test/loop.ul.ir index a72d97c..26b2406 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -1,9 +1,12 @@ - +global str terminal_namespace = "/dev/term/0" +global str prompt = "Enter a string:" +global str new_line = "\n" + function main () - real a is $0 - int i is $1 - int mode is $11 - str term is $10 + real a $0 + int i $1 + int mode $11 + str term $10 load_immediate 5.0 -> a load_immediate 5000 -> i @@ -15,41 +18,41 @@ function main () add_int i $3 -> i jump_ge_int loop_body i $2 - malloc_immediate "/dev/term/0" -> term + load_immediate terminal_namespace -> term load_immediate 0 -> mode - syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); + syscall OPEN term mode -> term // Terminal term = open("/dev/term/0", 0); - nat b is $1 + nat b $1 real_to_nat a -> b - malloc_immediate "Enter a string:" -> $7 + load_immediate prompt -> $7 string_length $7 -> $8 - syscall WRITE term $7 $8 # print prompt + syscall WRITE term $7 $8 // print prompt - str user_string is $9 + str user_string $9 load_immediate 32 -> $8 malloc $8 -> user_string - syscall READ term user_string $8 # read in max 32 byte string + syscall READ term user_string $8 // read in max 32 byte string - call pln user_string + call pln user_string -> void nat_to_string b -> $4 - call pln $4 + call pln $4 -> void real_to_string a -> $3 - call pln $3 + call pln $3 -> void exit 0 -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + load_immediate terminal_namespace -> ts load_immediate 0 -> mode - syscall OPEN ts mode -> ts # get terminal device + syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return \ No newline at end of file diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index cc69b12..248ca99 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -1,22 +1,25 @@ - +global str terminal_namespace = "/dev/term/0" +global str prompt = "Enter a string:" +global str new_line = "\n" + function main () int mode $11; str term $10; - malloc_immediate "/dev/term/0" -> term + load_immediate terminal_namespace -> term load_immediate 0 -> mode - syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); + syscall OPEN term mode -> term // Terminal term = open("/dev/term/0", 0); - malloc_immediate "Enter a string:" -> $7 + load_immediate prompt -> $7 string_length $7 -> $8 - syscall WRITE term $7 $8 # print prompt + syscall WRITE term $7 $8 // print prompt str user_string $9 load_immediate 32 -> $8 malloc $8 -> user_string - syscall READ term user_string $8 # read in max 32 byte string + syscall READ term user_string $8 // read in max 32 byte string - call pln user_string; + call pln user_string -> void exit 0 function pln (str message $0) @@ -24,14 +27,14 @@ function pln (str message $0) int mode $5 int msg_length $2 str nl $3 - int nl_length $4 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + load_immediate terminal_namespace -> ts load_immediate 0 -> mode - syscall OPEN ts mode -> ts # get terminal device + syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/paint-bw.ul.ir b/test/paint-bw.ul.ir index ab40219..4c1fb02 100644 --- a/test/paint-bw.ul.ir +++ b/test/paint-bw.ul.ir @@ -8,177 +8,178 @@ global const byte LIGHT_GRAY = 182 global byte SELECTED_COLOR = 255 function main () - # Open screen - plex screen is $0 - str screen_name is $18 - int mode is $11 - nat screen_buffer is $21 + // Open screen + plex screen $0 + str screen_name $18 + int mode $11 + nat screen_buffer $21 - # use load immediate because it is a pointer to a string, not a value - load_address &screen_namespace -> screen_name + // use load immediate because it a pointer to a string, not a value + load_address screen_namespace -> screen_name load_immediate 0 -> mode - syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); + syscall OPEN screen_name mode -> screen // Screen screen = open("/dev/screen/0", 0); - nat width is $20 - nat size is $22 - load_offset_32 screen 8 -> width # load width - load_offset_32 screen 12 -> size # load size - load_immediate 16 -> $1 # offset for screen buffer + nat width $20 + nat size $22 + load_offset_32 screen 8 -> width // load width + load_offset_32 screen 12 -> size // load size + load_immediate 16 -> $1 // offset for screen buffer add_nat screen $1 -> screen_buffer - # open mouse - plex mouse is $15 - str mouse_name is $16 - load_address &mouse_namespace -> mouse_name - syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); + // open mouse + plex mouse $15 + str mouse_name $16 + load_address mouse_namespace -> mouse_name + syscall OPEN mouse_name mode -> mouse // Mouse mouse = open("/dev/mouse/0", 0); - byte color is $1 - nat x_pos is $12 - nat y_pos is $13 + byte color $1 + nat x_pos $12 + nat y_pos $13 - load_absolute_32 &BLACK -> color + load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void - load_absolute_32 &WHITE -> color + load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void - # screen.draw# + // screen.draw syscall WRITE screen screen_buffer size - nat zero is $11 + nat zero $11 - draw_loop: - # load mouse click data + loop draw_loop + // load mouse click data syscall REFRESH mouse - byte left_down is $9 - load_offset_8 mouse 16 -> left_down # load btn1 pressed + byte left_down $9 + load_offset_8 mouse 16 -> left_down // load btn1 pressed - jump_eq_nat &draw_loop left_down zero + jump_eq_nat draw_loop left_down zero - nat mouse_x is $7 - nat mouse_y is $8 - load_offset_32 mouse 8 -> mouse_x # load x - load_offset_32 mouse 12 -> mouse_y # load y + nat mouse_x $7 + nat mouse_y $8 + load_offset_32 mouse 8 -> mouse_x // load x + load_offset_32 mouse 12 -> mouse_y // load y - nat box_size is $14 + nat box_size $14 load_immediate 20 -> box_size - # first row - load_absolute_32 &BLACK -> color + // first row + load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width - call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void - load_absolute_32 &WHITE -> color + load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width - call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void syscall WRITE screen screen_buffer size - byte selected_color is $25 - load_absolute_32 &SELECTED_COLOR -> selected_color + byte selected_color $25 + load_absolute_32 SELECTED_COLOR -> selected_color - nat brush_size is $19 + nat brush_size $19 load_immediate 5 -> brush_size - call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size + call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void - jump &draw_loop + jump draw_loop - # Flush and exit + // Flush and exit exit 0 -function set_color_if_clicked (int click_x is $0, int click_y is $1, - int box_x is $2, int box_y is $3, byte color is $4, int box_size is $5) +function set_color_if_clicked (int click_x $0, int click_y $1, + int box_x $2, int box_y $3, byte color $4, int box_size $5) - # Compute right - int right_edge is $6 + // Compute right + int right_edge $6 add_int box_x box_size -> right_edge - # Compute bottom = box_y + box_size - int bottom_edge is $7 + // Compute bottom = box_y + box_size + int bottom_edge $7 add_int box_y box_size -> bottom_edge - # Bounds check: x in [box_x, right] and y in [box_y, bottom] - jump_lt_int &fail click_x box_x - jump_ge_int &fail click_x right_edge - jump_lt_int &fail click_y box_y - jump_ge_int &fail click_y bottom_edge + // Bounds check: x in [box_x, right] and y in [box_y, bottom] + jump_lt_int fail click_x box_x + jump_ge_int fail click_x right_edge + jump_lt_int fail click_y box_y + jump_ge_int fail click_y bottom_edge - store_absolute_8 &SELECTED_COLOR color + store_absolute_8 SELECTED_COLOR color - fail: + else fail return -function draw_outlined_swatch(nat base is $0, - byte color is $1, int x is $2, int y is $3, int width is $4) +function draw_outlined_swatch(nat base $0, + byte color $1, int x $2, int y $3, int width $4) - # Constants - nat background_color is $5 - load_absolute_32 &GRAY -> background_color + // Constants + nat background_color $5 + load_absolute_32 GRAY -> background_color - byte selected_color is $10 - load_absolute_32 &SELECTED_COLOR -> selected_color + byte selected_color $10 + load_absolute_32 SELECTED_COLOR -> selected_color - jump_eq_int &set_selected selected_color color - jump &end_set_selected + jump_eq_int set_selected selected_color color + jump end_set_selected set_selected: - load_absolute_32 &DARK_GRAY -> background_color + load_absolute_32 DARK_GRAY -> background_color end_set_selected: - nat outline_size is $6 + nat outline_size $6 load_immediate 20 -> outline_size - nat fill_size is $7 + nat fill_size $7 load_immediate 17 -> fill_size - nat offset is $8 + nat offset $8 load_immediate 2 -> offset - call &draw_box base width background_color x y outline_size outline_size + call draw_box base width background_color x y outline_size outline_size -> void - add_int x offset -> $9 # x + 2 - add_int y offset -> $10 # y + 2 + add_int x offset -> $9 // x + 2 + add_int y offset -> $10 // y + 2 - call &draw_box base width color $9 $10 fill_size fill_size + call draw_box base width color $9 $10 fill_size fill_size -> void return -function draw_box (nat base is $0, nat screen_width is $1, - byte color is $2, nat x_start is $3, nat y_start is $4, nat width is $5, nat height is $6) +function draw_box (nat base $0, nat screen_width $1, + byte color $2, nat x_start $3, nat y_start $4, + nat width $5, nat height $6) - # Compute start address: base + y*640 + x - nat offset is $15 + // Compute start address: base + y*640 + x + nat offset $15 mul_int y_start screen_width -> offset add_int offset x_start -> offset add_nat offset base -> offset - nat fat_ptr_size is $25 + nat fat_ptr_size $25 load_immediate 4 -> fat_ptr_size - add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size + add_nat offset fat_ptr_size -> offset // need to add offset for fat pointer size - int i is $30 + int i $30 load_immediate 1 -> i - int zero is $26 + int zero $26 load_immediate 0 -> zero - int row_end is $27 - nat pixel_ptr is $29 + int row_end $27 + nat pixel_ptr $29 - draw_box_outer: - add_int offset width -> row_end # current + width - register_move offset -> pixel_ptr # set pixel point - memset_8 pixel_ptr color width # draw row - add_int offset screen_width -> offset # next row += 640 - sub_int height i -> height # decrement row count - jump_gt_int &draw_box_outer height zero + loop draw_box_outer + add_int offset width -> row_end // current + width + register_move offset -> pixel_ptr // set pixel point + memset_8 pixel_ptr color width // draw row + add_int offset screen_width -> offset // next row += 640 + sub_int height i -> height // decrement row count + jump_gt_int draw_box_outer height zero return diff --git a/test/paint.ul.ir b/test/paint.ul.ir index 3dac05e..0adc914 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -39,12 +39,12 @@ function main () load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch [screen_buffer color x_pos y_pos width] -> void load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void // screen.draw// syscall WRITE screen screen_buffer size @@ -72,15 +72,15 @@ function main () load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void syscall WRITE screen screen_buffer size @@ -90,7 +90,7 @@ function main () nat brush_size $19 load_immediate 5 -> brush_size - call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size + call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void jump draw_loop @@ -144,12 +144,12 @@ function draw_outlined_swatch(nat base $0, nat offset $8 load_immediate 2 -> offset - call draw_box base width background_color x y outline_size outline_size + call draw_box base width background_color x y outline_size outline_size -> void add_int x offset -> $9 // x + 2 add_int y offset -> $10 // y + 2 - call draw_box base width color $9 $10 fill_size fill_size + call draw_box base width color $9 $10 fill_size fill_size -> void return @@ -180,5 +180,5 @@ function draw_box (nat base $0, nat screen_width $1, memset_8 pixel_ptr color width // draw row add_int offset screen_width -> offset // next row += 640 sub_int height i -> height // decrement row count - jump_gt_int draw_box_outer height zero + jump_gt_int draw_box_outer height zero return diff --git a/test/simple.ul.ir b/test/simple.ul.ir index e4a1407..572d680 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -11,7 +11,7 @@ function main () add_real x y -> result str result_str $3 real_to_string result -> result_str - call &pln result_str + call &pln result_str -> void exit 0 function pln (str message $0) diff --git a/test/window.ul.ir b/test/window.ul.ir index c942eeb..c860dfc 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -24,21 +24,21 @@ function main () syscall OPEN screen mode -> screen nat_to_string screen -> tmp_str - call pln tmp_str + call pln tmp_str -> void load_offset_32 screen 8 -> width nat_to_string width -> tmp_str - call pln tmp_str + call pln tmp_str -> void load_offset_32 screen 12 -> buffer_size nat_to_string buffer_size -> tmp_str - call pln tmp_str + call pln tmp_str -> void load_immediate 16 -> offset_temp add_nat screen offset_temp -> screen_buffer nat_to_string screen_buffer -> tmp_str - call pln tmp_str + call pln tmp_str -> void // open mouse load_immediate mouse_namespace -> mouse @@ -72,17 +72,18 @@ function main () exit 0 function pln (str message $0) - str term $1 - int msg_length $2 - str nl $3 - int nl_length $4 + str ts $1 int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 + load_immediate terminal_namespace -> ts load_immediate 0 -> mode - syscall OPEN terminal_namespace mode -> term + syscall OPEN ts mode -> ts strlen message -> msg_length - syscall WRITE term message msg_length - load_address new_line -> nl + syscall WRITE ts message msg_length + load_immediate new_line -> nl strlen nl -> nl_length - syscall WRITE term nl nl_length + syscall WRITE ts nl nl_length return From 32ae0eeb8f4267d9744c4868125b9f45ef57ce04 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 08:28:17 -0800 Subject: [PATCH 13/27] fix readme --- README.org | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/README.org b/README.org index 37686f5..4635c9a 100644 --- a/README.org +++ b/README.org @@ -97,41 +97,46 @@ memory is managed via frame based arenas. function scopes defines a memory frame heap allocations using the internal malloc opcode push pointers within this frame. when a frame exits, the pointer is reset like stack based gc. #+BEGIN_SRC sh +global str terminal_namespace = "/dev/term/0" +global str prompt = "Enter a string:" +global str new_line = "\n" + function main () - int mode is $11 - str term is $10 + int mode $11; + str term $10; - malloc_immediate "/dev/term/0" -> term + load_immediate terminal_namespace -> term load_immediate 0 -> mode - syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); + syscall OPEN term mode -> term // Terminal term = open("/dev/term/0", 0); - malloc_immediate "Enter a string:" -> $7 + load_immediate prompt -> $7 string_length $7 -> $8 - syscall WRITE term $7 $8 # print prompt + syscall WRITE term $7 $8 // print prompt - str user_string is $9 + str user_string $9 load_immediate 32 -> $8 malloc $8 -> user_string - syscall READ term user_string $8 # read in max 32 byte string + syscall READ term user_string $8 // read in max 32 byte string - call pln user_string + call pln user_string -> void exit 0 -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + load_immediate terminal_namespace -> ts load_immediate 0 -> mode - syscall OPEN ts mode -> ts # get terminal device + syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length + return #+END_SRC From 55b61450645bd0452ea72673061c7e7a786695b0 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 09:21:12 -0800 Subject: [PATCH 14/27] Add semicolon --- src/tools/assembler/assembler.c | 12 +- test/add.ul.ir | 60 ++++---- test/fib.ul.ir | 77 +++++------ test/hello.ul.ir | 36 ++--- test/loop.ul.ir | 102 +++++++------- test/malloc.ul.ir | 58 ++++---- test/paint-bw.ul.ir | 232 +++++++++++++++---------------- test/paint.ul.ir | 233 ++++++++++++++++---------------- test/simple.ul.ir | 52 +++---- test/window.ul.ir | 130 +++++++++--------- 10 files changed, 502 insertions(+), 490 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 6b769d9..8f3b62c 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -216,6 +216,7 @@ bool define_global(VM *vm, SymbolTable *st) { default: return false; } + next_token_is(TOKEN_SEMICOLON); symbol_table_add(st, s); return true; @@ -297,6 +298,8 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { Token reg_num = next_token_is(TOKEN_LITERAL_INT); s.ref = atoi(reg_num.start); + + next_token_is(TOKEN_SEMICOLON); symbol_table_add(st, s); } @@ -403,6 +406,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_LITERAL_NAT); vm->pc+=4; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "call")) { vm->pc++; @@ -418,7 +423,12 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { } /* return type */ next = next_token(); - vm->pc++; + vm->pc++; /* we emit a value regardless, a void is register 255 */ + if (next.type == TOKEN_SEMICOLON) { + continue; + } + /* if it is not void, then it was the value */ + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "syscall")) { } else if (streq(token.start, "load_immediate")) { } else if (streq(token.start, "load_indirect_8")) { diff --git a/test/add.ul.ir b/test/add.ul.ir index 3364204..5ed166d 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -1,35 +1,35 @@ -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global int x = 1 -global int y = 1 +global str terminal_namespace = "/dev/term/0"; +global str new_line = "\n"; +global int x = 1; +global int y = 1; -function main () - int ans $2 - str ans_string $3 +function main () + int ans $2; + str ans_string $3; - load_absolute_32 x -> $0 - load_absolute_32 y -> $1 - call add $0 $1 -> ans - int_to_string ans -> ans_string - call pln ans_string -> void - exit 0 + load_absolute_32 x -> $0; + load_absolute_32 y -> $1; + call add $0 $1 -> ans; + int_to_string ans -> ans_string; + call pln ans_string -> void; + exit 0; -function add (int a $0, int b $1) - int result $2 - add_int a b -> result - return result +function add (int a $0, int b $1) + int result $2; + add_int a b -> result; + return result; -function pln (str message $0) - str term $1 - int msg_length $2 - str nl $3 - int nl_length $4 - int mode $5 +function pln (str message $0) + str term $1; + int msg_length $2; + str nl $3; + int nl_length $4; + int mode $5; - load_immediate 0 -> mode - syscall OPEN terminal_namespace mode -> term - strlen message -> msg_length - syscall WRITE term message msg_length - strlen new_line -> nl_length - syscall WRITE term nl nl_length - return + load_immediate 0 -> mode; + syscall OPEN terminal_namespace mode -> term; + strlen message -> msg_length; + syscall WRITE term message msg_length; + strlen new_line -> nl_length; + syscall WRITE term nl nl_length; + return; diff --git a/test/fib.ul.ir b/test/fib.ul.ir index d823c9a..e073416 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -1,48 +1,49 @@ -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" +global str terminal_namespace = "/dev/term/0"; +global str new_line = "\n"; -function main () - int n $0 - int str_n $1 +function main () + int n $0; + int str_n $1; - load_immediate 35 -> n - call fib n -> n - int_to_string n -> str_n - call pln str_n -> void - exit 0 + load_immediate 35 -> n; + call fib n -> n; + int_to_string n -> str_n; + call pln str_n -> void; + exit 0; -function fib (int n $0) - load_immediate 2 -> $1 +function fib (int n $0) + load_immediate 2 -> $1; - jump_lt_int base_case n $1 + jump_lt_int base_case n $1; - load_immediate 2 -> $3 - sub_int n $3 -> $4 - call fib $4 -> $5 + load_immediate 2 -> $3; + sub_int n $3 -> $4; + call fib $4 -> $5; - load_immediate 1 -> $3 - sub_int n $3 -> $4 - call fib $4 -> $6 + load_immediate 1 -> $3; + sub_int n $3 -> $4; + call fib $4 -> $6; - add_int $6 $5 -> $7 - return $7 + add_int $6 $5 -> $7; + return $7; - else base_case - return n + else base_case; + return n; -function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 +function pln (str message $0) + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; + \ No newline at end of file diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 204fab3..7373ad9 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -3,25 +3,25 @@ global str new_line = "\n" global str message = "nuqneH 'u'?" function main () - str hello $0 + str hello $0; - load_immediate message -> hello - call pln hello -> void - exit 0 + load_immediate message -> hello; + call pln hello -> void; + exit 0; function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 26b2406..0cc95f3 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -1,58 +1,58 @@ -global str terminal_namespace = "/dev/term/0" -global str prompt = "Enter a string:" -global str new_line = "\n" +global str terminal_namespace = "/dev/term/0"; +global str prompt = "Enter a string:"; +global str new_line = "\n"; -function main () - real a $0 - int i $1 - int mode $11 - str term $10 +function main (); + real a $0; + int i $1; + int mode $11; + str term $10; - load_immediate 5.0 -> a - load_immediate 5000 -> i - load_immediate 0 -> $2 - load_immediate -1 -> $3 - load_immediate 5.0 -> $5 - loop loop_body - add_real a $5 -> a - add_int i $3 -> i - jump_ge_int loop_body i $2 - - load_immediate terminal_namespace -> term - load_immediate 0 -> mode - syscall OPEN term mode -> term // Terminal term = open("/dev/term/0", 0); + load_immediate 5.0 -> a; + load_immediate 5000 -> i; + load_immediate 0 -> $2; + load_immediate -1 -> $3; + load_immediate 5.0 -> $5; + loop loop_body; + add_real a $5 -> a; + add_int i $3 -> i; + jump_ge_int loop_body i $2; - nat b $1 - real_to_nat a -> b - load_immediate prompt -> $7 - string_length $7 -> $8 - syscall WRITE term $7 $8 // print prompt + load_immediate terminal_namespace -> term; + load_immediate 0 -> mode; + syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); - str user_string $9 - load_immediate 32 -> $8 - malloc $8 -> user_string - syscall READ term user_string $8 // read in max 32 byte string + nat b $1; + real_to_nat a -> b; + load_immediate prompt -> $7; + string_length $7 -> $8; + syscall WRITE term $7 $8; // print prompt - call pln user_string -> void - nat_to_string b -> $4 - call pln $4 -> void - real_to_string a -> $3 - call pln $3 -> void - exit 0 + str user_string $9; + load_immediate 32 -> $8; + malloc $8 -> user_string; + syscall READ term user_string; $8 // read in max 32 byte string -function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 + call pln user_string -> void; + nat_to_string b -> $4; + call pln $4 -> void; + real_to_string a -> $3; + call pln $3 -> void; + exit 0; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return \ No newline at end of file +function pln (str message $0); + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; + + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length ; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; \ No newline at end of file diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 248ca99..9cb5155 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -1,40 +1,40 @@ -global str terminal_namespace = "/dev/term/0" -global str prompt = "Enter a string:" -global str new_line = "\n" +global str terminal_namespace = "/dev/term/0"; +global str prompt = "Enter a string:"; +global str new_line = "\n"; function main () int mode $11; str term $10; - load_immediate terminal_namespace -> term - load_immediate 0 -> mode - syscall OPEN term mode -> term // Terminal term = open("/dev/term/0", 0); + load_immediate terminal_namespace -> term; + load_immediate 0 -> mode; + syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); - load_immediate prompt -> $7 - string_length $7 -> $8 - syscall WRITE term $7 $8 // print prompt + load_immediate prompt -> $7; + string_length $7 -> $8; + syscall WRITE term $7 $8; // print prompt - str user_string $9 - load_immediate 32 -> $8 - malloc $8 -> user_string - syscall READ term user_string $8 // read in max 32 byte string + str user_string $9; + load_immediate 32 -> $8; + malloc $8 -> user_string; + syscall READ term user_string $8; // read in max 32 byte string - call pln user_string -> void - exit 0 + call pln user_string -> void; + exit 0; function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; diff --git a/test/paint-bw.ul.ir b/test/paint-bw.ul.ir index 4c1fb02..97b8fdf 100644 --- a/test/paint-bw.ul.ir +++ b/test/paint-bw.ul.ir @@ -1,185 +1,185 @@ -global const str screen_namespace = "/dev/screen/0" -global const str mouse_namespace = "/dev/mouse/0" -global const byte BLACK = 0 -global const byte WHITE = 255 -global const byte DARK_GRAY = 73 -global const byte GRAY = 146 -global const byte LIGHT_GRAY = 182 -global byte SELECTED_COLOR = 255 +global const str screen_namespace = "/dev/screen/0"; +global const str mouse_namespace = "/dev/mouse/0"; +global const byte BLACK = 0; +global const byte WHITE = 255; +global const byte DARK_GRAY = 73; +global const byte GRAY = 146; +global const byte LIGHT_GRAY = 182; +global byte SELECTED_COLOR = 255; function main () // Open screen - plex screen $0 - str screen_name $18 - int mode $11 - nat screen_buffer $21 + plex screen $0; + str screen_name $18; + int mode $11; + nat screen_buffer $21; // use load immediate because it a pointer to a string, not a value - load_address screen_namespace -> screen_name - load_immediate 0 -> mode - syscall OPEN screen_name mode -> screen // Screen screen = open("/dev/screen/0", 0); + load_address screen_namespace -> screen_name; + load_immediate 0 -> mode; + syscall OPEN screen_name mode -> screen; // Screen screen = open("/dev/screen/0", 0); - nat width $20 - nat size $22 - load_offset_32 screen 8 -> width // load width - load_offset_32 screen 12 -> size // load size - load_immediate 16 -> $1 // offset for screen buffer - add_nat screen $1 -> screen_buffer + nat width $20; + nat size $22; + load_offset_32 screen 8 -> width; // load width + load_offset_32 screen 12 -> size; // load size + load_immediate 16 -> $1; // offset for screen buffer + add_nat screen $1 -> screen_buffer; // open mouse - plex mouse $15 - str mouse_name $16 - load_address mouse_namespace -> mouse_name - syscall OPEN mouse_name mode -> mouse // Mouse mouse = open("/dev/mouse/0", 0); + plex mouse $15; + str mouse_name $16; + load_address mouse_namespace -> mouse_name; + syscall OPEN mouse_name mode -> mouse; // Mouse mouse = open("/dev/mouse/0", 0); - byte color $1 - nat x_pos $12 - nat y_pos $13 + byte color $1; + nat x_pos $12; + nat y_pos $13; - load_absolute_32 BLACK -> color - load_immediate 1 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + load_absolute_32 BLACK -> color; + load_immediate 1 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - load_absolute_32 WHITE -> color - load_immediate 21 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + load_absolute_32 WHITE -> color; + load_immediate 21 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; // screen.draw - syscall WRITE screen screen_buffer size + syscall WRITE screen screen_buffer size; - nat zero $11 + nat zero $11; loop draw_loop // load mouse click data - syscall REFRESH mouse + syscall REFRESH mouse; - byte left_down $9 - load_offset_8 mouse 16 -> left_down // load btn1 pressed + byte left_down $9; + load_offset_8 mouse 16 -> left_down; // load btn1 pressed - jump_eq_nat draw_loop left_down zero + jump_eq_nat draw_loop left_down zero; - nat mouse_x $7 - nat mouse_y $8 - load_offset_32 mouse 8 -> mouse_x // load x - load_offset_32 mouse 12 -> mouse_y // load y + nat mouse_x $7; + nat mouse_y $8; + load_offset_32 mouse 8 -> mouse_x; // load x + load_offset_32 mouse 12 -> mouse_y; // load y - nat box_size $14 - load_immediate 20 -> box_size + nat box_size $14; + load_immediate 20 -> box_size; // first row - load_absolute_32 BLACK -> color - load_immediate 1 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void + load_absolute_32 BLACK -> color; + load_immediate 1 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; - load_absolute_32 WHITE -> color - load_immediate 21 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void + load_absolute_32 WHITE -> color; + load_immediate 21 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; - syscall WRITE screen screen_buffer size + syscall WRITE screen screen_buffer size; - byte selected_color $25 - load_absolute_32 SELECTED_COLOR -> selected_color + byte selected_color $25; + load_absolute_32 SELECTED_COLOR -> selected_color; - nat brush_size $19 - load_immediate 5 -> brush_size + nat brush_size $19; + load_immediate 5 -> brush_size; - call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void + call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void; - jump draw_loop + jump draw_loop; // Flush and exit - exit 0 + exit 0; function set_color_if_clicked (int click_x $0, int click_y $1, int box_x $2, int box_y $3, byte color $4, int box_size $5) // Compute right - int right_edge $6 - add_int box_x box_size -> right_edge + int right_edge $6; + add_int box_x box_size -> right_edge; // Compute bottom = box_y + box_size - int bottom_edge $7 - add_int box_y box_size -> bottom_edge + int bottom_edge $7; + add_int box_y box_size -> bottom_edge; // Bounds check: x in [box_x, right] and y in [box_y, bottom] - jump_lt_int fail click_x box_x - jump_ge_int fail click_x right_edge - jump_lt_int fail click_y box_y - jump_ge_int fail click_y bottom_edge + jump_lt_int fail click_x box_x; + jump_ge_int fail click_x right_edge; + jump_lt_int fail click_y box_y; + jump_ge_int fail click_y bottom_edge; - store_absolute_8 SELECTED_COLOR color + store_absolute_8 SELECTED_COLOR color; else fail - return + return; function draw_outlined_swatch(nat base $0, byte color $1, int x $2, int y $3, int width $4) // Constants - nat background_color $5 - load_absolute_32 GRAY -> background_color + nat background_color $5; + load_absolute_32 GRAY -> background_color; - byte selected_color $10 - load_absolute_32 SELECTED_COLOR -> selected_color + byte selected_color $10; + load_absolute_32 SELECTED_COLOR -> selected_color; - jump_eq_int set_selected selected_color color - jump end_set_selected - set_selected: - load_absolute_32 DARK_GRAY -> background_color - end_set_selected: + jump_eq_int set_selected selected_color color; + jump end_set_selected; + do set_selected + load_absolute_32 DARK_GRAY -> background_color; + else end_set_selected - nat outline_size $6 - load_immediate 20 -> outline_size + nat outline_size $6; + load_immediate 20 -> outline_size; - nat fill_size $7 - load_immediate 17 -> fill_size + nat fill_size $7; + load_immediate 17 -> fill_size; - nat offset $8 - load_immediate 2 -> offset + nat offset $8; + load_immediate 2 -> offset; - call draw_box base width background_color x y outline_size outline_size -> void + call draw_box base width background_color x y outline_size outline_size -> void; - add_int x offset -> $9 // x + 2 - add_int y offset -> $10 // y + 2 + add_int x offset -> $9; // x + 2 + add_int y offset -> $10; // y + 2 - call draw_box base width color $9 $10 fill_size fill_size -> void + call draw_box base width color $9 $10 fill_size fill_size -> void; - return + return; function draw_box (nat base $0, nat screen_width $1, byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6) // Compute start address: base + y*640 + x - nat offset $15 - mul_int y_start screen_width -> offset - add_int offset x_start -> offset - add_nat offset base -> offset - nat fat_ptr_size $25 - load_immediate 4 -> fat_ptr_size - add_nat offset fat_ptr_size -> offset // need to add offset for fat pointer size + nat offset $15; + mul_int y_start screen_width -> offset; + add_int offset x_start -> offset; + add_nat offset base -> offset; + nat fat_ptr_size $25; + load_immediate 4 -> fat_ptr_size; + add_nat offset fat_ptr_size -> offset; // need to add offset for fat pointer size - int i $30 - load_immediate 1 -> i + int i $30; + load_immediate 1 -> i; - int zero $26 - load_immediate 0 -> zero + int zero $26; + load_immediate 0 -> zero; - int row_end $27 - nat pixel_ptr $29 + int row_end $27; + nat pixel_ptr $29; loop draw_box_outer - add_int offset width -> row_end // current + width - register_move offset -> pixel_ptr // set pixel point - memset_8 pixel_ptr color width // draw row - add_int offset screen_width -> offset // next row += 640 - sub_int height i -> height // decrement row count - jump_gt_int draw_box_outer height zero - return + add_int offset width -> row_end; // current + width + register_move offset -> pixel_ptr; // set pixel point + memset_8 pixel_ptr color width; // draw row + add_int offset screen_width -> offset; // next row += 640 + sub_int height i -> height; // decrement row count + jump_gt_int draw_box_outer height zero; + return; diff --git a/test/paint.ul.ir b/test/paint.ul.ir index 0adc914..97b8fdf 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -1,184 +1,185 @@ -global const str screen_namespace = "/dev/screen/0" -global const str mouse_namespace = "/dev/mouse/0" -global const byte BLACK = 0 -global const byte WHITE = 255 -global const byte DARK_GRAY = 73 -global const byte GRAY = 146 -global const byte LIGHT_GRAY = 182 -global byte SELECTED_COLOR = 255 +global const str screen_namespace = "/dev/screen/0"; +global const str mouse_namespace = "/dev/mouse/0"; +global const byte BLACK = 0; +global const byte WHITE = 255; +global const byte DARK_GRAY = 73; +global const byte GRAY = 146; +global const byte LIGHT_GRAY = 182; +global byte SELECTED_COLOR = 255; function main () // Open screen - plex screen $0 - str screen_name $18 - int mode $11 - nat screen_buffer $21 + plex screen $0; + str screen_name $18; + int mode $11; + nat screen_buffer $21; // use load immediate because it a pointer to a string, not a value - load_address screen_namespace -> screen_name - load_immediate 0 -> mode - syscall OPEN screen_name mode -> screen // Screen screen = open("/dev/screen/0", 0); + load_address screen_namespace -> screen_name; + load_immediate 0 -> mode; + syscall OPEN screen_name mode -> screen; // Screen screen = open("/dev/screen/0", 0); - nat width $20 - nat size $22 - load_offset_32 screen 8 -> width // load width - load_offset_32 screen 12 -> size // load size - load_immediate 16 -> $1 // offset for screen buffer - add_nat screen $1 -> screen_buffer + nat width $20; + nat size $22; + load_offset_32 screen 8 -> width; // load width + load_offset_32 screen 12 -> size; // load size + load_immediate 16 -> $1; // offset for screen buffer + add_nat screen $1 -> screen_buffer; // open mouse - plex mouse $15 - str mouse_name $16 - load_address mouse_namespace -> mouse_name - syscall OPEN mouse_name mode -> mouse // Mouse mouse = open("/dev/mouse/0", 0); + plex mouse $15; + str mouse_name $16; + load_address mouse_namespace -> mouse_name; + syscall OPEN mouse_name mode -> mouse; // Mouse mouse = open("/dev/mouse/0", 0); - byte color $1 - nat x_pos $12 - nat y_pos $13 + byte color $1; + nat x_pos $12; + nat y_pos $13; - load_absolute_32 BLACK -> color - load_immediate 1 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch [screen_buffer color x_pos y_pos width] -> void + load_absolute_32 BLACK -> color; + load_immediate 1 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - load_absolute_32 WHITE -> color - load_immediate 21 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void + load_absolute_32 WHITE -> color; + load_immediate 21 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - // screen.draw// - syscall WRITE screen screen_buffer size + // screen.draw + syscall WRITE screen screen_buffer size; - nat zero $11 + nat zero $11; loop draw_loop // load mouse click data - syscall REFRESH mouse + syscall REFRESH mouse; - byte left_down $9 - load_offset_8 mouse 16 -> left_down // load btn1 pressed + byte left_down $9; + load_offset_8 mouse 16 -> left_down; // load btn1 pressed - jump_eq_nat draw_loop left_down zero + jump_eq_nat draw_loop left_down zero; - nat mouse_x $7 - nat mouse_y $8 - load_offset_32 mouse 8 -> mouse_x // load x - load_offset_32 mouse 12 -> mouse_y // load y + nat mouse_x $7; + nat mouse_y $8; + load_offset_32 mouse 8 -> mouse_x; // load x + load_offset_32 mouse 12 -> mouse_y; // load y - nat box_size $14 - load_immediate 20 -> box_size + nat box_size $14; + load_immediate 20 -> box_size; // first row - load_absolute_32 BLACK -> color - load_immediate 1 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void + load_absolute_32 BLACK -> color; + load_immediate 1 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; - load_absolute_32 WHITE -> color - load_immediate 21 -> x_pos - load_immediate 1 -> y_pos - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void + load_absolute_32 WHITE -> color; + load_immediate 21 -> x_pos; + load_immediate 1 -> y_pos; + call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; - syscall WRITE screen screen_buffer size + syscall WRITE screen screen_buffer size; - byte selected_color $25 - load_absolute_32 SELECTED_COLOR -> selected_color + byte selected_color $25; + load_absolute_32 SELECTED_COLOR -> selected_color; - nat brush_size $19 - load_immediate 5 -> brush_size + nat brush_size $19; + load_immediate 5 -> brush_size; - call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void + call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void; - jump draw_loop + jump draw_loop; // Flush and exit - exit 0 + exit 0; function set_color_if_clicked (int click_x $0, int click_y $1, int box_x $2, int box_y $3, byte color $4, int box_size $5) // Compute right - int right_edge $6 - add_int box_x box_size -> right_edge + int right_edge $6; + add_int box_x box_size -> right_edge; // Compute bottom = box_y + box_size - int bottom_edge $7 - add_int box_y box_size -> bottom_edge + int bottom_edge $7; + add_int box_y box_size -> bottom_edge; // Bounds check: x in [box_x, right] and y in [box_y, bottom] - jump_lt_int fail click_x box_x - jump_ge_int fail click_x right_edge - jump_lt_int fail click_y box_y - jump_ge_int fail click_y bottom_edge + jump_lt_int fail click_x box_x; + jump_ge_int fail click_x right_edge; + jump_lt_int fail click_y box_y; + jump_ge_int fail click_y bottom_edge; - store_absolute_8 SELECTED_COLOR color + store_absolute_8 SELECTED_COLOR color; else fail - return + return; function draw_outlined_swatch(nat base $0, byte color $1, int x $2, int y $3, int width $4) // Constants - nat background_color $5 - load_absolute_32 GRAY -> background_color + nat background_color $5; + load_absolute_32 GRAY -> background_color; - byte selected_color $10 - load_absolute_32 SELECTED_COLOR -> selected_color + byte selected_color $10; + load_absolute_32 SELECTED_COLOR -> selected_color; - jump_eq_int set_selected selected_color color - jump end_set_selected + jump_eq_int set_selected selected_color color; + jump end_set_selected; do set_selected - load_absolute_32 DARK_GRAY -> background_color + load_absolute_32 DARK_GRAY -> background_color; else end_set_selected - nat outline_size $6 - load_immediate 20 -> outline_size + nat outline_size $6; + load_immediate 20 -> outline_size; - nat fill_size $7 - load_immediate 17 -> fill_size + nat fill_size $7; + load_immediate 17 -> fill_size; - nat offset $8 - load_immediate 2 -> offset + nat offset $8; + load_immediate 2 -> offset; - call draw_box base width background_color x y outline_size outline_size -> void + call draw_box base width background_color x y outline_size outline_size -> void; - add_int x offset -> $9 // x + 2 - add_int y offset -> $10 // y + 2 + add_int x offset -> $9; // x + 2 + add_int y offset -> $10; // y + 2 - call draw_box base width color $9 $10 fill_size fill_size -> void + call draw_box base width color $9 $10 fill_size fill_size -> void; - return + return; function draw_box (nat base $0, nat screen_width $1, - byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6) + byte color $2, nat x_start $3, nat y_start $4, + nat width $5, nat height $6) // Compute start address: base + y*640 + x - nat offset $15 - mul_int y_start screen_width -> offset - add_int offset x_start -> offset - add_nat offset base -> offset - nat fat_ptr_size $25 - load_immediate 4 -> fat_ptr_size - add_nat offset fat_ptr_size -> offset // need to add offset for fat pointer size + nat offset $15; + mul_int y_start screen_width -> offset; + add_int offset x_start -> offset; + add_nat offset base -> offset; + nat fat_ptr_size $25; + load_immediate 4 -> fat_ptr_size; + add_nat offset fat_ptr_size -> offset; // need to add offset for fat pointer size - int i $30 - load_immediate 1 -> i + int i $30; + load_immediate 1 -> i; - int zero $26 - load_immediate 0 -> zero + int zero $26; + load_immediate 0 -> zero; - int row_end $27 - nat pixel_ptr $29 + int row_end $27; + nat pixel_ptr $29; loop draw_box_outer - add_int offset width -> row_end // current + width - register_move offset -> pixel_ptr // set pixel point - memset_8 pixel_ptr color width // draw row - add_int offset screen_width -> offset // next row += 640 - sub_int height i -> height // decrement row count - jump_gt_int draw_box_outer height zero - return + add_int offset width -> row_end; // current + width + register_move offset -> pixel_ptr; // set pixel point + memset_8 pixel_ptr color width; // draw row + add_int offset screen_width -> offset; // next row += 640 + sub_int height i -> height; // decrement row count + jump_gt_int draw_box_outer height zero; + return; diff --git a/test/simple.ul.ir b/test/simple.ul.ir index 572d680..c973ff5 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -1,31 +1,31 @@ -global str terminal_namespace = "/dev/term/0" -global real x = 1.0 -global real y = 1.0 +global str terminal_namespace = "/dev/term/0"; +global real x = 1.0; +global real y = 1.0; function main () - real x $0 - load_absolute_32 &x -> x - real y $1 - load_absolute_32 &y -> y - real result $2 - add_real x y -> result - str result_str $3 - real_to_string result -> result_str - call &pln result_str -> void - exit 0 + real x $0; + load_absolute_32 x -> x; + real y $1; + load_absolute_32 y -> y; + real result $2; + add_real x y -> result; + str result_str $3; + real_to_string result -> result_str; + call pln result_str -> void; + exit 0; function pln (str message $0) - str term $1 - int msg_length $2 - str nl $3 - int nl_length $4 - int mode $5 + str term $1; + int msg_length $2; + str nl $3; + int nl_length $4; + int mode $5; - load_immediate 0 -> mode - syscall OPEN &terminal_namespace mode -> term - strlen message -> msg_length - syscall WRITE term message msg_length - load_address new_line -> nl - strlen nl -> nl_length - syscall WRITE term nl nl_length - return + load_immediate 0 -> mode; + syscall OPEN terminal_namespace mode -> term; + strlen message -> msg_length; + syscall WRITE term message msg_length; + load_address new_line -> nl; + strlen nl -> nl_length; + syscall WRITE term nl nl_length; + return; diff --git a/test/window.ul.ir b/test/window.ul.ir index c860dfc..8fa51a5 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -1,89 +1,89 @@ -global str screen_namespace = "/dev/screen/0" -global str mouse_namespace = "/dev/mouse/0" -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global byte white = 255 +global str screen_namespace = "/dev/screen/0"; +global str mouse_namespace = "/dev/mouse/0"; +global str terminal_namespace = "/dev/term/0"; +global str new_line = "\n"; +global byte white = 255; function main () - plex screen $0 - plex mouse $1 - str tmp_str $2 - byte color $3 - bool left_down $4 - int mode $5 - nat offset_temp $6 - nat x $7 - nat y $8 - nat width $9 - nat screen_buffer $10 - nat buffer_size $11 - nat pixel_pos $12 + plex screen $0; + plex mouse $1; + str tmp_str $2; + byte color $3; + bool left_down $4; + int mode $5; + nat offset_temp $6; + nat x $7; + nat y $8; + nat width $9; + nat screen_buffer $10; + nat buffer_size $11; + nat pixel_pos $12; - load_immediate screen_namespace -> screen - load_immediate 0 -> mode - syscall OPEN screen mode -> screen + load_immediate screen_namespace -> screen; + load_immediate 0 -> mode; + syscall OPEN screen mode -> screen; - nat_to_string screen -> tmp_str - call pln tmp_str -> void + nat_to_string screen -> tmp_str; + call pln tmp_str -> void; - load_offset_32 screen 8 -> width - nat_to_string width -> tmp_str - call pln tmp_str -> void + load_offset_32 screen 8 -> width; + nat_to_string width -> tmp_str; + call pln tmp_str -> void; - load_offset_32 screen 12 -> buffer_size - nat_to_string buffer_size -> tmp_str - call pln tmp_str -> void + load_offset_32 screen 12 -> buffer_size; + nat_to_string buffer_size -> tmp_str; + call pln tmp_str -> void; - load_immediate 16 -> offset_temp - add_nat screen offset_temp -> screen_buffer + load_immediate 16 -> offset_temp; + add_nat screen offset_temp -> screen_buffer; - nat_to_string screen_buffer -> tmp_str - call pln tmp_str -> void + nat_to_string screen_buffer -> tmp_str; + call pln tmp_str -> void; // open mouse - load_immediate mouse_namespace -> mouse - syscall OPEN mouse mode -> mouse + load_immediate mouse_namespace -> mouse; + syscall OPEN mouse mode -> mouse; - syscall WRITE screen screen_buffer buffer_size // redraw + syscall WRITE screen screen_buffer buffer_size; // redraw loop draw_loop // load mouse click data - syscall STAT mouse + syscall STAT mouse; - load_offset_8 mouse 16 -> left_down + load_offset_8 mouse 16 -> left_down; - jump_eq_nat draw_loop left_down mode // mode = 0 / false + jump_eq_nat draw_loop left_down mode; // mode = 0 / false - load_offset_32 mouse 8 -> x - load_offset_32 mouse 12 -> y + load_offset_32 mouse 8 -> x; + load_offset_32 mouse 12 -> y; // Compute start address: y *width + x - mul_nat y width -> pixel_pos - add_nat x pixel_pos -> pixel_pos - add_nat screen_buffer pixel_pos -> pixel_pos - load_immediate 4 -> fat_ptr_size - add_nat pixel_pos fat_ptr_size -> pixel_pos + mul_nat y width -> pixel_pos; + add_nat x pixel_pos -> pixel_pos; + add_nat screen_buffer pixel_pos -> pixel_pos; + load_immediate 4 -> fat_ptr_size; + add_nat pixel_pos fat_ptr_size -> pixel_pos; - load_absolute_32 white -> color - store_absolute_8 pixel_pos color // draw color at screen [x,y] - syscall WRITE screen screen_buffer buffer_size // redraw + load_absolute_32 white -> color; + store_absolute_8 pixel_pos color; // draw color at screen [x,y] + syscall WRITE screen screen_buffer buffer_size; // redraw - jump draw_loop - exit 0 + jump draw_loop; + exit 0; function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length ; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; From b638fedd81fddd4b3f8e1b5ba1a298f0e93a3a4c Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 09:26:35 -0800 Subject: [PATCH 15/27] Update readme --- README.org | 94 +++++++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/README.org b/README.org index 4635c9a..76d5fff 100644 --- a/README.org +++ b/README.org @@ -60,28 +60,28 @@ global str new_line = "\n" global str message = "nuqneH 'u'?" function main () - str hello $0 + str hello $0; - load_immediate message -> hello - call pln hello -> void - exit 0 + load_immediate message -> hello; + call pln hello -> void; + exit 0; function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; #+END_SRC #+BEGIN_SRC sh @@ -97,46 +97,46 @@ memory is managed via frame based arenas. function scopes defines a memory frame heap allocations using the internal malloc opcode push pointers within this frame. when a frame exits, the pointer is reset like stack based gc. #+BEGIN_SRC sh -global str terminal_namespace = "/dev/term/0" -global str prompt = "Enter a string:" -global str new_line = "\n" +global str terminal_namespace = "/dev/term/0"; +global str prompt = "Enter a string:"; +global str new_line = "\n"; function main () int mode $11; str term $10; - load_immediate terminal_namespace -> term - load_immediate 0 -> mode - syscall OPEN term mode -> term // Terminal term = open("/dev/term/0", 0); + load_immediate terminal_namespace -> term; + load_immediate 0 -> mode; + syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); - load_immediate prompt -> $7 - string_length $7 -> $8 - syscall WRITE term $7 $8 // print prompt + load_immediate prompt -> $7; + string_length $7 -> $8; + syscall WRITE term $7 $8; // print prompt - str user_string $9 - load_immediate 32 -> $8 - malloc $8 -> user_string - syscall READ term user_string $8 // read in max 32 byte string + str user_string $9; + load_immediate 32 -> $8; + malloc $8 -> user_string; + syscall READ term user_string $8; // read in max 32 byte string - call pln user_string -> void - exit 0 + call pln user_string -> void; + exit 0; function pln (str message $0) - str ts $1 - int mode $5 - int msg_length $2 - str nl $3 - int nl_length $4 + str ts $1; + int mode $5; + int msg_length $2; + str nl $3; + int nl_length $4; - load_immediate terminal_namespace -> ts - load_immediate 0 -> mode - syscall OPEN ts mode -> ts - strlen message -> msg_length - syscall WRITE ts message msg_length - load_immediate new_line -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length - return + load_immediate terminal_namespace -> ts; + load_immediate 0 -> mode; + syscall OPEN ts mode -> ts; + strlen message -> msg_length; + syscall WRITE ts message msg_length; + load_immediate new_line -> nl; + strlen nl -> nl_length; + syscall WRITE ts nl nl_length; + return; #+END_SRC From 89a17f06f00bee0a412e1d4f6a47f53bc4f42521 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 10:50:29 -0800 Subject: [PATCH 16/27] start making changes to init_vm --- src/arch/linux/main.c | 54 +++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 461daf9..ff72554 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -199,6 +199,7 @@ i32 main(i32 argc, char *argv[]) { char *output_file = nil; bool is_rom = false; bool is_assembly = false; + bool is_ir = false; // Parse command line arguments for (i32 i = 1; i < argc; i++) { @@ -216,36 +217,43 @@ i32 main(i32 argc, char *argv[]) { if (ext && (strcmp(ext, ".lisp") == 0)) { is_assembly = true; } + if (ext && (strcmp(ext, ".ir") == 0)) { + is_ir = true; + } } else if (output_file == nil && dump_rom) { // This is the output file for -o flag output_file = argv[i]; } } - VM vm = {0}; + VM *vm; + if (!init_vm(vm)) { + printf("vm did not initialize for some reason."); + return 1; + } bool compilation_success = true; if (input_file) { if (is_rom) { // Load ROM file directly - compilation_success = loadVM(input_file, &vm); + compilation_success = loadVM(input_file, vm); } else if (is_assembly) { // Compile Lisp file if (dump_rom && output_file) { - compilation_success = assembleAndSave(input_file, output_file, &vm); + compilation_success = assembleAndSave(input_file, output_file, vm); } else { - compilation_success = assembleAndSave(input_file, nil, &vm); + compilation_success = assembleAndSave(input_file, nil, vm); } } else { if (dump_rom && output_file) { - compilation_success = compileAndSave(input_file, output_file, &vm); + compilation_success = compileAndSave(input_file, output_file, vm); } else { - compilation_success = compileAndSave(input_file, nil, &vm); + compilation_success = compileAndSave(input_file, nil, vm); } } } else { printf("usage: undar ..."); - return 0; + return 1; } if (dump_rom) { @@ -254,7 +262,7 @@ i32 main(i32 argc, char *argv[]) { // If dump_rom flag was set without specifying output file, use default if (dump_rom && !is_rom && !output_file) { - if (!saveVM("memory_dump.bin", &vm)) { + if (!saveVM("memory_dump.bin", vm)) { printf("Failed to save VM to memory_dump.bin\n"); return EXIT_FAILURE; } @@ -262,7 +270,7 @@ i32 main(i32 argc, char *argv[]) { return EXIT_SUCCESS; } - vm_register_device(&vm, "/dev/term/0", "terminal", &console_data, + vm_register_device(vm, "/dev/term/0", "terminal", &console_data, &console_device_ops, 4); if (SDL_Init(SDL_INIT_VIDEO) < 0) { @@ -275,7 +283,7 @@ i32 main(i32 argc, char *argv[]) { screen_data.height = 480; screen_data.buffer_size = screen_data.width * screen_data.height; - vm_register_device(&vm, "/dev/screen/0", "screen", &screen_data, &screen_ops, + vm_register_device(vm, "/dev/screen/0", "screen", &screen_data, &screen_ops, 16 + screen_data.buffer_size); mouse_data.x = 0; @@ -285,10 +293,10 @@ i32 main(i32 argc, char *argv[]) { mouse_data.btn3 = 0; mouse_data.btn4 = 0; - vm_register_device(&vm, "/dev/mouse/0", "mouse", &mouse_data, &mouse_ops, 16); + vm_register_device(vm, "/dev/mouse/0", "mouse", &mouse_data, &mouse_ops, 16); keyboard_data.keys = SDL_GetKeyboardState(&keyboard_data.key_count); - vm_register_device(&vm, "/dev/keyboard/0", "keyboard", &keyboard_data, + vm_register_device(vm, "/dev/keyboard/0", "keyboard", &keyboard_data, &keyboard_ops, keyboard_data.key_count + 4); SDL_Event event; @@ -333,11 +341,11 @@ i32 main(i32 argc, char *argv[]) { case SDL_FINGERDOWN: case SDL_FINGERUP: { - float x = event.tfinger.x * 640; - float y = event.tfinger.y * 480; + f32 x = event.tfinger.x * 640; + f32 y = event.tfinger.y * 480; - mouse_data.x = (int)x; - mouse_data.y = (int)y; + mouse_data.x = (i32)x; + mouse_data.y = (i32)y; // Only treat the first finger as mouse input (ignore multi-touch // beyond 1 finger) @@ -354,10 +362,10 @@ i32 main(i32 argc, char *argv[]) { } // Run VM for a fixed number of cycles or a time slice - int cycles_this_frame = 0; - int max_cycles_per_frame = 100; // Adjust this value + i32 cycles_this_frame = 0; + i32 max_cycles_per_frame = 100; // Adjust this value while (cycles_this_frame < max_cycles_per_frame) { - if (!step_vm(&vm)) { + if (!step_vm(vm)) { running = false; break; } @@ -374,9 +382,9 @@ i32 main(i32 argc, char *argv[]) { SDL_RenderGetViewport(screen_data.renderer, &output_rect); // Calculate aspect ratio preserving scaling - float scale_x = (float)output_rect.w / screen_data.width; - float scale_y = (float)output_rect.h / screen_data.height; - float scale = SDL_min(scale_x, scale_y); + f32 scale_x = (f32)output_rect.w / screen_data.width; + f32 scale_y = (f32)output_rect.h / screen_data.height; + f32 scale = SDL_min(scale_x, scale_y); SDL_Rect dstrect = { (i32)((output_rect.w - screen_data.width * scale) / 2), @@ -392,5 +400,5 @@ i32 main(i32 argc, char *argv[]) { } } - return vm.flag; + return vm->flag; } From 733dfc0364097401269c2a2b7dcfa5db5e4168ac Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 13:52:12 -0800 Subject: [PATCH 17/27] wip, add all the sizing for symbols --- src/arch/linux-tui/devices.c | 98 --- src/arch/linux-tui/devices.h | 33 - src/arch/linux-tui/main.c | 264 -------- src/arch/linux/main.c | 47 +- src/tools/assembler/assembler.c | 1032 ++++++++++++++++++++++++++++++- src/tools/assembler/lexer.h | 2 +- src/tools/compiler/lexer.c | 6 +- src/tools/compiler/lexer.h | 2 +- test/paint.ul.ir | 16 +- 9 files changed, 1047 insertions(+), 453 deletions(-) delete mode 100644 src/arch/linux-tui/devices.c delete mode 100644 src/arch/linux-tui/devices.h delete mode 100644 src/arch/linux-tui/main.c diff --git a/src/arch/linux-tui/devices.c b/src/arch/linux-tui/devices.c deleted file mode 100644 index dcca021..0000000 --- a/src/arch/linux-tui/devices.c +++ /dev/null @@ -1,98 +0,0 @@ -#include "devices.h" -#include -#include - -i32 console_open(void *data, u32 mode) { - USED(mode); - USED(data); - /* Nothing to open — stdin/stdout are always available */ - return 0; /* Success */ -} - -i32 console_read(void *data, u8 *buffer, u32 size) { - USED(data); - ssize_t result = read(STDIN_FILENO, buffer, size); - if (result < 0) return -1; /* Error */ - return (i32)result; /* Bytes read */ -} - -i32 console_write(void *data, const u8 *buffer, u32 size) { - USED(data); - ssize_t result = write(STDOUT_FILENO, buffer, size); - if (result < 0) return -1; /* Error */ - return (i32)result; /* Bytes written */ -} - -i32 console_close(void *data) { - USED(data); - /* Nothing to close — stdin/stdout are process-owned */ - return 0; -} - -i32 console_ioctl(void *data, u32 cmd, void *args) { - USED(data); - USED(cmd); - USED(args); - return -1; /* Unsupported */ -} - -i32 screen_open(void *data, u32 mode) { - USED(mode); - ScreenDeviceData *screen = (ScreenDeviceData *)data; - USED(screen); - - return 0; -} - -i32 screen_read(void *data, u8 *buffer, u32 size) { - USED(data); - USED(buffer); - USED(size); - return -1; -} - -i32 screen_write(void *data, const u8 *buffer, u32 size) { - ScreenDeviceData *screen = (ScreenDeviceData *)data; - - if (size > screen->framebuffer_size * sizeof(u8)) { - return -1; - } - - memcpy(&screen->vm->memory[screen->framebuffer_pos], buffer, size); - return 0; -} - -i32 screen_close(void *data) { - ScreenDeviceData *screen = (ScreenDeviceData *)data; - USED(screen); - - return 0; -} - -i32 keyboard_open(void *data, u32 mode) { - USED(data); - USED(mode); - return 0; -} - -i32 keyboard_read(void *data, u8 *buffer, u32 size) { - KeyboardDeviceData *kbd = (KeyboardDeviceData *)data; - - if (size < (u32)kbd->key_count) - return -1; - - memcpy(buffer, kbd->keys, kbd->key_count); - return 0; -} - -i32 keyboard_write(void *data, const u8 *buffer, u32 size) { - USED(data); - USED(buffer); - USED(size); - return -1; /* not writable */ -} - -i32 keyboard_close(void *data) { - USED(data); - return 0; -} diff --git a/src/arch/linux-tui/devices.h b/src/arch/linux-tui/devices.h deleted file mode 100644 index 7481a6c..0000000 --- a/src/arch/linux-tui/devices.h +++ /dev/null @@ -1,33 +0,0 @@ -#include "../../vm/device.h" -#include "../../vm/vm.h" - -/* Screen device data */ -typedef struct screen_device_data_s { - u32 width; - u32 height; - u32 framebuffer_pos; - u32 framebuffer_size; - VM* vm; -} ScreenDeviceData; - -/* Keyboard device data */ -typedef struct keyboard_device_data_s { - const u8 *keys; - i32 key_count; -} KeyboardDeviceData; - -i32 screen_open(void *data, u32 mode); -i32 screen_read(void *data, u8 *buffer, u32 size); -i32 screen_write(void *data, const u8 *buffer, u32 size); -i32 screen_close(void *data); - -i32 keyboard_open(void *data, u32 mode); -i32 keyboard_read(void *data, u8 *buffer, u32 size); -i32 keyboard_write(void *data, const u8 *buffer, u32 size); -i32 keyboard_close(void *data); - -i32 console_open(void *data, u32 mode); -i32 console_read(void *data, u8 *buffer, u32 size); -i32 console_write(void *data, const u8 *buffer, u32 size); -i32 console_close(void *data); -i32 console_ioctl(void *data, u32 cmd, void *args); diff --git a/src/arch/linux-tui/main.c b/src/arch/linux-tui/main.c deleted file mode 100644 index c8a47bb..0000000 --- a/src/arch/linux-tui/main.c +++ /dev/null @@ -1,264 +0,0 @@ -#include "../../tools/test.h" -#include "../../tools/parser.h" -#include "../../vm/vm.h" -#include "devices.h" - -#include -#include -#include -#include -#include -#include -#include - -#define MAX_SRC_SIZE 16384 - -static DeviceOps screen_ops = {.open = screen_open, - .read = screen_read, - .write = screen_write, - .close = screen_close, - .ioctl = nil}; - -static DeviceOps console_device_ops = { - .open = console_open, - .read = console_read, - .write = console_write, - .close = console_close, - .ioctl = console_ioctl, -}; - -static ScreenDeviceData screen_data = {0}; - -void compileFile(const char *path, VM *vm) { - USED(vm); - - FILE *f = fopen(path, "rb"); - if (!f) { - perror("fopen"); - exit(1); - } - - static char source[MAX_SRC_SIZE + 1]; - - fseek(f, 0, SEEK_END); - long len = ftell(f); - fseek(f, 0, SEEK_SET); - if (len >= MAX_SRC_SIZE) { - perror("source is larget than buffer"); - exit(1); - } - size_t read = fread(source, 1, len, f); - source[read] = '\0'; - fclose(f); -} - -void repl(VM *vm) { - USED(vm); - - char buffer[1024 * 10] = {0}; // Larger buffer for multi-line input - char line[1024]; - - for (;;) { - // Count current parentheses balance - int paren_balance = 0; - for (int i = 0; buffer[i]; i++) { - if (buffer[i] == '(') paren_balance++; - else if (buffer[i] == ')') paren_balance--; - } - - // Show appropriate prompt - if (paren_balance > 0) { - printf(".. "); // Continuation prompt when unbalanced - } else { - printf("> "); // Normal prompt when balanced - } - fflush(stdout); - - if (!fgets(line, sizeof(line), stdin)) { - printf("\n"); - break; - } - - // Append the new line to buffer - strncat(buffer, line, sizeof(buffer) - strlen(buffer) - 1); - - // Recalculate balance after adding new line - paren_balance = 0; - for (int i = 0; buffer[i]; i++) { - if (buffer[i] == '(') paren_balance++; - else if (buffer[i] == ')') paren_balance--; - } - - // Only parse when parentheses are balanced - if (paren_balance == 0) { - // Check if buffer has actual content (not just whitespace) - int has_content = 0; - for (int i = 0; buffer[i]; i++) { - if (!isspace(buffer[i])) { - has_content = 1; - break; - } - } - - if (has_content) { - ExprNode *ast = expr_parse(buffer, strlen(buffer)); - if (!ast) { - printf("Parse failed.\n"); - } else { - printf("AST:\n"); - expr_print(ast, 0); - expr_free(ast); - } - } - - // Reset buffer for next input - buffer[0] = '\0'; - } - // If unbalanced, continue reading more lines - } - exit(0); -} - -enum FlagType { - FLAG_NONE = 0, - FLAG_TEST_MODE = 1, - FLAG_DUMP_ROM = 2, - FLAG_GUI_MODE = 4, -}; - -#define MAX_INPUT_FILES 16 /* Adjust based on your system's constraints */ - -struct CompilerConfig { - u32 flags; - char *input_files[MAX_INPUT_FILES]; - i32 input_file_count; -}; - -i32 parse_arguments(i32 argc, char *argv[], struct CompilerConfig *config) { - i32 i; - - /* Initialize config */ - config->flags = 0; - config->input_file_count = 0; - - /* Zero out input files array for safety */ - for (i = 0; i < MAX_INPUT_FILES; i++) { - config->input_files[i] = NULL; - } - - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - /* Long and short flag handling */ - if (strcmp(argv[i], "-g") == 0 || strcmp(argv[i], "--gui") == 0) { - config->flags |= FLAG_GUI_MODE; - } else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--test") == 0) { - config->flags |= FLAG_TEST_MODE; - } else if (strcmp(argv[i], "-o") == 0 || - strcmp(argv[i], "--dump-rom") == 0) { - config->flags |= FLAG_DUMP_ROM; - } else { - fprintf(stderr, "Unknown flag: %s\n", argv[i]); - return -1; - } - } else if (strstr(argv[i], ".ul") != NULL) { - /* Collect input files */ - if (config->input_file_count >= MAX_INPUT_FILES) { - fprintf(stderr, "Too many input files. Maximum is %lld\n", - MAX_INPUT_FILES); - return -1; - } - config->input_files[config->input_file_count++] = argv[i]; - } - } - - return 0; -} - -/* - * This needs to be done dynamically eventually - */ -void register_sdl_devices(VM *vm) { - screen_data.vm = vm; - - struct winsize w; - ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); - - screen_data.width = w.ws_row * 2; - screen_data.height = w.ws_col; - screen_data.framebuffer_size = w.ws_row * w.ws_col * 2; - screen_data.framebuffer_pos = vm->mp; - vm->mp += screen_data.framebuffer_size; /* advance memory pointer */ - - vm_register_device(vm, "/dev/screen/0", "screen", &screen_data, &screen_ops); -} - -i32 main(i32 argc, char *argv[]) { - struct CompilerConfig config = {0}; - - if (parse_arguments(argc, argv, &config) != 0) { - fprintf(stderr, "Usage: %s [-d] [-g] [-o] [file2.ul] ...\n", - argv[0]); - return 64; - } - - VM vm = {0}; - if (config.input_file_count == 0) { - repl(&vm); - } else { - for (i32 j = 0; j < config.input_file_count; j++) { - compileFile(config.input_files[j], &vm); - } - - if (config.flags & FLAG_DUMP_ROM) { - FILE *file = fopen("memory_dump.bin", "wb"); - if (!file) { - perror("Failed to open file"); - return EXIT_FAILURE; - } - - size_t code_written = fwrite(vm.code, 1, CODE_SIZE, file); - if (code_written != CODE_SIZE) { - fprintf(stderr, "Incomplete code write: %zu bytes written out of %llu\n", - code_written, CODE_SIZE); - fclose(file); - return EXIT_FAILURE; - } - - size_t memory_written = fwrite(vm.memory, 1, MEMORY_SIZE, file); - if (memory_written != MEMORY_SIZE) { - fprintf(stderr, - "Incomplete memory write: %zu bytes written out of %llu\n", - memory_written, MEMORY_SIZE); - fclose(file); - return EXIT_FAILURE; - } - - fclose(file); - } - } - bool running = true; - vm_register_device(&vm, "/dev/term/0", "terminal", nil, &console_device_ops); - - if (config.flags & FLAG_GUI_MODE) { - u32 i; - register_sdl_devices(&vm); - while (running) { - step_vm(&vm); - - for (i = 0; i < vm.dc; i++) { - Device *dev = &vm.devices[i]; - if (strcmp(dev->type, "screen") == 0) { - //ScreenDeviceData *screen = (ScreenDeviceData *)dev->data; - //vm.memory[screen->framebuffer_pos], - break; - } - } - } - } else { - while (running) { - running = step_vm(&vm); - } - } - - return 0; -} diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index ff72554..cebdef0 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -193,6 +193,25 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { } } +bool init_vm(VM *vm) { + vm->memory = (u8*)malloc(MEMORY_SIZE * sizeof(u8)); + vm->memory_size = MEMORY_SIZE; + + vm->code = (u8*)malloc(CODE_SIZE * sizeof(u8)); + vm->code_size = CODE_SIZE; + + vm->frames = (Frame*)malloc(FRAMES_SIZE * sizeof(Frame)); + vm->frames_size = FRAMES_SIZE; + + vm->stack = (u32*)malloc(STACK_SIZE * sizeof(u32)) + vm->stack_size = STACK_SIZE; + + vm->devices = (Device*)malloc(DEVICES_SIZE * sizeof(Device)); + vm->devices_size = DEVICES_SIZE; + + return true; +} + i32 main(i32 argc, char *argv[]) { bool dump_rom = false; char *input_file = nil; @@ -226,8 +245,8 @@ i32 main(i32 argc, char *argv[]) { } } - VM *vm; - if (!init_vm(vm)) { + VM vm = {0}; + if (!init_vm(&vm)) { printf("vm did not initialize for some reason."); return 1; } @@ -236,19 +255,19 @@ i32 main(i32 argc, char *argv[]) { if (input_file) { if (is_rom) { // Load ROM file directly - compilation_success = loadVM(input_file, vm); + compilation_success = loadVM(input_file, &vm); } else if (is_assembly) { // Compile Lisp file if (dump_rom && output_file) { - compilation_success = assembleAndSave(input_file, output_file, vm); + compilation_success = assembleAndSave(input_file, output_file, &vm); } else { - compilation_success = assembleAndSave(input_file, nil, vm); + compilation_success = assembleAndSave(input_file, nil, &vm); } } else { if (dump_rom && output_file) { - compilation_success = compileAndSave(input_file, output_file, vm); + compilation_success = compileAndSave(input_file, output_file, &vm); } else { - compilation_success = compileAndSave(input_file, nil, vm); + compilation_success = compileAndSave(input_file, nil, &vm); } } } else { @@ -262,7 +281,7 @@ i32 main(i32 argc, char *argv[]) { // If dump_rom flag was set without specifying output file, use default if (dump_rom && !is_rom && !output_file) { - if (!saveVM("memory_dump.bin", vm)) { + if (!saveVM("memory_dump.bin", &vm)) { printf("Failed to save VM to memory_dump.bin\n"); return EXIT_FAILURE; } @@ -270,7 +289,7 @@ i32 main(i32 argc, char *argv[]) { return EXIT_SUCCESS; } - vm_register_device(vm, "/dev/term/0", "terminal", &console_data, + vm_register_device(&vm, "/dev/term/0", "terminal", &console_data, &console_device_ops, 4); if (SDL_Init(SDL_INIT_VIDEO) < 0) { @@ -283,7 +302,7 @@ i32 main(i32 argc, char *argv[]) { screen_data.height = 480; screen_data.buffer_size = screen_data.width * screen_data.height; - vm_register_device(vm, "/dev/screen/0", "screen", &screen_data, &screen_ops, + vm_register_device(&vm, "/dev/screen/0", "screen", &screen_data, &screen_ops, 16 + screen_data.buffer_size); mouse_data.x = 0; @@ -293,10 +312,10 @@ i32 main(i32 argc, char *argv[]) { mouse_data.btn3 = 0; mouse_data.btn4 = 0; - vm_register_device(vm, "/dev/mouse/0", "mouse", &mouse_data, &mouse_ops, 16); + vm_register_device(&vm, "/dev/mouse/0", "mouse", &mouse_data, &mouse_ops, 16); keyboard_data.keys = SDL_GetKeyboardState(&keyboard_data.key_count); - vm_register_device(vm, "/dev/keyboard/0", "keyboard", &keyboard_data, + vm_register_device(&vm, "/dev/keyboard/0", "keyboard", &keyboard_data, &keyboard_ops, keyboard_data.key_count + 4); SDL_Event event; @@ -365,7 +384,7 @@ i32 main(i32 argc, char *argv[]) { i32 cycles_this_frame = 0; i32 max_cycles_per_frame = 100; // Adjust this value while (cycles_this_frame < max_cycles_per_frame) { - if (!step_vm(vm)) { + if (!step_vm(&vm)) { running = false; break; } @@ -400,5 +419,5 @@ i32 main(i32 argc, char *argv[]) { } } - return vm->flag; + return vm.flag; } diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 8f3b62c..7c3bf11 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -54,6 +54,34 @@ u32 get_ref(VM *vm, SymbolTable *st, const char *name, ScopeType scope) { return sym->ref; } +Token next_id_or_reg() { + Token token = next_token(); + if (token.type == TOKEN_IDENTIFIER) { + return token; + } + + if (token.type == TOKEN_BIG_MONEY) { + token = next_token(); + return token; + } + + printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + + return token; +} + +Token next_id_or_ptr() { + Token token = next_token(); + if (token.type != TOKEN_IDENTIFIER || token.type != TOKEN_LITERAL_NAT) { + printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } + return token; +} + Token next_token_is(TokenType type) { Token token = next_token(); if (token.type != type) { @@ -313,8 +341,8 @@ void define_function(VM *vm, SymbolTable *st) { Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { - printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, - name.length, name.start); + printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); exit(1); } memcpy(s.name, name.start, name.length); @@ -350,12 +378,12 @@ void define_branch(VM *vm, SymbolTable *st) { Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { - printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, - name.length, name.start); + printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); exit(1); } memcpy(s.name, name.start, name.length); - + s.ref = vm->pc; symbol_table_add(st, s); } @@ -405,19 +433,19 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_LITERAL_NAT); - vm->pc+=4; + vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "call")) { vm->pc++; next_token_is(TOKEN_IDENTIFIER); - vm->pc+=4; + vm->pc += 4; vm->pc++; /* number of args (implied) */ Token next = next_token(); - while (next.type != TOKEN_ARROW_LEFT) { + while (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; Token next = next_token(); } @@ -425,97 +453,1037 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next = next_token(); vm->pc++; /* we emit a value regardless, a void is register 255 */ if (next.type == TOKEN_SEMICOLON) { + /* exit early because no return type */ continue; } /* if it is not void, then it was the value */ next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "syscall")) { + vm->pc++; + + Token id_or_ptr = next_id_or_ptr(); + vm->pc += 4; + + Token next = next_token(); + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + vm->pc++; + } + Token next = next_token(); + } + } else if (streq(token.start, "return")) { + vm->pc++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + vm->pc++; + continue; + } + + next_token_is(TOKEN_SEMICOLON); + vm->pc++; + } else if (streq(token.start, "load_immediate")) { - } else if (streq(token.start, "load_indirect_8")) { - } else if (streq(token.start, "load_indirect_16")) { - } else if (streq(token.start, "load_indirect_32")) { - } else if (streq(token.start, "load_absolute_8")) { - } else if (streq(token.start, "load_absolute_16")) { - } else if (streq(token.start, "load_absolute_32")) { - } else if (streq(token.start, "load_offset_8")) { - } else if (streq(token.start, "load_offset_16")) { - } else if (streq(token.start, "load_offset_32")) { - } else if (streq(token.start, "store_absolute_8")) { - } else if (streq(token.start, "store_absolute_16")) { - } else if (streq(token.start, "store_absolute_32")) { - } else if (streq(token.start, "store_indirect_8")) { - } else if (streq(token.start, "store_indirect_16")) { - } else if (streq(token.start, "store_indirect_32")) { - } else if (streq(token.start, "store_offset_8")) { - } else if (streq(token.start, "store_offset_16")) { - } else if (streq(token.start, "store_offset_32")) { + vm->pc++; + + Token id_or_ptr = next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token output = next_id_or_reg(); + vm->pc++; + } else if (streq(token.start, "malloc")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + } else if (streq(token.start, "memset_8")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + next_id_or_reg(); + vm->pc++; + next_id_or_reg(); + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "memset_16")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + next_id_or_reg(); + vm->pc++; + next_id_or_reg(); + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "memset_32")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + next_id_or_reg(); + vm->pc++; + next_id_or_reg(); + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + + } else if (streq(token.start, "load_offset_8")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_offset_16")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_offset_32")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_indirect_8")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_indirect_16")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_indirect_32")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_absolute_8")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_absolute_16")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "load_absolute_32")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_absolute_8")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_absolute_16")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_absolute_32")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_indirect_8")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_indirect_16")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_indirect_32")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_offset_8")) { + vm->pc++; + + next_id_or_reg(); /* src1 */ + vm->pc++; + + next_token_is(TOKEN_LITERAL_NAT); /* offset */ + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); /* dest */ + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_offset_16")) { + vm->pc++; + + next_id_or_reg(); /* src1 */ + vm->pc++; + + next_token_is(TOKEN_LITERAL_NAT); /* offset */ + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); /* dest */ + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "store_offset_32")) { + vm->pc++; + + next_id_or_reg(); /* src1 */ + vm->pc++; + + next_token_is(TOKEN_LITERAL_NAT); /* offset */ + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); /* dest */ + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "register_move")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "add_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "sub_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "mul_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "div_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "abs_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "neg_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "add_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "sub_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "mul_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "div_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "abs_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "neg_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "add_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "sub_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "mul_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "div_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "abs_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "neg_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "int_to_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "nat_to_real")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "real_to_int")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "real_to_nat")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "bit_shift_left")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "bit_shift_right")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "bit_shift_r_ext")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "bit_and")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "bit_or")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "bit_xor")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_if_flag")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_eq_int")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_neq_int")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_gt_int")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_lt_int")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_le_int")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_ge_int")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_eq_nat")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_neq_nat")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_gt_nat")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_lt_nat")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_le_nat")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_ge_nat")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_eq_real")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_neq_real")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_ge_real")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_gt_real")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_lt_real")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "jump_le_real")) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_id_or_reg(); + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "string_length")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "int_to_string")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "nat_to_string")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (streq(token.start, "real_to_string")) { + vm->pc++; + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); } else if (streq(token.start, "string_eq")) { } else if (streq(token.start, "string_concat")) { } else if (streq(token.start, "string_get_char")) { } else if (streq(token.start, "string_find_char")) { } else if (streq(token.start, "string_slice")) { - } else if (streq(token.start, "int_to_string")) { - } else if (streq(token.start, "nat_to_string")) { - } else if (streq(token.start, "real_to_string")) { } else if (streq(token.start, "string_to_int")) { } else if (streq(token.start, "string_to_nat")) { } else if (streq(token.start, "string_to_real")) { } else { // some other identifier + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); } } } while (token.type != TOKEN_EOF); @@ -534,8 +1502,8 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - printf("Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), - token.length, token.start); + printf("Line %d [%s]: %.*s\n", token.line, + token_type_to_string(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed @@ -562,6 +1530,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { // check to see if it is an opcode first if (streq(token.start, "exit")) { } else if (streq(token.start, "call")) { + } else if (streq(token.start, "return")) { } else if (streq(token.start, "syscall")) { } else if (streq(token.start, "load_immediate")) { } else if (streq(token.start, "load_indirect_8")) { @@ -661,6 +1630,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { void assemble(VM *vm, char *source) { SymbolTable *st = symbol_table_init(); build_symbol_table(vm, source, st); + vm->pc = 0; /* actuall start emitting code */ emit_bytecode(vm, source, st); free(st->symbols); free(st); diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index 3d8ad1a..debd5ce 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -71,7 +71,7 @@ typedef enum { TOKEN_RBRACE, TOKEN_LBRACKET, TOKEN_RBRACKET, - TOKEN_ARROW_LEFT, + TOKEN_ARROW_RIGHT, TOKEN_ERROR } TokenType; diff --git a/src/tools/compiler/lexer.c b/src/tools/compiler/lexer.c index 490e952..291a3a3 100644 --- a/src/tools/compiler/lexer.c +++ b/src/tools/compiler/lexer.c @@ -348,7 +348,7 @@ Token next_token() { case '.': return makeToken(TOKEN_DOT); case '-': - return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); + return makeToken(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); case '+': return makeToken(TOKEN_PLUS); case '/': @@ -492,8 +492,8 @@ const char *token_type_to_string(TokenType type) { return "LBRACKET"; case TOKEN_RBRACKET: return "RBRACKET"; - case TOKEN_ARROW_LEFT: - return "ARROW_LEFT"; + case TOKEN_ARROW_RIGHT: + return "ARROW_RIGHT"; case TOKEN_MESH: return "MESH"; case TOKEN_BIG_MONEY: diff --git a/src/tools/compiler/lexer.h b/src/tools/compiler/lexer.h index 3e0ab11..5f83f08 100644 --- a/src/tools/compiler/lexer.h +++ b/src/tools/compiler/lexer.h @@ -67,7 +67,7 @@ typedef enum { TOKEN_RBRACE, TOKEN_LBRACKET, TOKEN_RBRACKET, - TOKEN_ARROW_LEFT, + TOKEN_ARROW_RIGHT, TOKEN_ERROR } TokenType; diff --git a/test/paint.ul.ir b/test/paint.ul.ir index 97b8fdf..a7854d4 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -1,10 +1,10 @@ -global const str screen_namespace = "/dev/screen/0"; -global const str mouse_namespace = "/dev/mouse/0"; -global const byte BLACK = 0; -global const byte WHITE = 255; -global const byte DARK_GRAY = 73; -global const byte GRAY = 146; -global const byte LIGHT_GRAY = 182; +global str screen_namespace = "/dev/screen/0"; +global str mouse_namespace = "/dev/mouse/0"; +global byte BLACK = 0; +global byte WHITE = 255; +global byte DARK_GRAY = 73; +global byte GRAY = 146; +global byte LIGHT_GRAY = 182; global byte SELECTED_COLOR = 255; function main () @@ -114,7 +114,7 @@ function set_color_if_clicked (int click_x $0, int click_y $1, jump_lt_int fail click_y box_y; jump_ge_int fail click_y bottom_edge; - store_absolute_8 SELECTED_COLOR color; + store_absolute_8 color -> SELECTED_COLOR; else fail return; From 2e5eb0322793fd86ee8680333469e7b28d9dc995 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 19:44:54 -0800 Subject: [PATCH 18/27] remove old assembler, finish symbol table pass, start working on code gen --- Makefile | 4 - src/arch/linux/main.c | 66 +- src/tools/assembler/assembler.c | 405 +++++---- src/tools/assembler/lexer.c | 26 +- src/tools/old_assembler/assembler.c | 1211 --------------------------- src/tools/old_assembler/assembler.h | 20 - src/tools/old_assembler/parser.c | 244 ------ src/tools/old_assembler/parser.h | 25 - src/vm/libc.c | 17 +- src/vm/libc.h | 1 + test/add.rom | Bin 143 -> 0 bytes test/add.ul.ir | 4 +- test/fib.ul.ir | 4 +- test/hello.rom | Bin 135 -> 0 bytes test/hello.ul.ir | 4 +- test/loop.rom | Bin 258 -> 0 bytes test/loop.ul.ir | 8 +- test/malloc.rom | Bin 167 -> 0 bytes test/malloc.ul.ir | 4 +- test/paint-bw.rom | Bin 574 -> 0 bytes test/paint.rom | Bin 1266 -> 0 bytes test/simple.rom | Bin 140 -> 0 bytes test/simple.ul.ir | 14 +- test/window.rom | Bin 326 -> 0 bytes test/window.ul.ir | 4 +- 25 files changed, 262 insertions(+), 1799 deletions(-) delete mode 100644 src/tools/old_assembler/assembler.c delete mode 100644 src/tools/old_assembler/assembler.h delete mode 100644 src/tools/old_assembler/parser.c delete mode 100644 src/tools/old_assembler/parser.h delete mode 100644 test/add.rom delete mode 100644 test/hello.rom delete mode 100644 test/loop.rom delete mode 100644 test/malloc.rom delete mode 100644 test/paint-bw.rom delete mode 100644 test/paint.rom delete mode 100644 test/simple.rom delete mode 100644 test/window.rom diff --git a/Makefile b/Makefile index 88100b7..6aa77f9 100644 --- a/Makefile +++ b/Makefile @@ -86,15 +86,11 @@ VM_SOURCES := \ ifeq ($(BUILD_MODE), release) PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c\ - $(SRC_DIR)/tools/old_assembler/parser.c \ - $(SRC_DIR)/tools/old_assembler/assembler.c \ $(SRC_DIR)/tools/assembler/lexer.c \ $(SRC_DIR)/tools/assembler/assembler.c else PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c \ - $(SRC_DIR)/tools/old_assembler/parser.c \ - $(SRC_DIR)/tools/old_assembler/assembler.c\ $(SRC_DIR)/tools/assembler/lexer.c \ $(SRC_DIR)/tools/assembler/assembler.c endif diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index cebdef0..94a4e2a 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,5 +1,3 @@ -#include "../../tools/old_assembler/assembler.h" -#include "../../tools/old_assembler/parser.h" #include "../../tools/assembler/assembler.h" #include "../../vm/vm.h" #include "devices.h" @@ -126,27 +124,7 @@ bool loadVM(const char *filename, VM *vm) { bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { USED(vm); USED(output_file); - FILE *f = fopen(source_file, "rb"); - if (!f) { - perror("fopen"); - return false; - } - - static char source[MAX_SRC_SIZE + 1]; - - fseek(f, 0, SEEK_END); - long len = ftell(f); - fseek(f, 0, SEEK_SET); - if (len >= MAX_SRC_SIZE) { - fprintf(stderr, "Source is larger than buffer\n"); - fclose(f); - return false; - } - size_t read = fread(source, 1, len, f); - source[read] = '\0'; - fclose(f); - - assemble(vm, source); + USED(source_file); return true; } @@ -173,42 +151,34 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { source[read] = '\0'; fclose(f); - ExprNode *ast = expr_parse(source, strlen(source)); - if (!ast) { - printf("Parse failed.\n"); - return false; - } else { - old_assemble(vm, ast); - expr_free(ast); + assemble(vm, source); - // If output file specified, save the VM - if (output_file) { - if (!saveVM(output_file, vm)) { - printf("Failed to save VM to %s\n", output_file); - return false; - } - printf("VM saved to %s\n", output_file); + if (output_file) { + if (!saveVM(output_file, vm)) { + printf("Failed to save VM to %s\n", output_file); + return false; } - return true; + printf("VM saved to %s\n", output_file); } + return true; } bool init_vm(VM *vm) { - vm->memory = (u8*)malloc(MEMORY_SIZE * sizeof(u8)); + vm->memory = (u8 *)malloc(MEMORY_SIZE * sizeof(u8)); vm->memory_size = MEMORY_SIZE; - vm->code = (u8*)malloc(CODE_SIZE * sizeof(u8)); + vm->code = (u8 *)malloc(CODE_SIZE * sizeof(u8)); vm->code_size = CODE_SIZE; - vm->frames = (Frame*)malloc(FRAMES_SIZE * sizeof(Frame)); + vm->frames = (Frame *)malloc(FRAMES_SIZE * sizeof(Frame)); vm->frames_size = FRAMES_SIZE; - vm->stack = (u32*)malloc(STACK_SIZE * sizeof(u32)) + vm->stack = (u32 *)malloc(STACK_SIZE * sizeof(u32)); vm->stack_size = STACK_SIZE; - vm->devices = (Device*)malloc(DEVICES_SIZE * sizeof(Device)); - vm->devices_size = DEVICES_SIZE; - + vm->devices = (Device *)malloc(DEVICES_SIZE * sizeof(Device)); + vm->device_size = DEVICES_SIZE; + return true; } @@ -217,7 +187,6 @@ i32 main(i32 argc, char *argv[]) { char *input_file = nil; char *output_file = nil; bool is_rom = false; - bool is_assembly = false; bool is_ir = false; // Parse command line arguments @@ -233,9 +202,6 @@ i32 main(i32 argc, char *argv[]) { if (ext && (strcmp(ext, ".rom") == 0)) { is_rom = true; } - if (ext && (strcmp(ext, ".lisp") == 0)) { - is_assembly = true; - } if (ext && (strcmp(ext, ".ir") == 0)) { is_ir = true; } @@ -256,7 +222,7 @@ i32 main(i32 argc, char *argv[]) { if (is_rom) { // Load ROM file directly compilation_success = loadVM(input_file, &vm); - } else if (is_assembly) { + } else if (is_ir) { // Compile Lisp file if (dump_rom && output_file) { compilation_success = assembleAndSave(input_file, output_file, &vm); diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 7c3bf11..b45d6f9 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -44,7 +44,7 @@ Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { return nil; } -u32 get_ref(VM *vm, SymbolTable *st, const char *name, ScopeType scope) { +u32 get_ref(SymbolTable *st, const char *name) { Symbol *sym = symbol_table_lookup(st, name); if (!sym) { fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); @@ -74,7 +74,11 @@ Token next_id_or_reg() { Token next_id_or_ptr() { Token token = next_token(); - if (token.type != TOKEN_IDENTIFIER || token.type != TOKEN_LITERAL_NAT) { + + if (token.type != TOKEN_IDENTIFIER && + token.type != TOKEN_LITERAL_NAT && + token.type != TOKEN_LITERAL_INT && + token.type != TOKEN_LITERAL_REAL) { printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, token.start); exit(1); @@ -91,6 +95,15 @@ Token next_token_is(TokenType type) { return token; } +Token next_token_is_either(TokenType type, TokenType type2) { + Token token = next_token(); + if (token.type != type && token.type != type2) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + exit(1); + } + return token; +} + /** * Global . */ @@ -140,9 +153,7 @@ bool define_global(VM *vm, SymbolTable *st) { return false; } - Token eq = next_token_is(TOKEN_EQ); Token name = next_token_is(TOKEN_IDENTIFIER); - if (name.length > MAX_SYMBOL_NAME_LENGTH) { return false; } @@ -153,6 +164,8 @@ bool define_global(VM *vm, SymbolTable *st) { s.ref = addr; s.scope = GLOBAL; + next_token_is(TOKEN_EQ); + Token value = next_token(); switch (value.type) { case TOKEN_KEYWORD_TRUE: { @@ -161,6 +174,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_KEYWORD_FALSE: { u32 addr = vm->mp; @@ -168,6 +182,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_LITERAL_INT: { i32 out = atoi(value.start); @@ -177,6 +192,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_LITERAL_NAT: { char *endptr; @@ -191,6 +207,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_LITERAL_REAL: { fixed_t out = float_to_fixed(atof(value.start)); @@ -204,8 +221,8 @@ bool define_global(VM *vm, SymbolTable *st) { } case TOKEN_LITERAL_STR: { const char *src = value.start; - u32 len = 0; - u32 i = 0; + i32 len = 0; + i32 i = 0; while (i < value.length) { char c = src[i++]; @@ -253,7 +270,7 @@ bool define_global(VM *vm, SymbolTable *st) { /** * Var . */ -void define_var(VM *vm, SymbolTable *st, Token regType) { +void define_var(SymbolTable *st, Token regType) { Symbol s; s.scope = VAR; switch (regType.type) { @@ -326,8 +343,6 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { Token reg_num = next_token_is(TOKEN_LITERAL_INT); s.ref = atoi(reg_num.start); - - next_token_is(TOKEN_SEMICOLON); symbol_table_add(st, s); } @@ -351,11 +366,11 @@ void define_function(VM *vm, SymbolTable *st) { Token next = next_token(); while (next.type != TOKEN_RPAREN) { - Token regType = next_token(); - define_var(vm, st, regType); + define_var(st, next); - Token next = next_token(); + next = next_token(); if (next.type == TOKEN_COMMA) { + next = next_token(); continue; } else if (next.type == TOKEN_RPAREN) { break; @@ -401,6 +416,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { exit(1); } + printf("Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), + token.length, token.start); + if (token.type == TOKEN_KEYWORD_GLOBAL) { define_global(vm, st); continue; @@ -416,7 +434,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { - define_var(vm, st, token); + define_var(st, token); + next_token_is(TOKEN_SEMICOLON); continue; } @@ -427,16 +446,31 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { continue; } + if (token.type == TOKEN_KEYWORD_RETURN) { + vm->pc++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + vm->pc++; + continue; + } + + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + continue; + } + if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first - if (streq(token.start, "exit")) { + if (strleq(token.start, "exit", token.length)) { vm->pc++; - next_token_is(TOKEN_LITERAL_NAT); + next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "call")) { + } else if (strleq(token.start, "call", token.length)) { vm->pc++; next_token_is(TOKEN_IDENTIFIER); @@ -447,7 +481,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token next = next_token(); while (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; - Token next = next_token(); + next = next_token(); } /* return type */ next = next_token(); @@ -458,10 +492,10 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { } /* if it is not void, then it was the value */ next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "syscall")) { + } else if (strleq(token.start, "syscall", token.length)) { vm->pc++; - Token id_or_ptr = next_id_or_ptr(); + next_id_or_ptr(); vm->pc += 4; Token next = next_token(); @@ -469,33 +503,34 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { if (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; } - Token next = next_token(); - } - } else if (streq(token.start, "return")) { - vm->pc++; - - Token next = next_token(); - if (next.type == TOKEN_SEMICOLON) { - /* put 0xFF as return register */ - vm->pc++; - continue; + next = next_token(); } - next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_immediate", token.length)) { vm->pc++; - } else if (streq(token.start, "load_immediate")) { - vm->pc++; - - Token id_or_ptr = next_id_or_ptr(); + next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); - Token output = next_id_or_reg(); + next_id_or_reg(); vm->pc++; - } else if (streq(token.start, "malloc")) { + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_address", token.length)) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "malloc", token.length)) { vm->pc++; next_id_or_reg(); @@ -506,7 +541,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next_id_or_reg(); vm->pc++; - } else if (streq(token.start, "memset_8")) { + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -517,7 +553,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "memset_16")) { + } else if (strleq(token.start, "memset_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -528,7 +564,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "memset_32")) { + } else if (strleq(token.start, "memset_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -539,7 +575,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_offset_8")) { + } else if (strleq(token.start, "load_offset_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -554,7 +590,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_offset_16")) { + } else if (strleq(token.start, "load_offset_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -569,7 +605,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_offset_32")) { + } else if (strleq(token.start, "load_offset_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -584,7 +620,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_indirect_8")) { + } else if (strleq(token.start, "load_indirect_8", token.length)) { vm->pc++; next_id_or_ptr(); @@ -596,7 +632,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_indirect_16")) { + } else if (strleq(token.start, "load_indirect_16", token.length)) { vm->pc++; next_id_or_ptr(); @@ -608,7 +644,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_indirect_32")) { + } else if (strleq(token.start, "load_indirect_32", token.length)) { vm->pc++; next_id_or_ptr(); @@ -620,7 +656,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_absolute_8")) { + } else if (strleq(token.start, "load_absolute_8", token.length)) { vm->pc++; next_id_or_ptr(); @@ -632,7 +668,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_absolute_16")) { + } else if (strleq(token.start, "load_absolute_16", token.length)) { vm->pc++; next_id_or_ptr(); @@ -644,7 +680,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_absolute_32")) { + } else if (strleq(token.start, "load_absolute_32", token.length)) { vm->pc++; next_id_or_ptr(); @@ -656,7 +692,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_absolute_8")) { + } else if (strleq(token.start, "store_absolute_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -668,7 +704,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_absolute_16")) { + } else if (strleq(token.start, "store_absolute_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -680,7 +716,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_absolute_32")) { + } else if (strleq(token.start, "store_absolute_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -692,7 +728,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_indirect_8")) { + } else if (strleq(token.start, "store_indirect_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -704,7 +740,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_indirect_16")) { + } else if (strleq(token.start, "store_indirect_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -716,7 +752,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_indirect_32")) { + } else if (strleq(token.start, "store_indirect_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -728,7 +764,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_offset_8")) { + } else if (strleq(token.start, "store_offset_8", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ @@ -743,7 +779,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_offset_16")) { + } else if (strleq(token.start, "store_offset_16", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ @@ -758,7 +794,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_offset_32")) { + } else if (strleq(token.start, "store_offset_32", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ @@ -773,7 +809,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "register_move")) { + } else if (strleq(token.start, "register_move", token.length)) { vm->pc++; next_id_or_reg(); @@ -785,7 +821,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "add_int")) { + } else if (strleq(token.start, "add_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -800,7 +836,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "sub_int")) { + } else if (strleq(token.start, "sub_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -815,7 +851,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "mul_int")) { + } else if (strleq(token.start, "mul_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -830,7 +866,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "div_int")) { + } else if (strleq(token.start, "div_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -845,7 +881,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "abs_int")) { + } else if (strleq(token.start, "abs_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -857,7 +893,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "neg_int")) { + } else if (strleq(token.start, "neg_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -869,7 +905,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "add_nat")) { + } else if (strleq(token.start, "add_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -884,7 +920,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "sub_nat")) { + } else if (strleq(token.start, "sub_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -899,7 +935,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "mul_nat")) { + } else if (strleq(token.start, "mul_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -914,7 +950,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "div_nat")) { + } else if (strleq(token.start, "div_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -929,7 +965,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "abs_nat")) { + } else if (strleq(token.start, "abs_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -941,7 +977,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "neg_nat")) { + } else if (strleq(token.start, "neg_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -953,7 +989,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "add_real")) { + } else if (strleq(token.start, "add_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -968,7 +1004,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "sub_real")) { + } else if (strleq(token.start, "sub_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -983,7 +1019,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "mul_real")) { + } else if (strleq(token.start, "mul_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -998,7 +1034,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "div_real")) { + } else if (strleq(token.start, "div_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1013,7 +1049,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "abs_real")) { + } else if (strleq(token.start, "abs_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1025,7 +1061,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "neg_real")) { + } else if (strleq(token.start, "neg_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1037,7 +1073,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "int_to_real")) { + } else if (strleq(token.start, "int_to_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1049,7 +1085,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "nat_to_real")) { + } else if (strleq(token.start, "nat_to_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1061,7 +1097,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "real_to_int")) { + } else if (strleq(token.start, "real_to_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -1073,7 +1109,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "real_to_nat")) { + } else if (strleq(token.start, "real_to_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -1085,7 +1121,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_shift_left")) { + } else if (strleq(token.start, "bit_shift_left", token.length)) { vm->pc++; next_id_or_reg(); @@ -1100,7 +1136,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_shift_right")) { + } else if (strleq(token.start, "bit_shift_right", token.length)) { vm->pc++; next_id_or_reg(); @@ -1115,7 +1151,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { vm->pc++; next_id_or_reg(); @@ -1130,7 +1166,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_and")) { + } else if (strleq(token.start, "bit_and", token.length)) { vm->pc++; next_id_or_reg(); @@ -1145,7 +1181,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_or")) { + } else if (strleq(token.start, "bit_or", token.length)) { vm->pc++; next_id_or_reg(); @@ -1160,7 +1196,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_xor")) { + } else if (strleq(token.start, "bit_xor", token.length)) { vm->pc++; next_id_or_reg(); @@ -1175,21 +1211,21 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump")) { + } else if (strleq(token.start, "jump", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_if_flag")) { + } else if (strleq(token.start, "jump_if_flag", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_eq_int")) { + } else if (strleq(token.start, "jump_eq_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1202,7 +1238,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_neq_int")) { + } else if (strleq(token.start, "jump_neq_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1215,7 +1251,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_gt_int")) { + } else if (strleq(token.start, "jump_gt_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1228,7 +1264,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_lt_int")) { + } else if (strleq(token.start, "jump_lt_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1241,7 +1277,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_le_int")) { + } else if (strleq(token.start, "jump_le_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1254,7 +1290,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_ge_int")) { + } else if (strleq(token.start, "jump_ge_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1267,7 +1303,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_eq_nat")) { + } else if (strleq(token.start, "jump_eq_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1280,7 +1316,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_neq_nat")) { + } else if (strleq(token.start, "jump_neq_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1293,7 +1329,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_gt_nat")) { + } else if (strleq(token.start, "jump_gt_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1306,7 +1342,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_lt_nat")) { + } else if (strleq(token.start, "jump_lt_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1319,7 +1355,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_le_nat")) { + } else if (strleq(token.start, "jump_le_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1332,7 +1368,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_ge_nat")) { + } else if (strleq(token.start, "jump_ge_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1345,7 +1381,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_eq_real")) { + } else if (strleq(token.start, "jump_eq_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1358,7 +1394,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_neq_real")) { + } else if (strleq(token.start, "jump_neq_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1371,7 +1407,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_ge_real")) { + } else if (strleq(token.start, "jump_ge_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1384,7 +1420,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_gt_real")) { + } else if (strleq(token.start, "jump_gt_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1397,7 +1433,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_lt_real")) { + } else if (strleq(token.start, "jump_lt_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1410,7 +1446,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_le_real")) { + } else if (strleq(token.start, "jump_le_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1423,7 +1459,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "string_length")) { + } else if (strleq(token.start, "string_length", token.length)) { vm->pc++; next_id_or_reg(); @@ -1435,7 +1471,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "int_to_string")) { + } else if (strleq(token.start, "int_to_string", token.length)) { vm->pc++; next_id_or_reg(); @@ -1447,7 +1483,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "nat_to_string")) { + } else if (strleq(token.start, "nat_to_string", token.length)) { vm->pc++; next_id_or_reg(); @@ -1459,7 +1495,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "real_to_string")) { + } else if (strleq(token.start, "real_to_string", token.length)) { vm->pc++; next_id_or_reg(); @@ -1471,19 +1507,19 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "string_eq")) { - } else if (streq(token.start, "string_concat")) { - } else if (streq(token.start, "string_get_char")) { - } else if (streq(token.start, "string_find_char")) { - } else if (streq(token.start, "string_slice")) { - } else if (streq(token.start, "string_to_int")) { - } else if (streq(token.start, "string_to_nat")) { - } else if (streq(token.start, "string_to_real")) { + } else if (strleq(token.start, "string_eq", token.length)) { + } else if (strleq(token.start, "string_concat", token.length)) { + } else if (strleq(token.start, "string_get_char", token.length)) { + } else if (strleq(token.start, "string_find_char", token.length)) { + } else if (strleq(token.start, "string_slice", token.length)) { + } else if (strleq(token.start, "string_to_int", token.length)) { + } else if (strleq(token.start, "string_to_nat", token.length)) { + } else if (strleq(token.start, "string_to_real", token.length)) { } else { // some other identifier printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); - exit(1); + exit(1); } } } while (token.type != TOKEN_EOF); @@ -1493,6 +1529,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { * 2nd pass, emit the bytecode */ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { + USED(st); + Token token; init_lexer(source); do { @@ -1502,15 +1540,26 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - printf("Line %d [%s]: %.*s\n", token.line, + printf("[Generate Bytecode] Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed + next_token(); // type + next_token(); // var + next_token(); // eq + next_token(); // value + next_token(); // ; + continue; } if (token.type == TOKEN_KEYWORD_FN) { // ignore, already processed + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + next = next_token(); + } + continue; } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || @@ -1519,106 +1568,34 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { // ignore, already processed + next_token(); // type + next_token(); // var + next_token(); // reg + next_token(); // ; + continue; } - if (token.type == TOKEN_KEYWORD_LOOP || - token.type == TOKEN_KEYWORD_ELSE) { + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { // ignore, already processed + next_token(); // id } - if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first - if (streq(token.start, "exit")) { - } else if (streq(token.start, "call")) { - } else if (streq(token.start, "return")) { - } else if (streq(token.start, "syscall")) { - } else if (streq(token.start, "load_immediate")) { - } else if (streq(token.start, "load_indirect_8")) { - } else if (streq(token.start, "load_indirect_16")) { - } else if (streq(token.start, "load_indirect_32")) { - } else if (streq(token.start, "load_absolute_8")) { - } else if (streq(token.start, "load_absolute_16")) { - } else if (streq(token.start, "load_absolute_32")) { - } else if (streq(token.start, "load_offset_8")) { - } else if (streq(token.start, "load_offset_16")) { - } else if (streq(token.start, "load_offset_32")) { - } else if (streq(token.start, "store_absolute_8")) { - } else if (streq(token.start, "store_absolute_16")) { - } else if (streq(token.start, "store_absolute_32")) { - } else if (streq(token.start, "store_indirect_8")) { - } else if (streq(token.start, "store_indirect_16")) { - } else if (streq(token.start, "store_indirect_32")) { - } else if (streq(token.start, "store_offset_8")) { - } else if (streq(token.start, "store_offset_16")) { - } else if (streq(token.start, "store_offset_32")) { - } else if (streq(token.start, "malloc")) { - } else if (streq(token.start, "memset_8")) { - } else if (streq(token.start, "memset_16")) { - } else if (streq(token.start, "memset_32")) { - } else if (streq(token.start, "register_move")) { - } else if (streq(token.start, "add_int")) { - } else if (streq(token.start, "sub_int")) { - } else if (streq(token.start, "mul_int")) { - } else if (streq(token.start, "div_int")) { - } else if (streq(token.start, "abs_int")) { - } else if (streq(token.start, "neg_int")) { - } else if (streq(token.start, "add_nat")) { - } else if (streq(token.start, "sub_nat")) { - } else if (streq(token.start, "mul_nat")) { - } else if (streq(token.start, "div_nat")) { - } else if (streq(token.start, "abs_nat")) { - } else if (streq(token.start, "neg_nat")) { - } else if (streq(token.start, "add_real")) { - } else if (streq(token.start, "sub_real")) { - } else if (streq(token.start, "mul_real")) { - } else if (streq(token.start, "div_real")) { - } else if (streq(token.start, "abs_real")) { - } else if (streq(token.start, "neg_real")) { - } else if (streq(token.start, "int_to_real")) { - } else if (streq(token.start, "nat_to_real")) { - } else if (streq(token.start, "real_to_int")) { - } else if (streq(token.start, "real_to_nat")) { - } else if (streq(token.start, "bit_shift_left")) { - } else if (streq(token.start, "bit_shift_right")) { - } else if (streq(token.start, "bit_shift_r_ext")) { - } else if (streq(token.start, "bit_and")) { - } else if (streq(token.start, "bit_or")) { - } else if (streq(token.start, "bit_xor")) { - } else if (streq(token.start, "jump")) { - } else if (streq(token.start, "jump_if_flag")) { - } else if (streq(token.start, "jump_eq_int")) { - } else if (streq(token.start, "jump_neq_int")) { - } else if (streq(token.start, "jump_gt_int")) { - } else if (streq(token.start, "jump_lt_int")) { - } else if (streq(token.start, "jump_le_int")) { - } else if (streq(token.start, "jump_ge_int")) { - } else if (streq(token.start, "jump_eq_nat")) { - } else if (streq(token.start, "jump_neq_nat")) { - } else if (streq(token.start, "jump_gt_nat")) { - } else if (streq(token.start, "jump_lt_nat")) { - } else if (streq(token.start, "jump_le_nat")) { - } else if (streq(token.start, "jump_ge_nat")) { - } else if (streq(token.start, "jump_eq_real")) { - } else if (streq(token.start, "jump_neq_real")) { - } else if (streq(token.start, "jump_ge_real")) { - } else if (streq(token.start, "jump_gt_real")) { - } else if (streq(token.start, "jump_lt_real")) { - } else if (streq(token.start, "jump_le_real")) { - } else if (streq(token.start, "string_length")) { - } else if (streq(token.start, "string_eq")) { - } else if (streq(token.start, "string_concat")) { - } else if (streq(token.start, "string_get_char")) { - } else if (streq(token.start, "string_find_char")) { - } else if (streq(token.start, "string_slice")) { - } else if (streq(token.start, "int_to_string")) { - } else if (streq(token.start, "nat_to_string")) { - } else if (streq(token.start, "real_to_string")) { - } else if (streq(token.start, "string_to_int")) { - } else if (streq(token.start, "string_to_nat")) { - } else if (streq(token.start, "string_to_real")) { - } else { - // some other identifier + if (token.type == TOKEN_KEYWORD_RETURN) { + vm->pc++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + emit_u8(vm, 0xFF); + vm->pc++; + continue; } + + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + continue; } } } while (token.type != TOKEN_EOF); diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index 371bed0..d926380 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -215,12 +215,19 @@ static TokenType identifierType() { case 'e': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { - case 'a': - return check_keyword(3, 1, "d", TOKEN_KEYWORD_READ); case 'f': return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); case 't': return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + case 'a': + if (lexer.current - lexer.start > 3) { + switch(lexer.start[3]) { + case 'd': + return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); + case 'l': + return check_keyword(4, 0, "", TOKEN_TYPE_REAL); + } + } } } break; @@ -272,7 +279,7 @@ static TokenType identifierType() { case 'g': return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); case 'l': - return check_keyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); + return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); case 'd': return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); case 'v': @@ -331,7 +338,8 @@ Token next_token() { char c = advance(); if (is_alpha(c)) return identifier(); - if (is_digit(c)) + char next = peek(); + if ((c == '-' && is_digit(next)) || is_digit(c)) return number(); switch (c) { @@ -354,7 +362,7 @@ Token next_token() { case '.': return make_token(TOKEN_DOT); case '-': - return make_token(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); + return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); case '+': return make_token(TOKEN_PLUS); case '/': @@ -389,7 +397,7 @@ const char *token_type_to_string(TokenType type) { case TOKEN_IDENTIFIER: return "IDENTIFIER"; case TOKEN_LITERAL_INT: - return "LITERAL_i32"; + return "LITERAL_INT"; case TOKEN_LITERAL_NAT: return "LITERAL_NAT"; case TOKEN_LITERAL_REAL: @@ -397,7 +405,7 @@ const char *token_type_to_string(TokenType type) { case TOKEN_LITERAL_STR: return "LITERAL_STR"; case TOKEN_TYPE_INT: - return "TYPE_i32"; + return "TYPE_INT"; case TOKEN_TYPE_NAT: return "TYPE_NAT"; case TOKEN_TYPE_REAL: @@ -498,8 +506,8 @@ const char *token_type_to_string(TokenType type) { return "LBRACKET"; case TOKEN_RBRACKET: return "RBRACKET"; - case TOKEN_ARROW_LEFT: - return "ARROW_LEFT"; + case TOKEN_ARROW_RIGHT: + return "ARROW_RIGHT"; case TOKEN_MESH: return "MESH"; case TOKEN_BIG_MONEY: diff --git a/src/tools/old_assembler/assembler.c b/src/tools/old_assembler/assembler.c deleted file mode 100644 index e66be6b..0000000 --- a/src/tools/old_assembler/assembler.c +++ /dev/null @@ -1,1211 +0,0 @@ -#include "assembler.h" -#include "parser.h" -typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType; - -typedef struct { - char *name; - u32 address; - SymbolType type; - int size; // How much memory this symbol occupies - int is_constant; // 1 = constant, 0 = variable -} Symbol; - -typedef struct { - Symbol *symbols; - int count; - int capacity; -} SymbolTable; - -void symbol_table_init(SymbolTable *table) { - table->capacity = 32; - table->count = 0; - table->symbols = malloc(table->capacity * sizeof(Symbol)); -} - -void symbol_table_add(SymbolTable *table, const char *name, u32 address, - SymbolType type) { - // Check for duplicates - for (int i = 0; i < table->count; i++) { - if (strcmp(table->symbols[i].name, name) == 0) { - fprintf(stderr, "Error: Duplicate label '%s'\n", name); - exit(1); - } - } - - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); - } - - Symbol *sym = &table->symbols[table->count++]; - sym->name = strdup(name); - sym->address = address; - sym->type = type; - sym->size = 4; // Default size - sym->is_constant = 0; -} - -Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { - for (int i = 0; i < table->count; i++) { - if (strcmp(table->symbols[i].name, name) == 0) { - return &table->symbols[i]; - } - } - return NULL; -} - -u32 find_label_in_table(SymbolTable *table, const char *name) { - Symbol *sym = symbol_table_lookup(table, name); - if (!sym) { - fprintf(stderr, "Error: Undefined label '%s'\n", name); - exit(1); - } - return sym->address; -} - -int get_instruction_byte_size(ExprNode *node) { - const char *opname = node->token; - - // Return (1 + 1) - if (strcmp(opname, "return") == 0) { - return 2; // 1 byte opcode + 1 byte return register - } - - if (strcmp(opname, "neg-int") == 0 || - strcmp(opname, "abs-int") == 0 || - strcmp(opname, "neg-nat") == 0 || - strcmp(opname, "abs-nat") == 0 || - strcmp(opname, "neg-real") == 0 || - strcmp(opname, "abs-real") == 0 || - strcmp(opname, "int-to-string") == 0 || - strcmp(opname, "load-indirect-8") == 0 || - strcmp(opname, "nat-to-string") == 0 || - strcmp(opname, "load-indirect-16") == 0 || - strcmp(opname, "real-to-string") == 0 || - strcmp(opname, "load-indirect-32") == 0 || - strcmp(opname, "int-to-real") == 0 || - strcmp(opname, "store-indirect-8") == 0 || - strcmp(opname, "nat-to-real") == 0 || - strcmp(opname, "store-indirect-16") == 0 || - strcmp(opname, "real-to-int") == 0 || - strcmp(opname, "store-indirect-32") == 0 || - strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 || - strcmp(opname, "int-to-nat") == 0 || - strcmp(opname, "string-length") == 0 || - strcmp(opname, "store-absolute-32") == 0 || - strcmp(opname, "store-absolute-8") == 0 || - strcmp(opname, "store-absolute-16") == 0 || - strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || - strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 || - strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) { - return 3; - } - - // Register-register-register opcodes (4 bytes: 1 + 3) - if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 || - strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 || - strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || - strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || - strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || - strcmp(opname, "bit-shift-left") == 0 || - strcmp(opname, "bit-shift-right") == 0 || - strcmp(opname, "bit-shift-r-ext") == 0 || - strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || - strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || - strcmp(opname, "div-real") == 0) { - return 4; - } - - // (5 bytes: 1 + 4) - if (strcmp(opname, "exit") == 0 || strcmp(opname, "jump-if-flag") == 0 || - strcmp(opname, "jump") == 0) { - return 5; - } - - // Load, Load-immediate (6 bytes: 1 + 1 + 4) - if (strcmp(opname, "load-absolute-32") == 0 || - strcmp(opname, "load-immediate") == 0 || - strcmp(opname, "load-absolute-16") == 0 || - strcmp(opname, "load-absolute-8") == 0) { - return 6; - } - - // jump compare (7 bytes: 1 + 4 + 1 + 1) - if (strcmp(opname, "jump-eq-int") == 0 || - strcmp(opname, "jump-neq-int") == 0 || - strcmp(opname, "jump-gt-int") == 0 || - strcmp(opname, "jump-lt-int") == 0 || - strcmp(opname, "jump-le-int") == 0 || - strcmp(opname, "jump-ge-int") == 0 || - strcmp(opname, "jump-eq-nat") == 0 || - strcmp(opname, "jump-neq-nat") == 0 || - strcmp(opname, "jump-gt-nat") == 0 || - strcmp(opname, "jump-lt-nat") == 0 || - strcmp(opname, "jump-le-nat") == 0 || - strcmp(opname, "jump-ge-nat") == 0 || - strcmp(opname, "jump-eq-real") == 0 || - strcmp(opname, "jump-neq-real") == 0 || - strcmp(opname, "jump-gt-real") == 0 || - strcmp(opname, "jump-lt-real") == 0 || - strcmp(opname, "jump-le-real") == 0 || - strcmp(opname, "jump-ge-real") == 0 || - strcmp(opname, "store-offset-8") == 0 || - strcmp(opname, "store-offset-16") == 0 || - strcmp(opname, "store-offset-32") == 0 || - strcmp(opname, "load-offset-8") == 0 || - strcmp(opname, "load-offset-16") == 0 || - strcmp(opname, "load-offset-32") == 0) { - return 7; - } - - // Call (1 + 4 + 1 + args + 1) - if (strcmp(opname, "call") == 0) { - ExprNode *args_node = node->children[1]; - u32 args_count; - - if (strcmp(args_node->token, "nil") == 0) { - args_count = 0; - } else { - args_count = 1 + args_node->child_count; - } - - return 1 + 1 + 1 + 4 + args_count; - } - - // Syscall (1 + syscall_id (4) + args) - if (strcmp(opname, "syscall") == 0) { - return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); - } - - fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); - exit(-1); -} - -int calculate_instruction_size(ExprNode *node) { - if (node->child_count == 0) - return 0; - - return get_instruction_byte_size(node); -} - -void collect_symbols_in_node(SymbolTable *table, ExprNode *node, - u32 *current_addr, int depth) { - char indent[32] = ""; - for (int i = 0; i < depth; i++) - strcat(indent, " "); - -#ifdef ASM_DEBUG - printf("%s%d %s ", indent, *current_addr, node->token); -#endif - - if (strcmp(node->token, "label") == 0) { - if (node->child_count >= 1) { - const char *name = node->children[0]->token; -#ifdef ASM_DEBUG - printf(" %s -> %d\n", name, *current_addr); -#endif - symbol_table_add(table, name, *current_addr, SYMBOL_CODE); - } - - for (size_t i = 1; i < node->child_count; i++) { - collect_symbols_in_node(table, node->children[i], current_addr, - depth + 1); - } - } else { - int size = get_instruction_byte_size(node); - *current_addr += size; -#ifdef ASM_DEBUG - printf(" +%d bytes -> %d\n", size, *current_addr); -#endif - } -} - -void collect_symbols(SymbolTable *table, ExprNode *program) { - // First, collect all data labels (with placeholder address) - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "data") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - ExprNode *item = section->children[j]; - if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { - const char *name = item->children[0]->token; - symbol_table_add(table, name, 0, SYMBOL_DATA); - } - } - } - } - - // Second, collect all code labels with proper nesting - u32 code_addr = 0; - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "code") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - collect_symbols_in_node(table, section->children[j], &code_addr, 0); - } - } - } -} - -u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { - u32 addr = vm->mp; - vm->mp += size; - vm->frames[vm->fp].end += size; - - // Update the symbol's address - Symbol *sym = symbol_table_lookup(table, name); - if (sym && sym->type == SYMBOL_DATA) { - sym->address = addr; - sym->size = size; - } - - return addr; -} - -void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } - -void emit_u32(VM *vm, u32 value) { - write_u32(vm, code, vm->cp, value); - vm->cp += 4; -} - -void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } - -int parse_register(const char *reg_str) { - if (reg_str[0] != '$') - return -1; - return atoi(reg_str + 1); -} - -u32 resolve_symbol(SymbolTable *table, const char *ref) { - // Handle symbol references (e.g., &label) - if (ref[0] == '&') { - return find_label_in_table(table, ref + 1); - } - - // Handle fixed-point numbers (e.g., 0.5) - if (strchr(ref, '.')) { - return TO_FIXED(atof(ref)); - } - - // Handle hexadecimal literals (e.g., 0x7) - if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) { - char *endptr; - u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x" - - if (endptr == ref + 2 || *endptr != '\0') { - fprintf(stderr, "Invalid hex literal: %s\n", ref); - exit(1); - } - return value; - } - - // Handle decimal literals (e.g., 7) - char *endptr; - u32 value = (u32)strtoul(ref, &endptr, 10); - - if (endptr == ref || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: %s\n", ref); - exit(1); - } - return value; -} - -static char *unwrap_string(const char *quoted_str) { - if (!quoted_str) - return NULL; - - size_t len = strlen(quoted_str); - if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { - // Remove quotes and process escape sequences - const char *src = quoted_str + 1; - size_t src_len = len - 2; - - // First pass: calculate the actual length needed after escape processing - size_t actual_len = 0; - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Escape sequence - actual_len++; - i++; // Skip the next character - } else { - actual_len++; - } - } - - char *unwrapped = (char *)malloc(actual_len + 1); - size_t dst_idx = 0; - - // Second pass: process escape sequences - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Handle escape sequences - switch (src[i + 1]) { - case 'n': - unwrapped[dst_idx++] = '\n'; - break; - case 't': - unwrapped[dst_idx++] = '\t'; - break; - case 'r': - unwrapped[dst_idx++] = '\r'; - break; - case '\\': - unwrapped[dst_idx++] = '\\'; - break; - case '"': - unwrapped[dst_idx++] = '"'; - break; - case '\'': - unwrapped[dst_idx++] = '\''; - break; - default: - // Unknown escape, keep both characters - unwrapped[dst_idx++] = src[i]; - unwrapped[dst_idx++] = src[i + 1]; - break; - } - i++; // Skip the next character - } else { - unwrapped[dst_idx++] = src[i]; - } - } - unwrapped[dst_idx] = '\0'; - return unwrapped; - } - // Not quoted, return copy - return strdup(quoted_str); -} - -void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { - for (size_t i = 0; i < block->child_count; ++i) { - ExprNode *item = block->children[i]; - if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { - const char *name = item->children[0]->token; - ExprNode *val = item->children[1]; - - if (val->child_count == 0) { - const char *token = val->token; - - // Case 1: String literal (enclosed in quotes) - if (token[0] == '"' && token[strlen(token) - 1] == '"') { - char *unwrapped = unwrap_string(token); - int len = strlen(unwrapped); - u32 addr = allocate_data(vm, table, name, len + 1 + 4); - - write_u32(vm, memory, addr, len); - for (int i = 0; i < len; i++) { - write_u8(vm, memory, addr + 4 + i, unwrapped[i]); - } - write_u8(vm, memory, addr + 4 + len, '\0'); - free(unwrapped); - } - // Case 2: Hexadecimal integer (0x...) - else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) { - char *endptr; - u32 value = (u32)strtoul(token + 2, &endptr, 16); - - if (endptr != token + strlen(token)) { - fprintf(stderr, "Invalid hex in data block: %s\n", token); - exit(1); - } - - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, value); - } - // Case 3: Floating-point (has decimal point) - else if (strchr(token, '.')) { - float f = atof(token); - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, TO_FIXED(f)); - } - // Case 4: Decimal integer - else { - char *endptr; - u32 value = (u32)strtoul(token, &endptr, 10); - - if (endptr != token + strlen(token)) { - fprintf(stderr, "Invalid decimal in data block: %s\n", token); - exit(1); - } - - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, value); - //vm->mp += 4; - } - } else { - fprintf(stderr, "Unsupported data item\n"); - exit(1); - } - } - } -} - -void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { - const char *opname = node->token; - if (strcmp(opname, "label") == 0) { - for (size_t i = 1; i < node->child_count; i++) { - process_code_expr(vm, table, node->children[i]); - } - } else if (strcmp(opname, "exit") == 0) { - emit_opcode(vm, OP_EXIT); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "jump") == 0) { - emit_opcode(vm, OP_JMP); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "jump-if-flag") == 0) { - emit_opcode(vm, OP_JMPF); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "call") == 0) { - emit_opcode(vm, OP_CALL); - - if (node->child_count < 3) { - fprintf(stderr, "Error: call requires (args) and return register\n"); - return; - } - - // Parse function address (first child) - u32 addr = resolve_symbol(table, node->children[0]->token); - if (addr == (u32)-1) { - fprintf(stderr, "Error: undefined symbol '%s'\n", - node->children[0]->token); - return; - } - emit_u32(vm, addr); - - // Parse argument list (second child) - ExprNode *args_node = node->children[1]; - u8 arg_count = 0; - - if (args_node->child_count > 0) { - // Multiple arguments case - arg_count = args_node->child_count + 1; // +1 for the token - } else { - // Single argument case - token is the argument - arg_count = (args_node->token[0] != '\0') ? 1 : 0; - } - emit_byte(vm, arg_count); - - // Emit arguments based on representation - if (arg_count > 0) { - // First argument is always the token - const char *reg_str = args_node->token; - int reg = parse_register(reg_str); - if (reg < 0) { - fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); - return; - } - emit_byte(vm, (u8)reg); - - // Emit children if present - for (size_t i = 0; i < args_node->child_count; i++) { - reg_str = args_node->children[i]->token; - reg = parse_register(reg_str); - if (reg < 0) { - fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); - return; - } - emit_byte(vm, (u8)reg); - } - } - // Parse return register (third child) - const char *return_reg_str = node->children[2]->token; - int return_reg = parse_register(return_reg_str); - - if (return_reg < 0) { - if (strcmp(return_reg_str, "nil") == 0) { - return_reg = 0xFF; - } else { - fprintf(stderr, "Error: invalid return register '%s'\n", - return_reg_str); - return; - } - } - emit_byte(vm, (u8)return_reg); - -} else if (strcmp(opname, "return") == 0) { - emit_opcode(vm, OP_RETURN); - - if (node->child_count != 1) { - fprintf(stderr, "Error: return requires exactly one argument\n"); - return; - } - - const char *reg_str = node->children[0]->token; - int reg = parse_register(reg_str); - - // Handle "nil" as special case (no return value) - if (reg < 0) { - if (strcmp(reg_str, "nil") == 0) { - reg = 0xFF; // Special value for "no return" - } else { - fprintf(stderr, "Error: invalid return register '%s'\n", reg_str); - return; - } - } - emit_byte(vm, (u8)reg); - } else if (strcmp(opname, "load-immediate") == 0) { - emit_opcode(vm, OP_LOAD_IMM); - int reg = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, reg); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-8") == 0) { - emit_opcode(vm, OP_LOAD_ABS_8); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-16") == 0) { - emit_opcode(vm, OP_LOAD_ABS_16); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-32") == 0) { - emit_opcode(vm, OP_LOAD_ABS_32); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-indirect-8") == 0) { - emit_opcode(vm, OP_LOAD_IND_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "load-indirect-16") == 0) { - emit_opcode(vm, OP_LOAD_IND_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "load-indirect-32") == 0) { - emit_opcode(vm, OP_LOAD_IND_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "malloc") == 0) { - emit_opcode(vm, OP_MALLOC); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "memset-8") == 0) { - emit_opcode(vm, OP_MEMSET_8); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "memset-16") == 0) { - emit_opcode(vm, OP_MEMSET_16); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "memset") == 0) { - emit_opcode(vm, OP_MEMSET_32); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "store-absolute-8") == 0) { - emit_opcode(vm, OP_STORE_ABS_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-absolute-16") == 0) { - emit_opcode(vm, OP_STORE_ABS_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-absolute-32") == 0) { - emit_opcode(vm, OP_STORE_ABS_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-8") == 0) { - emit_opcode(vm, OP_STORE_IND_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-16") == 0) { - emit_opcode(vm, OP_STORE_IND_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-32") == 0) { - emit_opcode(vm, OP_STORE_IND_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-offset-8") == 0) { - emit_opcode(vm, OP_STORE_OFF_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "store-offset-16") == 0) { - emit_opcode(vm, OP_STORE_OFF_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "store-offset-32") == 0) { - emit_opcode(vm, OP_STORE_OFF_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-8") == 0) { - emit_opcode(vm, OP_LOAD_OFF_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-16") == 0) { - emit_opcode(vm, OP_LOAD_OFF_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-32") == 0) { - emit_opcode(vm, OP_LOAD_OFF_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "register-move") == 0) { - emit_opcode(vm, OP_REG_MOV); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "syscall") == 0) { - emit_opcode(vm, OP_SYSCALL); - - // Parse syscall ID - u32 syscall_id = 0; - const char *syscall_name = node->children[0]->token; - if (strcmp(syscall_name, "EXIT") == 0) - syscall_id = SYSCALL_EXIT; - else if (strcmp(syscall_name, "OPEN") == 0) - syscall_id = SYSCALL_DEVICE_OPEN; - else if (strcmp(syscall_name, "READ") == 0) - syscall_id = SYSCALL_DEVICE_READ; - else if (strcmp(syscall_name, "WRITE") == 0) - syscall_id = SYSCALL_DEVICE_WRITE; - else if (strcmp(syscall_name, "CLOSE") == 0) - syscall_id = SYSCALL_DEVICE_CLOSE; - else if (strcmp(syscall_name, "IOCTL") == 0) - syscall_id = SYSCALL_DEVICE_IOCTL; - else if (strcmp(syscall_name, "REFRESH") == 0) - syscall_id = SYSCALL_DEVICE_REFRESH; - - emit_u32(vm, syscall_id); - - // Emit register arguments - for (size_t i = 1; i < node->child_count; ++i) { - int reg = parse_register(node->children[i]->token); - emit_byte(vm, reg); - } - } else if (strcmp(opname, "bit-shift-left") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_LEFT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-shift-right") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_RIGHT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-shift-r-ext") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_R_EXT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-and") == 0) { - emit_opcode(vm, OP_BAND); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-or") == 0) { - emit_opcode(vm, OP_BOR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-xor") == 0) { - emit_opcode(vm, OP_BXOR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "add-int") == 0) { - emit_opcode(vm, OP_ADD_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-int") == 0) { - emit_opcode(vm, OP_SUB_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-int") == 0) { - emit_opcode(vm, OP_MUL_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-int") == 0) { - emit_opcode(vm, OP_DIV_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-int") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-int") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "add-nat") == 0) { - emit_opcode(vm, OP_ADD_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-nat") == 0) { - emit_opcode(vm, OP_SUB_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-nat") == 0) { - emit_opcode(vm, OP_MUL_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-nat") == 0) { - emit_opcode(vm, OP_DIV_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-nat") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-nat") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "add-real") == 0) { - emit_opcode(vm, OP_ADD_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-real") == 0) { - emit_opcode(vm, OP_SUB_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-real") == 0) { - emit_opcode(vm, OP_MUL_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-real") == 0) { - emit_opcode(vm, OP_DIV_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-real") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-real") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "int-to-real") == 0) { - emit_opcode(vm, OP_INT_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "nat-to-real") == 0) { - emit_opcode(vm, OP_NAT_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-int") == 0) { - emit_opcode(vm, OP_REAL_TO_INT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-nat") == 0) { - emit_opcode(vm, OP_REAL_TO_NAT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "jump-eq-int") == 0) { - emit_opcode(vm, OP_JEQ_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-int") == 0) { - emit_opcode(vm, OP_JNEQ_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-int") == 0) { - emit_opcode(vm, OP_JGT_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-int") == 0) { - emit_opcode(vm, OP_JLT_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-int") == 0) { - emit_opcode(vm, OP_JLE_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-int") == 0) { - emit_opcode(vm, OP_JGE_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-eq-nat") == 0) { - emit_opcode(vm, OP_JEQ_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-nat") == 0) { - emit_opcode(vm, OP_JNEQ_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-nat") == 0) { - emit_opcode(vm, OP_JGT_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-nat") == 0) { - emit_opcode(vm, OP_JLT_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-nat") == 0) { - emit_opcode(vm, OP_JLE_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-nat") == 0) { - emit_opcode(vm, OP_JGE_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-eq-real") == 0) { - emit_opcode(vm, OP_JEQ_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-real") == 0) { - emit_opcode(vm, OP_JNEQ_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-real") == 0) { - emit_opcode(vm, OP_JGT_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-real") == 0) { - emit_opcode(vm, OP_JLT_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-real") == 0) { - emit_opcode(vm, OP_JLE_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-real") == 0) { - emit_opcode(vm, OP_JGE_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-length") == 0) { - emit_opcode(vm, OP_STRLEN); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-eq") == 0) { - emit_opcode(vm, OP_STREQ); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-concat") == 0) { - emit_opcode(vm, OP_STRCAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-get-char") == 0) { - emit_opcode(vm, OP_STR_GET_CHAR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-find-char") == 0) { - emit_opcode(vm, OP_STR_FIND_CHAR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-slice") == 0) { - emit_opcode(vm, OP_STR_SLICE); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - int src3 = parse_register(node->children[3]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - emit_byte(vm, src3); - } else if (strcmp(opname, "int-to-string") == 0) { - emit_opcode(vm, OP_INT_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "nat-to-string") == 0) { - emit_opcode(vm, OP_NAT_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-string") == 0) { - emit_opcode(vm, OP_REAL_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-int") == 0) { - emit_opcode(vm, OP_STRING_TO_INT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-nat") == 0) { - emit_opcode(vm, OP_STRING_TO_NAT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-real") == 0) { - emit_opcode(vm, OP_STRING_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else { - fprintf(stderr, "Unknown opcode: %s\n", opname); - } -} - -void old_assemble(VM *vm, ExprNode *program) { - SymbolTable table; - symbol_table_init(&table); - - // PASS 1: Collect all symbols (both code and data) - collect_symbols(&table, program); - - // PASS 2: Process data section using symbol table - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "data") == 0) { - process_data_block(vm, &table, section); - } - } - - // PASS 3: Process code section using complete symbol table - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "code") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - process_code_expr(vm, &table, section->children[j]); - } - } - } - - // Cleanup symbol table - for (int i = 0; i < table.count; i++) { -#ifdef ASM_DEBUG - Symbol s = table.symbols[i]; - printf("%s[%d]\n", s.name, s.address); -#endif - free(table.symbols[i].name); - } - free(table.symbols); -} diff --git a/src/tools/old_assembler/assembler.h b/src/tools/old_assembler/assembler.h deleted file mode 100644 index 76b9326..0000000 --- a/src/tools/old_assembler/assembler.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef ASSEMBLER_H -#define ASSEMBLER_H - -#include "../../vm/common.h" -#include "../../vm/vm.h" -#include "parser.h" - -#include -#include -#include -#include - -#define AS_FIXED(v) ((float)(i32)(v) / 65536.0f) -#define TO_FIXED(f) ((i32)( \ - ((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \ -)) - -void old_assemble(VM *vm, ExprNode *program); - -#endif diff --git a/src/tools/old_assembler/parser.c b/src/tools/old_assembler/parser.c deleted file mode 100644 index 69610d9..0000000 --- a/src/tools/old_assembler/parser.c +++ /dev/null @@ -1,244 +0,0 @@ -#include "parser.h" -#include -#include -#include -#include -#include -#include - -// Helper function to create a new node -static ExprNode *expr_node_create(const char *token, int line) { - ExprNode *node = (ExprNode *)malloc(sizeof(ExprNode)); - node->token = strdup(token ? token : ""); - node->children = NULL; - node->child_count = 0; - node->line = line; - return node; -} - -// Forward declaration -static ExprNode *parse_expression(const char **ptr, int line); - -// Skip whitespace characters and comments -static const char *skip_whitespace(const char *ptr) { - while (*ptr) { - // Skip regular whitespace - if (isspace(*ptr)) { - ptr++; - continue; - } - - // Check for comment start - if (*ptr == ';') { - // Skip everything until end of line - while (*ptr && *ptr != '\n') { - ptr++; - } - continue; - } - break; - } - return ptr; -} - -// Parse a token (atom) -static char *parse_token(const char **ptr, int line) { - const char *start = *ptr; - - // Skip leading whitespace and comments - start = skip_whitespace(start); - if (!*start) { - printf("Error at line:%d\n", line); - return NULL; - } - - const char *end = start; - - // Handle quoted strings - if (*start == '"') { - end++; // Skip opening quote - // Read until closing quote or end of string - while (*end && *end != '"') { - if (*end == '\\' && *(end + 1)) { - end += 2; // Skip escaped character - } else { - end++; - } - } - if (*end == '"') { - end++; // Include closing quote - } - } - // Handle parentheses as separate tokens - else if (*end == '(' || *end == ')') { - end++; - } else { - // Read until whitespace, parentheses, or comment - while (*end && !isspace(*end) && *end != '(' && *end != ')' && - *end != ';') { - end++; - } - } - - if (end == start) { - printf("Error at line:%d\n", line); - return NULL; - } - - size_t len = end - start; - char *token = (char *)malloc(len + 1); - memcpy(token, start, len); - token[len] = '\0'; - - *ptr = end; - return token; -} - -// Parse a list (expression starting with '(') -static ExprNode *parse_list(const char **ptr, int line) { - // Skip the opening parenthesis - (*ptr)++; - - *ptr = skip_whitespace(*ptr); - if (**ptr == ')') { - // Empty list - (*ptr)++; - return expr_node_create("\0", line); - } - - // Parse all children first - ExprNode **temp_children = NULL; - size_t temp_count = 0; - - while (**ptr && **ptr != ')') { - ExprNode *child = parse_expression(ptr, line); - if (child) { - // Resize temp children array - ExprNode **new_temp = - (ExprNode **)malloc(sizeof(ExprNode *) * (temp_count + 1)); - - // Copy existing children - for (size_t i = 0; i < temp_count; i++) { - new_temp[i] = temp_children[i]; - } - - // Add new child - new_temp[temp_count] = child; - temp_count++; - - // Free old array and update - free(temp_children); - temp_children = new_temp; - } - - *ptr = skip_whitespace(*ptr); - } - - if (**ptr == ')') { - (*ptr)++; // Skip closing parenthesis - } else { - fprintf(stderr, "Error: Missing closing parenthesis at line %d\n", line); - } - - // Create the actual node - ExprNode *node; - if (temp_count > 0 && temp_children[0]->child_count == 0) { - // First child is an atom, use it as the operator - node = expr_node_create(temp_children[0]->token, line); - // Move remaining children - node->child_count = temp_count - 1; - if (node->child_count > 0) { - node->children = - (ExprNode **)malloc(sizeof(ExprNode *) * node->child_count); - for (size_t i = 0; i < node->child_count; i++) { - node->children[i] = temp_children[i + 1]; - } - } - // Free the first child since we used its token - expr_free(temp_children[0]); - } else { - // No operator or first child is a list - node = expr_node_create("list", line); - node->children = temp_children; - node->child_count = temp_count; - } - - if (temp_count == 0) { - free(temp_children); - } - - return node; -} - -// Parse an expression (either atom or list) -static ExprNode *parse_expression(const char **ptr, int line) { - *ptr = skip_whitespace(*ptr); - - if (!**ptr) - return NULL; - - if (**ptr == '(') { - return parse_list(ptr, line); - } else { - // Parse atom - char *token = parse_token(ptr, line); - if (token) { - ExprNode *node = expr_node_create(token, line); - free(token); - return node; - } - return NULL; - } -} - -// Main parsing function -ExprNode *expr_parse(const char *source, size_t source_len) { - if (!source || source_len == 0) - return NULL; - - const char *ptr = source; - int line = 1; - - ptr = skip_whitespace(ptr); - if (!*ptr) - return NULL; - - return parse_expression(&ptr, line); -} - -// Free an Expr AST (and all children) -void expr_free(ExprNode *node) { - if (!node) - return; - - free(node->token); - - for (size_t i = 0; i < node->child_count; i++) { - expr_free(node->children[i]); - } - free(node->children); - free(node); -} - -// Debug: print AST (for dev) -void expr_print(ExprNode *node, int indent) { - if (!node) - return; - - for (int i = 0; i < indent; i++) { - printf(" "); - } - - if (node->child_count == 0) { - // Atom - printf("Atom: '%s' (line %d)\n", node->token, node->line); - } else { - // List - printf("List: '%s' (line %d) [%zu children]\n", node->token, node->line, - node->child_count); - - for (size_t i = 0; i < node->child_count; i++) { - expr_print(node->children[i], indent + 1); - } - } -} diff --git a/src/tools/old_assembler/parser.h b/src/tools/old_assembler/parser.h deleted file mode 100644 index 53ac41b..0000000 --- a/src/tools/old_assembler/parser.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef PARSER_H -#define PARSER_H - -#include // for size_t - -// Forward declare -typedef struct ExprNode ExprNode; - -// Node type: atom or list -struct ExprNode { - char *token; // For atoms: the value ("123", "$0", "add") - // For lists: the operator (first token) - ExprNode **children; // Array of child nodes (NULL if atom) - size_t child_count; // 0 if atom - int line; // Source line number (for errors) -}; - -ExprNode *expr_parse(const char *source, size_t source_len); -ExprNode* expand_macros(ExprNode* node); -ExprNode* expand_lambda(ExprNode* lambda_node); -void expr_free(ExprNode *node); -void expr_print(ExprNode *node, int indent); -void *safe_malloc(size_t size); - -#endif diff --git a/src/vm/libc.c b/src/vm/libc.c index 05dcf3f..3a85ffe 100644 --- a/src/vm/libc.c +++ b/src/vm/libc.c @@ -95,6 +95,21 @@ bool streq(const char *s1, const char *s2) { return (*s1 == '\0' && *s2 == '\0'); } +bool strleq(const char *s1, const char *s2, u32 length) { + u32 i; + if (s1 == nil && s2 == nil) return true; + if (s1 == nil || s2 == nil) return false; + + i = 0; + while (i < length && *s1 && *s2) { + if (*s1 != *s2) return false; + s1++; + s2++; + i++; + } + if (i == length) return true; + return (*s1 == '\0' && *s2 == '\0'); +} u32 strlength(const char *str) { u32 i; @@ -238,4 +253,4 @@ void fixed_to_string(i32 value, char *buffer) { } strcopy(buffer, end, temp + sizeof(temp) - end); -} \ No newline at end of file +} diff --git a/src/vm/libc.h b/src/vm/libc.h index f9ffc45..4479f9a 100644 --- a/src/vm/libc.h +++ b/src/vm/libc.h @@ -4,6 +4,7 @@ #include "common.h" bool streq(const char *s1, const char *s2); +bool strleq(const char *s1, const char *s2, u32 length); i32 strcopy(char* to, const char *from, u32 length); u32 strlength(const char *str); u32 strnlength(const char *str, u32 max_len); diff --git a/test/add.rom b/test/add.rom deleted file mode 100644 index 0f1dd4cf7279a0013513ad42e030f59d47f9742f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmXwyp$>pB3`DQjDrQh9EDA9`WD1W1i6MmFA1!1}`g*-yQUKVzTMbS4G)Pc_GC2t0 us+2hltxjO1fV9ZKQc%CVq(Z3r?swDPMKS#iHP(pc{dn?yTxVXsR~=qfTnG{X diff --git a/test/add.ul.ir b/test/add.ul.ir index 5ed166d..64e269d 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -28,8 +28,8 @@ function pln (str message $0) load_immediate 0 -> mode; syscall OPEN terminal_namespace mode -> term; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE term message msg_length; - strlen new_line -> nl_length; + string_length new_line -> nl_length; syscall WRITE term nl nl_length; return; diff --git a/test/fib.ul.ir b/test/fib.ul.ir index e073416..3ceff47 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -40,10 +40,10 @@ function pln (str message $0) load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; \ No newline at end of file diff --git a/test/hello.rom b/test/hello.rom deleted file mode 100644 index 9632b4b38a5231fefdfc691030dca7459f1c33f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 135 zcmXwvu?m1N3`8$YCS3%<*~yOe51a&teuf<_DAwQCL@|)N$K3(ItpDHegSt?sO05aq vZy{tRwJ94D)xwQJbYQjsE((9h;%=LUHC^FM#}-EIfeM!9vgfq)zW50bk=_dT diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 7373ad9..c8d2475 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -19,9 +19,9 @@ function pln (str message $0) load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; diff --git a/test/loop.rom b/test/loop.rom deleted file mode 100644 index 1ae871122caf2b3379b7de38bba2408806ef034d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 258 zcmYL@u?oUK42F|SN{_omaBva1MGzdStBaFcH+=y?5eGZe;xp(o`Eci-mSP~uclpDA zAtL%B$5@YmuE>Iv-&3$NK_w%!u=xsQ&Mf-{1a=zwg=ke6C4{(T(sBZ>eJNtI(yLE^ zS@dF#A$tCxbIeEO0X3h529yj9-OVnRUKiirZrvLx| diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 0cc95f3..a9a03b9 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -2,7 +2,7 @@ global str terminal_namespace = "/dev/term/0"; global str prompt = "Enter a string:"; global str new_line = "\n"; -function main (); +function main () real a $0; int i $1; int mode $11; @@ -13,7 +13,7 @@ function main (); load_immediate 0 -> $2; load_immediate -1 -> $3; load_immediate 5.0 -> $5; - loop loop_body; + loop loop_body add_real a $5 -> a; add_int i $3 -> i; jump_ge_int loop_body i $2; @@ -50,9 +50,9 @@ function pln (str message $0); load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length ; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; \ No newline at end of file diff --git a/test/malloc.rom b/test/malloc.rom deleted file mode 100644 index f03a1bf9bfedf082c61fa5e08d8b8dc7fb61a933..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 167 zcmXwyu@1sO3`6b26J6L?Z((9MW=15oegsHVh=C5J;`fiF&`7bL70CgBYwS9Q6~eW! zx-{n!I&mAobBKXWEI~WbEga(iqLvZzeQ0lV&q(X#KXh5k;(8xGbsnZyJtCVgV}x=m O?|FKTk5fVZ-QWwl{|@Q^ diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 9cb5155..84a883d 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -32,9 +32,9 @@ function pln (str message $0) load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; diff --git a/test/paint-bw.rom b/test/paint-bw.rom deleted file mode 100644 index 197ed3d0d5278c5530b9e1bcff1dacfb6ecc8b1d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 574 zcmah`OHRWu5cT*UC+U>H1EyVZ!8NT#0L<3pX0$H*2&j7lS)04ysU($uxvy@q+v|BtOe-tWaz?i9$=TN z1rO7qnjO;6UwM=+7uDy)-KlXRIoHxQoXzCAG^)vec#`72kC25=KA(w(^Dc+ N3m@h)vKO*9vQGwOFP{Ja diff --git a/test/paint.rom b/test/paint.rom deleted file mode 100644 index 329532c926885b2cd01e1c11d741dd26b236c765..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1266 zcmai!&5G1O6ou=i{*p?kt93e!!=!uB3L-8dxKM@}5ZoxPT)Wg>iK95ETYolg1kt6) zEPVsPm9O9F@ZkSs<>rRmoeDLZYmu zBFCzli4lUL3Fc)jNKH;+6;yswIy%eQ5t~%SGaX4GbWNjb+BrHOvh_c*ZOE>@tU2zz z*z*pvS9{)b1Mj<@cbL7=^X?D4w|d@T_HNI6bKrf_^Tt_;!U$2%q(Sgy;kl$&4)FR# zSy>;do%>Mj@f4|aZRr7SXKX&IX#Af-N77W)BWbDyku=qhNSbO+Bu#ZHlBU`gNmD(H zq^X7;lWw}}W~b3RMH(~xLXT`co!OkIyUeW)qcNFotknDOkr&xs2qs*S3|C~8One*N3f%dh}W-)(m-Aw$%g>IX5DIPVza~Gl+n1T uV5iD#F04i%$j9iUyGRV|O|pXY1vCf#%AnPw=_pUS%XOyJ?*7GdfZ_#>D+nL} diff --git a/test/simple.ul.ir b/test/simple.ul.ir index c973ff5..803bbde 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -3,12 +3,12 @@ global real x = 1.0; global real y = 1.0; function main () - real x $0; - load_absolute_32 x -> x; - real y $1; - load_absolute_32 y -> y; + real a $0; + load_absolute_32 x -> a; + real b $1; + load_absolute_32 y -> b; real result $2; - add_real x y -> result; + add_real a b -> result; str result_str $3; real_to_string result -> result_str; call pln result_str -> void; @@ -23,9 +23,9 @@ function pln (str message $0) load_immediate 0 -> mode; syscall OPEN terminal_namespace mode -> term; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE term nl nl_length; return; diff --git a/test/window.rom b/test/window.rom deleted file mode 100644 index 1721b46b94eb2976d719953e36238f8e8f969092..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 326 zcmX|-O%8%E5QS&xplHPs11wpndraKu!o&lx;|SWsjhgs(InU%7@J$Ui>CAf{%xhXA zq7QbzcD+ECnHs}dXSR)r-%uQPX-)}L8|Qyg(j<_wC&C%PydZAl;tw;842dqY#=uZf zngN=#Bc~Jl6UxY@L#IciiPPL=mE`lP8e$j10s_TjvxS;grVrZaY*@kzbg^o}am-fC z8m@^b{LU0L@|$EMQJ3Odee|#Tez ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length ; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; From f901dafa2b90f4dc8d86cd2e5440c8fcaf79ede5 Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 30 Nov 2025 23:22:25 -0800 Subject: [PATCH 19/27] WIP Code generation, 'simple' works --- bench/add.lisp | 4 - bench/fib.lisp | 6 - bench/hello.lisp | 1 - bench/simple.lisp | 1 - src/arch/linux/main.c | 24 - src/tools/assembler/assembler.c | 2120 ++++++++++++++++++++++++++----- src/tools/assembler/assembler.h | 1 + src/tools/assembler/lexer.c | 10 + src/vm/opcodes.h | 15 +- src/vm/vm.c | 578 +++++---- test/add.ul.ir | 20 +- test/fib.ul.ir | 17 +- test/hello.ul.ir | 27 +- test/loop.ul.ir | 30 +- test/malloc.ul.ir | 21 +- test/simple.ul | 5 +- test/simple.ul.ir | 23 +- test/window.ul.ir | 21 +- 18 files changed, 2187 insertions(+), 737 deletions(-) delete mode 100644 bench/add.lisp delete mode 100644 bench/fib.lisp delete mode 100644 bench/hello.lisp delete mode 100644 bench/simple.lisp diff --git a/bench/add.lisp b/bench/add.lisp deleted file mode 100644 index 95e29c0..0000000 --- a/bench/add.lisp +++ /dev/null @@ -1,4 +0,0 @@ -(lambda add-two (a b) - (return (+ a b))) - -(print (to-string (add-two 1 1))) diff --git a/bench/fib.lisp b/bench/fib.lisp deleted file mode 100644 index f73186a..0000000 --- a/bench/fib.lisp +++ /dev/null @@ -1,6 +0,0 @@ -(lambda fib (n) - (if (n < 2) - (return n)) - (return (+ (fib (- n 2)) (fib (- n 1))))) - -(print (fib 36)) \ No newline at end of file diff --git a/bench/hello.lisp b/bench/hello.lisp deleted file mode 100644 index 2856246..0000000 --- a/bench/hello.lisp +++ /dev/null @@ -1 +0,0 @@ -(print "nuqneH 'u'?") \ No newline at end of file diff --git a/bench/simple.lisp b/bench/simple.lisp deleted file mode 100644 index a20fde5..0000000 --- a/bench/simple.lisp +++ /dev/null @@ -1 +0,0 @@ -(print (+ 1.0 2.0)) \ No newline at end of file diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 94a4e2a..eef3085 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -163,25 +163,6 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { return true; } -bool init_vm(VM *vm) { - vm->memory = (u8 *)malloc(MEMORY_SIZE * sizeof(u8)); - vm->memory_size = MEMORY_SIZE; - - vm->code = (u8 *)malloc(CODE_SIZE * sizeof(u8)); - vm->code_size = CODE_SIZE; - - vm->frames = (Frame *)malloc(FRAMES_SIZE * sizeof(Frame)); - vm->frames_size = FRAMES_SIZE; - - vm->stack = (u32 *)malloc(STACK_SIZE * sizeof(u32)); - vm->stack_size = STACK_SIZE; - - vm->devices = (Device *)malloc(DEVICES_SIZE * sizeof(Device)); - vm->device_size = DEVICES_SIZE; - - return true; -} - i32 main(i32 argc, char *argv[]) { bool dump_rom = false; char *input_file = nil; @@ -212,11 +193,6 @@ i32 main(i32 argc, char *argv[]) { } VM vm = {0}; - if (!init_vm(&vm)) { - printf("vm did not initialize for some reason."); - return 1; - } - bool compilation_success = true; if (input_file) { if (is_rom) { diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index b45d6f9..3fdc9a9 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -8,19 +8,16 @@ #include #include -void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } +void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp] = byte; } void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); - vm->cp += 4; } -SymbolTable *symbol_table_init() { - SymbolTable *table = malloc(sizeof(SymbolTable)); - table->symbols = malloc(16 * sizeof(Symbol)); +void symbol_table_init(SymbolTable *table) { + table->symbols = calloc(16, sizeof(Symbol)); table->count = 0; table->capacity = 16; - return table; } u32 symbol_table_add(SymbolTable *table, Symbol s) { @@ -35,25 +32,64 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { return index; } -Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { +Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { for (u32 i = 0; i < table->count; i++) { - if (streq(table->symbols[i].name, name)) { - return &table->symbols[i]; + if (table->symbols[i].name_length == length) { + if (strleq(table->symbols[i].name, name, length)) { + return &table->symbols[i]; + } } } return nil; } -u32 get_ref(SymbolTable *st, const char *name) { - Symbol *sym = symbol_table_lookup(st, name); +u32 get_ref(SymbolTable *st, const char *name, u32 length) { + Symbol *sym = symbol_table_lookup(st, name, length); if (!sym) { - fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); + fprintf(stderr, "Error: Undefined Symbol '%.*s'\n", length, name); exit(1); return 0; } return sym->ref; } +u32 get_ptr(Token token, SymbolTable *st) { + if (token.type == TOKEN_IDENTIFIER) { + return get_ref(st, token.start, token.length); + } + + if (token.type == TOKEN_LITERAL_INT) { + return atoi(token.start); + } + + if (token.type == TOKEN_LITERAL_NAT) { + char *endptr; + u32 out = (u32)strtoul(token.start, &endptr, 10); + if (endptr == token.start || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: '%.*s'\n", token.length, token.start); + exit(1); + } + return out; + } + + fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, token.start); + exit(1); +} + +u32 get_reg(Token token, SymbolTable *st) { + if (token.type == TOKEN_IDENTIFIER) { + return get_ref(st, token.start, token.length); + } + + if (token.type == TOKEN_BIG_MONEY) { + token = next_token(); + return atoi(token.start); + } + + fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, token.start); + exit(1); +} + Token next_id_or_reg() { Token token = next_token(); if (token.type == TOKEN_IDENTIFIER) { @@ -75,10 +111,8 @@ Token next_id_or_reg() { Token next_id_or_ptr() { Token token = next_token(); - if (token.type != TOKEN_IDENTIFIER && - token.type != TOKEN_LITERAL_NAT && - token.type != TOKEN_LITERAL_INT && - token.type != TOKEN_LITERAL_REAL) { + if (token.type != TOKEN_IDENTIFIER && token.type != TOKEN_LITERAL_NAT && + token.type != TOKEN_LITERAL_INT && token.type != TOKEN_LITERAL_REAL) { printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, token.start); exit(1); @@ -95,15 +129,6 @@ Token next_token_is(TokenType type) { return token; } -Token next_token_is_either(TokenType type, TokenType type2) { - Token token = next_token(); - if (token.type != type && token.type != type2) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); - exit(1); - } - return token; -} - /** * Global . */ @@ -159,6 +184,7 @@ bool define_global(VM *vm, SymbolTable *st) { } memcpy(s.name, name.start, name.length); + s.name_length = name.length; u32 addr = vm->mp; s.ref = addr; @@ -226,6 +252,9 @@ bool define_global(VM *vm, SymbolTable *st) { while (i < value.length) { char c = src[i++]; + if (c == '"') { + continue; + } if (c == '\\' && i < value.length) { switch (src[i++]) { case 'n': @@ -245,7 +274,8 @@ bool define_global(VM *vm, SymbolTable *st) { i--; // Rewind for unknown escapes } } - write_u8(vm, memory, addr + 4 + len++, c); + write_u8(vm, memory, addr + 4 + len, c); + len++; } u32 size = len + 5; // 4 (len) + dst_len + 1 (null) @@ -338,6 +368,8 @@ void define_var(SymbolTable *st, Token regType) { } memcpy(s.name, name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; next_token_is(TOKEN_BIG_MONEY); @@ -361,13 +393,14 @@ void define_function(VM *vm, SymbolTable *st) { exit(1); } memcpy(s.name, name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; next_token_is(TOKEN_LPAREN); Token next = next_token(); while (next.type != TOKEN_RPAREN) { define_var(st, next); - next = next_token(); if (next.type == TOKEN_COMMA) { next = next_token(); @@ -379,7 +412,7 @@ void define_function(VM *vm, SymbolTable *st) { exit(1); } } - s.ref = vm->pc; + s.ref = vm->cp; symbol_table_add(st, s); } @@ -398,8 +431,9 @@ void define_branch(VM *vm, SymbolTable *st) { exit(1); } memcpy(s.name, name.start, name.length); + s.name_length = name.length; - s.ref = vm->pc; + s.ref = vm->cp; symbol_table_add(st, s); } @@ -416,8 +450,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { exit(1); } - printf("Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), - token.length, token.start); + printf("Line %d [%s]: %.*s cp=%d mp=%d\n", token.line, + token_type_to_string(token.type), token.length, token.start, vm->cp, + vm->mp); if (token.type == TOKEN_KEYWORD_GLOBAL) { define_global(vm, st); @@ -434,7 +469,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { - define_var(st, token); + define_var(st, token); next_token_is(TOKEN_SEMICOLON); continue; } @@ -447,16 +482,16 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { } if (token.type == TOKEN_KEYWORD_RETURN) { - vm->pc++; + vm->cp++; Token next = next_token(); if (next.type == TOKEN_SEMICOLON) { /* put 0xFF as return register */ - vm->pc++; + vm->cp++; continue; } - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); continue; } @@ -464,1047 +499,1043 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first if (strleq(token.start, "exit", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "call", token.length)) { - vm->pc++; + vm->cp++; next_token_is(TOKEN_IDENTIFIER); - vm->pc += 4; - vm->pc++; /* number of args (implied) */ - - Token next = next_token(); - while (next.type != TOKEN_ARROW_RIGHT) { - vm->pc++; - next = next_token(); - } - /* return type */ - next = next_token(); - vm->pc++; /* we emit a value regardless, a void is register 255 */ - if (next.type == TOKEN_SEMICOLON) { - /* exit early because no return type */ - continue; - } - /* if it is not void, then it was the value */ - next_token_is(TOKEN_SEMICOLON); - } else if (strleq(token.start, "syscall", token.length)) { - vm->pc++; - - next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; Token next = next_token(); while (next.type != TOKEN_SEMICOLON) { if (next.type != TOKEN_ARROW_RIGHT) { - vm->pc++; + vm->cp++; + } + next = next_token(); + } + + vm->cp++; /* number of args (implied) */ + + } else if (strleq(token.start, "syscall", token.length)) { + vm->cp++; + + next_token_is(TOKEN_IDENTIFIER); + + vm->cp += 4; + + Token next = next_token(); + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + vm->cp++; } next = next_token(); } } else if (strleq(token.start, "load_immediate", token.length)) { - vm->pc++; + vm->cp++; - next_id_or_ptr(); - vm->pc += 4; + next_token(); // literal + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_address", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; - next_token_is(TOKEN_SEMICOLON); + next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "malloc", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_8", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); /* src1 */ - vm->pc++; + vm->cp++; next_token_is(TOKEN_LITERAL_NAT); /* offset */ - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); /* dest */ - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_16", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); /* src1 */ - vm->pc++; + vm->cp++; next_token_is(TOKEN_LITERAL_NAT); /* offset */ - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); /* dest */ - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_32", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); /* src1 */ - vm->pc++; + vm->cp++; next_token_is(TOKEN_LITERAL_NAT); /* offset */ - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); /* dest */ - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "register_move", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_left", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_right", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_and", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_or", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_xor", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_if_flag", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_int", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_nat", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_real", token.length)) { - vm->pc++; + vm->cp++; next_id_or_ptr(); - vm->pc += 4; + vm->cp += 4; next_id_or_reg(); - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_length", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_string", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_string", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_string", token.length)) { - vm->pc++; + vm->cp++; next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); - vm->pc++; + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_eq", token.length)) { @@ -1540,8 +1571,9 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - printf("[Generate Bytecode] Line %d [%s]: %.*s\n", token.line, - token_type_to_string(token.type), token.length, token.start); + printf("[Generate Bytecode cp=%d mp=%d ] Line %d [%s]: %.*s\n", vm->cp, + vm->mp, token.line, token_type_to_string(token.type), token.length, + token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed @@ -1583,20 +1615,1410 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { } if (token.type == TOKEN_KEYWORD_RETURN) { - vm->pc++; + emit_byte(vm, OP_RETURN); + vm->cp++; Token next = next_token(); if (next.type == TOKEN_SEMICOLON) { /* put 0xFF as return register */ - emit_u8(vm, 0xFF); - vm->pc++; + emit_byte(vm, 0xFF); + vm->cp++; continue; } - vm->pc++; + u32 reg = get_reg(next, st); + emit_byte(vm, reg); + vm->cp++; next_token_is(TOKEN_SEMICOLON); continue; } + + if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first + if (strleq(token.start, "exit", token.length)) { + + emit_byte(vm, OP_EXIT); + vm->cp++; + + Token next = next_token(); + u32 ptr = get_ptr(next, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "call", token.length)) { + + emit_byte(vm, OP_CALL); + vm->cp++; + + Token id = next_token_is(TOKEN_IDENTIFIER); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + bool has_return = false; + u8 arg_count = 0; + u32 arg_pos = vm->cp++; + + Token next = next_token(); + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + arg_count++; + } else { + has_return = true; + } + next = next_token(); + } + + /* patch number of args */ + vm->code[arg_pos] = arg_count; + + if (!has_return) { + vm->cp++; + emit_byte(vm, 255); + continue; + } + } else if (strleq(token.start, "syscall", token.length)) { + + emit_byte(vm, OP_SYSCALL); + vm->cp++; + + Token next = next_token(); + + u32 syscall_id = 0; + const char *syscall_name = next.start; + if (strleq(syscall_name, "EXIT", next.length)) + syscall_id = SYSCALL_EXIT; + else if (strleq(syscall_name, "OPEN", next.length)) + syscall_id = SYSCALL_DEVICE_OPEN; + else if (strleq(syscall_name, "READ", next.length)) + syscall_id = SYSCALL_DEVICE_READ; + else if (strleq(syscall_name, "WRITE", next.length)) + syscall_id = SYSCALL_DEVICE_WRITE; + else if (strleq(syscall_name, "CLOSE", next.length)) + syscall_id = SYSCALL_DEVICE_CLOSE; + else if (strleq(syscall_name, "IOCTL", next.length)) + syscall_id = SYSCALL_DEVICE_IOCTL; + else if (strleq(syscall_name, "REFRESH", next.length)) + syscall_id = SYSCALL_DEVICE_REFRESH; + + emit_u32(vm, syscall_id); + vm->cp += 4; + + next = next_token(); + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + } + next = next_token(); + } + + } else if (strleq(token.start, "load_immediate", token.length)) { + + emit_byte(vm, OP_LOAD_IMM); + vm->cp++; + + Token value = next_token(); + switch (value.type) { + case TOKEN_KEYWORD_TRUE: { + emit_u32(vm, 1); + break; + } + case TOKEN_KEYWORD_FALSE: { + emit_u32(vm, 0); + break; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + emit_u32(vm, out); + break; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid 'real' number: '%.*s'\n", token.length, token.start); + exit(1); + } + emit_u32(vm, out); + break; + } + case TOKEN_LITERAL_REAL: { + fixed_t out = float_to_fixed(atof(value.start)); + emit_u32(vm, out); + break; + } + default:{ + fprintf(stderr, "Unknown immediate: '%.*s'\n", token.length, token.start); + exit(1); + } + } + + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_address", token.length)) { + emit_byte(vm, OP_LOAD_IMM); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "malloc", token.length)) { + emit_byte(vm, OP_MALLOC); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_8", token.length)) { + emit_byte(vm, OP_MEMSET_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_16", token.length)) { + emit_byte(vm, OP_MEMSET_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_32", token.length)) { + emit_byte(vm, OP_MEMSET_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_offset_8", token.length)) { + emit_byte(vm, OP_LOAD_OFF_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_offset_16", token.length)) { + emit_byte(vm, OP_LOAD_OFF_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_offset_32", token.length)) { + emit_byte(vm, OP_LOAD_OFF_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_indirect_8", token.length)) { + emit_byte(vm, OP_LOAD_IND_8); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_indirect_16", token.length)) { + emit_byte(vm, OP_LOAD_IND_16); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_indirect_32", token.length)) { + emit_byte(vm, OP_LOAD_IND_32); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_absolute_8", token.length)) { + emit_byte(vm, OP_LOAD_ABS_8); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_absolute_16", token.length)) { + emit_byte(vm, OP_LOAD_ABS_16); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_absolute_32", token.length)) { + emit_byte(vm, OP_LOAD_ABS_32); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_absolute_8", token.length)) { + emit_byte(vm, OP_STORE_ABS_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_absolute_16", token.length)) { + emit_byte(vm, OP_STORE_ABS_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_absolute_32", token.length)) { + emit_byte(vm, OP_STORE_ABS_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_indirect_8", token.length)) { + emit_byte(vm, OP_STORE_IND_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_indirect_16", token.length)) { + emit_byte(vm, OP_STORE_IND_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_indirect_32", token.length)) { + emit_byte(vm, OP_STORE_IND_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_offset_8", token.length)) { + emit_byte(vm, OP_STORE_OFF_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_offset_16", token.length)) { + emit_byte(vm, OP_STORE_OFF_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_offset_32", token.length)) { + emit_byte(vm, OP_STORE_OFF_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "register_move", token.length)) { + emit_byte(vm, OP_REG_MOV); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "add_int", token.length)) { + emit_byte(vm, OP_ADD_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "sub_int", token.length)) { + emit_byte(vm, OP_SUB_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "mul_int", token.length)) { + emit_byte(vm, OP_MUL_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "div_int", token.length)) { + emit_byte(vm, OP_DIV_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "abs_int", token.length)) { + emit_byte(vm, OP_ABS_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "neg_int", token.length)) { + emit_byte(vm, OP_NEG_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "add_nat", token.length)) { + emit_byte(vm, OP_ADD_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "sub_nat", token.length)) { + emit_byte(vm, OP_SUB_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "mul_nat", token.length)) { + emit_byte(vm, OP_MUL_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "div_nat", token.length)) { + emit_byte(vm, OP_DIV_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "abs_nat", token.length)) { + emit_byte(vm, OP_ABS_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "neg_nat", token.length)) { + emit_byte(vm, OP_NEG_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "add_real", token.length)) { + emit_byte(vm, OP_ADD_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON);; + } else if (strleq(token.start, "sub_real", token.length)) { + emit_byte(vm, OP_SUB_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "mul_real", token.length)) { + emit_byte(vm, OP_MUL_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "div_real", token.length)) { + emit_byte(vm, OP_DIV_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "abs_real", token.length)) { + emit_byte(vm, OP_ABS_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "neg_real", token.length)) { + emit_byte(vm, OP_NEG_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "int_to_real", token.length)) { + emit_byte(vm, OP_INT_TO_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "nat_to_real", token.length)) { + emit_byte(vm, OP_NAT_TO_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "real_to_int", token.length)) { + emit_byte(vm, OP_REAL_TO_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "real_to_nat", token.length)) { + emit_byte(vm, OP_REAL_TO_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_shift_left", token.length)) { + emit_byte(vm, OP_BIT_SHIFT_LEFT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_shift_right", token.length)) { + emit_byte(vm, OP_BIT_SHIFT_RIGHT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { + emit_byte(vm, OP_BIT_SHIFT_R_EXT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_and", token.length)) { + emit_byte(vm, OP_BAND); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_or", token.length)) { + emit_byte(vm, OP_BOR); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_xor", token.length)) { + emit_byte(vm, OP_BXOR); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump", token.length)) { + emit_byte(vm, OP_JMP); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_if_flag", token.length)) { + emit_byte(vm, OP_JMPF); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_eq_int", token.length)) { + emit_byte(vm, OP_JEQ_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_neq_int", token.length)) { + emit_byte(vm, OP_JNEQ_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_gt_int", token.length)) { + emit_byte(vm, OP_JGT_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_lt_int", token.length)) { + emit_byte(vm, OP_JLT_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_le_int", token.length)) { + emit_byte(vm, OP_JLE_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_ge_int", token.length)) { + emit_byte(vm, OP_JGE_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_eq_nat", token.length)) { + emit_byte(vm, OP_JEQ_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_neq_nat", token.length)) { + emit_byte(vm, OP_JNEQ_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_gt_nat", token.length)) { + emit_byte(vm, OP_JGT_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_lt_nat", token.length)) { + emit_byte(vm, OP_JLT_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_le_nat", token.length)) { + emit_byte(vm, OP_JLE_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_ge_nat", token.length)) { + emit_byte(vm, OP_JGE_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_eq_real", token.length)) { + emit_byte(vm, OP_JEQ_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_neq_real", token.length)) { + emit_byte(vm, OP_JNEQ_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_ge_real", token.length)) { + emit_byte(vm, OP_JGE_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_gt_real", token.length)) { + emit_byte(vm, OP_JGT_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_lt_real", token.length)) { + emit_byte(vm, OP_JLT_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_le_real", token.length)) { + emit_byte(vm, OP_JLE_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "string_length", token.length)) { + emit_byte(vm, OP_STRLEN); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "int_to_string", token.length)) { + emit_byte(vm, OP_INT_TO_STRING); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "nat_to_string", token.length)) { + emit_byte(vm, OP_NAT_TO_STRING); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "real_to_string", token.length)) { + emit_byte(vm, OP_REAL_TO_STRING); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON);; + } else if (strleq(token.start, "string_eq", token.length)) { + } else if (strleq(token.start, "string_concat", token.length)) { + } else if (strleq(token.start, "string_get_char", token.length)) { + } else if (strleq(token.start, "string_find_char", token.length)) { + } else if (strleq(token.start, "string_slice", token.length)) { + } else if (strleq(token.start, "string_to_int", token.length)) { + } else if (strleq(token.start, "string_to_nat", token.length)) { + } else if (strleq(token.start, "string_to_real", token.length)) { + } else { + // some other identifier + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } + } } } while (token.type != TOKEN_EOF); } @@ -1605,10 +3027,10 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { * Emit bytecode to the VM from the source string. */ void assemble(VM *vm, char *source) { - SymbolTable *st = symbol_table_init(); - build_symbol_table(vm, source, st); - vm->pc = 0; /* actuall start emitting code */ - emit_bytecode(vm, source, st); - free(st->symbols); - free(st); + SymbolTable st = {0}; + symbol_table_init(&st); + build_symbol_table(vm, source, &st); + vm->cp = 0; /* actuall start emitting code */ + emit_bytecode(vm, source, &st); + free(st.symbols); } diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 657dd40..107f2be 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -30,6 +30,7 @@ typedef struct symbol_tab_s SymbolTable; #define MAX_SYMBOL_NAME_LENGTH 64 struct symbol_s { char name[MAX_SYMBOL_NAME_LENGTH]; + u8 name_length; SymbolType type; ScopeType scope; u32 ref; // vm->mp if global, vm->pc local, register if var diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index d926380..58a2078 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -276,6 +276,16 @@ static TokenType identifierType() { } } break; + case 'b': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'y': + return check_keyword(2, 2, "te", TOKEN_TYPE_U8); + case 'o': + return check_keyword(2, 2, "ol", TOKEN_TYPE_U8); + } + } + break; case 'g': return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); case 'l': diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index 91d2b57..dbb7b73 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -150,16 +150,11 @@ typedef struct vm_s { u32 mp; /* memory pointer (last allocated value) */ u32 dc; /* device count */ i32 flag; /* flag (temporary results like SYSCALL status) */ - Frame *frames; /* function call frames */ - u32 frames_size; /* max frames */ - u32 *stack; /* main stack */ - u32 stack_size; /* max stack */ - Device *devices; /* device definitions */ - u32 device_size; /* max devices */ - u8 *code; /* code block */ - u32 code_size; /* max code size */ - u8 *memory; /* memory block */ - u32 memory_size; /* max memory size */ + Frame frames[FRAMES_SIZE]; /* function call frames */ + u32 stack[STACK_SIZE]; /* main stack */ + Device devices[DEVICES_SIZE]; /* device definitions */ + u8 code[CODE_SIZE]; /* code block */ + u8 memory[MEMORY_SIZE]; /* memory block */ } VM; /** diff --git a/src/vm/vm.c b/src/vm/vm.c index cca75e5..45c78af 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -27,26 +27,28 @@ #define MATH_OP(type, op) \ do { \ + u8 src1, src2, dest; \ u32 *regs = frame->locals; \ - dest = read_u8(vm, code, vm->pc); \ - vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ + dest = read_u8(vm, code, vm->pc); \ + vm->pc++; \ regs[dest] = (type)regs[src1] op(type) regs[src2]; \ return true; \ } while (0) #define BIT_OP(op) \ do { \ + u8 src1, src2, dest; \ u32 *regs = frame->locals; \ - dest = read_u8(vm, code, vm->pc); \ - vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ + dest = read_u8(vm, code, vm->pc); \ + vm->pc++; \ regs[dest] = regs[src1] op regs[src2]; \ return true; \ } while (0) @@ -83,14 +85,9 @@ u32 str_alloc(VM *vm, Frame *frame, const char *str, u32 length) { * Step to the next opcode in the vm. */ bool step_vm(VM *vm) { - u16 opcode, dest, src1, src2; - u32 v, ptr; - i32 value; - Frame *frame; - /* Get current instruction & Advance to next instruction */ - opcode = vm->code[vm->pc++]; - frame = &vm->frames[vm->fp]; + u8 opcode = vm->code[vm->pc++]; + Frame *frame = &vm->frames[vm->fp]; switch (opcode) { case OP_EXIT: { @@ -177,12 +174,23 @@ bool step_vm(VM *vm) { vm->fp--; return true; } - case OP_MALLOC: { - u32 size; + case OP_LOAD_IMM: { + u32 v; + u8 dest; + v = read_u32(vm, code, vm->pc); + vm->pc += 4; dest = read_u8(vm, code, vm->pc); vm->pc++; + frame->locals[dest] = v; + return true; + } + case OP_MALLOC: { + u8 src1, dest; + u32 size; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = vm->mp; size = frame->locals[src1]; write_u32(vm, memory, vm->mp, size); @@ -192,9 +200,9 @@ bool step_vm(VM *vm) { } case OP_MEMSET_32: { u32 i, start, end; - u8 dest_reg = read_u8(vm, code, vm->pc++); u8 value_reg = read_u8(vm, code, vm->pc++); u8 count_reg = read_u8(vm, code, vm->pc++); + u8 dest_reg = read_u8(vm, code, vm->pc++); u32 dest = frame->locals[dest_reg]; u32 value = frame->locals[value_reg]; @@ -220,12 +228,251 @@ bool step_vm(VM *vm) { frame->locals[0] = dest; vm->flag = 1; return true; + } + case OP_LOAD_ABS_32: { + u32 v, ptr; + u8 dest; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v = read_u32(vm, memory, ptr); + dest = read_u8(vm, code, vm->pc); + vm->pc++; + frame->locals[dest] = v; + return true; + } + case OP_LOAD_ABS_16: { + u32 v, ptr; + u8 dest; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v = read_u16(vm, memory, ptr); + dest = read_u8(vm, code, vm->pc); + vm->pc++; + frame->locals[dest] = v; + return true; + } + case OP_LOAD_ABS_8: { + u32 v, ptr; + u8 dest; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v = read_u8(vm, memory, ptr); + dest = read_u8(vm, code, vm->pc); + vm->pc++; + frame->locals[dest] = v; + return true; + } + case OP_LOAD_IND_32: { + u32 v, ptr; + u8 dest, src1; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + ptr = read_u32(vm, memory, v); + frame->locals[dest] = ptr; + return true; + } + case OP_LOAD_IND_16: { + u32 v; + u8 dest, src1; + u16 v16; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + v16 = read_u16(vm, memory, v); + frame->locals[dest] = v16; + return true; + } + case OP_LOAD_IND_8: { + u32 v; + u8 dest, src1; + u8 v8; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + v8 = read_u8(vm, memory, v); + frame->locals[dest] = v8; + return true; + } + case OP_LOAD_OFF_8: { + u32 v; + u8 dest, src1; + u32 offset; + u8 v8; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + offset = read_u32(vm, code, vm->pc); + vm->pc += 4; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + v8 = read_u8(vm, memory, (v + offset)); + frame->locals[dest] = v8; + return true; + } + case OP_LOAD_OFF_16: { + u32 v; + u8 dest, src1; + u32 offset; + u16 v16; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + offset = read_u32(vm, code, vm->pc); + vm->pc += 4; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + v16 = read_u16(vm, memory, (v + offset)); + frame->locals[dest] = v16; + return true; + } + case OP_LOAD_OFF_32: { + u32 v, ptr; + u8 dest, src1; + u32 offset; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + offset = read_u32(vm, code, vm->pc); + vm->pc += 4; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + ptr = read_u32(vm, memory, (v + offset)); + frame->locals[dest] = ptr; + return true; + } + case OP_STORE_ABS_32: { + u32 v, ptr; + u8 dest, src1; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + ptr = frame->locals[dest]; + write_u32(vm, memory, ptr, v); + return true; + } + case OP_STORE_ABS_16: { + u32 v, ptr; + u8 dest, src1; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + ptr = frame->locals[dest]; + write_u16(vm, memory, ptr, v); + return true; + } + case OP_STORE_ABS_8: { + u32 v, ptr; + u8 dest, src1; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + v = frame->locals[src1]; + ptr = frame->locals[dest]; + write_u8(vm, memory, ptr, v); + return true; + } + case OP_STORE_IND_32: { + u32 v, ptr; + u8 dest, src1; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = frame->locals[dest]; + v = frame->locals[src1]; + write_u32(vm, memory, ptr, v); + return true; + } + case OP_STORE_IND_16: { + u32 ptr; + u8 dest, src1; + u16 v16; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = frame->locals[dest]; + v16 = frame->locals[src1]; + write_u16(vm, memory, ptr, v16); + return true; + } + case OP_STORE_IND_8: { + u32 ptr; + u8 dest, src1; + u8 v8; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = frame->locals[dest]; + v8 = frame->locals[src1]; + write_u8(vm, memory, ptr, v8); + return true; + } + case OP_STORE_OFF_8: { + u32 ptr; + u8 dest, src1; + u32 offset; + u8 v8; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + offset = read_u32(vm, code, vm->pc); + vm->pc += 4; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = frame->locals[dest]; + v8 = frame->locals[src1]; + write_u8(vm, memory, (ptr + offset), v8); + return true; + } + case OP_STORE_OFF_16: { + u32 ptr; + u8 dest, src1; + u32 offset; + u16 v16; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + offset = read_u32(vm, code, vm->pc); + vm->pc += 4; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = frame->locals[dest]; + v16 = frame->locals[src1]; + write_u16(vm, memory, (ptr + offset), v16); + return true; + } + case OP_STORE_OFF_32: { + u32 v, ptr; + u8 dest, src1; + u32 offset; + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + offset = read_u32(vm, code, vm->pc); + vm->pc += 4; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = frame->locals[dest]; + v = frame->locals[src1]; + write_u32(vm, memory, (ptr + offset), v); + return true; } case OP_MEMSET_16: { u32 i, start, end; - u8 dest_reg = read_u8(vm, code, vm->pc++); u8 value_reg = read_u8(vm, code, vm->pc++); u8 count_reg = read_u8(vm, code, vm->pc++); + u8 dest_reg = read_u8(vm, code, vm->pc++); u32 dest = frame->locals[dest_reg]; u16 value = (u16)(frame->locals[value_reg]); @@ -254,9 +501,9 @@ bool step_vm(VM *vm) { } case OP_MEMSET_8: { u32 i, start, end; - u8 dest_reg = read_u8(vm, code, vm->pc++); u8 value_reg = read_u8(vm, code, vm->pc++); u8 count_reg = read_u8(vm, code, vm->pc++); + u8 dest_reg = read_u8(vm, code, vm->pc++); u32 dest = frame->locals[dest_reg]; u8 value = (u8)(frame->locals[value_reg]); @@ -282,223 +529,13 @@ bool step_vm(VM *vm) { frame->locals[0] = dest; vm->flag = 1; return true; - } - case OP_LOAD_IMM: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - v = read_u32(vm, code, vm->pc); - vm->pc += 4; - frame->locals[dest] = v; - return true; - } - case OP_LOAD_ABS_32: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - ptr = read_u32(vm, code, vm->pc); - vm->pc += 4; - v = read_u32(vm, memory, ptr); - frame->locals[dest] = v; - return true; - } - case OP_LOAD_ABS_16: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - ptr = read_u32(vm, code, vm->pc); - vm->pc += 4; - v = read_u16(vm, memory, ptr); - frame->locals[dest] = v; - return true; - } - case OP_LOAD_ABS_8: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - ptr = read_u32(vm, code, vm->pc); - vm->pc += 4; - v = read_u8(vm, memory, ptr); - frame->locals[dest] = v; - return true; - } - case OP_LOAD_IND_32: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - v = frame->locals[src1]; - ptr = read_u32(vm, memory, v); - frame->locals[dest] = ptr; - return true; - } - case OP_LOAD_IND_16: { - u16 v16; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - v = frame->locals[src1]; - v16 = read_u16(vm, memory, v); - frame->locals[dest] = v16; - return true; - } - case OP_LOAD_IND_8: { - u8 v8; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - v = frame->locals[src1]; - v8 = read_u8(vm, memory, v); - frame->locals[dest] = v8; - return true; - } - case OP_LOAD_OFF_8: { - u32 offset; - u8 v8; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - offset = read_u32(vm, code, vm->pc); - vm->pc += 4; - v = frame->locals[src1]; - v8 = read_u8(vm, memory, (v + offset)); - frame->locals[dest] = v8; - return true; - } - case OP_LOAD_OFF_16: { - u32 offset; - u16 v16; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - offset = read_u32(vm, code, vm->pc); - vm->pc += 4; - v = frame->locals[src1]; - v16 = read_u16(vm, memory, (v + offset)); - frame->locals[dest] = v16; - return true; - } - case OP_LOAD_OFF_32: { - u32 offset; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - offset = read_u32(vm, code, vm->pc); - vm->pc += 4; - v = frame->locals[src1]; - ptr = read_u32(vm, memory, (v + offset)); - frame->locals[dest] = ptr; - return true; - } - case OP_STORE_ABS_32: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - v = frame->locals[src1]; - ptr = frame->locals[dest]; - write_u32(vm, memory, ptr, v); - return true; - } - case OP_STORE_ABS_16: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - v = frame->locals[src1]; - ptr = frame->locals[dest]; - write_u16(vm, memory, ptr, v); - return true; - } - case OP_STORE_ABS_8: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - v = frame->locals[src1]; - ptr = frame->locals[dest]; - write_u8(vm, memory, ptr, v); - return true; - } - case OP_STORE_IND_32: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - ptr = frame->locals[dest]; - v = frame->locals[src1]; - write_u32(vm, memory, ptr, v); - return true; - } - case OP_STORE_IND_16: { - u16 v16; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - ptr = frame->locals[dest]; - v16 = frame->locals[src1]; - write_u16(vm, memory, ptr, v16); - return true; - } - case OP_STORE_IND_8: { - u8 v8; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - ptr = frame->locals[dest]; - v8 = frame->locals[src1]; - write_u8(vm, memory, ptr, v8); - return true; - } - case OP_STORE_OFF_8: { - u32 offset; - u8 v8; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - offset = read_u32(vm, code, vm->pc); - vm->pc += 4; - ptr = frame->locals[dest]; - v8 = frame->locals[src1]; - write_u8(vm, memory, (ptr + offset), v8); - return true; - } - case OP_STORE_OFF_16: { - u32 offset; - u16 v16; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - offset = read_u32(vm, code, vm->pc); - vm->pc += 4; - ptr = frame->locals[dest]; - v16 = frame->locals[src1]; - write_u16(vm, memory, (ptr + offset), v16); - return true; - } - case OP_STORE_OFF_32: { - u32 offset; - dest = read_u8(vm, code, vm->pc); - vm->pc++; - src1 = read_u8(vm, code, vm->pc); - vm->pc++; - offset = read_u32(vm, code, vm->pc); - vm->pc += 4; - ptr = frame->locals[dest]; - v = frame->locals[src1]; - write_u32(vm, memory, (ptr + offset), v); - return true; - } + } case OP_REG_MOV: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = frame->locals[src1]; if (is_heap_value(vm, src1)) { @@ -532,12 +569,12 @@ bool step_vm(VM *vm) { Device *dev; u32 path_ptr, mode, device_ptr; u8 path_reg, mode_reg, dest_reg; - dest_reg = read_u8(vm, code, vm->pc); - vm->pc++; path_reg = read_u8(vm, code, vm->pc); vm->pc++; mode_reg = read_u8(vm, code, vm->pc); vm->pc++; + dest_reg = read_u8(vm, code, vm->pc); + vm->pc++; path_ptr = frame->locals[path_reg]; mode = frame->locals[mode_reg]; @@ -715,10 +752,12 @@ bool step_vm(VM *vm) { case OP_DIV_INT: MATH_OP(i32, /); case OP_ABS_INT: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; + i32 value; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; value = frame->locals[src1]; if (value < 0) { @@ -729,10 +768,12 @@ bool step_vm(VM *vm) { return true; } case OP_NEG_INT: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; + i32 value; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; value = frame->locals[src1]; frame->locals[dest] = -value; @@ -747,53 +788,59 @@ bool step_vm(VM *vm) { case OP_DIV_NAT: MATH_OP(u32, /); case OP_MUL_REAL: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1, src2; src1 = read_u8(vm, code, vm->pc); vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = fixed_mul(frame->locals[src1], frame->locals[src2]); return true; } case OP_DIV_REAL: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1, src2; src1 = read_u8(vm, code, vm->pc); vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = fixed_div(frame->locals[src1], frame->locals[src2]); return true; } case OP_ADD_REAL: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1, src2; src1 = read_u8(vm, code, vm->pc); vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = fixed_add(frame->locals[src1], frame->locals[src2]); return true; } case OP_SUB_REAL: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1, src2; src1 = read_u8(vm, code, vm->pc); vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = fixed_sub(frame->locals[src1], frame->locals[src2]); return true; } case OP_REAL_TO_INT: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; + i32 value; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; value = frame->locals[src1]; frame->locals[dest] = fixed_to_int(value); @@ -801,27 +848,31 @@ bool step_vm(VM *vm) { return true; } case OP_INT_TO_REAL: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = int_to_fixed(frame->locals[src1]); return true; } case OP_REAL_TO_NAT: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; + u32 value; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; value = frame->locals[src1]; frame->locals[dest] = fixed_to_int(value); return true; } case OP_NAT_TO_REAL: { - dest = read_u8(vm, code, vm->pc); - vm->pc++; + u8 dest, src1; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; frame->locals[dest] = int_to_fixed(frame->locals[src1]); return true; } @@ -880,11 +931,13 @@ bool step_vm(VM *vm) { COMPARE_AND_JUMP(i32, <=); } case OP_INT_TO_STRING: { + u32 ptr; + u8 dest, src1; char buffer[32]; - dest = read_u8(vm, code, vm->pc); - vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; int_to_string(AS_INT(frame->locals[src1]), buffer); ptr = str_alloc(vm, frame, buffer, strlength(buffer)); frame->locals[dest] = ptr; @@ -892,11 +945,13 @@ bool step_vm(VM *vm) { return true; } case OP_NAT_TO_STRING: { + u32 ptr; + u8 dest, src1; char buffer[32]; - dest = read_u8(vm, code, vm->pc); - vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; nat_to_string(frame->locals[src1], buffer); ptr = str_alloc(vm, frame, buffer, strlength(buffer)); frame->locals[dest] = ptr; @@ -904,11 +959,13 @@ bool step_vm(VM *vm) { return true; } case OP_REAL_TO_STRING: { + u32 ptr; + u8 dest, src1; char buffer[32]; - dest = read_u8(vm, code, vm->pc); - vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; fixed_to_string(AS_INT(frame->locals[src1]), buffer); ptr = str_alloc(vm, frame, buffer, strlength(buffer)); /* copy buffer to dest */ @@ -917,11 +974,12 @@ bool step_vm(VM *vm) { return true; } case OP_STRLEN: { + u8 dest, src1; u32 ptr, length; - dest = read_u8(vm, code, vm->pc); - vm->pc++; src1 = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; ptr = frame->locals[src1]; length = read_u32(vm, memory, ptr); diff --git a/test/add.ul.ir b/test/add.ul.ir index 64e269d..51816fa 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -4,14 +4,11 @@ global int x = 1; global int y = 1; function main () - int ans $2; - str ans_string $3; - load_absolute_32 x -> $0; load_absolute_32 y -> $1; - call add $0 $1 -> ans; - int_to_string ans -> ans_string; - call pln ans_string -> void; + call add $0 $1 -> $2; + int_to_string $2 -> $3; + call pln $3; exit 0; function add (int a $0, int b $1) @@ -19,17 +16,20 @@ function add (int a $0, int b $1) add_int a b -> result; return result; -function pln (str message $0) - str term $1; +function pln (str message $0) + plex term $1; int msg_length $2; str nl $3; int nl_length $4; int mode $5; + str term_ns $6; load_immediate 0 -> mode; - syscall OPEN terminal_namespace mode -> term; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode -> term; string_length message -> msg_length; syscall WRITE term message msg_length; - string_length new_line -> nl_length; + load_address new_line -> nl; + string_length nl -> nl_length; syscall WRITE term nl nl_length; return; diff --git a/test/fib.ul.ir b/test/fib.ul.ir index 3ceff47..d283508 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -30,20 +30,21 @@ function fib (int n $0) else base_case; return n; -function pln (str message $0) - str ts $1; - int mode $5; +function pln (str message $0) + plex term $1; int msg_length $2; str nl $3; int nl_length $4; + int mode $5; + str term_ns $6; - load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode -> term; string_length message -> msg_length; - syscall WRITE ts message msg_length; - load_immediate new_line -> nl; + syscall WRITE term message msg_length; + load_address new_line -> nl; string_length nl -> nl_length; - syscall WRITE ts nl nl_length; + syscall WRITE term nl nl_length; return; \ No newline at end of file diff --git a/test/hello.ul.ir b/test/hello.ul.ir index c8d2475..4f0a609 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -1,27 +1,28 @@ -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global str message = "nuqneH 'u'?" +global str terminal_namespace = "/dev/term/0"; +global str new_line = "\n"; +global str message = "nuqneH 'u'?"; function main () str hello $0; - load_immediate message -> hello; - call pln hello -> void; + load_address message -> hello; + call pln hello; exit 0; function pln (str message $0) - str ts $1; - int mode $5; + plex term $1; int msg_length $2; str nl $3; int nl_length $4; + int mode $5; + str term_ns $6; - load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode -> term; string_length message -> msg_length; - syscall WRITE ts message msg_length; - load_immediate new_line -> nl; + syscall WRITE term message msg_length; + load_address new_line -> nl; string_length nl -> nl_length; - syscall WRITE ts nl nl_length; - return; + syscall WRITE term nl nl_length; + return; \ No newline at end of file diff --git a/test/loop.ul.ir b/test/loop.ul.ir index a9a03b9..06db690 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -18,41 +18,41 @@ function main () add_int i $3 -> i; jump_ge_int loop_body i $2; - load_immediate terminal_namespace -> term; + load_address terminal_namespace -> term; load_immediate 0 -> mode; syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); nat b $1; real_to_nat a -> b; - load_immediate prompt -> $7; + load_address prompt -> $7; string_length $7 -> $8; syscall WRITE term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ term user_string; $8 // read in max 32 byte string + syscall READ term user_string $8; // read in max 32 byte string - call pln user_string -> void; + call pln user_string; nat_to_string b -> $4; - call pln $4 -> void; + call pln $4; real_to_string a -> $3; - call pln $3 -> void; + call pln $3; exit 0; -function pln (str message $0); - str ts $1; - int mode $5; +function pln (str message $0) + plex term $1; int msg_length $2; str nl $3; int nl_length $4; + int mode $5; - load_immediate terminal_namespace -> ts; + load_address terminal_namespace -> term; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; + syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); string_length message -> msg_length; - syscall WRITE ts message msg_length ; - load_immediate new_line -> nl; + syscall WRITE term message msg_length; + load_address new_line -> nl; string_length nl -> nl_length; - syscall WRITE ts nl nl_length; - return; \ No newline at end of file + syscall WRITE term nl nl_length; + return; diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 84a883d..87e4109 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -23,18 +23,19 @@ function main () exit 0; function pln (str message $0) - str ts $1; - int mode $5; - int msg_length $2; - str nl $3; + plex term $1; + int msg_length $2; + str nl $3; int nl_length $4; + int mode $5; + str term_ns $6; - load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode -> term; string_length message -> msg_length; - syscall WRITE ts message msg_length; - load_immediate new_line -> nl; + syscall WRITE term message msg_length; + load_address new_line -> nl; string_length nl -> nl_length; - syscall WRITE ts nl nl_length; - return; + syscall WRITE term nl nl_length; + return; \ No newline at end of file diff --git a/test/simple.ul b/test/simple.ul index 7f7b697..459abcd 100644 --- a/test/simple.ul +++ b/test/simple.ul @@ -2,8 +2,6 @@ * Constants */ const str nl = "\n"; -const real x = 1.0; -const real y = 1.0; plex Terminal { nat handle; @@ -13,7 +11,8 @@ plex Terminal { * Main function */ function main() { - pln((x + y).str); + pln((1.0 + 1.0) as str); + exit(0); } /** diff --git a/test/simple.ul.ir b/test/simple.ul.ir index 803bbde..7c90ce9 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -1,28 +1,25 @@ global str terminal_namespace = "/dev/term/0"; -global real x = 1.0; -global real y = 1.0; +global str new_line = "\n"; function main () - real a $0; - load_absolute_32 x -> a; - real b $1; - load_absolute_32 y -> b; - real result $2; - add_real a b -> result; - str result_str $3; - real_to_string result -> result_str; - call pln result_str -> void; + load_immediate 1.0 -> $0; + load_immediate 1.0 -> $1; + add_real $0 $1 -> $0; + real_to_string $0 -> $0; + call pln $0; exit 0; function pln (str message $0) - str term $1; + plex term $1; int msg_length $2; str nl $3; int nl_length $4; int mode $5; + str term_ns $6; load_immediate 0 -> mode; - syscall OPEN terminal_namespace mode -> term; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode -> term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; diff --git a/test/window.ul.ir b/test/window.ul.ir index d05a596..f8f43fd 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -72,18 +72,19 @@ function main () exit 0; function pln (str message $0) - str ts $1; - int mode $5; - int msg_length $2; - str nl $3; + plex term $1; + int msg_length $2; + str nl $3; int nl_length $4; + int mode $5; + str term_ns $6; - load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode -> term; string_length message -> msg_length; - syscall WRITE ts message msg_length ; - load_immediate new_line -> nl; + syscall WRITE term message msg_length; + load_address new_line -> nl; string_length nl -> nl_length; - syscall WRITE ts nl nl_length; - return; + syscall WRITE term nl nl_length; + return; \ No newline at end of file From 9218051b8707c34f863f0703c93ac510b6cc6eea Mon Sep 17 00:00:00 2001 From: zongor Date: Mon, 1 Dec 2025 23:20:36 -0800 Subject: [PATCH 20/27] Add better debugging for assembler. Simplify symbol table creation (somewhat). --- src/tools/assembler/assembler.c | 1218 +++++++++++-------------------- 1 file changed, 407 insertions(+), 811 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 3fdc9a9..244cdcd 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -8,10 +8,151 @@ #include #include -void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp] = byte; } +const char *opcode_to_string(Opcode op) { + static const char *names[] = { + [OP_EXIT] = "exit", + [OP_JMP] = "jump", + [OP_JMPF] = "jump-if-flag", + [OP_CALL] = "call", + [OP_RETURN] = "return", + + /* Immediate loads (only 32-bit variant needed) */ + [OP_LOAD_IMM] = "load-immediate", + + /* Register-indirect loads */ + [OP_LOAD_IND_8] = "load-indirect-8", + [OP_LOAD_IND_16] = "load-indirect-16", + [OP_LOAD_IND_32] = "load-indirect-32", + + /* Absolute address loads */ + [OP_LOAD_ABS_8] = "load-absolute-8", + [OP_LOAD_ABS_16] = "load-absolute-16", + [OP_LOAD_ABS_32] = "load-absolute-32", + + /* Base+offset loads */ + [OP_LOAD_OFF_8] = "load-offset-8", + [OP_LOAD_OFF_16] = "load-offset-16", + [OP_LOAD_OFF_32] = "load-offset-32", + + /* Absolute address stores */ + [OP_STORE_ABS_8] = "store-absolute-8", + [OP_STORE_ABS_16] = "store-absolute-16", + [OP_STORE_ABS_32] = "store-absolute-32", + + /* Register-indirect stores */ + [OP_STORE_IND_8] = "store-indirect-8", + [OP_STORE_IND_16] = "store-indirect-16", + [OP_STORE_IND_32] = "store-indirect-32", + + /* Base+offset stores */ + [OP_STORE_OFF_8] = "store-offset-8", + [OP_STORE_OFF_16] = "store-offset-16", + [OP_STORE_OFF_32] = "store-offset-32", + + /* Memory operations */ + [OP_MALLOC] = "malloc", + [OP_MEMSET_8] = "memset-8", + [OP_MEMSET_16] = "memset-16", + [OP_MEMSET_32] = "memset-32", + + /* Register operations */ + [OP_REG_MOV] = "register-move", + [OP_SYSCALL] = "syscall", + + /* Bit operations */ + [OP_BIT_SHIFT_LEFT] = "bit-shift-left", + [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", + [OP_BIT_SHIFT_R_EXT] = "bit-shift-re", + [OP_BAND] = "bit-and", + [OP_BOR] = "bit-or", + [OP_BXOR] = "bit-xor", + + /* Integer arithmetic */ + [OP_ADD_INT] = "add-int", + [OP_SUB_INT] = "sub-int", + [OP_MUL_INT] = "mul-int", + [OP_DIV_INT] = "div-int", + + /* Natural number arithmetic */ + [OP_ADD_NAT] = "add-nat", + [OP_SUB_NAT] = "sub-nat", + [OP_MUL_NAT] = "mul-nat", + [OP_DIV_NAT] = "div-nat", + + /* Floating point operations */ + [OP_ADD_REAL] = "add-real", + [OP_SUB_REAL] = "sub-real", + [OP_MUL_REAL] = "mul-real", + [OP_DIV_REAL] = "div-real", + + /* Type conversions */ + [OP_INT_TO_REAL] = "int-to-real", + [OP_NAT_TO_REAL] = "nat-to-real", + [OP_REAL_TO_INT] = "real-to-int", + [OP_REAL_TO_NAT] = "real-to-nat", + + /* Integer comparisons */ + [OP_JEQ_INT] = "jump-eq-int", + [OP_JNEQ_INT] = "jump-neq-int", + [OP_JGT_INT] = "jump-gt-int", + [OP_JLT_INT] = "jump-lt-int", + [OP_JLE_INT] = "jump-le-int", + [OP_JGE_INT] = "jump-ge-int", + + /* Natural number comparisons */ + [OP_JEQ_NAT] = "jump-eq-nat", + [OP_JNEQ_NAT] = "jump-neq-nat", + [OP_JGT_NAT] = "jump-gt-nat", + [OP_JLT_NAT] = "jump-lt-nat", + [OP_JLE_NAT] = "jump-le-nat", + [OP_JGE_NAT] = "jump-ge-nat", + + /* Floating point comparisons */ + [OP_JEQ_REAL] = "jump-eq-real", + [OP_JNEQ_REAL] = "jump-neq-real", + [OP_JGE_REAL] = "jump-ge-real", + [OP_JGT_REAL] = "jump-gt-real", + [OP_JLT_REAL] = "jump-lt-real", + [OP_JLE_REAL] = "jump-le-real", + + /* String operations */ + [OP_STRLEN] = "string-length", + [OP_STREQ] = "string-eq", + [OP_STRCAT] = "string-concat", + [OP_STR_GET_CHAR] = "string-get-char", + [OP_STR_FIND_CHAR] = "string-find-char", + [OP_STR_SLICE] = "string-slice", + + /* String conversions */ + [OP_INT_TO_STRING] = "int-to-string", + [OP_NAT_TO_STRING] = "nat-to-string", + [OP_REAL_TO_STRING] = "real-to-string", + [OP_STRING_TO_INT] = "string-to-int", + [OP_STRING_TO_NAT] = "string-to-nat", + [OP_STRING_TO_REAL] = "string-to-real" + }; -void emit_u32(VM *vm, u32 value) { - write_u32(vm, code, vm->cp, value); + if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { + return ""; + } + + const char *name = names[op]; + return name ? name : ""; +} + +void emit_op(VM *vm, u8 byte) { + printf("vm->code[%d] = %s\n", vm->cp, opcode_to_string(byte)); + vm->code[vm->cp] = byte; +} + +void emit_byte(VM *vm, u8 byte) { + printf("vm->code[%d] = %d\n", vm->cp, byte); + vm->code[vm->cp] = byte; +} + +void emit_u32(VM *vm, u32 value) { + printf("vm->code[%d..%d] = %d\n", vm->cp, vm->cp+3, value); + write_u32(vm, code, vm->cp, value); } void symbol_table_init(SymbolTable *table) { @@ -26,6 +167,15 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } + if (s.scope == VAR) { + // ignore for now + // printf("$%d = %s\n", s.ref, s.name); + } else if (s.scope == GLOBAL) { + printf("memory[%d] = %s\n", s.ref, s.name); + } else { + printf("code[%d] = %s\n", s.ref, s.name); + } + table->symbols[table->count] = s; u32 index = table->count; table->count++; @@ -66,13 +216,15 @@ u32 get_ptr(Token token, SymbolTable *st) { char *endptr; u32 out = (u32)strtoul(token.start, &endptr, 10); if (endptr == token.start || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: '%.*s'\n", token.length, token.start); + fprintf(stderr, "Invalid decimal literal: '%.*s'\n", token.length, + token.start); exit(1); } return out; } - fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, token.start); + fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, + token.start); exit(1); } @@ -86,7 +238,8 @@ u32 get_reg(Token token, SymbolTable *st) { return atoi(token.start); } - fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, token.start); + fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, + token.start); exit(1); } @@ -185,6 +338,7 @@ bool define_global(VM *vm, SymbolTable *st) { memcpy(s.name, name.start, name.length); s.name_length = name.length; + s.name[name.length] = '\0'; u32 addr = vm->mp; s.ref = addr; @@ -491,6 +645,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { continue; } + get_reg(next, st); vm->cp++; next_token_is(TOKEN_SEMICOLON); continue; @@ -501,35 +656,41 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { if (strleq(token.start, "exit", token.length)) { vm->cp++; - next_id_or_ptr(); + next_token(); vm->cp += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "call", token.length)) { - vm->cp++; + vm->cp++; - next_token_is(TOKEN_IDENTIFIER); + next_token_is(TOKEN_IDENTIFIER); + vm->cp += 4; - vm->cp += 4; + bool has_return = false; + vm->cp++; - Token next = next_token(); - while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { - vm->cp++; + Token next = next_token(); + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + get_reg(next, st); + vm->cp++; + } else { + has_return = true; + } + next = next_token(); } - next = next_token(); - } - - vm->cp++; /* number of args (implied) */ + if (!has_return) { + vm->cp+=2; + continue; + } } else if (strleq(token.start, "syscall", token.length)) { vm->cp++; - next_token_is(TOKEN_IDENTIFIER); - + Token next = next_token(); vm->cp += 4; - Token next = next_token(); + next = next_token(); while (next.type != TOKEN_SEMICOLON) { if (next.type != TOKEN_ARROW_RIGHT) { vm->cp++; @@ -538,1006 +699,433 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { } } else if (strleq(token.start, "load_immediate", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_token(); // literal vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_address", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "malloc", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - next_id_or_reg(); vm->cp++; - next_id_or_reg(); vm->cp++; - next_token_is(TOKEN_SEMICOLON); - } else if (strleq(token.start, "memset_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - next_id_or_reg(); vm->cp++; - next_id_or_reg(); vm->cp++; - next_token_is(TOKEN_SEMICOLON); - } else if (strleq(token.start, "memset_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - next_id_or_reg(); vm->cp++; - next_id_or_reg(); vm->cp++; - next_token_is(TOKEN_SEMICOLON); - } else if (strleq(token.start, "load_offset_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_ptr(); vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_8", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); /* src1 */ vm->cp++; - - next_token_is(TOKEN_LITERAL_NAT); /* offset */ vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); /* dest */ vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_16", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); /* src1 */ vm->cp++; - - next_token_is(TOKEN_LITERAL_NAT); /* offset */ vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); /* dest */ vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_32", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); /* src1 */ vm->cp++; - - next_token_is(TOKEN_LITERAL_NAT); /* offset */ vm->cp += 4; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); /* dest */ vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "register_move", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_left", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_right", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_and", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_or", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_xor", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); + vm->cp+=4; } else if (strleq(token.start, "jump_if_flag", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_token_is(TOKEN_SEMICOLON); + vm->cp+=4; } else if (strleq(token.start, "jump_eq_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_int", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_nat", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_real", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_ptr(); - vm->cp += 4; - - next_id_or_reg(); + vm->cp+=4; vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_length", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_string", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_string", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_string", token.length)) { + while (token.type != TOKEN_SEMICOLON) token = next_token(); vm->cp++; - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_ARROW_RIGHT); - - next_id_or_reg(); vm->cp++; - - next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_eq", token.length)) { } else if (strleq(token.start, "string_concat", token.length)) { } else if (strleq(token.start, "string_get_char", token.length)) { @@ -1571,9 +1159,9 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - printf("[Generate Bytecode cp=%d mp=%d ] Line %d [%s]: %.*s\n", vm->cp, - vm->mp, token.line, token_type_to_string(token.type), token.length, - token.start); + //printf("[Generate Bytecode cp=%d mp=%d ] Line %d [%s]: %.*s\n", vm->cp, + // vm->mp, token.line, token_type_to_string(token.type), token.length, + // token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed @@ -1615,7 +1203,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { } if (token.type == TOKEN_KEYWORD_RETURN) { - emit_byte(vm, OP_RETURN); + emit_op(vm, OP_RETURN); vm->cp++; Token next = next_token(); @@ -1637,7 +1225,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { // check to see if it is an opcode first if (strleq(token.start, "exit", token.length)) { - emit_byte(vm, OP_EXIT); + emit_op(vm, OP_EXIT); vm->cp++; Token next = next_token(); @@ -1648,7 +1236,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "call", token.length)) { - emit_byte(vm, OP_CALL); + emit_op(vm, OP_CALL); vm->cp++; Token id = next_token_is(TOKEN_IDENTIFIER); @@ -1659,6 +1247,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { bool has_return = false; u8 arg_count = 0; u32 arg_pos = vm->cp++; + printf("vm->code[%d] = ?\n", arg_pos); Token next = next_token(); while (next.type != TOKEN_SEMICOLON) { @@ -1669,6 +1258,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { arg_count++; } else { has_return = true; + arg_count--; // is a return not an arg } next = next_token(); } @@ -1676,14 +1266,16 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { /* patch number of args */ vm->code[arg_pos] = arg_count; - if (!has_return) { - vm->cp++; + printf("^vm->code[%d] = %d\n", arg_pos, arg_count); + + if (!has_return) { + vm->cp+=2; emit_byte(vm, 255); continue; } } else if (strleq(token.start, "syscall", token.length)) { - emit_byte(vm, OP_SYSCALL); + emit_op(vm, OP_SYSCALL); vm->cp++; Token next = next_token(); @@ -1720,43 +1312,45 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { } else if (strleq(token.start, "load_immediate", token.length)) { - emit_byte(vm, OP_LOAD_IMM); + emit_op(vm, OP_LOAD_IMM); vm->cp++; Token value = next_token(); switch (value.type) { - case TOKEN_KEYWORD_TRUE: { - emit_u32(vm, 1); - break; - } - case TOKEN_KEYWORD_FALSE: { - emit_u32(vm, 0); - break; - } - case TOKEN_LITERAL_INT: { - i32 out = atoi(value.start); - emit_u32(vm, out); - break; - } - case TOKEN_LITERAL_NAT: { - char *endptr; - u32 out = (u32)strtoul(value.start, &endptr, 10); - if (endptr == value.start || *endptr != '\0') { - fprintf(stderr, "Invalid 'real' number: '%.*s'\n", token.length, token.start); - exit(1); - } - emit_u32(vm, out); - break; - } - case TOKEN_LITERAL_REAL: { - fixed_t out = float_to_fixed(atof(value.start)); - emit_u32(vm, out); - break; - } - default:{ - fprintf(stderr, "Unknown immediate: '%.*s'\n", token.length, token.start); + case TOKEN_KEYWORD_TRUE: { + emit_u32(vm, 1); + break; + } + case TOKEN_KEYWORD_FALSE: { + emit_u32(vm, 0); + break; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + emit_u32(vm, out); + break; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid 'real' number: '%.*s'\n", token.length, + token.start); exit(1); } + emit_u32(vm, out); + break; + } + case TOKEN_LITERAL_REAL: { + fixed_t out = float_to_fixed(atof(value.start)); + emit_u32(vm, out); + break; + } + default: { + fprintf(stderr, "Unknown immediate: '%.*s'\n", token.length, + token.start); + exit(1); + } } vm->cp += 4; @@ -1770,7 +1364,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_address", token.length)) { - emit_byte(vm, OP_LOAD_IMM); + emit_op(vm, OP_LOAD_IMM); vm->cp++; Token id = next_token(); @@ -1787,7 +1381,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "malloc", token.length)) { - emit_byte(vm, OP_MALLOC); + emit_op(vm, OP_MALLOC); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -1800,7 +1394,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_8", token.length)) { - emit_byte(vm, OP_MEMSET_8); + emit_op(vm, OP_MEMSET_8); vm->cp++; Token reg = next_token(); @@ -1820,7 +1414,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_16", token.length)) { - emit_byte(vm, OP_MEMSET_16); + emit_op(vm, OP_MEMSET_16); vm->cp++; Token reg = next_token(); @@ -1840,7 +1434,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_32", token.length)) { - emit_byte(vm, OP_MEMSET_32); + emit_op(vm, OP_MEMSET_32); vm->cp++; Token reg = next_token(); @@ -1860,7 +1454,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_8", token.length)) { - emit_byte(vm, OP_LOAD_OFF_8); + emit_op(vm, OP_LOAD_OFF_8); vm->cp++; Token reg = next_token(); @@ -1882,7 +1476,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_16", token.length)) { - emit_byte(vm, OP_LOAD_OFF_16); + emit_op(vm, OP_LOAD_OFF_16); vm->cp++; Token reg = next_token(); @@ -1904,7 +1498,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_32", token.length)) { - emit_byte(vm, OP_LOAD_OFF_32); + emit_op(vm, OP_LOAD_OFF_32); vm->cp++; Token reg = next_token(); @@ -1926,7 +1520,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_8", token.length)) { - emit_byte(vm, OP_LOAD_IND_8); + emit_op(vm, OP_LOAD_IND_8); vm->cp++; Token id = next_token(); @@ -1943,7 +1537,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_16", token.length)) { - emit_byte(vm, OP_LOAD_IND_16); + emit_op(vm, OP_LOAD_IND_16); vm->cp++; Token id = next_token(); @@ -1960,7 +1554,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_32", token.length)) { - emit_byte(vm, OP_LOAD_IND_32); + emit_op(vm, OP_LOAD_IND_32); vm->cp++; Token id = next_token(); @@ -1977,7 +1571,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_8", token.length)) { - emit_byte(vm, OP_LOAD_ABS_8); + emit_op(vm, OP_LOAD_ABS_8); vm->cp++; Token id = next_token(); @@ -1994,7 +1588,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_16", token.length)) { - emit_byte(vm, OP_LOAD_ABS_16); + emit_op(vm, OP_LOAD_ABS_16); vm->cp++; Token id = next_token(); @@ -2011,7 +1605,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_32", token.length)) { - emit_byte(vm, OP_LOAD_ABS_32); + emit_op(vm, OP_LOAD_ABS_32); vm->cp++; Token id = next_token(); @@ -2028,7 +1622,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_8", token.length)) { - emit_byte(vm, OP_STORE_ABS_8); + emit_op(vm, OP_STORE_ABS_8); vm->cp++; Token reg = next_token(); @@ -2045,7 +1639,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_16", token.length)) { - emit_byte(vm, OP_STORE_ABS_16); + emit_op(vm, OP_STORE_ABS_16); vm->cp++; Token reg = next_token(); @@ -2062,7 +1656,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_32", token.length)) { - emit_byte(vm, OP_STORE_ABS_32); + emit_op(vm, OP_STORE_ABS_32); vm->cp++; Token reg = next_token(); @@ -2079,7 +1673,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_8", token.length)) { - emit_byte(vm, OP_STORE_IND_8); + emit_op(vm, OP_STORE_IND_8); vm->cp++; Token reg = next_token(); @@ -2096,7 +1690,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_16", token.length)) { - emit_byte(vm, OP_STORE_IND_16); + emit_op(vm, OP_STORE_IND_16); vm->cp++; Token reg = next_token(); @@ -2113,7 +1707,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_32", token.length)) { - emit_byte(vm, OP_STORE_IND_32); + emit_op(vm, OP_STORE_IND_32); vm->cp++; Token reg = next_token(); @@ -2130,7 +1724,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_8", token.length)) { - emit_byte(vm, OP_STORE_OFF_8); + emit_op(vm, OP_STORE_OFF_8); vm->cp++; Token reg = next_token(); @@ -2152,7 +1746,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_16", token.length)) { - emit_byte(vm, OP_STORE_OFF_16); + emit_op(vm, OP_STORE_OFF_16); vm->cp++; Token reg = next_token(); @@ -2174,7 +1768,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_32", token.length)) { - emit_byte(vm, OP_STORE_OFF_32); + emit_op(vm, OP_STORE_OFF_32); vm->cp++; Token reg = next_token(); @@ -2196,7 +1790,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "register_move", token.length)) { - emit_byte(vm, OP_REG_MOV); + emit_op(vm, OP_REG_MOV); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2209,7 +1803,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_int", token.length)) { - emit_byte(vm, OP_ADD_INT); + emit_op(vm, OP_ADD_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2226,7 +1820,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_int", token.length)) { - emit_byte(vm, OP_SUB_INT); + emit_op(vm, OP_SUB_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2243,7 +1837,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_int", token.length)) { - emit_byte(vm, OP_MUL_INT); + emit_op(vm, OP_MUL_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2260,7 +1854,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_int", token.length)) { - emit_byte(vm, OP_DIV_INT); + emit_op(vm, OP_DIV_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2277,7 +1871,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_int", token.length)) { - emit_byte(vm, OP_ABS_INT); + emit_op(vm, OP_ABS_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2290,7 +1884,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_int", token.length)) { - emit_byte(vm, OP_NEG_INT); + emit_op(vm, OP_NEG_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2303,7 +1897,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_nat", token.length)) { - emit_byte(vm, OP_ADD_NAT); + emit_op(vm, OP_ADD_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2320,7 +1914,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_nat", token.length)) { - emit_byte(vm, OP_SUB_NAT); + emit_op(vm, OP_SUB_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2337,7 +1931,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_nat", token.length)) { - emit_byte(vm, OP_MUL_NAT); + emit_op(vm, OP_MUL_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2354,7 +1948,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_nat", token.length)) { - emit_byte(vm, OP_DIV_NAT); + emit_op(vm, OP_DIV_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2371,7 +1965,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_nat", token.length)) { - emit_byte(vm, OP_ABS_NAT); + emit_op(vm, OP_ABS_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2384,7 +1978,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_nat", token.length)) { - emit_byte(vm, OP_NEG_NAT); + emit_op(vm, OP_NEG_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2397,7 +1991,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_real", token.length)) { - emit_byte(vm, OP_ADD_REAL); + emit_op(vm, OP_ADD_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2412,9 +2006,10 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { arg = get_reg(next, st); emit_byte(vm, arg); vm->cp++; - next_token_is(TOKEN_SEMICOLON);; + next_token_is(TOKEN_SEMICOLON); + ; } else if (strleq(token.start, "sub_real", token.length)) { - emit_byte(vm, OP_SUB_REAL); + emit_op(vm, OP_SUB_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2431,7 +2026,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_real", token.length)) { - emit_byte(vm, OP_MUL_REAL); + emit_op(vm, OP_MUL_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2448,7 +2043,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_real", token.length)) { - emit_byte(vm, OP_DIV_REAL); + emit_op(vm, OP_DIV_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2465,7 +2060,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_real", token.length)) { - emit_byte(vm, OP_ABS_REAL); + emit_op(vm, OP_ABS_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2478,7 +2073,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_real", token.length)) { - emit_byte(vm, OP_NEG_REAL); + emit_op(vm, OP_NEG_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2491,7 +2086,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_real", token.length)) { - emit_byte(vm, OP_INT_TO_REAL); + emit_op(vm, OP_INT_TO_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2504,7 +2099,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_real", token.length)) { - emit_byte(vm, OP_NAT_TO_REAL); + emit_op(vm, OP_NAT_TO_REAL); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2517,7 +2112,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_int", token.length)) { - emit_byte(vm, OP_REAL_TO_INT); + emit_op(vm, OP_REAL_TO_INT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2530,7 +2125,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_nat", token.length)) { - emit_byte(vm, OP_REAL_TO_NAT); + emit_op(vm, OP_REAL_TO_NAT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2543,7 +2138,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_left", token.length)) { - emit_byte(vm, OP_BIT_SHIFT_LEFT); + emit_op(vm, OP_BIT_SHIFT_LEFT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2560,7 +2155,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_right", token.length)) { - emit_byte(vm, OP_BIT_SHIFT_RIGHT); + emit_op(vm, OP_BIT_SHIFT_RIGHT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2577,7 +2172,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { - emit_byte(vm, OP_BIT_SHIFT_R_EXT); + emit_op(vm, OP_BIT_SHIFT_R_EXT); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2594,7 +2189,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_and", token.length)) { - emit_byte(vm, OP_BAND); + emit_op(vm, OP_BAND); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2611,7 +2206,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_or", token.length)) { - emit_byte(vm, OP_BOR); + emit_op(vm, OP_BOR); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2628,7 +2223,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_xor", token.length)) { - emit_byte(vm, OP_BXOR); + emit_op(vm, OP_BXOR); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2645,7 +2240,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump", token.length)) { - emit_byte(vm, OP_JMP); + emit_op(vm, OP_JMP); vm->cp++; Token id = next_token(); @@ -2655,7 +2250,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_if_flag", token.length)) { - emit_byte(vm, OP_JMPF); + emit_op(vm, OP_JMPF); vm->cp++; Token id = next_token(); @@ -2665,7 +2260,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_int", token.length)) { - emit_byte(vm, OP_JEQ_INT); + emit_op(vm, OP_JEQ_INT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2681,7 +2276,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_int", token.length)) { - emit_byte(vm, OP_JNEQ_INT); + emit_op(vm, OP_JNEQ_INT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2697,7 +2292,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_int", token.length)) { - emit_byte(vm, OP_JGT_INT); + emit_op(vm, OP_JGT_INT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2713,7 +2308,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_int", token.length)) { - emit_byte(vm, OP_JLT_INT); + emit_op(vm, OP_JLT_INT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2729,7 +2324,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_int", token.length)) { - emit_byte(vm, OP_JLE_INT); + emit_op(vm, OP_JLE_INT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2745,7 +2340,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_int", token.length)) { - emit_byte(vm, OP_JGE_INT); + emit_op(vm, OP_JGE_INT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2761,7 +2356,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_nat", token.length)) { - emit_byte(vm, OP_JEQ_NAT); + emit_op(vm, OP_JEQ_NAT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2777,7 +2372,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_nat", token.length)) { - emit_byte(vm, OP_JNEQ_NAT); + emit_op(vm, OP_JNEQ_NAT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2793,7 +2388,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_nat", token.length)) { - emit_byte(vm, OP_JGT_NAT); + emit_op(vm, OP_JGT_NAT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2809,7 +2404,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_nat", token.length)) { - emit_byte(vm, OP_JLT_NAT); + emit_op(vm, OP_JLT_NAT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2825,7 +2420,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_nat", token.length)) { - emit_byte(vm, OP_JLE_NAT); + emit_op(vm, OP_JLE_NAT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2841,7 +2436,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_nat", token.length)) { - emit_byte(vm, OP_JGE_NAT); + emit_op(vm, OP_JGE_NAT); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2857,7 +2452,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_real", token.length)) { - emit_byte(vm, OP_JEQ_REAL); + emit_op(vm, OP_JEQ_REAL); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2873,7 +2468,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_real", token.length)) { - emit_byte(vm, OP_JNEQ_REAL); + emit_op(vm, OP_JNEQ_REAL); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2889,7 +2484,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_real", token.length)) { - emit_byte(vm, OP_JGE_REAL); + emit_op(vm, OP_JGE_REAL); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2905,7 +2500,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_real", token.length)) { - emit_byte(vm, OP_JGT_REAL); + emit_op(vm, OP_JGT_REAL); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2921,7 +2516,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_real", token.length)) { - emit_byte(vm, OP_JLT_REAL); + emit_op(vm, OP_JLT_REAL); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2937,7 +2532,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_real", token.length)) { - emit_byte(vm, OP_JLE_REAL); + emit_op(vm, OP_JLE_REAL); vm->cp++; Token id = next_token(); u32 ptr = get_ptr(id, st); @@ -2953,7 +2548,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_length", token.length)) { - emit_byte(vm, OP_STRLEN); + emit_op(vm, OP_STRLEN); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2966,7 +2561,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_string", token.length)) { - emit_byte(vm, OP_INT_TO_STRING); + emit_op(vm, OP_INT_TO_STRING); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2979,7 +2574,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_string", token.length)) { - emit_byte(vm, OP_NAT_TO_STRING); + emit_op(vm, OP_NAT_TO_STRING); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -2992,7 +2587,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_string", token.length)) { - emit_byte(vm, OP_REAL_TO_STRING); + emit_op(vm, OP_REAL_TO_STRING); vm->cp++; Token reg = next_token(); u8 arg = get_reg(reg, st); @@ -3003,7 +2598,8 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { arg = get_reg(reg, st); emit_byte(vm, arg); vm->cp++; - next_token_is(TOKEN_SEMICOLON);; + next_token_is(TOKEN_SEMICOLON); + ; } else if (strleq(token.start, "string_eq", token.length)) { } else if (strleq(token.start, "string_concat", token.length)) { } else if (strleq(token.start, "string_get_char", token.length)) { From 07528b1f3f341b24ed13a6128a1166cbbec91bfd Mon Sep 17 00:00:00 2001 From: zongor Date: Wed, 3 Dec 2025 21:45:03 -0800 Subject: [PATCH 21/27] fix silly missing `get_reg` error. all non gui ones are working now --- src/tools/assembler/assembler.c | 1053 ++++++++++++------------------- test/fib.ul.ir | 9 +- test/hello.ul.ir | 8 +- test/loop.ul.ir | 14 +- test/malloc.ul.ir | 18 +- 5 files changed, 431 insertions(+), 671 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 244cdcd..0ae0285 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -10,127 +10,126 @@ const char *opcode_to_string(Opcode op) { static const char *names[] = { - [OP_EXIT] = "exit", - [OP_JMP] = "jump", - [OP_JMPF] = "jump-if-flag", - [OP_CALL] = "call", - [OP_RETURN] = "return", - - /* Immediate loads (only 32-bit variant needed) */ - [OP_LOAD_IMM] = "load-immediate", - - /* Register-indirect loads */ - [OP_LOAD_IND_8] = "load-indirect-8", - [OP_LOAD_IND_16] = "load-indirect-16", - [OP_LOAD_IND_32] = "load-indirect-32", - - /* Absolute address loads */ - [OP_LOAD_ABS_8] = "load-absolute-8", - [OP_LOAD_ABS_16] = "load-absolute-16", - [OP_LOAD_ABS_32] = "load-absolute-32", - - /* Base+offset loads */ - [OP_LOAD_OFF_8] = "load-offset-8", - [OP_LOAD_OFF_16] = "load-offset-16", - [OP_LOAD_OFF_32] = "load-offset-32", - - /* Absolute address stores */ - [OP_STORE_ABS_8] = "store-absolute-8", - [OP_STORE_ABS_16] = "store-absolute-16", - [OP_STORE_ABS_32] = "store-absolute-32", - - /* Register-indirect stores */ - [OP_STORE_IND_8] = "store-indirect-8", - [OP_STORE_IND_16] = "store-indirect-16", - [OP_STORE_IND_32] = "store-indirect-32", - - /* Base+offset stores */ - [OP_STORE_OFF_8] = "store-offset-8", - [OP_STORE_OFF_16] = "store-offset-16", - [OP_STORE_OFF_32] = "store-offset-32", - - /* Memory operations */ - [OP_MALLOC] = "malloc", - [OP_MEMSET_8] = "memset-8", - [OP_MEMSET_16] = "memset-16", - [OP_MEMSET_32] = "memset-32", - - /* Register operations */ - [OP_REG_MOV] = "register-move", - [OP_SYSCALL] = "syscall", - - /* Bit operations */ - [OP_BIT_SHIFT_LEFT] = "bit-shift-left", - [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", - [OP_BIT_SHIFT_R_EXT] = "bit-shift-re", - [OP_BAND] = "bit-and", - [OP_BOR] = "bit-or", - [OP_BXOR] = "bit-xor", - - /* Integer arithmetic */ - [OP_ADD_INT] = "add-int", - [OP_SUB_INT] = "sub-int", - [OP_MUL_INT] = "mul-int", - [OP_DIV_INT] = "div-int", - - /* Natural number arithmetic */ - [OP_ADD_NAT] = "add-nat", - [OP_SUB_NAT] = "sub-nat", - [OP_MUL_NAT] = "mul-nat", - [OP_DIV_NAT] = "div-nat", - - /* Floating point operations */ - [OP_ADD_REAL] = "add-real", - [OP_SUB_REAL] = "sub-real", - [OP_MUL_REAL] = "mul-real", - [OP_DIV_REAL] = "div-real", - - /* Type conversions */ - [OP_INT_TO_REAL] = "int-to-real", - [OP_NAT_TO_REAL] = "nat-to-real", - [OP_REAL_TO_INT] = "real-to-int", - [OP_REAL_TO_NAT] = "real-to-nat", - - /* Integer comparisons */ - [OP_JEQ_INT] = "jump-eq-int", - [OP_JNEQ_INT] = "jump-neq-int", - [OP_JGT_INT] = "jump-gt-int", - [OP_JLT_INT] = "jump-lt-int", - [OP_JLE_INT] = "jump-le-int", - [OP_JGE_INT] = "jump-ge-int", - - /* Natural number comparisons */ - [OP_JEQ_NAT] = "jump-eq-nat", - [OP_JNEQ_NAT] = "jump-neq-nat", - [OP_JGT_NAT] = "jump-gt-nat", - [OP_JLT_NAT] = "jump-lt-nat", - [OP_JLE_NAT] = "jump-le-nat", - [OP_JGE_NAT] = "jump-ge-nat", - - /* Floating point comparisons */ - [OP_JEQ_REAL] = "jump-eq-real", - [OP_JNEQ_REAL] = "jump-neq-real", - [OP_JGE_REAL] = "jump-ge-real", - [OP_JGT_REAL] = "jump-gt-real", - [OP_JLT_REAL] = "jump-lt-real", - [OP_JLE_REAL] = "jump-le-real", - - /* String operations */ - [OP_STRLEN] = "string-length", - [OP_STREQ] = "string-eq", - [OP_STRCAT] = "string-concat", - [OP_STR_GET_CHAR] = "string-get-char", - [OP_STR_FIND_CHAR] = "string-find-char", - [OP_STR_SLICE] = "string-slice", - - /* String conversions */ - [OP_INT_TO_STRING] = "int-to-string", - [OP_NAT_TO_STRING] = "nat-to-string", - [OP_REAL_TO_STRING] = "real-to-string", - [OP_STRING_TO_INT] = "string-to-int", - [OP_STRING_TO_NAT] = "string-to-nat", - [OP_STRING_TO_REAL] = "string-to-real" - }; + [OP_EXIT] = "exit", + [OP_JMP] = "jump", + [OP_JMPF] = "jump-if-flag", + [OP_CALL] = "call", + [OP_RETURN] = "return", + + /* Immediate loads (only 32-bit variant needed) */ + [OP_LOAD_IMM] = "load-immediate", + + /* Register-indirect loads */ + [OP_LOAD_IND_8] = "load-indirect-8", + [OP_LOAD_IND_16] = "load-indirect-16", + [OP_LOAD_IND_32] = "load-indirect-32", + + /* Absolute address loads */ + [OP_LOAD_ABS_8] = "load-absolute-8", + [OP_LOAD_ABS_16] = "load-absolute-16", + [OP_LOAD_ABS_32] = "load-absolute-32", + + /* Base+offset loads */ + [OP_LOAD_OFF_8] = "load-offset-8", + [OP_LOAD_OFF_16] = "load-offset-16", + [OP_LOAD_OFF_32] = "load-offset-32", + + /* Absolute address stores */ + [OP_STORE_ABS_8] = "store-absolute-8", + [OP_STORE_ABS_16] = "store-absolute-16", + [OP_STORE_ABS_32] = "store-absolute-32", + + /* Register-indirect stores */ + [OP_STORE_IND_8] = "store-indirect-8", + [OP_STORE_IND_16] = "store-indirect-16", + [OP_STORE_IND_32] = "store-indirect-32", + + /* Base+offset stores */ + [OP_STORE_OFF_8] = "store-offset-8", + [OP_STORE_OFF_16] = "store-offset-16", + [OP_STORE_OFF_32] = "store-offset-32", + + /* Memory operations */ + [OP_MALLOC] = "malloc", + [OP_MEMSET_8] = "memset-8", + [OP_MEMSET_16] = "memset-16", + [OP_MEMSET_32] = "memset-32", + + /* Register operations */ + [OP_REG_MOV] = "register-move", + [OP_SYSCALL] = "syscall", + + /* Bit operations */ + [OP_BIT_SHIFT_LEFT] = "bit-shift-left", + [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", + [OP_BIT_SHIFT_R_EXT] = "bit-shift-re", + [OP_BAND] = "bit-and", + [OP_BOR] = "bit-or", + [OP_BXOR] = "bit-xor", + + /* Integer arithmetic */ + [OP_ADD_INT] = "add-int", + [OP_SUB_INT] = "sub-int", + [OP_MUL_INT] = "mul-int", + [OP_DIV_INT] = "div-int", + + /* Natural number arithmetic */ + [OP_ADD_NAT] = "add-nat", + [OP_SUB_NAT] = "sub-nat", + [OP_MUL_NAT] = "mul-nat", + [OP_DIV_NAT] = "div-nat", + + /* Floating point operations */ + [OP_ADD_REAL] = "add-real", + [OP_SUB_REAL] = "sub-real", + [OP_MUL_REAL] = "mul-real", + [OP_DIV_REAL] = "div-real", + + /* Type conversions */ + [OP_INT_TO_REAL] = "int-to-real", + [OP_NAT_TO_REAL] = "nat-to-real", + [OP_REAL_TO_INT] = "real-to-int", + [OP_REAL_TO_NAT] = "real-to-nat", + + /* Integer comparisons */ + [OP_JEQ_INT] = "jump-eq-int", + [OP_JNEQ_INT] = "jump-neq-int", + [OP_JGT_INT] = "jump-gt-int", + [OP_JLT_INT] = "jump-lt-int", + [OP_JLE_INT] = "jump-le-int", + [OP_JGE_INT] = "jump-ge-int", + + /* Natural number comparisons */ + [OP_JEQ_NAT] = "jump-eq-nat", + [OP_JNEQ_NAT] = "jump-neq-nat", + [OP_JGT_NAT] = "jump-gt-nat", + [OP_JLT_NAT] = "jump-lt-nat", + [OP_JLE_NAT] = "jump-le-nat", + [OP_JGE_NAT] = "jump-ge-nat", + + /* Floating point comparisons */ + [OP_JEQ_REAL] = "jump-eq-real", + [OP_JNEQ_REAL] = "jump-neq-real", + [OP_JGE_REAL] = "jump-ge-real", + [OP_JGT_REAL] = "jump-gt-real", + [OP_JLT_REAL] = "jump-lt-real", + [OP_JLE_REAL] = "jump-le-real", + + /* String operations */ + [OP_STRLEN] = "string-length", + [OP_STREQ] = "string-eq", + [OP_STRCAT] = "string-concat", + [OP_STR_GET_CHAR] = "string-get-char", + [OP_STR_FIND_CHAR] = "string-find-char", + [OP_STR_SLICE] = "string-slice", + + /* String conversions */ + [OP_INT_TO_STRING] = "int-to-string", + [OP_NAT_TO_STRING] = "nat-to-string", + [OP_REAL_TO_STRING] = "real-to-string", + [OP_STRING_TO_INT] = "string-to-int", + [OP_STRING_TO_NAT] = "string-to-nat", + [OP_STRING_TO_REAL] = "string-to-real"}; if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { return ""; @@ -140,19 +139,19 @@ const char *opcode_to_string(Opcode op) { return name ? name : ""; } -void emit_op(VM *vm, u8 byte) { +void emit_op(VM *vm, u8 byte) { printf("vm->code[%d] = %s\n", vm->cp, opcode_to_string(byte)); - vm->code[vm->cp] = byte; + vm->code[vm->cp] = byte; } -void emit_byte(VM *vm, u8 byte) { +void emit_byte(VM *vm, u8 byte) { printf("vm->code[%d] = %d\n", vm->cp, byte); - vm->code[vm->cp] = byte; + vm->code[vm->cp] = byte; } -void emit_u32(VM *vm, u32 value) { - printf("vm->code[%d..%d] = %d\n", vm->cp, vm->cp+3, value); - write_u32(vm, code, vm->cp, value); +void emit_u32(VM *vm, u32 value) { + printf("vm->code[%d..%d] = %d\n", vm->cp, vm->cp + 3, value); + write_u32(vm, code, vm->cp, value); } void symbol_table_init(SymbolTable *table) { @@ -161,7 +160,28 @@ void symbol_table_init(SymbolTable *table) { table->capacity = 16; } +Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { + for (u32 i = 0; i < table->count; i++) { + if (table->symbols[i].name_length == length) { + if (strleq(table->symbols[i].name, name, length)) { + return &table->symbols[i]; + } + } + } + return nil; +} + u32 symbol_table_add(SymbolTable *table, Symbol s) { + Symbol *sym = symbol_table_lookup(table, s.name, s.name_length); + if (sym != nil) { + fprintf(stderr, + "Error: Symbol '%.*s' already defined, the assembler is not smart " + "enough to do scope properly so please pick a different variable " + "name (hard I know)\n", + s.name_length, s.name); + exit(1); + } + if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); @@ -182,17 +202,6 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { return index; } -Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { - for (u32 i = 0; i < table->count; i++) { - if (table->symbols[i].name_length == length) { - if (strleq(table->symbols[i].name, name, length)) { - return &table->symbols[i]; - } - } - } - return nil; -} - u32 get_ref(SymbolTable *st, const char *name, u32 length) { Symbol *sym = symbol_table_lookup(st, name, length); if (!sym) { @@ -591,6 +600,109 @@ void define_branch(VM *vm, SymbolTable *st) { symbol_table_add(st, s); } +int get_instruction_byte_size(const char *opname) { + + // Return (1 + 1) + if (strcmp(opname, "return") == 0) { + return 2; // 1 byte opcode + 1 byte return register + } + + if (strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || + strcmp(opname, "neg_nat") == 0 || strcmp(opname, "abs_nat") == 0 || + strcmp(opname, "neg_real") == 0 || strcmp(opname, "abs_real") == 0 || + strcmp(opname, "int_to_string") == 0 || + strcmp(opname, "load_indirect_8") == 0 || + strcmp(opname, "nat_to_string") == 0 || + strcmp(opname, "load_indirect_16") == 0 || + strcmp(opname, "real_to_string") == 0 || + strcmp(opname, "load_indirect_32") == 0 || + strcmp(opname, "int_to_real") == 0 || + strcmp(opname, "store_indirect_8") == 0 || + strcmp(opname, "nat_to_real") == 0 || + strcmp(opname, "store_indirect_16") == 0 || + strcmp(opname, "real_to_int") == 0 || + strcmp(opname, "store_indirect_32") == 0 || + strcmp(opname, "real_to_nat") == 0 || strcmp(opname, "nat_to_int") == 0 || + strcmp(opname, "int_to_nat") == 0 || + strcmp(opname, "string_length") == 0 || + strcmp(opname, "store_absolute_32") == 0 || + strcmp(opname, "store_absolute_8") == 0 || + strcmp(opname, "store_absolute_16") == 0 || + strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || + strcmp(opname, "memset_8") == 0 || strcmp(opname, "memset_16") == 0 || + strcmp(opname, "register_move") == 0 || strcmp(opname, "malloc") == 0) { + return 3; + } + + // Register_register_register opcodes (4 bytes: 1 + 3) + if (strcmp(opname, "add_int") == 0 || strcmp(opname, "sub_int") == 0 || + strcmp(opname, "mul_int") == 0 || strcmp(opname, "div_int") == 0 || + strcmp(opname, "add_nat") == 0 || strcmp(opname, "sub_nat") == 0 || + strcmp(opname, "mul_nat") == 0 || strcmp(opname, "div_nat") == 0 || + strcmp(opname, "add_real") == 0 || strcmp(opname, "sub_real") == 0 || + strcmp(opname, "bit_shift_left") == 0 || + strcmp(opname, "bit_shift_right") == 0 || + strcmp(opname, "bit_shift_r_ext") == 0 || + strcmp(opname, "bit_and") == 0 || strcmp(opname, "bit_or") == 0 || + strcmp(opname, "bit_xor") == 0 || strcmp(opname, "mul_real") == 0 || + strcmp(opname, "div_real") == 0) { + return 4; + } + + // (5 bytes: 1 + 4) + if (strcmp(opname, "halt") == 0 || strcmp(opname, "jump_if_flag") == 0 || + strcmp(opname, "jump") == 0) { + return 5; + } + + // Load, Load_immediate (6 bytes: 1 + 1 + 4) + if (strcmp(opname, "load_absolute_32") == 0 || + strcmp(opname, "load_immediate") == 0 || + strcmp(opname, "load_address") == 0 || + strcmp(opname, "load_absolute_16") == 0 || + strcmp(opname, "load_absolute_8") == 0) { + return 6; + } + + // jump compare (7 bytes: 1 + 4 + 1 + 1) + if (strcmp(opname, "jump_eq_int") == 0 || + strcmp(opname, "jump_neq_int") == 0 || + strcmp(opname, "jump_gt_int") == 0 || + strcmp(opname, "jump_lt_int") == 0 || + strcmp(opname, "jump_le_int") == 0 || + strcmp(opname, "jump_ge_int") == 0 || + strcmp(opname, "jump_eq_nat") == 0 || + strcmp(opname, "jump_neq_nat") == 0 || + strcmp(opname, "jump_gt_nat") == 0 || + strcmp(opname, "jump_lt_nat") == 0 || + strcmp(opname, "jump_le_nat") == 0 || + strcmp(opname, "jump_ge_nat") == 0 || + strcmp(opname, "jump_eq_real") == 0 || + strcmp(opname, "jump_neq_real") == 0 || + strcmp(opname, "jump_gt_real") == 0 || + strcmp(opname, "jump_lt_real") == 0 || + strcmp(opname, "jump_le_real") == 0 || + strcmp(opname, "jump_ge_real") == 0 || + strcmp(opname, "store_offset_8") == 0 || + strcmp(opname, "store_offset_16") == 0 || + strcmp(opname, "store_offset_32") == 0 || + strcmp(opname, "load_offset_8") == 0 || + strcmp(opname, "load_offset_16") == 0 || + strcmp(opname, "load_offset_32") == 0) { + return 7; + } + + fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); + exit(-1); +} + +#define FAKE_OP(op) \ + } \ + else if (strleq(token.start, op, token.length)) { \ + while (token.type != TOKEN_SEMICOLON) \ + token = next_token(); \ + vm->cp += get_instruction_byte_size(op); + /** * Build the symbol table and calculate the types/size/offsets of all values. */ @@ -604,69 +716,69 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { exit(1); } - printf("Line %d [%s]: %.*s cp=%d mp=%d\n", token.line, - token_type_to_string(token.type), token.length, token.start, vm->cp, - vm->mp); - - if (token.type == TOKEN_KEYWORD_GLOBAL) { - define_global(vm, st); - continue; - } - - if (token.type == TOKEN_KEYWORD_FN) { - define_function(vm, st); - continue; - } - - if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || - token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || - token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || - token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || - token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { - define_var(st, token); - next_token_is(TOKEN_SEMICOLON); - continue; - } - - if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || - token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || - token.type == TOKEN_KEYWORD_FOR) { - define_branch(vm, st); - continue; - } - - if (token.type == TOKEN_KEYWORD_RETURN) { - vm->cp++; - - Token next = next_token(); - if (next.type == TOKEN_SEMICOLON) { - /* put 0xFF as return register */ - vm->cp++; + if (token.type != TOKEN_EOF) { + if (token.type == TOKEN_KEYWORD_GLOBAL) { + define_global(vm, st); continue; } - get_reg(next, st); - vm->cp++; - next_token_is(TOKEN_SEMICOLON); - continue; - } + if (token.type == TOKEN_KEYWORD_FN) { + define_function(vm, st); + continue; + } - if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first - if (strleq(token.start, "exit", token.length)) { + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { + define_var(st, token); + next_token_is(TOKEN_SEMICOLON); + continue; + } + + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(vm, st); + continue; + } + + if (token.type == TOKEN_KEYWORD_RETURN) { vm->cp++; - next_token(); - vm->cp += 4; + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + vm->cp++; + continue; + } + get_reg(next, st); + vm->cp++; next_token_is(TOKEN_SEMICOLON); - } else if (strleq(token.start, "call", token.length)) { + continue; + } + + if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first + if (strleq(token.start, "exit", token.length)) { + + vm->cp++; + + next_token(); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "call", token.length)) { + vm->cp++; next_token_is(TOKEN_IDENTIFIER); vm->cp += 4; bool has_return = false; + u8 arg_count = 0; vm->cp++; Token next = next_token(); @@ -674,471 +786,125 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { if (next.type != TOKEN_ARROW_RIGHT) { get_reg(next, st); vm->cp++; + arg_count++; } else { has_return = true; + arg_count--; // is a return not an arg } next = next_token(); } if (!has_return) { - vm->cp+=2; + vm->cp++; continue; } - } else if (strleq(token.start, "syscall", token.length)) { - vm->cp++; + } else if (strleq(token.start, "syscall", token.length)) { - Token next = next_token(); - vm->cp += 4; + vm->cp++; + + Token next = next_token(); + vm->cp += 4; - next = next_token(); - while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { - vm->cp++; - } next = next_token(); - } + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + get_reg(next, st); + vm->cp++; + } + next = next_token(); + } - } else if (strleq(token.start, "load_immediate", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_address", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "malloc", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "memset_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "memset_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "memset_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "load_offset_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_offset_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_offset_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_indirect_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - } else if (strleq(token.start, "load_indirect_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - } else if (strleq(token.start, "load_indirect_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_absolute_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_absolute_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_absolute_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "store_absolute_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_absolute_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_absolute_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_indirect_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_indirect_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_indirect_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_offset_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "store_offset_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "store_offset_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "register_move", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "add_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "sub_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "mul_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "div_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "abs_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "neg_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "add_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "sub_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "mul_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "div_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "abs_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "neg_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "add_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "sub_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "mul_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "div_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "abs_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "neg_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "int_to_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "nat_to_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "real_to_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "real_to_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_shift_left", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_shift_right", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_and", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_or", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_xor", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - } else if (strleq(token.start, "jump_if_flag", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - } else if (strleq(token.start, "jump_eq_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_neq_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_gt_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_lt_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_le_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_ge_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_eq_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_neq_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_gt_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_lt_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_le_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_ge_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_eq_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_neq_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_ge_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_gt_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_lt_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_le_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "string_length", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "int_to_string", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "nat_to_string", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "real_to_string", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "string_eq", token.length)) { - } else if (strleq(token.start, "string_concat", token.length)) { - } else if (strleq(token.start, "string_get_char", token.length)) { - } else if (strleq(token.start, "string_find_char", token.length)) { - } else if (strleq(token.start, "string_slice", token.length)) { - } else if (strleq(token.start, "string_to_int", token.length)) { - } else if (strleq(token.start, "string_to_nat", token.length)) { - } else if (strleq(token.start, "string_to_real", token.length)) { - } else { - // some other identifier - printf("Unknown id at line %d: %.*s\n", token.line, token.length, - token.start); - exit(1); + FAKE_OP("load_immediate") + FAKE_OP("load_address") + FAKE_OP("malloc") + FAKE_OP("memset_8") + FAKE_OP("memset_16") + FAKE_OP("memset_32") + FAKE_OP("load_offset_8") + FAKE_OP("load_offset_16") + FAKE_OP("load_offset_32") + FAKE_OP("load_indirect_8") + FAKE_OP("load_indirect_16") + FAKE_OP("load_indirect_32") + FAKE_OP("load_absolute_8") + FAKE_OP("load_absolute_16") + FAKE_OP("load_absolute_32") + FAKE_OP("store_absolute_8") + FAKE_OP("store_absolute_16") + FAKE_OP("store_absolute_32") + FAKE_OP("store_indirect_8") + FAKE_OP("store_indirect_16") + FAKE_OP("store_indirect_32") + FAKE_OP("store_offset_8") + FAKE_OP("store_offset_16") + FAKE_OP("store_offset_32") + FAKE_OP("register_move") + FAKE_OP("add_int") + FAKE_OP("sub_int") + FAKE_OP("mul_int") + FAKE_OP("div_int") + FAKE_OP("abs_int") + FAKE_OP("neg_int") + FAKE_OP("add_nat") + FAKE_OP("sub_nat") + FAKE_OP("mul_nat") + FAKE_OP("div_nat") + FAKE_OP("abs_nat") + FAKE_OP("neg_nat") + FAKE_OP("add_real") + FAKE_OP("sub_real") + FAKE_OP("mul_real") + FAKE_OP("div_real") + FAKE_OP("abs_real") + FAKE_OP("neg_real") + FAKE_OP("int_to_real") + FAKE_OP("nat_to_real") + FAKE_OP("real_to_int") + FAKE_OP("real_to_nat") + FAKE_OP("bit_shift_left") + FAKE_OP("bit_shift_right") + FAKE_OP("bit_shift_r_ext") + FAKE_OP("bit_and") + FAKE_OP("bit_or") + FAKE_OP("bit_xor") + FAKE_OP("jump") + FAKE_OP("jump_if_flag") + FAKE_OP("jump_eq_int") + FAKE_OP("jump_neq_int") + FAKE_OP("jump_gt_int") + FAKE_OP("jump_lt_int") + FAKE_OP("jump_le_int") + FAKE_OP("jump_ge_int") + FAKE_OP("jump_eq_nat") + FAKE_OP("jump_neq_nat") + FAKE_OP("jump_gt_nat") + FAKE_OP("jump_lt_nat") + FAKE_OP("jump_le_nat") + FAKE_OP("jump_ge_nat") + FAKE_OP("jump_eq_real") + FAKE_OP("jump_neq_real") + FAKE_OP("jump_ge_real") + FAKE_OP("jump_gt_real") + FAKE_OP("jump_lt_real") + FAKE_OP("jump_le_real") + FAKE_OP("string_length") + FAKE_OP("int_to_string") + FAKE_OP("nat_to_string") + FAKE_OP("real_to_string") + FAKE_OP("string_eq") + FAKE_OP("string_concat") + FAKE_OP("string_get_char") + FAKE_OP("string_find_char") + FAKE_OP("string_slice") + FAKE_OP("string_to_int") + FAKE_OP("string_to_nat") + FAKE_OP("string_to_real") + } else { + // some other identifier + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } } } } while (token.type != TOKEN_EOF); @@ -1148,8 +914,6 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { * 2nd pass, emit the bytecode */ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { - USED(st); - Token token; init_lexer(source); do { @@ -1159,9 +923,6 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - //printf("[Generate Bytecode cp=%d mp=%d ] Line %d [%s]: %.*s\n", vm->cp, - // vm->mp, token.line, token_type_to_string(token.type), token.length, - // token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed @@ -1269,7 +1030,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { printf("^vm->code[%d] = %d\n", arg_pos, arg_count); if (!has_return) { - vm->cp+=2; + vm->cp++; emit_byte(vm, 255); continue; } diff --git a/test/fib.ul.ir b/test/fib.ul.ir index d283508..36f4de0 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -2,13 +2,12 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; function main () - int n $0; int str_n $1; - load_immediate 35 -> n; - call fib n -> n; - int_to_string n -> str_n; - call pln str_n -> void; + load_immediate 36 -> $0; + call fib $0 -> $0; + int_to_string $0 -> str_n; + call pln str_n; exit 0; function fib (int n $0) diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 4f0a609..8243a1e 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -1,12 +1,12 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; -global str message = "nuqneH 'u'?"; +global str hello = "nuqneH 'u'?"; function main () - str hello $0; + str msg $0; - load_address message -> hello; - call pln hello; + load_address hello -> msg; + call pln msg; exit 0; function pln (str message $0) diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 06db690..54dbede 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -5,8 +5,8 @@ global str new_line = "\n"; function main () real a $0; int i $1; - int mode $11; - str term $10; + int in_mode $11; + str in_term $10; load_immediate 5.0 -> a; load_immediate 5000 -> i; @@ -18,20 +18,20 @@ function main () add_int i $3 -> i; jump_ge_int loop_body i $2; - load_address terminal_namespace -> term; - load_immediate 0 -> mode; - syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> in_term; + load_immediate 0 -> in_mode; + syscall OPEN in_term in_mode -> in_term; // Terminal term = open("/dev/term/0", 0); nat b $1; real_to_nat a -> b; load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE term $7 $8; // print prompt + syscall WRITE in_term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ term user_string $8; // read in max 32 byte string + syscall READ in_term user_string $8; // read in max 32 byte string call pln user_string; nat_to_string b -> $4; diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 87e4109..8eb06fd 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -3,23 +3,23 @@ global str prompt = "Enter a string:"; global str new_line = "\n"; function main () - int mode $11; - str term $10; + int in_mode $11; + str in_term $10; - load_immediate terminal_namespace -> term; - load_immediate 0 -> mode; - syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> in_term; + load_immediate 0 -> in_mode; + syscall OPEN in_term in_mode -> in_term; // Terminal term = open("/dev/term/0", 0); - load_immediate prompt -> $7; + load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE term $7 $8; // print prompt + syscall WRITE in_term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ term user_string $8; // read in max 32 byte string + syscall READ in_term user_string $8; // read in max 32 byte string - call pln user_string -> void; + call pln user_string; exit 0; function pln (str message $0) From 0d30ea292a78ff87fde57e2c63e563ae2d2fdfa9 Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 7 Dec 2025 15:29:49 -0800 Subject: [PATCH 22/27] Fix assembler, update tests, update roms, add back nogui mode for speed. --- Makefile | 23 +- bench/fib.lua | 2 +- bench/fib.pl | 2 +- bench/fib.py | 2 +- bench/fib.zl | 2 +- bench/run.sh | 6 +- bench/simple.lua | 2 +- bench/simple.pl | 2 +- bench/simple.py | 2 +- bench/simple.zl | 2 +- src/arch/linux/devices.c | 5 + src/arch/linux/main.c | 48 ++++- src/tools/assembler/assembler.c | 367 +++++++++++++++++--------------- src/tools/assembler/assembler.h | 5 +- src/tools/compiler/compiler.h | 4 + src/vm/vm.c | 21 +- test/add.asm.lisp | 26 --- test/add.rom | Bin 0 -> 151 bytes test/add.ul.ir | 6 +- test/fib.asm.lisp | 33 --- test/fib.rom | Bin 0 -> 187 bytes test/fib.ul.ir | 12 +- test/hello.asm.lisp | 19 -- test/hello.rom | Bin 0 -> 135 bytes test/hello.ul.ir | 4 +- test/loop.asm.lisp | 44 ---- test/loop.rom | Bin 0 -> 257 bytes test/loop.ul.ir | 10 +- test/malloc.asm.lisp | 26 --- test/malloc.rom | Bin 0 -> 187 bytes test/malloc.ul.ir | 9 +- test/paint-bw.asm.lisp | 147 ------------- test/paint-bw.rom | Bin 0 -> 550 bytes test/paint-bw.ul.ir | 101 +++++---- test/paint.asm.lisp | 261 ----------------------- test/paint.rom | Bin 0 -> 1161 bytes test/paint.ul.ir | 203 ++++++++++++++---- test/simple.asm.lisp | 22 -- test/simple.rom | Bin 0 -> 132 bytes test/simple.ul.ir | 6 +- test/window.asm.lisp | 71 ------ test/window.rom | Bin 0 -> 332 bytes test/window.ul.ir | 36 ++-- 43 files changed, 538 insertions(+), 993 deletions(-) delete mode 100644 test/add.asm.lisp create mode 100644 test/add.rom delete mode 100644 test/fib.asm.lisp create mode 100644 test/fib.rom delete mode 100644 test/hello.asm.lisp create mode 100644 test/hello.rom delete mode 100644 test/loop.asm.lisp create mode 100644 test/loop.rom delete mode 100644 test/malloc.asm.lisp create mode 100644 test/malloc.rom delete mode 100644 test/paint-bw.asm.lisp create mode 100644 test/paint-bw.rom delete mode 100644 test/paint.asm.lisp create mode 100644 test/paint.rom delete mode 100644 test/simple.asm.lisp create mode 100644 test/simple.rom delete mode 100644 test/window.asm.lisp create mode 100644 test/window.rom diff --git a/Makefile b/Makefile index 6aa77f9..359fd8e 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,9 @@ PLATFORM ?= linux BUILD_MODE ?= debug # 'debug' or 'release' +# Ensure BUILD_MODE is fixed before any conditionals +$(eval BUILD_MODE := $(or $(BUILD_MODE),debug)) + # --- DIRECTORIES --- SRC_DIR := src BUILD_DIR := build/$(PLATFORM) @@ -107,14 +110,12 @@ DEPS := $(VM_OBJS:.o=.d) $(PLATFORM_OBJ:.o=.d) # Default target all: $(TARGET) -# 'debug' target — just set BUILD_MODE and build -debug: BUILD_MODE=debug -debug: $(TARGET) - -# 'release' target — just set BUILD_MODE and build -release: BUILD_MODE=release -release: $(TARGET) +debug: + $(MAKE) BUILD_MODE=debug all +release: + $(MAKE) BUILD_MODE=release all + # --- COMPILE VM CORE (freestanding) --- $(BUILD_DIR)/vm/%.o: $(SRC_DIR)/vm/%.c @mkdir -p $(dir $@) @@ -155,11 +156,11 @@ clean-all: # --- TEST COMPILATION TARGET --- # Compiles all .asm.lisp test files to .rom using the debug VM executable -# Usage: make compile-tests PLATFORM=linux -compile-tests: $(BUILD_DIR)/undar-$(PLATFORM)$(TARGET_SUFFIX) +# Usage: make tests PLATFORM=linux +tests: $(BUILD_DIR)/undar-$(PLATFORM)$(TARGET_SUFFIX) @echo "Compiling test assembly files for $(PLATFORM)..." - @for f in ./test/*.asm.lisp; do \ - base=$$(basename "$$f" .asm.lisp); \ + @for f in ./test/*.ul.ir; do \ + base=$$(basename "$$f" .ul.ir); \ echo " [$$base] $$f -> ./test/$$base.rom"; \ $(BUILD_DIR)/undar-$(PLATFORM)$(TARGET_SUFFIX) "$$f" -o "./test/$$base.rom"; \ done diff --git a/bench/fib.lua b/bench/fib.lua index be357e9..d7ce19a 100644 --- a/bench/fib.lua +++ b/bench/fib.lua @@ -3,6 +3,6 @@ function fib(n) return fib(n-1) + fib(n-2) end -local result = fib(36) +local result = fib(35) print(result) diff --git a/bench/fib.pl b/bench/fib.pl index 776351e..e246993 100644 --- a/bench/fib.pl +++ b/bench/fib.pl @@ -7,6 +7,6 @@ sub fib { return fib($n-1) + fib($n-2); } -my $result = fib(36); +my $result = fib(35); print "$result\n"; diff --git a/bench/fib.py b/bench/fib.py index 4ab00e6..c4ea615 100644 --- a/bench/fib.py +++ b/bench/fib.py @@ -3,5 +3,5 @@ def fib(n): return n return fib(n-1) + fib(n-2) -result = fib(36) +result = fib(35) print(result) diff --git a/bench/fib.zl b/bench/fib.zl index dd93fc8..1b95c65 100644 --- a/bench/fib.zl +++ b/bench/fib.zl @@ -3,5 +3,5 @@ fn fib(n) { return fib(n - 2) + fib(n - 1); } -let result = fib(36); +let result = fib(35); print result; diff --git a/bench/run.sh b/bench/run.sh index ab75bcd..ac7deaf 100755 --- a/bench/run.sh +++ b/bench/run.sh @@ -41,9 +41,9 @@ print_section "zre ($FILENAME.t.ul)" echo "test input" | time ../build/old/zre -t "$FILENAME.ul" # Undâr Implementation (inline assembled) -print_section "undar ($FILENAME.asm.lisp)" -echo "test input" | time ../build/linux/undar-linux-release "../test/$FILENAME.asm.lisp" +print_section "undar ($FILENAME.ul.ir)" +echo "test input" | time ../build/linux/undar-linux-release -t "../test/$FILENAME.ul.ir" # Undâr Implementation (binary) print_section "undar ($FILENAME.rom)" -echo "test input" | time ../build/linux/undar-linux-release "../test/$FILENAME.rom" +echo "test input" | time ../build/linux/undar-linux-release -t "../test/$FILENAME.rom" diff --git a/bench/simple.lua b/bench/simple.lua index cb25114..55dc375 100644 --- a/bench/simple.lua +++ b/bench/simple.lua @@ -1 +1 @@ -print(tostring(1 + 2)) +print(tostring(1.0 + 2.0)) diff --git a/bench/simple.pl b/bench/simple.pl index 69a038c..2924ef9 100644 --- a/bench/simple.pl +++ b/bench/simple.pl @@ -1 +1 @@ -print((1 + 2) . "\n"); +print((1.0 + 2.0) . "\n"); diff --git a/bench/simple.py b/bench/simple.py index b02db97..07d8fb6 100644 --- a/bench/simple.py +++ b/bench/simple.py @@ -1 +1 @@ -print(str(1 + 2)) +print(str(1.0 + 2.0)) diff --git a/bench/simple.zl b/bench/simple.zl index e391eec..7549032 100644 --- a/bench/simple.zl +++ b/bench/simple.zl @@ -1,2 +1,2 @@ -let sum = 1 + 2; +let sum = 1.0 + 2.0; print sum; diff --git a/src/arch/linux/devices.c b/src/arch/linux/devices.c index 4d667b1..03aba8f 100644 --- a/src/arch/linux/devices.c +++ b/src/arch/linux/devices.c @@ -29,6 +29,11 @@ i32 console_read(void *data, u8 *buffer, u32 size) { i32 console_write(void *data, const u8 *buffer, u32 size) { USED(data); + + if (size > MEMORY_SIZE) { + return 0; + } + for (u32 i = 0; i < size; i++) { putchar(buffer[i]); } diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index eef3085..7a4453c 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -129,6 +129,38 @@ bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { return true; } +#ifdef STATIC + #define SYMBOLS_COUNT 2048 + Symbol symbols[SYMBOLS_COUNT]; +#endif + +void symbol_table_init(SymbolTable *t) { + #ifdef STATIC + memset(symbols, 0, SYMBOLS_COUNT*sizeof(Symbol)); + t->symbols = symbols; + t->count = 0; + t->capacity = SYMBOLS_COUNT; + #else + t->symbols = calloc(16, sizeof(Symbol)); + t->count = 0; + t->capacity = 16; + #endif +} + +bool resize_or_check_size(SymbolTable *table) { + #ifdef STATIC + if (table->count >= table->capacity) { + return false; + } + #else + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + } + #endif + return true; +} + // Function to assemble and optionally save bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { FILE *f = fopen(source_file, "rb"); @@ -151,7 +183,12 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { source[read] = '\0'; fclose(f); - assemble(vm, source); + SymbolTable table = {0}; + symbol_table_init(&table); + assemble(vm, &table, source); +#ifndef STATIC + free(table.symbols); +#endif if (output_file) { if (!saveVM(output_file, vm)) { @@ -167,6 +204,7 @@ i32 main(i32 argc, char *argv[]) { bool dump_rom = false; char *input_file = nil; char *output_file = nil; + bool terminal_only_mode = false; bool is_rom = false; bool is_ir = false; @@ -174,6 +212,8 @@ i32 main(i32 argc, char *argv[]) { for (i32 i = 1; i < argc; i++) { if (strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--dump-rom") == 0) { dump_rom = true; + } else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--term") == 0) { + terminal_only_mode = true; } else if (input_file == nil) { // This is the input file input_file = argv[i]; @@ -234,6 +274,11 @@ i32 main(i32 argc, char *argv[]) { vm_register_device(&vm, "/dev/term/0", "terminal", &console_data, &console_device_ops, 4); + if (terminal_only_mode) { + while (step_vm(&vm)); + return 0; + } + if (SDL_Init(SDL_INIT_VIDEO) < 0) { printf("SDL initialization failed: %s\n", SDL_GetError()); return 1; @@ -326,6 +371,7 @@ i32 main(i32 argc, char *argv[]) { i32 cycles_this_frame = 0; i32 max_cycles_per_frame = 100; // Adjust this value while (cycles_this_frame < max_cycles_per_frame) { + //printf("code[%d] = %s\n", vm.pc, opcode_to_string(vm.code[vm.pc])); if (!step_vm(&vm)) { running = false; break; diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 0ae0285..d69c183 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,9 +1,12 @@ -#include "assembler.h" #include "../../vm/common.h" #include "../../vm/fixed.h" #include "../../vm/libc.h" #include "../../vm/opcodes.h" + +#include "assembler.h" #include "lexer.h" + +/* FIXME: remove these and replace with libc.h instead */ #include #include #include @@ -12,124 +15,123 @@ const char *opcode_to_string(Opcode op) { static const char *names[] = { [OP_EXIT] = "exit", [OP_JMP] = "jump", - [OP_JMPF] = "jump-if-flag", + [OP_JMPF] = "jump_if_flag", [OP_CALL] = "call", [OP_RETURN] = "return", - /* Immediate loads (only 32-bit variant needed) */ - [OP_LOAD_IMM] = "load-immediate", + [OP_LOAD_IMM] = "load_immediate", - /* Register-indirect loads */ - [OP_LOAD_IND_8] = "load-indirect-8", - [OP_LOAD_IND_16] = "load-indirect-16", - [OP_LOAD_IND_32] = "load-indirect-32", + /* Register_indirect loads */ + [OP_LOAD_IND_8] = "load_indirect_8", + [OP_LOAD_IND_16] = "load_indirect_16", + [OP_LOAD_IND_32] = "load_indirect_32", /* Absolute address loads */ - [OP_LOAD_ABS_8] = "load-absolute-8", - [OP_LOAD_ABS_16] = "load-absolute-16", - [OP_LOAD_ABS_32] = "load-absolute-32", + [OP_LOAD_ABS_8] = "load_absolute_8", + [OP_LOAD_ABS_16] = "load_absolute_16", + [OP_LOAD_ABS_32] = "load_absolute_32", /* Base+offset loads */ - [OP_LOAD_OFF_8] = "load-offset-8", - [OP_LOAD_OFF_16] = "load-offset-16", - [OP_LOAD_OFF_32] = "load-offset-32", + [OP_LOAD_OFF_8] = "load_offset_8", + [OP_LOAD_OFF_16] = "load_offset_16", + [OP_LOAD_OFF_32] = "load_offset_32", /* Absolute address stores */ - [OP_STORE_ABS_8] = "store-absolute-8", - [OP_STORE_ABS_16] = "store-absolute-16", - [OP_STORE_ABS_32] = "store-absolute-32", + [OP_STORE_ABS_8] = "store_absolute_8", + [OP_STORE_ABS_16] = "store_absolute_16", + [OP_STORE_ABS_32] = "store_absolute_32", - /* Register-indirect stores */ - [OP_STORE_IND_8] = "store-indirect-8", - [OP_STORE_IND_16] = "store-indirect-16", - [OP_STORE_IND_32] = "store-indirect-32", + /* Register_indirect stores */ + [OP_STORE_IND_8] = "store_indirect_8", + [OP_STORE_IND_16] = "store_indirect_16", + [OP_STORE_IND_32] = "store_indirect_32", /* Base+offset stores */ - [OP_STORE_OFF_8] = "store-offset-8", - [OP_STORE_OFF_16] = "store-offset-16", - [OP_STORE_OFF_32] = "store-offset-32", + [OP_STORE_OFF_8] = "store_offset_8", + [OP_STORE_OFF_16] = "store_offset_16", + [OP_STORE_OFF_32] = "store_offset_32", /* Memory operations */ [OP_MALLOC] = "malloc", - [OP_MEMSET_8] = "memset-8", - [OP_MEMSET_16] = "memset-16", - [OP_MEMSET_32] = "memset-32", + [OP_MEMSET_8] = "memset_8", + [OP_MEMSET_16] = "memset_16", + [OP_MEMSET_32] = "memset_32", /* Register operations */ - [OP_REG_MOV] = "register-move", + [OP_REG_MOV] = "register_move", [OP_SYSCALL] = "syscall", /* Bit operations */ - [OP_BIT_SHIFT_LEFT] = "bit-shift-left", - [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", - [OP_BIT_SHIFT_R_EXT] = "bit-shift-re", - [OP_BAND] = "bit-and", - [OP_BOR] = "bit-or", - [OP_BXOR] = "bit-xor", + [OP_BIT_SHIFT_LEFT] = "bit_shift_left", + [OP_BIT_SHIFT_RIGHT] = "bit_shift_right", + [OP_BIT_SHIFT_R_EXT] = "bit_shift_re", + [OP_BAND] = "bit_and", + [OP_BOR] = "bit_or", + [OP_BXOR] = "bit_xor", /* Integer arithmetic */ - [OP_ADD_INT] = "add-int", - [OP_SUB_INT] = "sub-int", - [OP_MUL_INT] = "mul-int", - [OP_DIV_INT] = "div-int", + [OP_ADD_INT] = "add_int", + [OP_SUB_INT] = "sub_int", + [OP_MUL_INT] = "mul_int", + [OP_DIV_INT] = "div_int", /* Natural number arithmetic */ - [OP_ADD_NAT] = "add-nat", - [OP_SUB_NAT] = "sub-nat", - [OP_MUL_NAT] = "mul-nat", - [OP_DIV_NAT] = "div-nat", + [OP_ADD_NAT] = "add_nat", + [OP_SUB_NAT] = "sub_nat", + [OP_MUL_NAT] = "mul_nat", + [OP_DIV_NAT] = "div_nat", /* Floating point operations */ - [OP_ADD_REAL] = "add-real", - [OP_SUB_REAL] = "sub-real", - [OP_MUL_REAL] = "mul-real", - [OP_DIV_REAL] = "div-real", + [OP_ADD_REAL] = "add_real", + [OP_SUB_REAL] = "sub_real", + [OP_MUL_REAL] = "mul_real", + [OP_DIV_REAL] = "div_real", /* Type conversions */ - [OP_INT_TO_REAL] = "int-to-real", - [OP_NAT_TO_REAL] = "nat-to-real", - [OP_REAL_TO_INT] = "real-to-int", - [OP_REAL_TO_NAT] = "real-to-nat", + [OP_INT_TO_REAL] = "int_to_real", + [OP_NAT_TO_REAL] = "nat_to_real", + [OP_REAL_TO_INT] = "real_to_int", + [OP_REAL_TO_NAT] = "real_to_nat", /* Integer comparisons */ - [OP_JEQ_INT] = "jump-eq-int", - [OP_JNEQ_INT] = "jump-neq-int", - [OP_JGT_INT] = "jump-gt-int", - [OP_JLT_INT] = "jump-lt-int", - [OP_JLE_INT] = "jump-le-int", - [OP_JGE_INT] = "jump-ge-int", + [OP_JEQ_INT] = "jump_eq_int", + [OP_JNEQ_INT] = "jump_neq_int", + [OP_JGT_INT] = "jump_gt_int", + [OP_JLT_INT] = "jump_lt_int", + [OP_JLE_INT] = "jump_le_int", + [OP_JGE_INT] = "jump_ge_int", /* Natural number comparisons */ - [OP_JEQ_NAT] = "jump-eq-nat", - [OP_JNEQ_NAT] = "jump-neq-nat", - [OP_JGT_NAT] = "jump-gt-nat", - [OP_JLT_NAT] = "jump-lt-nat", - [OP_JLE_NAT] = "jump-le-nat", - [OP_JGE_NAT] = "jump-ge-nat", + [OP_JEQ_NAT] = "jump_eq_nat", + [OP_JNEQ_NAT] = "jump_neq_nat", + [OP_JGT_NAT] = "jump_gt_nat", + [OP_JLT_NAT] = "jump_lt_nat", + [OP_JLE_NAT] = "jump_le_nat", + [OP_JGE_NAT] = "jump_ge_nat", /* Floating point comparisons */ - [OP_JEQ_REAL] = "jump-eq-real", - [OP_JNEQ_REAL] = "jump-neq-real", - [OP_JGE_REAL] = "jump-ge-real", - [OP_JGT_REAL] = "jump-gt-real", - [OP_JLT_REAL] = "jump-lt-real", - [OP_JLE_REAL] = "jump-le-real", + [OP_JEQ_REAL] = "jump_eq_real", + [OP_JNEQ_REAL] = "jump_neq_real", + [OP_JGE_REAL] = "jump_ge_real", + [OP_JGT_REAL] = "jump_gt_real", + [OP_JLT_REAL] = "jump_lt_real", + [OP_JLE_REAL] = "jump_le_real", /* String operations */ - [OP_STRLEN] = "string-length", - [OP_STREQ] = "string-eq", - [OP_STRCAT] = "string-concat", - [OP_STR_GET_CHAR] = "string-get-char", - [OP_STR_FIND_CHAR] = "string-find-char", - [OP_STR_SLICE] = "string-slice", + [OP_STRLEN] = "string_length", + [OP_STREQ] = "string_eq", + [OP_STRCAT] = "string_concat", + [OP_STR_GET_CHAR] = "string_get_char", + [OP_STR_FIND_CHAR] = "string_find_char", + [OP_STR_SLICE] = "string_slice", /* String conversions */ - [OP_INT_TO_STRING] = "int-to-string", - [OP_NAT_TO_STRING] = "nat-to-string", - [OP_REAL_TO_STRING] = "real-to-string", - [OP_STRING_TO_INT] = "string-to-int", - [OP_STRING_TO_NAT] = "string-to-nat", - [OP_STRING_TO_REAL] = "string-to-real"}; + [OP_INT_TO_STRING] = "int_to_string", + [OP_NAT_TO_STRING] = "nat_to_string", + [OP_REAL_TO_STRING] = "real_to_string", + [OP_STRING_TO_INT] = "string_to_int", + [OP_STRING_TO_NAT] = "string_to_nat", + [OP_STRING_TO_REAL] = "string_to_real"}; if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { return ""; @@ -139,27 +141,28 @@ const char *opcode_to_string(Opcode op) { return name ? name : ""; } + void emit_op(VM *vm, u8 byte) { - printf("vm->code[%d] = %s\n", vm->cp, opcode_to_string(byte)); +#ifdef DEBUG_PRINT + printf("code[%d] = %s\n", vm->cp, opcode_to_string(byte)); +#endif vm->code[vm->cp] = byte; } void emit_byte(VM *vm, u8 byte) { - printf("vm->code[%d] = %d\n", vm->cp, byte); +#ifdef DEBUG_PRINT + printf("code[%d] = %d\n", vm->cp, byte); +#endif vm->code[vm->cp] = byte; } void emit_u32(VM *vm, u32 value) { - printf("vm->code[%d..%d] = %d\n", vm->cp, vm->cp + 3, value); +#ifdef DEBUG_PRINT + printf("code[%d..%d] = %d\n", vm->cp, vm->cp + 3, value); +#endif write_u32(vm, code, vm->cp, value); } -void symbol_table_init(SymbolTable *table) { - table->symbols = calloc(16, sizeof(Symbol)); - table->count = 0; - table->capacity = 16; -} - Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { for (u32 i = 0; i < table->count; i++) { if (table->symbols[i].name_length == length) { @@ -182,19 +185,23 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { exit(1); } - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + if (!resize_or_check_size(table)) { + fprintf(stderr, + "Error: Symbol table is out of memory! This is likely because you built this in static mode." + "if you built using malloc, that means your computer is out of memory. Close a few tabs in your web browser and try again." + "Count was %d, while capacity was %d\n", + table->count, table->capacity); + exit(1); } - +#ifdef DEBUG_PRINT if (s.scope == VAR) { - // ignore for now - // printf("$%d = %s\n", s.ref, s.name); + printf("$%d = %s\n", s.ref, s.name); } else if (s.scope == GLOBAL) { printf("memory[%d] = %s\n", s.ref, s.name); } else { printf("code[%d] = %s\n", s.ref, s.name); } +#endif table->symbols[table->count] = s; u32 index = table->count; @@ -232,7 +239,7 @@ u32 get_ptr(Token token, SymbolTable *st) { return out; } - fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, + fprintf(stderr, "Error: Not a pointer or symbol '%.*s'\n", token.length, token.start); exit(1); } @@ -595,6 +602,7 @@ void define_branch(VM *vm, SymbolTable *st) { } memcpy(s.name, name.start, name.length); s.name_length = name.length; + s.name[name.length] = '\0'; s.ref = vm->cp; symbol_table_add(st, s); @@ -625,9 +633,6 @@ int get_instruction_byte_size(const char *opname) { strcmp(opname, "real_to_nat") == 0 || strcmp(opname, "nat_to_int") == 0 || strcmp(opname, "int_to_nat") == 0 || strcmp(opname, "string_length") == 0 || - strcmp(opname, "store_absolute_32") == 0 || - strcmp(opname, "store_absolute_8") == 0 || - strcmp(opname, "store_absolute_16") == 0 || strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || strcmp(opname, "memset_8") == 0 || strcmp(opname, "memset_16") == 0 || strcmp(opname, "register_move") == 0 || strcmp(opname, "malloc") == 0) { @@ -660,7 +665,10 @@ int get_instruction_byte_size(const char *opname) { strcmp(opname, "load_immediate") == 0 || strcmp(opname, "load_address") == 0 || strcmp(opname, "load_absolute_16") == 0 || - strcmp(opname, "load_absolute_8") == 0) { + strcmp(opname, "load_absolute_8") == 0 || + strcmp(opname, "store_absolute_32") == 0 || + strcmp(opname, "store_absolute_8") == 0 || + strcmp(opname, "store_absolute_16") == 0) { return 6; } @@ -696,12 +704,15 @@ int get_instruction_byte_size(const char *opname) { exit(-1); } -#define FAKE_OP(op) \ - } \ - else if (strleq(token.start, op, token.length)) { \ - while (token.type != TOKEN_SEMICOLON) \ - token = next_token(); \ - vm->cp += get_instruction_byte_size(op); +#define FAKE_OP(op) \ + } else if (strleq(token.start, op, token.length)) { \ + do { \ + while (token.type != TOKEN_SEMICOLON) { \ + token = next_token(); \ + } \ + /*printf("code[%d]=%s\n %d + %d = %d\n", vm->cp, op, get_instruction_byte_size(op), vm->cp, vm->cp + get_instruction_byte_size(op)); */\ + vm->cp += get_instruction_byte_size(op); \ + } while(0); /** * Build the symbol table and calculate the types/size/offsets of all values. @@ -760,6 +771,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { continue; } + #ifdef DEBUG_PRINT + printf("-- %.*s --\n", token.length, token.start); + #endif if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first if (strleq(token.start, "exit", token.length)) { @@ -769,6 +783,10 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next_token(); vm->cp += 4; + #ifdef DEBUG_PRINT + printf("code[%d] = exit\n", vm->cp); + #endif + next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "call", token.length)) { @@ -777,27 +795,27 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_IDENTIFIER); vm->cp += 4; - bool has_return = false; - u8 arg_count = 0; vm->cp++; - - Token next = next_token(); - while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { + Token next = next_token_is(TOKEN_LPAREN); + next = next_token(); + while (next.type != TOKEN_RPAREN) { get_reg(next, st); vm->cp++; - arg_count++; - } else { - has_return = true; - arg_count--; // is a return not an arg - } - next = next_token(); + next = next_token(); } - if (!has_return) { + next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + vm->cp++; + } else { + next = next_token(); + get_reg(next, st); vm->cp++; - continue; } + #ifdef DEBUG_PRINT + printf("code[%d] = call\n", vm->cp); + #endif + continue; } else if (strleq(token.start, "syscall", token.length)) { vm->cp++; @@ -807,13 +825,14 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next = next_token(); while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { - get_reg(next, st); - vm->cp++; - } + get_reg(next, st); + vm->cp++; next = next_token(); } - + #ifdef DEBUG_PRINT + printf("code[%d] = syscall\n", vm->cp); + #endif + continue; FAKE_OP("load_immediate") FAKE_OP("load_address") FAKE_OP("malloc") @@ -982,6 +1001,9 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { continue; } + #ifdef DEBUG_PRINT + printf("-- %.*s --\n", token.length, token.start); + #endif if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first if (strleq(token.start, "exit", token.length)) { @@ -1005,35 +1027,36 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { emit_u32(vm, ptr); vm->cp += 4; - bool has_return = false; u8 arg_count = 0; u32 arg_pos = vm->cp++; - printf("vm->code[%d] = ?\n", arg_pos); - - Token next = next_token(); - while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { + Token next = next_token_is(TOKEN_LPAREN); + next = next_token(); + while (next.type != TOKEN_RPAREN) { u8 arg = get_reg(next, st); emit_byte(vm, arg); vm->cp++; arg_count++; - } else { - has_return = true; - arg_count--; // is a return not an arg - } - next = next_token(); + next = next_token(); } - /* patch number of args */ vm->code[arg_pos] = arg_count; - - printf("^vm->code[%d] = %d\n", arg_pos, arg_count); - - if (!has_return) { - vm->cp++; + + #ifdef DEBUG_PRINT + printf("^code[%d] = %d\n", arg_pos, arg_count); + #endif + + next = next_token(); + if (next.type == TOKEN_SEMICOLON) { emit_byte(vm, 255); - continue; + vm->cp++; + } else { + next = next_token(); + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; } + + continue; } else if (strleq(token.start, "syscall", token.length)) { emit_op(vm, OP_SYSCALL); @@ -1062,15 +1085,20 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp += 4; next = next_token(); - while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { - u8 arg = get_reg(next, st); - emit_byte(vm, arg); - vm->cp++; - } + while (next.type != TOKEN_SEMICOLON && next.type != TOKEN_ARROW_RIGHT) { + u8 arg =get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; next = next_token(); } + if (next.type == TOKEN_ARROW_RIGHT) { + next = next_token(); + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + } + } else if (strleq(token.start, "load_immediate", token.length)) { emit_op(vm, OP_LOAD_IMM); @@ -1168,6 +1196,8 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { emit_byte(vm, arg); vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); arg = get_reg(reg, st); emit_byte(vm, arg); @@ -1188,6 +1218,8 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { emit_byte(vm, arg); vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); arg = get_reg(reg, st); emit_byte(vm, arg); @@ -1208,6 +1240,8 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { emit_byte(vm, arg); vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); arg = get_reg(reg, st); emit_byte(vm, arg); @@ -1444,10 +1478,10 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_ARROW_RIGHT); - Token id = next_token(); - u32 ptr = get_ptr(id, st); - emit_u32(vm, ptr); - vm->cp += 4; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_16", token.length)) { @@ -1461,10 +1495,10 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_ARROW_RIGHT); - Token id = next_token(); - u32 ptr = get_ptr(id, st); - emit_u32(vm, ptr); - vm->cp += 4; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_32", token.length)) { @@ -1478,10 +1512,10 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { next_token_is(TOKEN_ARROW_RIGHT); - Token id = next_token(); - u32 ptr = get_ptr(id, st); - emit_u32(vm, ptr); - vm->cp += 4; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_8", token.length)) { @@ -2383,11 +2417,8 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { /** * Emit bytecode to the VM from the source string. */ -void assemble(VM *vm, char *source) { - SymbolTable st = {0}; - symbol_table_init(&st); - build_symbol_table(vm, source, &st); - vm->cp = 0; /* actuall start emitting code */ - emit_bytecode(vm, source, &st); - free(st.symbols); +void assemble(VM *vm, SymbolTable *st, char *source) { + build_symbol_table(vm, source, st); + vm->cp = 0; /* actually start emitting code */ + emit_bytecode(vm, source, st); } diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 107f2be..106aee5 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -43,6 +43,9 @@ struct symbol_tab_s { u32 capacity; }; -void assemble(VM *vm, char *source); +void assemble(VM *vm, SymbolTable *st, char *source); +extern bool resize_or_check_size(SymbolTable *table);/* implement this in arch/ not here */ + +const char *opcode_to_string(Opcode op); #endif diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h index e223513..778a5e3 100644 --- a/src/tools/compiler/compiler.h +++ b/src/tools/compiler/compiler.h @@ -103,6 +103,10 @@ struct names_tab_s { u32 capacity; }; +/** + * FIXME: + * Symbols need to be inside a scope so we can have duplicates + */ struct symbol_tab_s { Symbol *symbols; u32 count; diff --git a/src/vm/vm.c b/src/vm/vm.c index 45c78af..9ae7ac0 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -349,37 +349,34 @@ bool step_vm(VM *vm) { } case OP_STORE_ABS_32: { u32 v, ptr; - u8 dest, src1; + u8 src1; src1 = read_u8(vm, code, vm->pc); vm->pc++; - dest = read_u8(vm, code, vm->pc); - vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; v = frame->locals[src1]; - ptr = frame->locals[dest]; write_u32(vm, memory, ptr, v); return true; } case OP_STORE_ABS_16: { u32 v, ptr; - u8 dest, src1; + u8 src1; src1 = read_u8(vm, code, vm->pc); vm->pc++; - dest = read_u8(vm, code, vm->pc); - vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; v = frame->locals[src1]; - ptr = frame->locals[dest]; write_u16(vm, memory, ptr, v); return true; } case OP_STORE_ABS_8: { u32 v, ptr; - u8 dest, src1; + u8 src1; src1 = read_u8(vm, code, vm->pc); vm->pc++; - dest = read_u8(vm, code, vm->pc); - vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; v = frame->locals[src1]; - ptr = frame->locals[dest]; write_u8(vm, memory, ptr, v); return true; } diff --git a/test/add.asm.lisp b/test/add.asm.lisp deleted file mode 100644 index 1c783c7..0000000 --- a/test/add.asm.lisp +++ /dev/null @@ -1,26 +0,0 @@ -((code - (label main - (load-immediate $0 1) - (load-immediate $1 1) - (call &add ($0 $1) $2) - (int-to-string $3 $2) - (call &pln ($3) nil) - (exit 0)) - - (label add - (add-int $2 $1 $0) - (return $2)) - - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (load-immediate $3 &new-line) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) -(data - (label terminal-namespace "/dev/term/0") - (label new-line "\n"))) diff --git a/test/add.rom b/test/add.rom new file mode 100644 index 0000000000000000000000000000000000000000..7ced01fc9fae8f7f892509e8edb8dd33468d3f47 GIT binary patch literal 151 zcmXwyp$>pB3`DQ(h8PBgAOwd(j4zqO;y_{u;rHPNS(CoLUYZmDR(7eO2xn;X5;{U; zBo(5InoK#dhXi_NsxGVlSeei(F9~W!6{u@CEt&fRbNbXw; $0; load_absolute_32 y -> $1; - call add $0 $1 -> $2; + call add ($0 $1) -> $2; int_to_string $2 -> $3; - call pln $3; + call pln ($3); exit 0; function add (int a $0, int b $1) @@ -26,7 +26,7 @@ function pln (str message $0) load_immediate 0 -> mode; load_address terminal_namespace -> term_ns; - syscall OPEN term_ns mode -> term; + syscall OPEN term_ns mode term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; diff --git a/test/fib.asm.lisp b/test/fib.asm.lisp deleted file mode 100644 index 6004071..0000000 --- a/test/fib.asm.lisp +++ /dev/null @@ -1,33 +0,0 @@ -((code - (label main - (load-immediate $0 35) - (call &fib ($0) $0) - (int-to-string $1 $0) - (call &pln ($1) nil) - (exit 0)) - (label fib - (load-immediate $1 2) - (jump-lt-int &base-case $0 $1) - (load-immediate $3 2) - (sub-int $4 $0 $3) - (call &fib ($4) $5) - (load-immediate $3 1) - (sub-int $4 $0 $3) - (call &fib ($4) $6) - (add-int $7 $6 $5) - (return $7) - (label base-case - (return $0))) - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (load-immediate $3 &new-line) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) -(data - (label terminal-namespace "/dev/term/0") - (label new-line "\n"))) diff --git a/test/fib.rom b/test/fib.rom new file mode 100644 index 0000000000000000000000000000000000000000..f09af70d3e9f81ed2e30cdce403d2b250de2c6f6 GIT binary patch literal 187 zcmYjLI}XB74095{R|ys-CSc(Jj~j4-*{>v# zX9+v$05_ODL~WD}y2SM^ZnKCZf=ir*S;4^ UppV;h+H?4}&+vY=i@w1I6PA$)*8l(j literal 0 HcmV?d00001 diff --git a/test/fib.ul.ir b/test/fib.ul.ir index 36f4de0..caf8152 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -4,10 +4,10 @@ global str new_line = "\n"; function main () int str_n $1; - load_immediate 36 -> $0; - call fib $0 -> $0; + load_immediate 35 -> $0; + call fib ($0) -> $0; int_to_string $0 -> str_n; - call pln str_n; + call pln (str_n); exit 0; function fib (int n $0) @@ -17,11 +17,11 @@ function fib (int n $0) load_immediate 2 -> $3; sub_int n $3 -> $4; - call fib $4 -> $5; + call fib ($4) -> $5; load_immediate 1 -> $3; sub_int n $3 -> $4; - call fib $4 -> $6; + call fib ($4) -> $6; add_int $6 $5 -> $7; return $7; @@ -39,7 +39,7 @@ function pln (str message $0) load_immediate 0 -> mode; load_address terminal_namespace -> term_ns; - syscall OPEN term_ns mode -> term; + syscall OPEN term_ns mode term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; diff --git a/test/hello.asm.lisp b/test/hello.asm.lisp deleted file mode 100644 index f14933f..0000000 --- a/test/hello.asm.lisp +++ /dev/null @@ -1,19 +0,0 @@ -((code - (label main - (load-immediate $1 &hello-str) ; load hello string ptr - (call &pln ($1) nil) - (exit 0)) ; done - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (load-immediate $3 &new-line) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) -(data - (label terminal-namespace "/dev/term/0") - (label new-line "\n") - (label hello-str "nuqneH 'u'?"))) diff --git a/test/hello.rom b/test/hello.rom new file mode 100644 index 0000000000000000000000000000000000000000..74ed6378c0b6d7890e4365c4e21a0c4a89fdcff8 GIT binary patch literal 135 zcmZQzU|?_sVi-_^F@Q9S7>L6t3?dl*gV+o#Ac7T4vN1CP*=($gehf^^%s?h10~3n? xkY@H{W`PJWvoQVV28!sXq?YNIq!#7s8vs=?FmN$|MDt1u^HMz&)JxUv832-t3ikj2 literal 0 HcmV?d00001 diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 8243a1e..d43c9fa 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -6,7 +6,7 @@ function main () str msg $0; load_address hello -> msg; - call pln msg; + call pln (msg); exit 0; function pln (str message $0) @@ -19,7 +19,7 @@ function pln (str message $0) load_immediate 0 -> mode; load_address terminal_namespace -> term_ns; - syscall OPEN term_ns mode -> term; + syscall OPEN term_ns mode term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; diff --git a/test/loop.asm.lisp b/test/loop.asm.lisp deleted file mode 100644 index 7e2b95e..0000000 --- a/test/loop.asm.lisp +++ /dev/null @@ -1,44 +0,0 @@ -((code - (label main - (load-immediate $0 5.0) - (load-immediate $1 5000) - (load-immediate $2 0) - (load-immediate $3 -1) - (load-immediate $5 5.0) - (label loop-body - (add-real $0 $0 $5) - (add-int $1 $1 $3) - (jump-ge-int &loop-body $1 $2)) - (load-immediate $10 &terminal-namespace) - (load-immediate $11 0) - (syscall OPEN $10 $10 $11) ; Terminal term = open(namespace, flags) - - (real-to-nat $1 $0) - (load-immediate $7 &prompt) - (string-length $8 $7) - (syscall WRITE $10 $7 $8) ; print prompt - - (load-immediate $8 32) - (malloc $11 $8) - (syscall READ $10 $11 $8) ; read in max 32 byte string - - (call &pln ($11) nil) - (nat-to-string $4 $1) - (call &pln ($4) nil) - (real-to-string $3 $0) - (call &pln ($3) nil) - (exit 0)) - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (load-immediate $3 &new-line) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) -(data - (label terminal-namespace "/dev/term/0") - (label prompt "Enter a string: ") - (label new-line "\n"))) diff --git a/test/loop.rom b/test/loop.rom new file mode 100644 index 0000000000000000000000000000000000000000..48f675eb1a74b105b694330d617f6620ec3e002e GIT binary patch literal 257 zcmXv|F$%&!5S+ceBbTI5u&@zHVIejV?CdOT(`hdxh=nO)@D2JVA9qeJWZ`yZXZDr> zfFIvfyNM7j;MhM4K*#}O152aswlr6wa$3_76Bn}Ba}`Qio*npaPMTDSR~@v(+k)tq zK2=zye&>Dn0Qi{YQr6^)Q*B$6LKnL-Mq`b>7L~t UGtTFhLvbr!efMbZN5qi$1H~B_fB*mh literal 0 HcmV?d00001 diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 54dbede..2edad6a 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -20,7 +20,7 @@ function main () load_address terminal_namespace -> in_term; load_immediate 0 -> in_mode; - syscall OPEN in_term in_mode -> in_term; // Terminal term = open("/dev/term/0", 0); + syscall OPEN in_term in_mode in_term; // Terminal term = open("/dev/term/0", 0); nat b $1; real_to_nat a -> b; @@ -33,11 +33,11 @@ function main () malloc $8 -> user_string; syscall READ in_term user_string $8; // read in max 32 byte string - call pln user_string; + call pln (user_string); nat_to_string b -> $4; - call pln $4; + call pln ($4); real_to_string a -> $3; - call pln $3; + call pln ($3); exit 0; function pln (str message $0) @@ -49,7 +49,7 @@ function pln (str message $0) load_address terminal_namespace -> term; load_immediate 0 -> mode; - syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); + syscall OPEN term mode term; // Terminal term = open("/dev/term/0", 0); string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; diff --git a/test/malloc.asm.lisp b/test/malloc.asm.lisp deleted file mode 100644 index 4f76cf6..0000000 --- a/test/malloc.asm.lisp +++ /dev/null @@ -1,26 +0,0 @@ -((code - (label main - (load-immediate $0 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $0 $0 $11) - - (load-immediate $1 &help) ; print help message - (call &pln ($0 $1) nil) - - (load-immediate $1 32) ; read in a string of max 32 char length - (malloc $4 $1) ; allocate memory for the string - (syscall READ $0 $4 $1) ; read the string - - (call &pln ($0 $4) nil) ; print the string - (exit 0)) - (label pln - (load-immediate $3 &new-line) - (string-length $2 $1) - (syscall WRITE $0 $1 $2) - (string-length $4 $3) - (syscall WRITE $0 $3 $4) - (return nil))) -(data - (label terminal-namespace "/dev/term/0") - (label help "Enter a string: ") - (label new-line "\n"))) diff --git a/test/malloc.rom b/test/malloc.rom new file mode 100644 index 0000000000000000000000000000000000000000..a5fd5992e3528092d03acb9b3b8577a051917b73 GIT binary patch literal 187 zcmXv|I}XAy47Fpoahd@()(lKt!pw?Ta|a|U#6X8qxPI#xYRP^d*-r)l_-Jp{Z$yzR z?`%fG%45fSMPIxSRl^G>ML9?ySB)TeQN*z5{6{nv(ON{?U;$^x2#$}sLxuQf5W6L+ dlkdYf&%^Y}C)laJjw;eEz31sUKF(mjzzWsc4vYW* literal 0 HcmV?d00001 diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 8eb06fd..dca8380 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -8,7 +8,7 @@ function main () load_address terminal_namespace -> in_term; load_immediate 0 -> in_mode; - syscall OPEN in_term in_mode -> in_term; // Terminal term = open("/dev/term/0", 0); + syscall OPEN in_term in_mode in_term; // Terminal term = open("/dev/term/0", 0); load_address prompt -> $7; string_length $7 -> $8; @@ -19,7 +19,7 @@ function main () malloc $8 -> user_string; syscall READ in_term user_string $8; // read in max 32 byte string - call pln user_string; + call pln (user_string); exit 0; function pln (str message $0) @@ -32,10 +32,11 @@ function pln (str message $0) load_immediate 0 -> mode; load_address terminal_namespace -> term_ns; - syscall OPEN term_ns mode -> term; + syscall OPEN term_ns mode term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; string_length nl -> nl_length; syscall WRITE term nl nl_length; - return; \ No newline at end of file + return; + \ No newline at end of file diff --git a/test/paint-bw.asm.lisp b/test/paint-bw.asm.lisp deleted file mode 100644 index 81bf808..0000000 --- a/test/paint-bw.asm.lisp +++ /dev/null @@ -1,147 +0,0 @@ -((code - (label main - ; Open screen - ; use load immediate because it is a pointer to a string, not a value - (load-immediate $0 &screen-namespace) - (load-immediate $11 0) - (syscall OPEN $0 $18 $11) ; open(out Plex screen, in namespace, in flags) - - (load-offset-32 $20 $0 8) ; load width - (load-offset-32 $22 $0 12) ; load size - (load-immediate $1 16) ; offset for screen buffer - (add-nat $21 $0 $1) - - ; open mouse - (load-immediate $16 &mouse-namespace) - (syscall OPEN $15 $16 $11) ; open(out Plex mouse, in namespace, in flags) - - ; outline_swatch(screen, BLACK, 1, 1); - (load-absolute-32 $1 &BLACK) - (load-immediate $12 1) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - ; outline_swatch(screen, WHITE, 1, 1); - (load-absolute-32 $1 &WHITE) - (load-immediate $12 21) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - ; screen.draw(); - (syscall WRITE $0 $21 $22) - - (label draw-loop - ; load mouse click data - (syscall REFRESH $15) - (load-offset-8 $9 $15 16) ; load btn1 pressed - - (jump-eq-nat &draw-loop $9 $11) - - (load-offset-32 $7 $15 8) ; load x - (load-offset-32 $8 $15 12) ; load y - - (load-immediate $14 20) ; box size - - ; first row - (load-absolute-32 $1 &BLACK) - (load-immediate $12 1) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &WHITE) - (load-immediate $12 21) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (syscall WRITE $0 $21 $22) - - (load-absolute-32 $22 &SELECTED-COLOR) ; color - (load-immediate $1 5) ; size of brush - - (call &draw-box ($21 $20 $22 $7 $8 $1 $1) nil) - - (jump &draw-loop)) - - ; Flush and exit - (exit 0)) - - (label set-color-if-clicked - ; (click_x, click_y, box_x, box_y, color, box_size) - - ; Compute right = box_x + box_size - (add-int $6 $2 $5) ; $6 = right edge - - ; Compute bottom = box_y + box_size - (add-int $7 $3 $5) ; $7 = bottom edge - - ; Bounds check: x in [box_x, right] and y in [box_y, bottom] - (jump-lt-int &fail $0 $2) - (jump-gt-int &fail $0 $6) - (jump-lt-int &fail $1 $3) - (jump-gt-int &fail $1 $7) - - (load-immediate $10 &SELECTED-COLOR) - (store-absolute-8 $10 $4) - - (label fail) - (return nil)) - - (label draw-outlined-swatch - ; (base, color, x, y, width) - - ; Constants - (load-absolute-32 $5 &GRAY) - (load-absolute-32 $10 &SELECTED-COLOR) - (jump-eq-int &set-selected $10 $1) - (jump-eq-int &end-set-selected $5 $5) - (label set-selected) - (load-absolute-32 $5 &DARK-GRAY) - (label end-set-selected) - - (load-immediate $6 20) ; outline size - (load-immediate $7 17) ; fill size - (load-immediate $8 2) ; offset - - (call &draw-box ($0 $4 $5 $2 $3 $6 $6) nil) - - (add-int $9 $2 $8) ; x + 2 - (add-int $10 $3 $8) ; y + 2 - - (call &draw-box ($0 $4 $1 $9 $10 $7 $7) nil) - - (return nil)) - - (label draw-box - ; (base, screen_width, color, x_start, y_start, width, height) - - ; Compute start address: base + y*640 + x - (mul-int $15 $4 $1) ; $15 = y * 640 - (add-int $15 $15 $3) ; $15 += x - (add-nat $15 $0 $15) ; $15 = base + pixel_offset - (load-immediate $25 4) - (add-nat $15 $15 $25) ; need to add offset for fat pointer size - - ; Outer loop: height times - (load-immediate $30 1) ; increment - - (label draw-box-outer - (add-int $27 $15 $5) ; $27 = row end = current + width - (register-move $29 $15) ; $7 = pixel pointer - (memset-8 $29 $2 $5) ; draw row - (add-int $15 $15 $1) ; next row (+= 640) - (sub-int $6 $6 $30) ; decrement row count - (jump-gt-int &draw-box-outer $6 0)) - (return nil))) -(data - (label screen-namespace "/dev/screen/0") - (label mouse-namespace "/dev/mouse/0") - (label SELECTED-COLOR 255) - (label BLACK 0) - (label WHITE 255) - (label DARK-GRAY 73) - (label GRAY 146) - (label LIGHT-GRAY 182))) diff --git a/test/paint-bw.rom b/test/paint-bw.rom new file mode 100644 index 0000000000000000000000000000000000000000..1f22c521cbf5b225334371b4ac3c992fac5726ba GIT binary patch literal 550 zcmah`NlpVX5OjN=#c{~YL`*c~5Eg|~I7cW*+_)ls0CQuJ62Xy2@C+WnEBFL2G2IP0 zAR+Lfs=Hj%Wsijr;sX%A*YUPM)>Mxe3q_t$A%gH&)IpR;V?9KKnSe%*uo|*!SVX^1 zx~6YR#sF8?mK!lFm1>dIxK#C8Gbz6xv8-ZR(`YI_2}N3R&)4l4+_Tc2UJy6j9TI zwX@hp{_|6QzsasN=zzFqr*`#@fp#KOK screen_name; load_immediate 0 -> mode; - syscall OPEN screen_name mode -> screen; // Screen screen = open("/dev/screen/0", 0); + syscall OPEN screen_name mode screen; // Screen screen = open("/dev/screen/0", 0); nat width $20; nat size $22; @@ -30,26 +30,26 @@ function main () plex mouse $15; str mouse_name $16; load_address mouse_namespace -> mouse_name; - syscall OPEN mouse_name mode -> mouse; // Mouse mouse = open("/dev/mouse/0", 0); + syscall OPEN mouse_name mode mouse; // Mouse mouse = open("/dev/mouse/0", 0); byte color $1; nat x_pos $12; nat y_pos $13; - load_absolute_32 BLACK -> color; + load_absolute_8 BLACK -> color; load_immediate 1 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); - load_absolute_32 WHITE -> color; + load_absolute_8 WHITE -> color; load_immediate 21 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); // screen.draw syscall WRITE screen screen_buffer size; - nat zero $11; + nat m_zero $11; loop draw_loop // load mouse click data @@ -58,7 +58,7 @@ function main () byte left_down $9; load_offset_8 mouse 16 -> left_down; // load btn1 pressed - jump_eq_nat draw_loop left_down zero; + jump_eq_nat draw_loop left_down m_zero; nat mouse_x $7; nat mouse_y $8; @@ -69,28 +69,28 @@ function main () load_immediate 20 -> box_size; // first row - load_absolute_32 BLACK -> color; + load_absolute_8 BLACK -> color; load_immediate 1 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); - load_absolute_32 WHITE -> color; + load_absolute_8 WHITE -> color; load_immediate 21 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); syscall WRITE screen screen_buffer size; byte selected_color $25; - load_absolute_32 SELECTED_COLOR -> selected_color; + load_absolute_8 SELECTED_COLOR -> selected_color; nat brush_size $19; load_immediate 5 -> brush_size; - call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void; + call draw_box (screen_buffer width selected_color mouse_x mouse_y brush_size brush_size); jump draw_loop; @@ -98,41 +98,41 @@ function main () exit 0; function set_color_if_clicked (int click_x $0, int click_y $1, - int box_x $2, int box_y $3, byte color $4, int box_size $5) + int box_x $2, int box_y $3, byte check_color $4, int bsize $5) // Compute right int right_edge $6; - add_int box_x box_size -> right_edge; + add_int box_x bsize -> right_edge; - // Compute bottom = box_y + box_size + // Compute bottom = box_y + bsize int bottom_edge $7; - add_int box_y box_size -> bottom_edge; + add_int box_y bsize -> bottom_edge; // Bounds check: x in [box_x, right] and y in [box_y, bottom] jump_lt_int fail click_x box_x; - jump_ge_int fail click_x right_edge; + jump_gt_int fail click_x right_edge; jump_lt_int fail click_y box_y; - jump_ge_int fail click_y bottom_edge; + jump_gt_int fail click_y bottom_edge; - store_absolute_8 SELECTED_COLOR color; + store_absolute_8 check_color -> SELECTED_COLOR; else fail return; -function draw_outlined_swatch(nat base $0, - byte color $1, int x $2, int y $3, int width $4) +function draw_outlined_swatch(nat dos_base $0, + byte swatch_color $1, int x $2, int y $3, int dos_width $4) // Constants nat background_color $5; - load_absolute_32 GRAY -> background_color; + load_absolute_8 GRAY -> background_color; - byte selected_color $10; - load_absolute_32 SELECTED_COLOR -> selected_color; + byte dos_selected_color $10; + load_absolute_8 SELECTED_COLOR -> dos_selected_color; - jump_eq_int set_selected selected_color color; + jump_eq_int set_selected swatch_color dos_selected_color; jump end_set_selected; do set_selected - load_absolute_32 DARK_GRAY -> background_color; + load_absolute_8 DARK_GRAY -> background_color; else end_set_selected nat outline_size $6; @@ -141,27 +141,27 @@ function draw_outlined_swatch(nat base $0, nat fill_size $7; load_immediate 17 -> fill_size; - nat offset $8; - load_immediate 2 -> offset; + nat dos_offset $8; + load_immediate 2 -> dos_offset; - call draw_box base width background_color x y outline_size outline_size -> void; + call draw_box (dos_base dos_width background_color x y outline_size outline_size); - add_int x offset -> $9; // x + 2 - add_int y offset -> $10; // y + 2 + add_int x dos_offset -> $9; // x + 2 + add_int y dos_offset -> $10; // y + 2 - call draw_box base width color $9 $10 fill_size fill_size -> void; + call draw_box (dos_base dos_width swatch_color $9 $10 fill_size fill_size); return; -function draw_box (nat base $0, nat screen_width $1, - byte color $2, nat x_start $3, nat y_start $4, - nat width $5, nat height $6) +function draw_box (nat db_base $0, nat screen_width $1, + byte box_color $2, nat x_start $3, nat y_start $4, + nat db_width $5, nat height $6) // Compute start address: base + y*640 + x nat offset $15; mul_int y_start screen_width -> offset; add_int offset x_start -> offset; - add_nat offset base -> offset; + add_nat offset db_base -> offset; nat fat_ptr_size $25; load_immediate 4 -> fat_ptr_size; add_nat offset fat_ptr_size -> offset; // need to add offset for fat pointer size @@ -172,13 +172,8 @@ function draw_box (nat base $0, nat screen_width $1, int zero $26; load_immediate 0 -> zero; - int row_end $27; - nat pixel_ptr $29; - loop draw_box_outer - add_int offset width -> row_end; // current + width - register_move offset -> pixel_ptr; // set pixel point - memset_8 pixel_ptr color width; // draw row + memset_8 box_color db_width -> offset; // draw row add_int offset screen_width -> offset; // next row += 640 sub_int height i -> height; // decrement row count jump_gt_int draw_box_outer height zero; diff --git a/test/paint.asm.lisp b/test/paint.asm.lisp deleted file mode 100644 index 2e946d5..0000000 --- a/test/paint.asm.lisp +++ /dev/null @@ -1,261 +0,0 @@ -((code - (label main - ; Open screen - ; use load immediate because it is a pointer to a string, not a value - (load-immediate $0 &screen-namespace) - (load-immediate $11 0) - (syscall OPEN $0 $0 $11) ; Screen screen = open(namespace, flags) - - (load-offset-32 $20 $0 8) ; load width - (load-offset-32 $22 $0 12) ; load size - (load-immediate $1 16) ; pointer offset for screen buffer - (add-nat $21 $0 $1) - - ; open mouse - (load-immediate $16 &mouse-namespace) - (syscall OPEN $15 $16 $11) ; Mouse mouse = open(namespace, flags) - - ; outline_swatch(screen, BLACK, 1, 1); - (load-absolute-32 $1 &BLACK) - (load-immediate $12 1) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - ; outline_swatch(screen, WHITE, 1, 1); - (load-absolute-32 $1 &WHITE) - (load-immediate $12 21) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &CHARCOAL) - (load-immediate $12 1) - (load-immediate $13 21) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &DARK-GRAY) - (load-immediate $12 21) - (load-immediate $13 21) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &RED) - (load-immediate $12 1) - (load-immediate $13 41) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &ORANGE) - (load-immediate $12 21) - (load-immediate $13 41) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &YELLOW) - (load-immediate $12 1) - (load-immediate $13 61) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &GREEN) - (load-immediate $12 21) - (load-immediate $13 61) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &BLUE) - (load-immediate $12 1) - (load-immediate $13 81) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - (load-absolute-32 $1 &PURPLE) - (load-immediate $12 21) - (load-immediate $13 81) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - - ; screen.draw(); - (syscall WRITE $0 $21 $22) - - (label draw-loop - ; load mouse click data - (syscall REFRESH $15) - (load-offset-8 $9 $15 16) ; load btn1 pressed - - (jump-eq-nat &draw-loop $9 $11) - - (load-offset-32 $7 $15 8) ; load x - (load-offset-32 $8 $15 12) ; load y - - (load-immediate $14 20) ; box size - - ; outline_swatch(screen, BLACK, 1, 1); - (load-absolute-32 $1 &BLACK) - (load-immediate $12 1) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - ; outline_swatch(screen, WHITE, 1, 1); - (load-absolute-32 $1 &WHITE) - (load-immediate $12 21) - (load-immediate $13 1) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &CHARCOAL) - (load-immediate $12 1) - (load-immediate $13 21) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &DARK-GRAY) - (load-immediate $12 21) - (load-immediate $13 21) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &RED) - (load-immediate $12 1) - (load-immediate $13 41) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &ORANGE) - (load-immediate $12 21) - (load-immediate $13 41) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &YELLOW) - (load-immediate $12 1) - (load-immediate $13 61) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &GREEN) - (load-immediate $12 21) - (load-immediate $13 61) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &BLUE) - (load-immediate $12 1) - (load-immediate $13 81) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (load-absolute-32 $1 &PURPLE) - (load-immediate $12 21) - (load-immediate $13 81) - (call &draw-outlined-swatch ($21 $1 $12 $13 $20) nil) - (call &set-color-if-clicked ($7 $8 $12 $13 $1 $14) nil) - - (syscall WRITE $0 $21 $22) - - (load-absolute-32 $22 &SELECTED-COLOR) ; color - (load-immediate $1 5) ; size of brush - - (call &draw-box ($21 $20 $22 $7 $8 $1 $1) nil) - - (jump &draw-loop)) - - ; Flush and exit - (exit 0)) - - (label set-color-if-clicked - ; (click_x, click_y, box_x, box_y, color, box_size) - - ; Compute right = box_x + box_size - (add-int $6 $2 $5) ; $6 = right edge - - ; Compute bottom = box_y + box_size - (add-int $7 $3 $5) ; $7 = bottom edge - - ; Bounds check: x in [box_x, right] and y in [box_y, bottom] - (jump-lt-int &fail $0 $2) - (jump-gt-int &fail $0 $6) - (jump-lt-int &fail $1 $3) - (jump-gt-int &fail $1 $7) - - (load-immediate $10 &SELECTED-COLOR) - (store-absolute-8 $10 $4) - - (label fail) - (return nil)) - - (label draw-outlined-swatch - ; (base, color, x, y, width) - - ; Constants - (load-absolute-32 $5 &GRAY) - (load-absolute-32 $10 &SELECTED-COLOR) - (jump-eq-int &set-selected $10 $1) - (jump-eq-int &end-set-selected $5 $5) - (label set-selected) - (load-absolute-32 $5 &DARK-GRAY) - (label end-set-selected) - - (load-immediate $6 20) ; outline size - (load-immediate $7 17) ; fill size - (load-immediate $8 2) ; offset - - (call &draw-box ($0 $4 $5 $2 $3 $6 $6) nil) - - (add-int $9 $2 $8) ; x + 2 - (add-int $10 $3 $8) ; y + 2 - - (call &draw-box ($0 $4 $1 $9 $10 $7 $7) nil) - - (return nil)) - - (label draw-box - ; (base, screen_width, color, x_start, y_start, width, height) - - ; Compute start address: base + y*640 + x - (mul-int $15 $4 $1) ; $15 = y * 640 - (add-int $15 $15 $3) ; $15 += x - (add-nat $15 $0 $15) ; $15 = base + pixel_offset - (load-immediate $25 4) - (add-nat $15 $15 $25) ; need to add offset for fat pointer size - - ; Outer loop: height times - (load-immediate $30 1) ; increment - - (label draw-box-outer - (add-int $27 $15 $5) ; $27 = row end = current + width - (register-move $29 $15) ; $7 = pixel pointer - (memset-8 $29 $2 $5) ; draw row - (add-int $15 $15 $1) ; next row (+= 640) - (sub-int $6 $6 $30) ; decrement row count - (jump-gt-int &draw-box-outer $6 0)) - (return nil))) -(data - (label screen-namespace "/dev/screen/0") - (label mouse-namespace "/dev/mouse/0") - (label SELECTED-COLOR 255) - (label BLACK 0) - (label WHITE 255) - (label CHARCOAL 36) - (label DARK-GRAY 73) - (label GRAY 146) - (label LIGHT-GRAY 182) - (label DARK-RED 128) - (label RED 224) - (label DARK-YELLOW 144) - (label YELLOW 252) - (label DARK-TEAL 9) - (label TEAL 18) - (label DARK-GREEN 12) - (label GREEN 16) - (label LIME 28) - (label LIGHT-CYAN 159) - (label NAVY 2) - (label BLUE 3) - (label DEEP-SKY-BLUE 10) - (label LIGHT-BLUE 19) - (label PURPLE 131) - (label LIGHT-PURPLE 147) - (label DARK-MAGENTA 130) - (label MAGENTA 227) - (label PLUM 129) - (label PINK 226) - (label SADDLE-BROWN 72) - (label PERU 141) - (label SIENNA 136) - (label ORANGE 241) - (label DARK-ORANGE 208) - (label GOLD 244))) diff --git a/test/paint.rom b/test/paint.rom new file mode 100644 index 0000000000000000000000000000000000000000..062f0d9051f5c0690b216aef098c8a5a63655c79 GIT binary patch literal 1161 zcmah}yGjHx6wPar%xkmm&WJ1H7V85QVYd*3#TRO!rQe`i`asdzM`I(1b|Ux_V&PZ# z38G)1SPJnbx-emIOmS~=?>QOfoFoV#G{qqNjLmvr8tTcEyrv({i8dX;~I?19nW!#-RT^6s=!^& zaf{uV9Cx9>?c}({?re^`S>Vp)xP>kSVGv3ODTky&WhC`ic0oHojFnMa#LB2GSpu;myM}jQc#O(@GyTYQv%tl1Z2}qvZ zHJE<@JPx`~kfbrkvnPeK0hkL$Aj+R_BIXgwxz336Gb)3A75jk@n&>9ODpORo6jIDw z{6 screen_name; load_immediate 0 -> mode; - syscall OPEN screen_name mode -> screen; // Screen screen = open("/dev/screen/0", 0); + syscall OPEN screen_name mode screen; // Screen screen = open("/dev/screen/0", 0); nat width $20; nat size $22; @@ -30,26 +57,66 @@ function main () plex mouse $15; str mouse_name $16; load_address mouse_namespace -> mouse_name; - syscall OPEN mouse_name mode -> mouse; // Mouse mouse = open("/dev/mouse/0", 0); + syscall OPEN mouse_name mode mouse; // Mouse mouse = open("/dev/mouse/0", 0); byte color $1; nat x_pos $12; nat y_pos $13; - load_absolute_32 BLACK -> color; + load_absolute_8 BLACK -> color; load_immediate 1 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); - load_absolute_32 WHITE -> color; + load_absolute_8 WHITE -> color; load_immediate 21 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 CHARCOAL -> color; + load_immediate 1 -> x_pos; + load_immediate 21 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 DARK_GRAY -> color; + load_immediate 21 -> x_pos; + load_immediate 21 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 RED -> color; + load_immediate 1 -> x_pos; + load_immediate 41 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 ORANGE -> color; + load_immediate 21 -> x_pos; + load_immediate 41 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 YELLOW -> color; + load_immediate 1 -> x_pos; + load_immediate 61 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 GREEN -> color; + load_immediate 21 -> x_pos; + load_immediate 61 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 BLUE -> color; + load_immediate 1 -> x_pos; + load_immediate 81 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + + load_absolute_8 PURPLE -> color; + load_immediate 21 -> x_pos; + load_immediate 81 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); // screen.draw syscall WRITE screen screen_buffer size; - nat zero $11; + nat m_zero $11; loop draw_loop // load mouse click data @@ -58,7 +125,7 @@ function main () byte left_down $9; load_offset_8 mouse 16 -> left_down; // load btn1 pressed - jump_eq_nat draw_loop left_down zero; + jump_eq_nat draw_loop left_down m_zero; nat mouse_x $7; nat mouse_y $8; @@ -69,28 +136,75 @@ function main () load_immediate 20 -> box_size; // first row - load_absolute_32 BLACK -> color; + load_absolute_8 BLACK -> color; load_immediate 1 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); - - load_absolute_32 WHITE -> color; + load_absolute_8 WHITE -> color; load_immediate 21 -> x_pos; load_immediate 1 -> y_pos; - call draw_outlined_swatch screen_buffer color x_pos y_pos width -> void; - call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size -> void; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 CHARCOAL -> color; + load_immediate 1 -> x_pos; + load_immediate 21 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 DARK_GRAY -> color; + load_immediate 21 -> x_pos; + load_immediate 21 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 RED -> color; + load_immediate 1 -> x_pos; + load_immediate 41 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 ORANGE -> color; + load_immediate 21 -> x_pos; + load_immediate 41 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 YELLOW -> color; + load_immediate 1 -> x_pos; + load_immediate 61 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 GREEN -> color; + load_immediate 21 -> x_pos; + load_immediate 61 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 BLUE -> color; + load_immediate 1 -> x_pos; + load_immediate 81 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); + + load_absolute_8 PURPLE -> color; + load_immediate 21 -> x_pos; + load_immediate 81 -> y_pos; + call draw_outlined_swatch (screen_buffer color x_pos y_pos width); + call set_color_if_clicked (mouse_x mouse_y x_pos y_pos color box_size); syscall WRITE screen screen_buffer size; byte selected_color $25; - load_absolute_32 SELECTED_COLOR -> selected_color; + load_absolute_8 SELECTED_COLOR -> selected_color; nat brush_size $19; load_immediate 5 -> brush_size; - call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size -> void; + call draw_box (screen_buffer width selected_color mouse_x mouse_y brush_size brush_size); jump draw_loop; @@ -98,41 +212,41 @@ function main () exit 0; function set_color_if_clicked (int click_x $0, int click_y $1, - int box_x $2, int box_y $3, byte color $4, int box_size $5) + int box_x $2, int box_y $3, byte check_color $4, int bsize $5) // Compute right int right_edge $6; - add_int box_x box_size -> right_edge; + add_int box_x bsize -> right_edge; - // Compute bottom = box_y + box_size + // Compute bottom = box_y + bsize int bottom_edge $7; - add_int box_y box_size -> bottom_edge; + add_int box_y bsize -> bottom_edge; // Bounds check: x in [box_x, right] and y in [box_y, bottom] jump_lt_int fail click_x box_x; - jump_ge_int fail click_x right_edge; + jump_gt_int fail click_x right_edge; jump_lt_int fail click_y box_y; - jump_ge_int fail click_y bottom_edge; + jump_gt_int fail click_y bottom_edge; - store_absolute_8 color -> SELECTED_COLOR; + store_absolute_8 check_color -> SELECTED_COLOR; else fail return; -function draw_outlined_swatch(nat base $0, - byte color $1, int x $2, int y $3, int width $4) +function draw_outlined_swatch(nat dos_base $0, + byte swatch_color $1, int x $2, int y $3, int dos_width $4) // Constants nat background_color $5; - load_absolute_32 GRAY -> background_color; + load_absolute_8 GRAY -> background_color; - byte selected_color $10; - load_absolute_32 SELECTED_COLOR -> selected_color; + byte dos_selected_color $10; + load_absolute_8 SELECTED_COLOR -> dos_selected_color; - jump_eq_int set_selected selected_color color; + jump_eq_int set_selected swatch_color dos_selected_color; jump end_set_selected; do set_selected - load_absolute_32 DARK_GRAY -> background_color; + load_absolute_8 DARK_GRAY -> background_color; else end_set_selected nat outline_size $6; @@ -141,27 +255,27 @@ function draw_outlined_swatch(nat base $0, nat fill_size $7; load_immediate 17 -> fill_size; - nat offset $8; - load_immediate 2 -> offset; + nat dos_offset $8; + load_immediate 2 -> dos_offset; - call draw_box base width background_color x y outline_size outline_size -> void; + call draw_box (dos_base dos_width background_color x y outline_size outline_size); - add_int x offset -> $9; // x + 2 - add_int y offset -> $10; // y + 2 + add_int x dos_offset -> $9; // x + 2 + add_int y dos_offset -> $10; // y + 2 - call draw_box base width color $9 $10 fill_size fill_size -> void; + call draw_box (dos_base dos_width swatch_color $9 $10 fill_size fill_size); return; -function draw_box (nat base $0, nat screen_width $1, - byte color $2, nat x_start $3, nat y_start $4, - nat width $5, nat height $6) +function draw_box (nat db_base $0, nat screen_width $1, + byte box_color $2, nat x_start $3, nat y_start $4, + nat db_width $5, nat height $6) // Compute start address: base + y*640 + x nat offset $15; mul_int y_start screen_width -> offset; add_int offset x_start -> offset; - add_nat offset base -> offset; + add_nat offset db_base -> offset; nat fat_ptr_size $25; load_immediate 4 -> fat_ptr_size; add_nat offset fat_ptr_size -> offset; // need to add offset for fat pointer size @@ -172,13 +286,8 @@ function draw_box (nat base $0, nat screen_width $1, int zero $26; load_immediate 0 -> zero; - int row_end $27; - nat pixel_ptr $29; - loop draw_box_outer - add_int offset width -> row_end; // current + width - register_move offset -> pixel_ptr; // set pixel point - memset_8 pixel_ptr color width; // draw row + memset_8 box_color db_width -> offset; // draw row add_int offset screen_width -> offset; // next row += 640 sub_int height i -> height; // decrement row count jump_gt_int draw_box_outer height zero; diff --git a/test/simple.asm.lisp b/test/simple.asm.lisp deleted file mode 100644 index 1cd4d01..0000000 --- a/test/simple.asm.lisp +++ /dev/null @@ -1,22 +0,0 @@ -((code - (label main - (load-absolute-32 $0 &x) - (load-absolute-32 $1 &y) - (add-real $2 $1 $0) - (real-to-string $3 $2) - (call &pln ($3) nil) - (exit 0)) - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (load-immediate $3 &new-line) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) -(data (label terminal-namespace "/dev/term/0") - (label new-line "\n") - (label x 1.0) - (label y 2.0))) diff --git a/test/simple.rom b/test/simple.rom new file mode 100644 index 0000000000000000000000000000000000000000..9ea8e30f3987b053c87f58ec31024966ea6a3e46 GIT binary patch literal 132 zcmXwxp$>pR3`5^;cL*>jG!l1w2?D_vfW#2O?;i!ilC@pi3}Dv&)BQpm!Cnm5Ai3!7 rZ literal 0 HcmV?d00001 diff --git a/test/simple.ul.ir b/test/simple.ul.ir index 7c90ce9..3c04c23 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -3,10 +3,10 @@ global str new_line = "\n"; function main () load_immediate 1.0 -> $0; - load_immediate 1.0 -> $1; + load_immediate 2.0 -> $1; add_real $0 $1 -> $0; real_to_string $0 -> $0; - call pln $0; + call pln ($0); exit 0; function pln (str message $0) @@ -19,7 +19,7 @@ function pln (str message $0) load_immediate 0 -> mode; load_address terminal_namespace -> term_ns; - syscall OPEN term_ns mode -> term; + syscall OPEN term_ns mode term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; diff --git a/test/window.asm.lisp b/test/window.asm.lisp deleted file mode 100644 index b697ebe..0000000 --- a/test/window.asm.lisp +++ /dev/null @@ -1,71 +0,0 @@ -((code - (label main - ; Open screen - ; use load immediate because it is a pointer to a string, not a value - (load-immediate $18 &screen-namespace) - (syscall OPEN $0 $18 $11) ; open(out Plex screen, in namespace, in flags) - - (nat-to-string $5 $0) - (call &pln ($5) nil) - - (load-offset-32 $20 $0 8) ; load width - - (nat-to-string $5 $20) - (call &pln ($5) nil) - - (load-offset-32 $22 $0 12) ; load size - - (nat-to-string $5 $22) - (call &pln ($5) nil) - - (load-immediate $1 16) ; offset for screen buffer - (add-nat $21 $0 $1) - - (nat-to-string $5 $21) - (call &pln ($5) nil) - - ; open mouse - (load-immediate $16 &mouse-namespace) - (syscall OPEN $15 $16 $11) ; open(out Plex mouse, in namespace, in flags) - - (syscall WRITE $0 $21 $22) ; redraw - - (label draw-loop - ; load mouse click data - (syscall REFRESH $15) - (load-offset-8 $9 $15 16) ; load btn1 pressed - - (jump-eq-nat &draw-loop $9 $11) - - (load-offset-32 $7 $15 8) ; load x - (load-offset-32 $8 $15 12) ; load y - - ; Compute start address: y*width + x - (mul-nat $30 $8 $20) ; $15 = y * width - (add-nat $30 $30 $7) ; $15 += x - (add-nat $30 $30 $21) ; $15 += pixel_offset - (load-immediate $1 4) ; need to add offset for fat pointer size - (add-nat $30 $30 $1) - - (load-absolute-32 $3 &WHITE) ; color - (store-absolute-8 $30 $3) ; draw color at screen [x,y] - (syscall WRITE $0 $21 $22) ; redraw - - (jump &draw-loop)) - (exit 0)) - (label pln - (load-immediate $1 &terminal-namespace) ; get terminal device - (load-immediate $11 0) - (syscall OPEN $1 $1 $11) - (load-immediate $3 &new-line) - (string-length $2 $0) - (syscall WRITE $1 $0 $2) - (string-length $4 $3) - (syscall WRITE $1 $3 $4) - (return nil))) -(data - (label screen-namespace "/dev/screen/0") - (label mouse-namespace "/dev/mouse/0") - (label terminal-namespace "/dev/term/0") - (label new-line "\n") - (label WHITE 255))) diff --git a/test/window.rom b/test/window.rom new file mode 100644 index 0000000000000000000000000000000000000000..002b86542fbc13977ac473d9d95d6c72df310d58 GIT binary patch literal 332 zcmYk2Jr2S!42A7FN!$DRU6k(glQ0Nr{5$k-yazzI1MYq3)mh?L~{i~XDg0N~AY z8K*_?u98lxCX_%!3mLvxVKxOQSd?w~M?4ePx-U^{2BrdITl<3>F-VT6k%%L(j*L); zQlZYhtU92y$ODRRu`5bn<=$7;dnMNjl@Hz?S<#9lW$`*mc^dQW)s_R3IvR})WGN9L sQwKhvhSUs5$+H0JYkbz@Wr(q__aM%RbhpPC8Mp=9 screen; + load_address screen_namespace -> screen; load_immediate 0 -> mode; - syscall OPEN screen mode -> screen; + syscall OPEN screen mode screen; nat_to_string screen -> tmp_str; - call pln tmp_str -> void; + call pln (tmp_str); load_offset_32 screen 8 -> width; nat_to_string width -> tmp_str; - call pln tmp_str -> void; + call pln (tmp_str); load_offset_32 screen 12 -> buffer_size; nat_to_string buffer_size -> tmp_str; - call pln tmp_str -> void; + call pln (tmp_str); load_immediate 16 -> offset_temp; add_nat screen offset_temp -> screen_buffer; nat_to_string screen_buffer -> tmp_str; - call pln tmp_str -> void; + call pln (tmp_str); // open mouse - load_immediate mouse_namespace -> mouse; - syscall OPEN mouse mode -> mouse; + load_address mouse_namespace -> mouse; + syscall OPEN mouse mode mouse; syscall WRITE screen screen_buffer buffer_size; // redraw loop draw_loop // load mouse click data - syscall STAT mouse; + syscall REFRESH mouse; load_offset_8 mouse 16 -> left_down; @@ -64,11 +65,12 @@ function main () load_immediate 4 -> fat_ptr_size; add_nat pixel_pos fat_ptr_size -> pixel_pos; - load_absolute_32 white -> color; - store_absolute_8 pixel_pos color; // draw color at screen [x,y] + load_absolute_32 WHITE -> color; + store_absolute_8 color -> pixel_pos; // draw color at screen [x,y] + syscall WRITE screen screen_buffer buffer_size; // redraw - jump draw_loop; + jump draw_loop; exit 0; function pln (str message $0) @@ -76,15 +78,15 @@ function pln (str message $0) int msg_length $2; str nl $3; int nl_length $4; - int mode $5; + int pln_mode $5; str term_ns $6; - load_immediate 0 -> mode; + load_immediate 0 -> pln_mode; load_address terminal_namespace -> term_ns; - syscall OPEN term_ns mode -> term; + syscall OPEN term_ns pln_mode term; string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; string_length nl -> nl_length; syscall WRITE term nl nl_length; - return; \ No newline at end of file + return; From edb10db545f9dd171e9e9b4879978c4645362130 Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 7 Dec 2025 15:31:41 -0800 Subject: [PATCH 23/27] Fix documentation --- README.org | 70 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/README.org b/README.org index 76d5fff..94a4737 100644 --- a/README.org +++ b/README.org @@ -55,32 +55,33 @@ You can view some examples in the =.ul.ir= files in =/test= **Sample Program: =hello.ul.ir=** #+BEGIN_SRC sh -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global str message = "nuqneH 'u'?" +global str terminal_namespace = "/dev/term/0"; +global str new_line = "\n"; +global str hello = "nuqneH 'u'?"; function main () - str hello $0; + str msg $0; - load_immediate message -> hello; - call pln hello -> void; + load_address hello -> msg; + call pln (msg); exit 0; function pln (str message $0) - str ts $1; - int mode $5; + plex term $1; int msg_length $2; str nl $3; int nl_length $4; + int mode $5; + str term_ns $6; - load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; - strlen message -> msg_length; - syscall WRITE ts message msg_length; - load_immediate new_line -> nl; - strlen nl -> nl_length; - syscall WRITE ts nl nl_length; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode term; + string_length message -> msg_length; + syscall WRITE term message msg_length; + load_address new_line -> nl; + string_length nl -> nl_length; + syscall WRITE term nl nl_length; return; #+END_SRC @@ -102,42 +103,43 @@ global str prompt = "Enter a string:"; global str new_line = "\n"; function main () - int mode $11; - str term $10; + int in_mode $11; + str in_term $10; - load_immediate terminal_namespace -> term; - load_immediate 0 -> mode; - syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> in_term; + load_immediate 0 -> in_mode; + syscall OPEN in_term in_mode in_term; // Terminal term = open("/dev/term/0", 0); - load_immediate prompt -> $7; + load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE term $7 $8; // print prompt + syscall WRITE in_term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ term user_string $8; // read in max 32 byte string + syscall READ in_term user_string $8; // read in max 32 byte string - call pln user_string -> void; + call pln (user_string); exit 0; function pln (str message $0) - str ts $1; - int mode $5; + plex term $1; int msg_length $2; str nl $3; int nl_length $4; + int mode $5; + str term_ns $6; - load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; - syscall OPEN ts mode -> ts; - strlen message -> msg_length; - syscall WRITE ts message msg_length; - load_immediate new_line -> nl; - strlen nl -> nl_length; - syscall WRITE ts nl nl_length; + load_address terminal_namespace -> term_ns; + syscall OPEN term_ns mode term; + string_length message -> msg_length; + syscall WRITE term message msg_length; + load_address new_line -> nl; + string_length nl -> nl_length; + syscall WRITE term nl nl_length; return; - + #+END_SRC values passed to functions must be explicitly returned to propagate. heap values are copy on write, so if a value is modified in a child function it will change the parents value, unless the size of the structure changes then it will copy the parents value and append it to its own frame with the modification. this allows for the low resource usage of a C but the convenience of a Java/Go without the garbage collection. From 80d9c67b9ee739f62b246d52ab9ff3c8b688fc2c Mon Sep 17 00:00:00 2001 From: zongor Date: Mon, 8 Dec 2025 21:42:59 -0800 Subject: [PATCH 24/27] Code cleanup --- src/arch/emscripten/main.c | 13 +- src/tools/assembler/assembler.c | 534 +++++++++++------------ src/tools/compiler/compiler.c | 113 +---- src/tools/compiler/compiler.h | 18 +- src/tools/compiler/{lexer.c => parser.c} | 0 src/tools/compiler/{lexer.h => parser.h} | 0 src/vm/fixed.c | 128 +----- src/vm/fixed.h | 35 +- test/paint.ul | 25 +- 9 files changed, 321 insertions(+), 545 deletions(-) rename src/tools/compiler/{lexer.c => parser.c} (100%) rename src/tools/compiler/{lexer.h => parser.h} (100%) diff --git a/src/arch/emscripten/main.c b/src/arch/emscripten/main.c index 1a98d0e..8067a09 100644 --- a/src/arch/emscripten/main.c +++ b/src/arch/emscripten/main.c @@ -66,7 +66,6 @@ void mainloop() { mouse_data.btn4 = 0; break; - // Touch events (map to mouse_data as left-click equivalent) case SDL_FINGERMOTION: case SDL_FINGERDOWN: case SDL_FINGERUP: { @@ -77,8 +76,6 @@ void mainloop() { mouse_data.x = (int)x; mouse_data.y = (int)y; - // Only treat the first finger as mouse input (ignore multi-touch beyond 1 - // finger) if (event.tfinger.fingerId == 0) { if (event.type == SDL_FINGERDOWN || event.type == SDL_FINGERMOTION) { mouse_data.btn1 = 1; @@ -91,9 +88,8 @@ void mainloop() { } } - // Run VM for a fixed number of cycles or a time slice int cycles_this_frame = 0; - int max_cycles_per_frame = 2000; // Adjust this value + int max_cycles_per_frame = 2000; while (cycles_this_frame < max_cycles_per_frame) { if (!step_vm(&vm)) { emscripten_cancel_main_loop(); @@ -102,13 +98,12 @@ void mainloop() { cycles_this_frame++; } - // Render only if the screen buffer was updated AND at a reasonable rate if (screen_data.update) { if (screen_data.renderer && screen_data.texture) { SDL_RenderCopy(screen_data.renderer, screen_data.texture, NULL, NULL); SDL_RenderPresent(screen_data.renderer); } - screen_data.update = false; // Reset flag after rendering + screen_data.update = false; } } @@ -119,7 +114,6 @@ bool loadVM(const char *filename, VM *vm) { return false; } - // Read VM state if (fread(&vm->pc, sizeof(u32), 1, file) != 1 || fread(&vm->cp, sizeof(u32), 1, file) != 1 || fread(&vm->fp, sizeof(u32), 1, file) != 1 || @@ -132,14 +126,12 @@ bool loadVM(const char *filename, VM *vm) { return false; } - // Read code section if (fread(vm->code, 1, vm->cp, file) != vm->cp) { printf("Failed to read code section\n"); fclose(file); return false; } - // Read memory section if (fread(vm->memory, 1, vm->mp, file) != vm->mp) { printf("Failed to read memory section\n"); fclose(file); @@ -187,7 +179,6 @@ int main(int argc, char **argv) { vm_register_device(&vm, "/dev/term/0", "terminal", &console_data, &console_device_ops, 4); - // Set up main loop emscripten_set_main_loop(mainloop, 0, 1); return 0; } \ No newline at end of file diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index d69c183..00ed047 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -11,127 +11,127 @@ #include #include +/* FIXME: technically this is not allowed in C89 find another way */ const char *opcode_to_string(Opcode op) { - static const char *names[] = { - [OP_EXIT] = "exit", - [OP_JMP] = "jump", - [OP_JMPF] = "jump_if_flag", - [OP_CALL] = "call", - [OP_RETURN] = "return", + static const char *names[] = {[OP_EXIT] = "exit", + [OP_JMP] = "jump", + [OP_JMPF] = "jump_if_flag", + [OP_CALL] = "call", + [OP_RETURN] = "return", - [OP_LOAD_IMM] = "load_immediate", + [OP_LOAD_IMM] = "load_immediate", - /* Register_indirect loads */ - [OP_LOAD_IND_8] = "load_indirect_8", - [OP_LOAD_IND_16] = "load_indirect_16", - [OP_LOAD_IND_32] = "load_indirect_32", + /* Register_indirect loads */ + [OP_LOAD_IND_8] = "load_indirect_8", + [OP_LOAD_IND_16] = "load_indirect_16", + [OP_LOAD_IND_32] = "load_indirect_32", - /* Absolute address loads */ - [OP_LOAD_ABS_8] = "load_absolute_8", - [OP_LOAD_ABS_16] = "load_absolute_16", - [OP_LOAD_ABS_32] = "load_absolute_32", + /* Absolute address loads */ + [OP_LOAD_ABS_8] = "load_absolute_8", + [OP_LOAD_ABS_16] = "load_absolute_16", + [OP_LOAD_ABS_32] = "load_absolute_32", - /* Base+offset loads */ - [OP_LOAD_OFF_8] = "load_offset_8", - [OP_LOAD_OFF_16] = "load_offset_16", - [OP_LOAD_OFF_32] = "load_offset_32", + /* Base+offset loads */ + [OP_LOAD_OFF_8] = "load_offset_8", + [OP_LOAD_OFF_16] = "load_offset_16", + [OP_LOAD_OFF_32] = "load_offset_32", - /* Absolute address stores */ - [OP_STORE_ABS_8] = "store_absolute_8", - [OP_STORE_ABS_16] = "store_absolute_16", - [OP_STORE_ABS_32] = "store_absolute_32", + /* Absolute address stores */ + [OP_STORE_ABS_8] = "store_absolute_8", + [OP_STORE_ABS_16] = "store_absolute_16", + [OP_STORE_ABS_32] = "store_absolute_32", - /* Register_indirect stores */ - [OP_STORE_IND_8] = "store_indirect_8", - [OP_STORE_IND_16] = "store_indirect_16", - [OP_STORE_IND_32] = "store_indirect_32", + /* Register_indirect stores */ + [OP_STORE_IND_8] = "store_indirect_8", + [OP_STORE_IND_16] = "store_indirect_16", + [OP_STORE_IND_32] = "store_indirect_32", - /* Base+offset stores */ - [OP_STORE_OFF_8] = "store_offset_8", - [OP_STORE_OFF_16] = "store_offset_16", - [OP_STORE_OFF_32] = "store_offset_32", + /* Base+offset stores */ + [OP_STORE_OFF_8] = "store_offset_8", + [OP_STORE_OFF_16] = "store_offset_16", + [OP_STORE_OFF_32] = "store_offset_32", - /* Memory operations */ - [OP_MALLOC] = "malloc", - [OP_MEMSET_8] = "memset_8", - [OP_MEMSET_16] = "memset_16", - [OP_MEMSET_32] = "memset_32", + /* Memory operations */ + [OP_MALLOC] = "malloc", + [OP_MEMSET_8] = "memset_8", + [OP_MEMSET_16] = "memset_16", + [OP_MEMSET_32] = "memset_32", - /* Register operations */ - [OP_REG_MOV] = "register_move", - [OP_SYSCALL] = "syscall", + /* Register operations */ + [OP_REG_MOV] = "register_move", + [OP_SYSCALL] = "syscall", - /* Bit operations */ - [OP_BIT_SHIFT_LEFT] = "bit_shift_left", - [OP_BIT_SHIFT_RIGHT] = "bit_shift_right", - [OP_BIT_SHIFT_R_EXT] = "bit_shift_re", - [OP_BAND] = "bit_and", - [OP_BOR] = "bit_or", - [OP_BXOR] = "bit_xor", + /* Bit operations */ + [OP_BIT_SHIFT_LEFT] = "bit_shift_left", + [OP_BIT_SHIFT_RIGHT] = "bit_shift_right", + [OP_BIT_SHIFT_R_EXT] = "bit_shift_re", + [OP_BAND] = "bit_and", + [OP_BOR] = "bit_or", + [OP_BXOR] = "bit_xor", - /* Integer arithmetic */ - [OP_ADD_INT] = "add_int", - [OP_SUB_INT] = "sub_int", - [OP_MUL_INT] = "mul_int", - [OP_DIV_INT] = "div_int", + /* Integer arithmetic */ + [OP_ADD_INT] = "add_int", + [OP_SUB_INT] = "sub_int", + [OP_MUL_INT] = "mul_int", + [OP_DIV_INT] = "div_int", - /* Natural number arithmetic */ - [OP_ADD_NAT] = "add_nat", - [OP_SUB_NAT] = "sub_nat", - [OP_MUL_NAT] = "mul_nat", - [OP_DIV_NAT] = "div_nat", + /* Natural number arithmetic */ + [OP_ADD_NAT] = "add_nat", + [OP_SUB_NAT] = "sub_nat", + [OP_MUL_NAT] = "mul_nat", + [OP_DIV_NAT] = "div_nat", - /* Floating point operations */ - [OP_ADD_REAL] = "add_real", - [OP_SUB_REAL] = "sub_real", - [OP_MUL_REAL] = "mul_real", - [OP_DIV_REAL] = "div_real", + /* Floating point operations */ + [OP_ADD_REAL] = "add_real", + [OP_SUB_REAL] = "sub_real", + [OP_MUL_REAL] = "mul_real", + [OP_DIV_REAL] = "div_real", - /* Type conversions */ - [OP_INT_TO_REAL] = "int_to_real", - [OP_NAT_TO_REAL] = "nat_to_real", - [OP_REAL_TO_INT] = "real_to_int", - [OP_REAL_TO_NAT] = "real_to_nat", + /* Type conversions */ + [OP_INT_TO_REAL] = "int_to_real", + [OP_NAT_TO_REAL] = "nat_to_real", + [OP_REAL_TO_INT] = "real_to_int", + [OP_REAL_TO_NAT] = "real_to_nat", - /* Integer comparisons */ - [OP_JEQ_INT] = "jump_eq_int", - [OP_JNEQ_INT] = "jump_neq_int", - [OP_JGT_INT] = "jump_gt_int", - [OP_JLT_INT] = "jump_lt_int", - [OP_JLE_INT] = "jump_le_int", - [OP_JGE_INT] = "jump_ge_int", + /* Integer comparisons */ + [OP_JEQ_INT] = "jump_eq_int", + [OP_JNEQ_INT] = "jump_neq_int", + [OP_JGT_INT] = "jump_gt_int", + [OP_JLT_INT] = "jump_lt_int", + [OP_JLE_INT] = "jump_le_int", + [OP_JGE_INT] = "jump_ge_int", - /* Natural number comparisons */ - [OP_JEQ_NAT] = "jump_eq_nat", - [OP_JNEQ_NAT] = "jump_neq_nat", - [OP_JGT_NAT] = "jump_gt_nat", - [OP_JLT_NAT] = "jump_lt_nat", - [OP_JLE_NAT] = "jump_le_nat", - [OP_JGE_NAT] = "jump_ge_nat", + /* Natural number comparisons */ + [OP_JEQ_NAT] = "jump_eq_nat", + [OP_JNEQ_NAT] = "jump_neq_nat", + [OP_JGT_NAT] = "jump_gt_nat", + [OP_JLT_NAT] = "jump_lt_nat", + [OP_JLE_NAT] = "jump_le_nat", + [OP_JGE_NAT] = "jump_ge_nat", - /* Floating point comparisons */ - [OP_JEQ_REAL] = "jump_eq_real", - [OP_JNEQ_REAL] = "jump_neq_real", - [OP_JGE_REAL] = "jump_ge_real", - [OP_JGT_REAL] = "jump_gt_real", - [OP_JLT_REAL] = "jump_lt_real", - [OP_JLE_REAL] = "jump_le_real", + /* Floating point comparisons */ + [OP_JEQ_REAL] = "jump_eq_real", + [OP_JNEQ_REAL] = "jump_neq_real", + [OP_JGE_REAL] = "jump_ge_real", + [OP_JGT_REAL] = "jump_gt_real", + [OP_JLT_REAL] = "jump_lt_real", + [OP_JLE_REAL] = "jump_le_real", - /* String operations */ - [OP_STRLEN] = "string_length", - [OP_STREQ] = "string_eq", - [OP_STRCAT] = "string_concat", - [OP_STR_GET_CHAR] = "string_get_char", - [OP_STR_FIND_CHAR] = "string_find_char", - [OP_STR_SLICE] = "string_slice", + /* String operations */ + [OP_STRLEN] = "string_length", + [OP_STREQ] = "string_eq", + [OP_STRCAT] = "string_concat", + [OP_STR_GET_CHAR] = "string_get_char", + [OP_STR_FIND_CHAR] = "string_find_char", + [OP_STR_SLICE] = "string_slice", - /* String conversions */ - [OP_INT_TO_STRING] = "int_to_string", - [OP_NAT_TO_STRING] = "nat_to_string", - [OP_REAL_TO_STRING] = "real_to_string", - [OP_STRING_TO_INT] = "string_to_int", - [OP_STRING_TO_NAT] = "string_to_nat", - [OP_STRING_TO_REAL] = "string_to_real"}; + /* String conversions */ + [OP_INT_TO_STRING] = "int_to_string", + [OP_NAT_TO_STRING] = "nat_to_string", + [OP_REAL_TO_STRING] = "real_to_string", + [OP_STRING_TO_INT] = "string_to_int", + [OP_STRING_TO_NAT] = "string_to_nat", + [OP_STRING_TO_REAL] = "string_to_real"}; if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { return ""; @@ -141,7 +141,6 @@ const char *opcode_to_string(Opcode op) { return name ? name : ""; } - void emit_op(VM *vm, u8 byte) { #ifdef DEBUG_PRINT printf("code[%d] = %s\n", vm->cp, opcode_to_string(byte)); @@ -152,7 +151,7 @@ void emit_op(VM *vm, u8 byte) { void emit_byte(VM *vm, u8 byte) { #ifdef DEBUG_PRINT printf("code[%d] = %d\n", vm->cp, byte); -#endif +#endif vm->code[vm->cp] = byte; } @@ -186,9 +185,11 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { } if (!resize_or_check_size(table)) { - fprintf(stderr, - "Error: Symbol table is out of memory! This is likely because you built this in static mode." - "if you built using malloc, that means your computer is out of memory. Close a few tabs in your web browser and try again." + fprintf(stderr, + "Error: Symbol table is out of memory! This is likely because you " + "built the assembler in static mode, increase the static size." + "if you built using malloc, that means your computer is out of " + "memory. Close a few tabs in your web browser and try again." "Count was %d, while capacity was %d\n", table->count, table->capacity); exit(1); @@ -212,7 +213,8 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { u32 get_ref(SymbolTable *st, const char *name, u32 length) { Symbol *sym = symbol_table_lookup(st, name, length); if (!sym) { - fprintf(stderr, "Error: Undefined Symbol '%.*s'\n", length, name); + fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n", + length, name); exit(1); return 0; } @@ -232,15 +234,15 @@ u32 get_ptr(Token token, SymbolTable *st) { char *endptr; u32 out = (u32)strtoul(token.start, &endptr, 10); if (endptr == token.start || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: '%.*s'\n", token.length, - token.start); + fprintf(stderr, "Invalid decimal literal at line %d: %.*s\n", token.line, + token.length, token.start); exit(1); } return out; } - fprintf(stderr, "Error: Not a pointer or symbol '%.*s'\n", token.length, - token.start); + fprintf(stderr, "Error: Not a pointer or symbol at line %d: %.*s\n", + token.line, token.length, token.start); exit(1); } @@ -254,8 +256,8 @@ u32 get_reg(Token token, SymbolTable *st) { return atoi(token.start); } - fprintf(stderr, "Error: Not a register or symbol '%.*s'\n", token.length, - token.start); + fprintf(stderr, "Error: Not a register or symbol at line %d: %.*s\n", + token.line, token.length, token.start); exit(1); } @@ -439,16 +441,16 @@ bool define_global(VM *vm, SymbolTable *st) { case '\\': case '"': case '\'': - break; // Keep as-is + break; default: - i--; // Rewind for unknown escapes + i--; /* Rewind for unknown escapes */ } } write_u8(vm, memory, addr + 4 + len, c); len++; } - u32 size = len + 5; // 4 (len) + dst_len + 1 (null) + u32 size = len + 5; /* 4 (len) + dst_len + 1 (null) */ s.size = size; vm->mp += size; @@ -610,9 +612,8 @@ void define_branch(VM *vm, SymbolTable *st) { int get_instruction_byte_size(const char *opname) { - // Return (1 + 1) if (strcmp(opname, "return") == 0) { - return 2; // 1 byte opcode + 1 byte return register + return 2; } if (strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || @@ -632,14 +633,13 @@ int get_instruction_byte_size(const char *opname) { strcmp(opname, "store_indirect_32") == 0 || strcmp(opname, "real_to_nat") == 0 || strcmp(opname, "nat_to_int") == 0 || strcmp(opname, "int_to_nat") == 0 || - strcmp(opname, "string_length") == 0 || - strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || - strcmp(opname, "memset_8") == 0 || strcmp(opname, "memset_16") == 0 || + strcmp(opname, "string_length") == 0 || strcmp(opname, "memset") == 0 || + strcmp(opname, "memset") == 0 || strcmp(opname, "memset_8") == 0 || + strcmp(opname, "memset_16") == 0 || strcmp(opname, "register_move") == 0 || strcmp(opname, "malloc") == 0) { return 3; } - // Register_register_register opcodes (4 bytes: 1 + 3) if (strcmp(opname, "add_int") == 0 || strcmp(opname, "sub_int") == 0 || strcmp(opname, "mul_int") == 0 || strcmp(opname, "div_int") == 0 || strcmp(opname, "add_nat") == 0 || strcmp(opname, "sub_nat") == 0 || @@ -654,13 +654,11 @@ int get_instruction_byte_size(const char *opname) { return 4; } - // (5 bytes: 1 + 4) if (strcmp(opname, "halt") == 0 || strcmp(opname, "jump_if_flag") == 0 || strcmp(opname, "jump") == 0) { return 5; } - // Load, Load_immediate (6 bytes: 1 + 1 + 4) if (strcmp(opname, "load_absolute_32") == 0 || strcmp(opname, "load_immediate") == 0 || strcmp(opname, "load_address") == 0 || @@ -672,7 +670,6 @@ int get_instruction_byte_size(const char *opname) { return 6; } - // jump compare (7 bytes: 1 + 4 + 1 + 1) if (strcmp(opname, "jump_eq_int") == 0 || strcmp(opname, "jump_neq_int") == 0 || strcmp(opname, "jump_gt_int") == 0 || @@ -704,15 +701,17 @@ int get_instruction_byte_size(const char *opname) { exit(-1); } -#define FAKE_OP(op) \ - } else if (strleq(token.start, op, token.length)) { \ - do { \ - while (token.type != TOKEN_SEMICOLON) { \ - token = next_token(); \ - } \ - /*printf("code[%d]=%s\n %d + %d = %d\n", vm->cp, op, get_instruction_byte_size(op), vm->cp, vm->cp + get_instruction_byte_size(op)); */\ - vm->cp += get_instruction_byte_size(op); \ - } while(0); +#define FAKE_OP(op) \ + } else if (strleq(token.start, op, token.length)) { \ + do { \ + while (token.type != TOKEN_SEMICOLON) { \ + token = next_token(); \ + } \ + /*printf("code[%d]=%s\n %d + %d = %d\n", vm->cp, op, \ + * get_instruction_byte_size(op), vm->cp, vm->cp + \ + * get_instruction_byte_size(op)); */ \ + vm->cp += get_instruction_byte_size(op); \ + } while (0); /** * Build the symbol table and calculate the types/size/offsets of all values. @@ -771,11 +770,11 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { continue; } - #ifdef DEBUG_PRINT - printf("-- %.*s --\n", token.length, token.start); - #endif +#ifdef DEBUG_PRINT + printf("-- %.*s --\n", token.length, token.start); +#endif if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first + /* check to see if it is an opcode first */ if (strleq(token.start, "exit", token.length)) { vm->cp++; @@ -783,9 +782,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next_token(); vm->cp += 4; - #ifdef DEBUG_PRINT - printf("code[%d] = exit\n", vm->cp); - #endif +#ifdef DEBUG_PRINT + printf("code[%d] = exit\n", vm->cp); +#endif next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "call", token.length)) { @@ -799,9 +798,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token next = next_token_is(TOKEN_LPAREN); next = next_token(); while (next.type != TOKEN_RPAREN) { - get_reg(next, st); - vm->cp++; - next = next_token(); + get_reg(next, st); + vm->cp++; + next = next_token(); } next = next_token(); @@ -812,9 +811,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { get_reg(next, st); vm->cp++; } - #ifdef DEBUG_PRINT - printf("code[%d] = call\n", vm->cp); - #endif +#ifdef DEBUG_PRINT + printf("code[%d] = call\n", vm->cp); +#endif continue; } else if (strleq(token.start, "syscall", token.length)) { @@ -829,97 +828,97 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->cp++; next = next_token(); } - #ifdef DEBUG_PRINT - printf("code[%d] = syscall\n", vm->cp); - #endif +#ifdef DEBUG_PRINT + printf("code[%d] = syscall\n", vm->cp); +#endif continue; - FAKE_OP("load_immediate") - FAKE_OP("load_address") - FAKE_OP("malloc") - FAKE_OP("memset_8") - FAKE_OP("memset_16") - FAKE_OP("memset_32") - FAKE_OP("load_offset_8") - FAKE_OP("load_offset_16") - FAKE_OP("load_offset_32") - FAKE_OP("load_indirect_8") - FAKE_OP("load_indirect_16") - FAKE_OP("load_indirect_32") - FAKE_OP("load_absolute_8") - FAKE_OP("load_absolute_16") - FAKE_OP("load_absolute_32") - FAKE_OP("store_absolute_8") - FAKE_OP("store_absolute_16") - FAKE_OP("store_absolute_32") - FAKE_OP("store_indirect_8") - FAKE_OP("store_indirect_16") - FAKE_OP("store_indirect_32") - FAKE_OP("store_offset_8") - FAKE_OP("store_offset_16") - FAKE_OP("store_offset_32") - FAKE_OP("register_move") - FAKE_OP("add_int") - FAKE_OP("sub_int") - FAKE_OP("mul_int") - FAKE_OP("div_int") - FAKE_OP("abs_int") - FAKE_OP("neg_int") - FAKE_OP("add_nat") - FAKE_OP("sub_nat") - FAKE_OP("mul_nat") - FAKE_OP("div_nat") - FAKE_OP("abs_nat") - FAKE_OP("neg_nat") - FAKE_OP("add_real") - FAKE_OP("sub_real") - FAKE_OP("mul_real") - FAKE_OP("div_real") - FAKE_OP("abs_real") - FAKE_OP("neg_real") - FAKE_OP("int_to_real") - FAKE_OP("nat_to_real") - FAKE_OP("real_to_int") - FAKE_OP("real_to_nat") - FAKE_OP("bit_shift_left") - FAKE_OP("bit_shift_right") - FAKE_OP("bit_shift_r_ext") - FAKE_OP("bit_and") - FAKE_OP("bit_or") - FAKE_OP("bit_xor") - FAKE_OP("jump") - FAKE_OP("jump_if_flag") - FAKE_OP("jump_eq_int") - FAKE_OP("jump_neq_int") - FAKE_OP("jump_gt_int") - FAKE_OP("jump_lt_int") - FAKE_OP("jump_le_int") - FAKE_OP("jump_ge_int") - FAKE_OP("jump_eq_nat") - FAKE_OP("jump_neq_nat") - FAKE_OP("jump_gt_nat") - FAKE_OP("jump_lt_nat") - FAKE_OP("jump_le_nat") - FAKE_OP("jump_ge_nat") - FAKE_OP("jump_eq_real") - FAKE_OP("jump_neq_real") - FAKE_OP("jump_ge_real") - FAKE_OP("jump_gt_real") - FAKE_OP("jump_lt_real") - FAKE_OP("jump_le_real") - FAKE_OP("string_length") - FAKE_OP("int_to_string") - FAKE_OP("nat_to_string") - FAKE_OP("real_to_string") - FAKE_OP("string_eq") - FAKE_OP("string_concat") - FAKE_OP("string_get_char") - FAKE_OP("string_find_char") - FAKE_OP("string_slice") - FAKE_OP("string_to_int") - FAKE_OP("string_to_nat") - FAKE_OP("string_to_real") + FAKE_OP("load_immediate") + FAKE_OP("load_address") + FAKE_OP("malloc") + FAKE_OP("memset_8") + FAKE_OP("memset_16") + FAKE_OP("memset_32") + FAKE_OP("load_offset_8") + FAKE_OP("load_offset_16") + FAKE_OP("load_offset_32") + FAKE_OP("load_indirect_8") + FAKE_OP("load_indirect_16") + FAKE_OP("load_indirect_32") + FAKE_OP("load_absolute_8") + FAKE_OP("load_absolute_16") + FAKE_OP("load_absolute_32") + FAKE_OP("store_absolute_8") + FAKE_OP("store_absolute_16") + FAKE_OP("store_absolute_32") + FAKE_OP("store_indirect_8") + FAKE_OP("store_indirect_16") + FAKE_OP("store_indirect_32") + FAKE_OP("store_offset_8") + FAKE_OP("store_offset_16") + FAKE_OP("store_offset_32") + FAKE_OP("register_move") + FAKE_OP("add_int") + FAKE_OP("sub_int") + FAKE_OP("mul_int") + FAKE_OP("div_int") + FAKE_OP("abs_int") + FAKE_OP("neg_int") + FAKE_OP("add_nat") + FAKE_OP("sub_nat") + FAKE_OP("mul_nat") + FAKE_OP("div_nat") + FAKE_OP("abs_nat") + FAKE_OP("neg_nat") + FAKE_OP("add_real") + FAKE_OP("sub_real") + FAKE_OP("mul_real") + FAKE_OP("div_real") + FAKE_OP("abs_real") + FAKE_OP("neg_real") + FAKE_OP("int_to_real") + FAKE_OP("nat_to_real") + FAKE_OP("real_to_int") + FAKE_OP("real_to_nat") + FAKE_OP("bit_shift_left") + FAKE_OP("bit_shift_right") + FAKE_OP("bit_shift_r_ext") + FAKE_OP("bit_and") + FAKE_OP("bit_or") + FAKE_OP("bit_xor") + FAKE_OP("jump") + FAKE_OP("jump_if_flag") + FAKE_OP("jump_eq_int") + FAKE_OP("jump_neq_int") + FAKE_OP("jump_gt_int") + FAKE_OP("jump_lt_int") + FAKE_OP("jump_le_int") + FAKE_OP("jump_ge_int") + FAKE_OP("jump_eq_nat") + FAKE_OP("jump_neq_nat") + FAKE_OP("jump_gt_nat") + FAKE_OP("jump_lt_nat") + FAKE_OP("jump_le_nat") + FAKE_OP("jump_ge_nat") + FAKE_OP("jump_eq_real") + FAKE_OP("jump_neq_real") + FAKE_OP("jump_ge_real") + FAKE_OP("jump_gt_real") + FAKE_OP("jump_lt_real") + FAKE_OP("jump_le_real") + FAKE_OP("string_length") + FAKE_OP("int_to_string") + FAKE_OP("nat_to_string") + FAKE_OP("real_to_string") + FAKE_OP("string_eq") + FAKE_OP("string_concat") + FAKE_OP("string_get_char") + FAKE_OP("string_find_char") + FAKE_OP("string_slice") + FAKE_OP("string_to_int") + FAKE_OP("string_to_nat") + FAKE_OP("string_to_real") } else { - // some other identifier + /* some other identifier */ printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); exit(1); @@ -944,17 +943,17 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { if (token.type != TOKEN_EOF) { if (token.type == TOKEN_KEYWORD_GLOBAL) { - // ignore, already processed - next_token(); // type - next_token(); // var - next_token(); // eq - next_token(); // value - next_token(); // ; + /* ignore, already processed */ + next_token(); /* type */ + next_token(); /* var */ + next_token(); /* eq */ + next_token(); /* value */ + next_token(); /* ; */ continue; } if (token.type == TOKEN_KEYWORD_FN) { - // ignore, already processed + /* ignore, already processed */ Token next = next_token(); while (next.type != TOKEN_RPAREN) { next = next_token(); @@ -967,19 +966,19 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { - // ignore, already processed - next_token(); // type - next_token(); // var - next_token(); // reg - next_token(); // ; + /* ignore, already processed */ + next_token(); /* type */ + next_token(); /* var */ + next_token(); /* reg */ + next_token(); /* ; */ continue; } if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || token.type == TOKEN_KEYWORD_FOR) { - // ignore, already processed - next_token(); // id + /* ignore, already processed */ + next_token(); /* id */ } if (token.type == TOKEN_KEYWORD_RETURN) { @@ -1001,11 +1000,11 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { continue; } - #ifdef DEBUG_PRINT - printf("-- %.*s --\n", token.length, token.start); - #endif +#ifdef DEBUG_PRINT + printf("-- %.*s --\n", token.length, token.start); +#endif if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first + /* check to see if it is an opcode first */ if (strleq(token.start, "exit", token.length)) { emit_op(vm, OP_EXIT); @@ -1032,19 +1031,19 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { Token next = next_token_is(TOKEN_LPAREN); next = next_token(); while (next.type != TOKEN_RPAREN) { - u8 arg = get_reg(next, st); - emit_byte(vm, arg); - vm->cp++; - arg_count++; - next = next_token(); + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + arg_count++; + next = next_token(); } vm->code[arg_pos] = arg_count; - - #ifdef DEBUG_PRINT + +#ifdef DEBUG_PRINT printf("^code[%d] = %d\n", arg_pos, arg_count); - #endif - +#endif + next = next_token(); if (next.type == TOKEN_SEMICOLON) { emit_byte(vm, 255); @@ -1085,8 +1084,9 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp += 4; next = next_token(); - while (next.type != TOKEN_SEMICOLON && next.type != TOKEN_ARROW_RIGHT) { - u8 arg =get_reg(next, st); + while (next.type != TOKEN_SEMICOLON && + next.type != TOKEN_ARROW_RIGHT) { + u8 arg = get_reg(next, st); emit_byte(vm, arg); vm->cp++; next = next_token(); @@ -1241,7 +1241,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { vm->cp++; next_token_is(TOKEN_ARROW_RIGHT); - + reg = next_token(); arg = get_reg(reg, st); emit_byte(vm, arg); @@ -2404,7 +2404,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { } else if (strleq(token.start, "string_to_nat", token.length)) { } else if (strleq(token.start, "string_to_real", token.length)) { } else { - // some other identifier + /* some other identifier */ printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); exit(1); diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c index 889c277..056219e 100644 --- a/src/tools/compiler/compiler.c +++ b/src/tools/compiler/compiler.c @@ -1,5 +1,7 @@ +#include "lexer.h" #include "compiler.h" #include "../../vm/common.h" +#include "../../vm/opcodes.h" #include "../../vm/libc.h" #include #include @@ -178,101 +180,6 @@ ValueType *plex_get_field_by_name(PlexTable *plex_table, return plex_get_field(plex_table, fields_table, plex_index, (u32)field_index); } -Symbol *global(VM *vm) { - Symbol s; - ValueType t; - - s.ref.global = vm->mp; - - Token token_type = next_token(); - Token array_or_eq = next_token(); - if (array_or_eq.type == TOKEN_LBRACKET) { - Token rb = next_token(); - if (rb.type != TOKEN_RBRACKET) - return nil; - - Token eq = next_token(); - if (eq.type != TOKEN_EQ) - return nil; - - t.type = ARRAY; - ValueType array_type; - - switch (token_type.type) { - case TOKEN_TYPE_I8: - array_type.type = I8; - break; - case TOKEN_TYPE_I16: - array_type.type = I16; - break; - case TOKEN_TYPE_INT: - array_type.type = I32; - break; - case TOKEN_TYPE_U8: - array_type.type = U8; - break; - case TOKEN_TYPE_U16: - array_type.type = U16; - break; - case TOKEN_TYPE_NAT: - array_type.type = U32; - break; - case TOKEN_TYPE_REAL: - array_type.type = F32; - break; - case TOKEN_TYPE_STR: - array_type.type = STR; - break; - case TOKEN_IDENTIFIER: - break; - default: - return nil; - } - - } else { - // its not an array, so should be = - if (array_or_eq.type != TOKEN_EQ) - return nil; - - switch (token_type.type) { - case TOKEN_TYPE_I8: - t.type = I8; - break; - case TOKEN_TYPE_I16: - t.type = I16; - break; - case TOKEN_TYPE_INT: - t.type = I32; - break; - case TOKEN_TYPE_U8: - t.type = U8; - break; - case TOKEN_TYPE_U16: - t.type = U16; - break; - case TOKEN_TYPE_NAT: - t.type = U32; - break; - case TOKEN_TYPE_REAL: - t.type = F32; - break; - case TOKEN_TYPE_STR: - t.type = STR; - break; - case TOKEN_IDENTIFIER: - break; - default: - return nil; - } - } - - s.type = t; - - Token value = next_token(); - - return nil; -} - typedef struct { Token current; Token previous; @@ -420,18 +327,18 @@ void number(Compiler *c, VM *vm) { c->last = Symbol{ .type=parser.previous.type }; switch (parser.previous.type) { - case TOKEN_INT_LITERAL: { + case TOKEN_LITERAL_INT: { char *endptr; i32 value = (i32)strtol(parser.previous.start, &endptr, 10); emit_u32(vm, value); return; } - case TOKEN_UINT_LITERAL: { + case TOKEN_LITERAL_NAT: { long value = atol(parser.previous.start); emit_u32(vm, value); return; } - case TOKEN_FLOAT_LITERAL: { + case TOKEN_LITERAL_REAL: { float value = atof(parser.previous.start); fixed_t fvalue = float_to_fixed(value); emit_u32(vm, fvalue); @@ -454,10 +361,10 @@ static void unary(Compiler *c, VM *vm) { switch (operatorType) { case TOKEN_MINUS: { switch (c->last.type) { - case TOKEN_UINT_LITERAL: - emit_opcode(vm, OP_NEG_UINT); - case TOKEN_FLOAT_LITERAL: - emit_opcode(vm, OP_NEG_FLOAT); + case TOKEN_LITERAL_NAT: + emit_opcode(vm, OP_NEG_NAT); + case TOKEN_LITERAL_REAL: + emit_opcode(vm, OP_NEG_REAL); default: emit_opcode(vm, OP_NEG_INT); } @@ -472,7 +379,7 @@ static void unary(Compiler *c, VM *vm) { } static void emitHalt(Compiler *c, VM *vm) { - emit_opcode(vm, OP_HALT); + emit_opcode(vm, OP_EXIT); advance(); number(c, vm); } diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h index 778a5e3..59e420b 100644 --- a/src/tools/compiler/compiler.h +++ b/src/tools/compiler/compiler.h @@ -3,7 +3,6 @@ #import "../../vm/common.h" -typedef enum { GLOBAL, LOCAL } ScopeType; typedef enum { VOID, BOOL, @@ -33,6 +32,8 @@ typedef struct symbol_s Symbol; typedef struct symbol_tab_s SymbolTable; typedef struct names_tab_s NamesTable; typedef struct plex_fields_tab_s PlexFieldsTable; +typedef struct scope_s Scope; +typedef struct scope_tab_s ScopeTable; struct value_type_s { SymbolType type; @@ -65,7 +66,6 @@ struct array_def_s { struct symbol_s { u32 name; ValueType type; - ScopeType scope; union { u32 local; // register u32 global; // address @@ -103,14 +103,20 @@ struct names_tab_s { u32 capacity; }; -/** - * FIXME: - * Symbols need to be inside a scope so we can have duplicates - */ struct symbol_tab_s { Symbol *symbols; u32 count; u32 capacity; }; +struct scope_s { + SymbolTable table; +}; + +struct scope_tab_s { + Scope *scopes; + u32 count; + u32 capacity; +}; + #endif diff --git a/src/tools/compiler/lexer.c b/src/tools/compiler/parser.c similarity index 100% rename from src/tools/compiler/lexer.c rename to src/tools/compiler/parser.c diff --git a/src/tools/compiler/lexer.h b/src/tools/compiler/parser.h similarity index 100% rename from src/tools/compiler/lexer.h rename to src/tools/compiler/parser.h diff --git a/src/vm/fixed.c b/src/vm/fixed.c index bbbdbc2..f75aae0 100644 --- a/src/vm/fixed.c +++ b/src/vm/fixed.c @@ -1,8 +1,5 @@ -/* fixed.c - Q16.16 Fixed-Point Math Implementation */ - #include "fixed.h" -/* Conversion functions */ fixed_t int_to_fixed(i32 i) { return i << 16; } i32 fixed_to_int(fixed_t f) { return f >> 16; } @@ -16,38 +13,32 @@ fixed_t fixed_add(fixed_t a, fixed_t b) { return a + b; } fixed_t fixed_sub(fixed_t a, fixed_t b) { return a - b; } fixed_t fixed_mul(fixed_t a, fixed_t b) { - /* Extract high and low parts */ i32 a_hi = a >> 16; u32 a_lo = (u32)a & 0xFFFFU; i32 b_hi = b >> 16; u32 b_lo = (u32)b & 0xFFFFU; - /* Compute partial products */ - i32 p0 = (i32)(a_lo * b_lo) >> 16; /* Low * Low */ - i32 p1 = a_hi * (i32)b_lo; /* High * Low */ - i32 p2 = (i32)a_lo * b_hi; /* Low * High */ - i32 p3 = (a_hi * b_hi) << 16; /* High * High */ + i32 p0 = (i32)(a_lo * b_lo) >> 16; + i32 p1 = a_hi * (i32)b_lo; + i32 p2 = (i32)a_lo * b_hi; + i32 p3 = (a_hi * b_hi) << 16; - /* Combine results */ return p0 + p1 + p2 + p3; } fixed_t fixed_div(fixed_t a, fixed_t b) { - int negative; + i32 negative; u32 ua, ub, quotient, remainder; - int i; + i32 i; if (b == 0) - return 0; /* Handle division by zero */ + return 0; - /* Determine sign */ negative = ((a < 0) ^ (b < 0)); - /* Work with absolute values */ ua = (a < 0) ? -a : a; ub = (b < 0) ? -b : b; - /* Perform division using long division in base 2^16 */ quotient = 0; remainder = 0; @@ -71,111 +62,18 @@ fixed_t fixed_div(fixed_t a, fixed_t b) { return negative ? -(i32)quotient : (i32)quotient; } -int fixed_eq(fixed_t a, fixed_t b) { return a == b; } +i32 fixed_eq(fixed_t a, fixed_t b) { return a == b; } -int fixed_ne(fixed_t a, fixed_t b) { return a != b; } +i32 fixed_ne(fixed_t a, fixed_t b) { return a != b; } -int fixed_lt(fixed_t a, fixed_t b) { return a < b; } +i32 fixed_lt(fixed_t a, fixed_t b) { return a < b; } -int fixed_le(fixed_t a, fixed_t b) { return a <= b; } +i32 fixed_le(fixed_t a, fixed_t b) { return a <= b; } -int fixed_gt(fixed_t a, fixed_t b) { return a > b; } +i32 fixed_gt(fixed_t a, fixed_t b) { return a > b; } -int fixed_ge(fixed_t a, fixed_t b) { return a >= b; } +i32 fixed_ge(fixed_t a, fixed_t b) { return a >= b; } -/* Unary operations */ fixed_t fixed_neg(fixed_t f) { return -f; } fixed_t fixed_abs(fixed_t f) { return (f < 0) ? -f : f; } - -/* Square root using Newton-Raphson method */ -fixed_t fixed_sqrt(fixed_t f) { - fixed_t x, prev; - - if (f <= 0) - return 0; - - x = f; - /* Newton-Raphson iteration: x = (x + f/x) / 2 */ - do { - prev = x; - x = fixed_div(fixed_add(x, fixed_div(f, x)), int_to_fixed(2)); - } while ( - fixed_gt(fixed_abs(fixed_sub(x, prev)), 1)); /* Precision to 1/65536 */ - - return x; -} - -/* Sine function using Taylor series */ -fixed_t fixed_sin(fixed_t f) { - fixed_t result, term, f_squared; - int i; - /* Normalize angle to [-π, π] */ - fixed_t pi2 = fixed_mul(FIXED_PI, int_to_fixed(2)); - while (fixed_gt(f, FIXED_PI)) - f = fixed_sub(f, pi2); - while (fixed_lt(f, fixed_neg(FIXED_PI))) - f = fixed_add(f, pi2); - - /* Taylor series: sin(x) = x - x³/3! + x⁵/5! - x⁷/7! + ... */ - result = f; - term = f; - f_squared = fixed_mul(f, f); - - /* Calculate first few terms for reasonable precision */ - for (i = 3; i <= 11; i += 2) { - term = fixed_mul(term, f_squared); - term = fixed_div(term, int_to_fixed(i * (i - 1))); - - if ((i / 2) % 2 == 0) { - result = fixed_add(result, term); - } else { - result = fixed_sub(result, term); - } - } - - return result; -} - -/* Cosine function using Taylor series */ -fixed_t fixed_cos(fixed_t f) { - /* cos(x) = 1 - x²/2! + x⁴/4! - x⁶/6! + ... */ - fixed_t result = FIXED_ONE; - fixed_t term = FIXED_ONE; - fixed_t f_squared = fixed_mul(f, f); - - int i; - for (i = 2; i <= 12; i += 2) { - term = fixed_mul(term, f_squared); - term = fixed_div(term, int_to_fixed(i * (i - 1))); - - if ((i / 2) % 2 == 0) { - result = fixed_add(result, term); - } else { - result = fixed_sub(result, term); - } - } - - return result; -} - -/* Tangent function */ -fixed_t fixed_tan(fixed_t f) { - fixed_t cos_val = fixed_cos(f); - if (cos_val == 0) - return 0; /* Handle undefined case */ - return fixed_div(fixed_sin(f), cos_val); -} - -/* Utility functions */ -fixed_t fixed_min(fixed_t a, fixed_t b) { return (a < b) ? a : b; } - -fixed_t fixed_max(fixed_t a, fixed_t b) { return (a > b) ? a : b; } - -fixed_t fixed_clamp(fixed_t f, fixed_t min_val, fixed_t max_val) { - if (f < min_val) - return min_val; - if (f > max_val) - return max_val; - return f; -} diff --git a/src/vm/fixed.h b/src/vm/fixed.h index 717705c..c48f99a 100644 --- a/src/vm/fixed.h +++ b/src/vm/fixed.h @@ -3,51 +3,34 @@ #include "common.h" -/* Q16.16 fixed-point type */ typedef i32 fixed_t; -/* Constants */ #define FIXED_ONE 0x00010000L /* 1.0 in Q16.16 */ #define FIXED_ZERO 0x00000000L /* 0.0 in Q16.16 */ #define FIXED_HALF 0x00008000L /* 0.5 in Q16.16 */ -#define FIXED_PI 0x0003243FL /* π ≈ 3.14159 */ -#define FIXED_E 0x0002B7E1L /* e ≈ 2.71828 */ +#define FIXED_PI 0x0003243FL /* 3.14159 */ +#define FIXED_E 0x0002B7E1L /* 2.71828 */ #define FIXED_MAX 0x7FFFFFFFL /* Maximum positive value */ #define FIXED_MIN 0x80000000L /* Minimum negative value */ -/* Conversion functions */ fixed_t int_to_fixed(i32 i); i32 fixed_to_int(fixed_t f); fixed_t float_to_fixed(f32 f); f32 fixed_to_float(fixed_t f); -/* Basic arithmetic operations */ fixed_t fixed_add(fixed_t a, fixed_t b); fixed_t fixed_sub(fixed_t a, fixed_t b); fixed_t fixed_mul(fixed_t a, fixed_t b); fixed_t fixed_div(fixed_t a, fixed_t b); -/* Comparison functions */ -int fixed_eq(fixed_t a, fixed_t b); -int fixed_ne(fixed_t a, fixed_t b); -int fixed_lt(fixed_t a, fixed_t b); -int fixed_le(fixed_t a, fixed_t b); -int fixed_gt(fixed_t a, fixed_t b); -int fixed_ge(fixed_t a, fixed_t b); +i32 fixed_eq(fixed_t a, fixed_t b); +i32 fixed_ne(fixed_t a, fixed_t b); +i32 fixed_lt(fixed_t a, fixed_t b); +i32 fixed_le(fixed_t a, fixed_t b); +i32 fixed_gt(fixed_t a, fixed_t b); +i32 fixed_ge(fixed_t a, fixed_t b); -/* Unary operations */ fixed_t fixed_neg(fixed_t f); fixed_t fixed_abs(fixed_t f); -/* Advanced math functions */ -fixed_t fixed_sqrt(fixed_t f); -fixed_t fixed_sin(fixed_t f); /* f in radians */ -fixed_t fixed_cos(fixed_t f); /* f in radians */ -fixed_t fixed_tan(fixed_t f); /* f in radians */ - -/* Utility functions */ -fixed_t fixed_min(fixed_t a, fixed_t b); -fixed_t fixed_max(fixed_t a, fixed_t b); -fixed_t fixed_clamp(fixed_t f, fixed_t min, fixed_t max); - -#endif /* FIXED_H */ +#endif diff --git a/test/paint.ul b/test/paint.ul index 1ae3af2..d6cc410 100644 --- a/test/paint.ul +++ b/test/paint.ul @@ -20,9 +20,7 @@ plex Screen implements Device { byte[] buffer; draw() { - unsafe { - write(this, this.buffer, this.buffer.length); - } + write(this, this.buffer, this.buffer.length); } } @@ -82,8 +80,6 @@ function set_color(int box_size, int bx, int by, int mx, int my, byte color) { if (my > bottom) return; selected_color = color; - - return; } /** @@ -97,22 +93,17 @@ function outline_swatch(Device screen, byte color, int x, int y) { rectangle(screen, bg_color, x, y, 20, 20); rectangle(screen, color, x + 2, y + 2, 17, 17); - return; } /** * Draw a rectangle */ function rectangle(Device screen, byte color, int x, int y, int width, int height) { - // we need unsafe because we are using pointers `.ptr` and `memset` directly - // unsafe takes the guardrails off and allows you to access/modify memory directly - unsafe { - int base = y * screen.width + x + screen.buffer.ptr + 4; - do (int i = height; i > 0; i--) { - int row = base + width; - memset(screen.buffer, row, color, width); - base += screen.width; - } - } - return; + int base = y * screen.width + x + screen.buffer.ptr + 4; + + do (int i = height; i > 0; i--) { + int row = base + width; + memset(screen.buffer, row, color, width); + base += screen.width; + } } From 0fd26567046420978d4002c1e5401e3990fb2c23 Mon Sep 17 00:00:00 2001 From: zongor Date: Mon, 8 Dec 2025 21:42:59 -0800 Subject: [PATCH 25/27] Code cleanup --- src/tools/compiler/compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c index 056219e..5899df6 100644 --- a/src/tools/compiler/compiler.c +++ b/src/tools/compiler/compiler.c @@ -1,4 +1,4 @@ -#include "lexer.h" +#include "parser.h" #include "compiler.h" #include "../../vm/common.h" #include "../../vm/opcodes.h" From 411481214671de97a08d18245e505dd6f75d8d1a Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 20 Dec 2025 00:42:28 -0800 Subject: [PATCH 26/27] Update assembler to add scopes --- src/arch/emscripten/main.c | 4 +- src/arch/linux/main.c | 32 +++-- src/tools/assembler/assembler.c | 117 +++++++++++----- src/tools/assembler/assembler.h | 19 ++- src/tools/compiler/compiler.c | 235 ++++---------------------------- src/tools/compiler/parser.c | 2 +- src/tools/compiler/parser.h | 4 + test/add.ul.ir | 9 +- test/fib.ul.ir | 10 +- test/hello.ul.ir | 8 +- test/loop.ul.ir | 26 ++-- test/malloc.ul.ir | 21 +-- test/paint-bw.ul.ir | 21 +-- test/paint.ul.ir | 24 ++-- test/simple.ul.ir | 6 +- test/window.ul.ir | 16 ++- 16 files changed, 236 insertions(+), 318 deletions(-) diff --git a/src/arch/emscripten/main.c b/src/arch/emscripten/main.c index 8067a09..e22622c 100644 --- a/src/arch/emscripten/main.c +++ b/src/arch/emscripten/main.c @@ -89,7 +89,7 @@ void mainloop() { } int cycles_this_frame = 0; - int max_cycles_per_frame = 2000; + int max_cycles_per_frame = 1000; while (cycles_this_frame < max_cycles_per_frame) { if (!step_vm(&vm)) { emscripten_cancel_main_loop(); @@ -181,4 +181,4 @@ int main(int argc, char **argv) { emscripten_set_main_loop(mainloop, 0, 1); return 0; -} \ No newline at end of file +} diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 7a4453c..27ec9a9 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -130,24 +130,29 @@ bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { } #ifdef STATIC - #define SYMBOLS_COUNT 2048 - Symbol symbols[SYMBOLS_COUNT]; + #define SCOPES_COUNT 2048 + SymbolTable scopes[SCOPES_COUNT]; #endif -void symbol_table_init(SymbolTable *t) { +void symbol_table_init(ScopeTable *t) { #ifdef STATIC - memset(symbols, 0, SYMBOLS_COUNT*sizeof(Symbol)); - t->symbols = symbols; + memset(scopes, 0, SCOPES_COUNT*sizeof(SymbolTable)); + t->scopes = scopes; t->count = 0; - t->capacity = SYMBOLS_COUNT; + t->capacity = SCOPES_COUNT; #else - t->symbols = calloc(16, sizeof(Symbol)); + t->scopes = calloc(16, sizeof(SymbolTable)); t->count = 0; t->capacity = 16; #endif + + // Make sure that all the parents are the 'global' namespace. + for (u32 i = 0; i < t->capacity; i++) { + t->scopes[i].parent = -1; + } } -bool resize_or_check_size(SymbolTable *table) { +bool table_realloc(ScopeTable *table) { #ifdef STATIC if (table->count >= table->capacity) { return false; @@ -155,7 +160,12 @@ bool resize_or_check_size(SymbolTable *table) { #else if (table->count >= table->capacity) { table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + table->scopes = realloc(table->scopes, table->capacity * sizeof(SymbolTable)); + + // Make sure that all the parents are the 'global' namespace. + for (u32 i = table->count; i < table->capacity; i++) { + table->scopes[i].parent = -1; + } } #endif return true; @@ -183,11 +193,11 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { source[read] = '\0'; fclose(f); - SymbolTable table = {0}; + ScopeTable table = {0}; symbol_table_init(&table); assemble(vm, &table, source); #ifndef STATIC - free(table.symbols); + free(table.scopes); #endif if (output_file) { diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 00ed047..5d73fde 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -162,35 +162,46 @@ void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); } -Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { - for (u32 i = 0; i < table->count; i++) { - if (table->symbols[i].name_length == length) { - if (strleq(table->symbols[i].name, name, length)) { - return &table->symbols[i]; +Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length, + i32 scope_ref) { + SymbolTable st = table->scopes[scope_ref]; + for (u32 i = 0; i < st.count; i++) { + if (st.symbols[i].name_length == length) { + if (strleq(st.symbols[i].name, name, length)) { + return &table->scopes[scope_ref].symbols[i]; } } } - return nil; + if (st.parent < 0) + return nil; + return symbol_table_lookup(table, name, length, st.parent); } -u32 symbol_table_add(SymbolTable *table, Symbol s) { - Symbol *sym = symbol_table_lookup(table, s.name, s.name_length); +u8 symbol_table_add(ScopeTable *table, Symbol s) { + Symbol *sym = + symbol_table_lookup(table, s.name, s.name_length, table->scope_ref); if (sym != nil) { fprintf(stderr, - "Error: Symbol '%.*s' already defined, the assembler is not smart " - "enough to do scope properly so please pick a different variable " - "name (hard I know)\n", + "Error: Symbol '%.*s' already defined, in this scope" + " please pick a different variable name or create a new scope.\n", s.name_length, s.name); exit(1); } - if (!resize_or_check_size(table)) { + if (table->scopes[table->scope_ref].count + 1 > 255) { + fprintf(stderr, "Error: Only 255 symbols are allowed per scope" + " first off: impressive; secondly:" + " just create a new scope and keep going.\n"); + exit(1); + } + + if (!table_realloc(table)) { fprintf(stderr, "Error: Symbol table is out of memory! This is likely because you " - "built the assembler in static mode, increase the static size." - "if you built using malloc, that means your computer is out of " - "memory. Close a few tabs in your web browser and try again." - "Count was %d, while capacity was %d\n", + " built the assembler in static mode, increase the static size." + " if you built using malloc, that means your computer is out of" + " memory. Close a few tabs in your web browser and try again." + " Count was %d, while capacity was %d\n", table->count, table->capacity); exit(1); } @@ -203,15 +214,14 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { printf("code[%d] = %s\n", s.ref, s.name); } #endif - - table->symbols[table->count] = s; - u32 index = table->count; - table->count++; + table->scopes[table->scope_ref].symbols[table->scopes[table->scope_ref].count] = s; + u8 index = table->scopes[table->scope_ref].count; + table->scopes[table->scope_ref].count++; return index; } -u32 get_ref(SymbolTable *st, const char *name, u32 length) { - Symbol *sym = symbol_table_lookup(st, name, length); +u32 get_ref(ScopeTable *st, const char *name, u32 length) { + Symbol *sym = symbol_table_lookup(st, name, length, st->scope_ref); if (!sym) { fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n", length, name); @@ -221,7 +231,7 @@ u32 get_ref(SymbolTable *st, const char *name, u32 length) { return sym->ref; } -u32 get_ptr(Token token, SymbolTable *st) { +u32 get_ptr(Token token, ScopeTable *st) { if (token.type == TOKEN_IDENTIFIER) { return get_ref(st, token.start, token.length); } @@ -246,7 +256,7 @@ u32 get_ptr(Token token, SymbolTable *st) { exit(1); } -u32 get_reg(Token token, SymbolTable *st) { +u32 get_reg(Token token, ScopeTable *st) { if (token.type == TOKEN_IDENTIFIER) { return get_ref(st, token.start, token.length); } @@ -303,7 +313,7 @@ Token next_token_is(TokenType type) { /** * Global . */ -bool define_global(VM *vm, SymbolTable *st) { +bool define_global(VM *vm, ScopeTable *st) { Symbol s; Token token_type = next_token(); @@ -472,7 +482,7 @@ bool define_global(VM *vm, SymbolTable *st) { /** * Var . */ -void define_var(SymbolTable *st, Token regType) { +void define_var(ScopeTable *st, Token regType) { Symbol s; s.scope = VAR; switch (regType.type) { @@ -553,7 +563,7 @@ void define_var(SymbolTable *st, Token regType) { /** * function . */ -void define_function(VM *vm, SymbolTable *st) { +void define_function(VM *vm, ScopeTable *st) { Symbol s; s.scope = LOCAL; s.type = FUNCTION; @@ -570,6 +580,12 @@ void define_function(VM *vm, SymbolTable *st) { next_token_is(TOKEN_LPAREN); + i32 temp = st->scope_ref; + + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + Token next = next_token(); while (next.type != TOKEN_RPAREN) { define_var(st, next); @@ -585,13 +601,17 @@ void define_function(VM *vm, SymbolTable *st) { } } s.ref = vm->cp; + next = next_token_is(TOKEN_LBRACE); + + st->scope_ref = temp; // need to add to the parents scope symbol_table_add(st, s); + st->scope_ref = (i32)st->count; } /** * Branch. */ -void define_branch(VM *vm, SymbolTable *st) { +void define_branch(VM *vm, ScopeTable *st) { Symbol s; s.scope = LOCAL; s.type = VOID; @@ -702,7 +722,8 @@ int get_instruction_byte_size(const char *opname) { } #define FAKE_OP(op) \ - } else if (strleq(token.start, op, token.length)) { \ + } \ + else if (strleq(token.start, op, token.length)) { \ do { \ while (token.type != TOKEN_SEMICOLON) { \ token = next_token(); \ @@ -716,7 +737,7 @@ int get_instruction_byte_size(const char *opname) { /** * Build the symbol table and calculate the types/size/offsets of all values. */ -void build_symbol_table(VM *vm, char *source, SymbolTable *st) { +void build_symbol_table(VM *vm, char *source, ScopeTable *st) { Token token; init_lexer(source); do { @@ -727,6 +748,22 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { } if (token.type != TOKEN_EOF) { + + if (token.type == TOKEN_LBRACE) { + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + continue; + } + + if (token.type == TOKEN_RBRACE) { + i32 current_scope = st->scope_ref; + i32 parent = st->scopes[current_scope].parent; + if (parent < 0) parent = 0; + st->scope_ref = parent; + continue; + } + if (token.type == TOKEN_KEYWORD_GLOBAL) { define_global(vm, st); continue; @@ -931,7 +968,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { /** * 2nd pass, emit the bytecode */ -void emit_bytecode(VM *vm, char *source, SymbolTable *st) { +void emit_bytecode(VM *vm, char *source, ScopeTable *st) { Token token; init_lexer(source); do { @@ -941,6 +978,21 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { + + if (token.type == TOKEN_LBRACE) { + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + continue; + } + + if (token.type == TOKEN_RBRACE) { + i32 current_scope = st->scope_ref; + i32 parent = st->scopes[current_scope].parent; + if (parent < 0) parent = 0; + st->scope_ref = parent; + continue; + } if (token.type == TOKEN_KEYWORD_GLOBAL) { /* ignore, already processed */ @@ -2417,8 +2469,9 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { /** * Emit bytecode to the VM from the source string. */ -void assemble(VM *vm, SymbolTable *st, char *source) { +void assemble(VM *vm, ScopeTable *st, char *source) { build_symbol_table(vm, source, st); vm->cp = 0; /* actually start emitting code */ + st->count = 0; emit_bytecode(vm, source, st); } diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 106aee5..95edd43 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -26,6 +26,8 @@ typedef enum { typedef struct symbol_s Symbol; typedef struct symbol_tab_s SymbolTable; +typedef struct scope_tab_s ScopeTable; +typedef struct assembler_s Assembler; #define MAX_SYMBOL_NAME_LENGTH 64 struct symbol_s { @@ -38,13 +40,20 @@ struct symbol_s { }; struct symbol_tab_s { - Symbol *symbols; - u32 count; - u32 capacity; + Symbol symbols[256]; + u8 count; + i32 parent; }; -void assemble(VM *vm, SymbolTable *st, char *source); -extern bool resize_or_check_size(SymbolTable *table);/* implement this in arch/ not here */ +struct scope_tab_s { + SymbolTable *scopes; + u32 count; + u32 capacity; + i32 scope_ref; +}; + +void assemble(VM *vm, ScopeTable *st, char *source); +extern bool table_realloc(ScopeTable *table);/* implement this in arch/ not here */ const char *opcode_to_string(Opcode op); diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c index 5899df6..04e6ae0 100644 --- a/src/tools/compiler/compiler.c +++ b/src/tools/compiler/compiler.c @@ -3,188 +3,18 @@ #include "../../vm/common.h" #include "../../vm/opcodes.h" #include "../../vm/libc.h" +#include "../../vm/fixed.h" #include #include #include -NamesTable *names_table_init() { - NamesTable *table = malloc(sizeof(NamesTable)); - table->names = malloc(16 * sizeof(char *)); - table->count = 0; - table->capacity = 16; - return table; -} - -FunctionTable *function_table_init() { - FunctionTable *table = malloc(sizeof(FunctionTable)); - table->symbols = malloc(16 * sizeof(FunctionDef)); - table->count = 0; - table->capacity = 16; - return table; -} - -ArrayTable *array_table_init() { - ArrayTable *table = malloc(sizeof(ArrayTable)); - table->symbols = malloc(16 * sizeof(ArrayDef)); - table->count = 0; - table->capacity = 16; - return table; -} - -PlexTable *plex_table_init() { - PlexTable *table = malloc(sizeof(PlexTable)); - table->symbols = malloc(16 * sizeof(PlexDef)); - table->count = 0; - table->capacity = 16; - return table; -} - -PlexFieldsTable *plex_fields_table_init() { - PlexFieldsTable *table = malloc(sizeof(PlexFieldsTable)); - table->plex_refs = malloc(64 * sizeof(u32)); - table->fields = malloc(64 * sizeof(ValueType)); - table->count = 0; - table->capacity = 64; - return table; -} - -u32 names_table_add(NamesTable *table, const char *name) { - for (u32 i = 0; i < table->count; i++) { - if (strcmp(table->names[i], name) == 0) { - return (u32)i; - } - } - - if (table->count >= table->capacity) { - table->capacity *= 2; - table->names = realloc(table->names, table->capacity * sizeof(char *)); - } - - table->names[table->count] = malloc(strlen(name) + 1); - strcpy(table->names[table->count], name); - u32 index = (u32)table->count; - table->count++; - return index; -} - -u32 function_table_add(FunctionTable *table, FunctionDef def) { - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = - realloc(table->symbols, table->capacity * sizeof(FunctionDef)); - } - - table->symbols[table->count] = def; - u32 index = (u32)table->count; - table->count++; - return index; -} - -u32 array_table_add(ArrayTable *table, ArrayDef def) { - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(ArrayDef)); - } - - table->symbols[table->count] = def; - u32 index = (u32)table->count; - table->count++; - return index; -} - -u32 plex_add(PlexTable *plex_table, u32 name, u32 size, u32 field_start, - u32 field_count) { - if (plex_table->count >= plex_table->capacity) { - plex_table->capacity *= 2; - plex_table->symbols = - realloc(plex_table->symbols, plex_table->capacity * sizeof(PlexDef)); - } - - plex_table->symbols[plex_table->count].name = name; - plex_table->symbols[plex_table->count].size = size; - plex_table->symbols[plex_table->count].field_ref_start = field_start; - plex_table->symbols[plex_table->count].field_count = field_count; - - u32 index = (u32)plex_table->count; - plex_table->count++; - return index; -} - -u32 plex_fields_add(PlexFieldsTable *fields_table, u32 plex_ref, - ValueType field) { - if (fields_table->count + 1 > fields_table->capacity) { - u32 new_capacity = fields_table->capacity * 2; - if (new_capacity < fields_table->count + 1) { - new_capacity = fields_table->count + 1; - } - fields_table->plex_refs = - realloc(fields_table->plex_refs, new_capacity * sizeof(u32)); - fields_table->fields = - realloc(fields_table->fields, new_capacity * sizeof(ValueType)); - fields_table->capacity = new_capacity; - } - - u32 start_index = fields_table->count; - fields_table->plex_refs[start_index] = plex_ref; - fields_table->fields[start_index] = field; - fields_table->count++; - return start_index; -} - -int plex_get_field_index_by_name(PlexTable *plex_table, - PlexFieldsTable *fields_table, - NamesTable *names_table, u32 plex_index, - const char *field_name) { - if (plex_index >= plex_table->count) - return -1; - - PlexDef *plex_def = &plex_table->symbols[plex_index]; - u32 field_start = plex_def->field_ref_start; - u32 field_count = plex_def->field_count; - - for (u32 i = 0; i < field_count; i++) { - u32 field_table_index = field_start + i; - ValueType *field = &fields_table->fields[field_table_index]; - - if (field->name < names_table->count) { - if (strcmp(names_table->names[field->name], field_name) == 0) { - return (int)i; // Return field index within the plex - } - } - } - return -1; // Not found -} - -ValueType *plex_get_field(PlexTable *plex_table, PlexFieldsTable *fields_table, - u32 plex_index, u32 field_in_plex_index) { - if (plex_index >= plex_table->count) - return nil; - - PlexDef *plex_def = &plex_table->symbols[plex_index]; - if (field_in_plex_index >= plex_def->field_count) - return nil; - - u32 field_table_index = plex_def->field_ref_start + field_in_plex_index; - return &fields_table->fields[field_table_index]; -} - -ValueType *plex_get_field_by_name(PlexTable *plex_table, - PlexFieldsTable *fields_table, - NamesTable *names_table, u32 plex_index, - const char *field_name) { - int field_index = plex_get_field_index_by_name( - plex_table, fields_table, names_table, plex_index, field_name); - if (field_index == -1) - return nil; - - return plex_get_field(plex_table, fields_table, plex_index, (u32)field_index); -} - typedef struct { Token current; Token previous; + Token before; bool hadError; bool panicMode; + i8 rp; } Parser; typedef enum { @@ -209,13 +39,6 @@ typedef struct { Precedence precedence; } ParseRule; -typedef struct { - SymbolTable table; - Symbol current; - Symbol last; - i8 rp; // Next free register -} Compiler; - Parser parser; const char *internalErrorMsg = @@ -269,30 +92,24 @@ void consume(TokenType type, const char *message) { errorAtCurrent(message); } -static int allocateRegister(Compiler *c) { +static int allocateRegister() { char buffer[38]; - if (c->rp + 1 > 31) { - sprintf(buffer, "Out of registers (used %d, max 32)", c->rp + 1); + if (parser.rp + 1 > 31) { + sprintf(buffer, "Out of registers (used %d, max 32)", parser.rp + 1); error(buffer); return -1; } - return c->rp++; + return parser.rp++; } -static void popRegister(Compiler *c) { - if (c->rp - 1 > 0) { - c->rp--; +static void popRegister() { + if (parser.rp - 1 > 0) { + parser.rp--; } } -static void freeRegister(Compiler *c, u8 reg) { - if (reg == c->rp - 1) { - c->rp--; - } -} - -static void clearRegisters(Compiler *c, u8 reg) { c->rp = 0; } +static void clearRegisters(u8 reg) { parser.rp = 0; } void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } @@ -312,20 +129,17 @@ static bool match(TokenType type) { return true; } -static void expression(Compiler *c, VM *vm) { - USED(c); +static void expression(VM *vm) { USED(vm); } -void number(Compiler *c, VM *vm) { +void number(VM *vm) { emit_opcode(vm, OP_LOAD_IMM); - int reg = allocateRegister(c); + int reg = allocateRegister(); if (reg < 0) return; emit_byte(vm, reg); - c->last = Symbol{ .type=parser.previous.type }; - switch (parser.previous.type) { case TOKEN_LITERAL_INT: { char *endptr; @@ -351,16 +165,16 @@ void number(Compiler *c, VM *vm) { errorAtCurrent("Invalid number format"); } -static void unary(Compiler *c, VM *vm) { +static void unary(VM *vm) { TokenType operatorType = parser.previous.type; // Compile the operand. - expression(c, vm); + expression(vm); // Emit the operator instruction. switch (operatorType) { case TOKEN_MINUS: { - switch (c->last.type) { + switch (parser.previous.type) { case TOKEN_LITERAL_NAT: emit_opcode(vm, OP_NEG_NAT); case TOKEN_LITERAL_REAL: @@ -378,16 +192,16 @@ static void unary(Compiler *c, VM *vm) { } } -static void emitHalt(Compiler *c, VM *vm) { +static void emitHalt(VM *vm) { emit_opcode(vm, OP_EXIT); advance(); - number(c, vm); + number(vm); } -static void endCompiler(Compiler *c, VM *vm) { emitHalt(c, vm); } +static void endCompiler(VM *vm) { emitHalt(vm); } -static void grouping(Compiler *c, VM *vm) { - expression(c, vm); +static void grouping(VM *vm) { + expression(vm); consume(TOKEN_RPAREN, "Expect ')' after expression."); } @@ -399,11 +213,10 @@ bool compile(const char *source, VM *vm) { parser.hadError = false; parser.panicMode = false; - Compiler compiler; advance(); - expression(&compiler, vm); + expression(vm); consume(TOKEN_EOF, "Expect end of expression."); - endCompiler(&compiler, vm); + endCompiler(vm); return parser.hadError; } diff --git a/src/tools/compiler/parser.c b/src/tools/compiler/parser.c index 291a3a3..375222c 100644 --- a/src/tools/compiler/parser.c +++ b/src/tools/compiler/parser.c @@ -1,7 +1,7 @@ #include #include "../../vm/common.h" -#include "lexer.h" +#include "parser.h" typedef struct { const char *start; diff --git a/src/tools/compiler/parser.h b/src/tools/compiler/parser.h index 5f83f08..714cf38 100644 --- a/src/tools/compiler/parser.h +++ b/src/tools/compiler/parser.h @@ -16,6 +16,8 @@ typedef enum { TOKEN_TYPE_NAT, TOKEN_TYPE_REAL, TOKEN_TYPE_STR, + TOKEN_TYPE_BOOL, + TOKEN_TYPE_VOID, TOKEN_KEYWORD_PLEX, TOKEN_KEYWORD_FN, TOKEN_KEYWORD_CONST, @@ -35,6 +37,8 @@ typedef enum { TOKEN_KEYWORD_WRITE, TOKEN_KEYWORD_REFRESH, TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_LOOP, + TOKEN_KEYWORD_DO, TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_FALSE, diff --git a/test/add.ul.ir b/test/add.ul.ir index 8073af3..0ca47a4 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -3,20 +3,22 @@ global str new_line = "\n"; global int x = 1; global int y = 1; -function main () +function main () { load_absolute_32 x -> $0; load_absolute_32 y -> $1; call add ($0 $1) -> $2; int_to_string $2 -> $3; call pln ($3); exit 0; +} -function add (int a $0, int b $1) +function add (int a $0, int b $1) { int result $2; add_int a b -> result; return result; +} -function pln (str message $0) +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -33,3 +35,4 @@ function pln (str message $0) string_length nl -> nl_length; syscall WRITE term nl nl_length; return; +} diff --git a/test/fib.ul.ir b/test/fib.ul.ir index caf8152..50b2cb5 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -1,7 +1,7 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; -function main () +function main () { int str_n $1; load_immediate 35 -> $0; @@ -9,8 +9,9 @@ function main () int_to_string $0 -> str_n; call pln (str_n); exit 0; +} -function fib (int n $0) +function fib (int n $0) { load_immediate 2 -> $1; jump_lt_int base_case n $1; @@ -28,8 +29,9 @@ function fib (int n $0) else base_case; return n; +} -function pln (str message $0) +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -46,4 +48,4 @@ function pln (str message $0) string_length nl -> nl_length; syscall WRITE term nl nl_length; return; - \ No newline at end of file +} diff --git a/test/hello.ul.ir b/test/hello.ul.ir index d43c9fa..9789e11 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -2,14 +2,15 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; global str hello = "nuqneH 'u'?"; -function main () +function main () { str msg $0; load_address hello -> msg; call pln (msg); exit 0; +} -function pln (str message $0) +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -25,4 +26,5 @@ function pln (str message $0) load_address new_line -> nl; string_length nl -> nl_length; syscall WRITE term nl nl_length; - return; \ No newline at end of file + return; +} diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 2edad6a..278c338 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -2,36 +2,38 @@ global str terminal_namespace = "/dev/term/0"; global str prompt = "Enter a string:"; global str new_line = "\n"; -function main () +function main () { real a $0; int i $1; - int in_mode $11; - str in_term $10; + int mode $11; + str term $10; + // do (i = 5000; i >= 0, i = i - 1) load_immediate 5.0 -> a; load_immediate 5000 -> i; load_immediate 0 -> $2; load_immediate -1 -> $3; load_immediate 5.0 -> $5; - loop loop_body + loop loop_body { add_real a $5 -> a; add_int i $3 -> i; - jump_ge_int loop_body i $2; + jump_ge_int loop_body i $2; + } - load_address terminal_namespace -> in_term; - load_immediate 0 -> in_mode; - syscall OPEN in_term in_mode in_term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> term; + load_immediate 0 -> mode; + syscall OPEN term mode term; // Terminal term = open("/dev/term/0", 0); nat b $1; real_to_nat a -> b; load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE in_term $7 $8; // print prompt + syscall WRITE term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ in_term user_string $8; // read in max 32 byte string + syscall READ term user_string $8; // read in max 32 byte string call pln (user_string); nat_to_string b -> $4; @@ -39,8 +41,9 @@ function main () real_to_string a -> $3; call pln ($3); exit 0; +} -function pln (str message $0) +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -56,3 +59,4 @@ function pln (str message $0) string_length nl -> nl_length; syscall WRITE term nl nl_length; return; +} diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index dca8380..d67eb54 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -2,27 +2,28 @@ global str terminal_namespace = "/dev/term/0"; global str prompt = "Enter a string:"; global str new_line = "\n"; -function main () - int in_mode $11; - str in_term $10; +function main () { + int mode $11; + str term $10; - load_address terminal_namespace -> in_term; - load_immediate 0 -> in_mode; - syscall OPEN in_term in_mode in_term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> term; + load_immediate 0 -> mode; + syscall OPEN term mode term; // Terminal term = open("/dev/term/0", 0); load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE in_term $7 $8; // print prompt + syscall WRITE term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ in_term user_string $8; // read in max 32 byte string + syscall READ term user_string $8; // read in max 32 byte string call pln (user_string); exit 0; +} -function pln (str message $0) +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -39,4 +40,4 @@ function pln (str message $0) string_length nl -> nl_length; syscall WRITE term nl nl_length; return; - \ No newline at end of file +} diff --git a/test/paint-bw.ul.ir b/test/paint-bw.ul.ir index 3fd24c5..9c3d77d 100644 --- a/test/paint-bw.ul.ir +++ b/test/paint-bw.ul.ir @@ -7,7 +7,7 @@ global byte GRAY = 146; global byte LIGHT_GRAY = 182; global byte SELECTED_COLOR = 255; -function main () +function main () { // Open screen plex screen $0; str screen_name $18; @@ -51,7 +51,7 @@ function main () nat m_zero $11; - loop draw_loop + loop draw_loop { // load mouse click data syscall REFRESH mouse; @@ -92,13 +92,14 @@ function main () call draw_box (screen_buffer width selected_color mouse_x mouse_y brush_size brush_size); - jump draw_loop; + jump draw_loop; + } // Flush and exit exit 0; function set_color_if_clicked (int click_x $0, int click_y $1, - int box_x $2, int box_y $3, byte check_color $4, int bsize $5) + int box_x $2, int box_y $3, byte check_color $4, int bsize $5) { // Compute right int right_edge $6; @@ -118,9 +119,10 @@ function set_color_if_clicked (int click_x $0, int click_y $1, else fail return; +} function draw_outlined_swatch(nat dos_base $0, - byte swatch_color $1, int x $2, int y $3, int dos_width $4) + byte swatch_color $1, int x $2, int y $3, int dos_width $4) { // Constants nat background_color $5; @@ -152,10 +154,11 @@ function draw_outlined_swatch(nat dos_base $0, call draw_box (dos_base dos_width swatch_color $9 $10 fill_size fill_size); return; +} function draw_box (nat db_base $0, nat screen_width $1, byte box_color $2, nat x_start $3, nat y_start $4, - nat db_width $5, nat height $6) + nat db_width $5, nat height $6) { // Compute start address: base + y*640 + x nat offset $15; @@ -172,9 +175,11 @@ function draw_box (nat db_base $0, nat screen_width $1, int zero $26; load_immediate 0 -> zero; - loop draw_box_outer + loop draw_box_outer { memset_8 box_color db_width -> offset; // draw row add_int offset screen_width -> offset; // next row += 640 sub_int height i -> height; // decrement row count - jump_gt_int draw_box_outer height zero; + jump_gt_int draw_box_outer height zero; + } return; +} \ No newline at end of file diff --git a/test/paint.ul.ir b/test/paint.ul.ir index 5a1c18c..98d9e77 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -34,7 +34,7 @@ global byte DARK_ORANGE = 208; global byte GOLD = 244; global byte SELECTED_COLOR = 255; -function main () +function main () { // Open screen plex screen $0; str screen_name $18; @@ -118,14 +118,14 @@ function main () nat m_zero $11; - loop draw_loop + loop draw_loop { // load mouse click data syscall REFRESH mouse; byte left_down $9; load_offset_8 mouse 16 -> left_down; // load btn1 pressed - jump_eq_nat draw_loop left_down m_zero; + jump_eq_nat draw_loop left_down m_zero; // if (!btn1.left) continue; nat mouse_x $7; nat mouse_y $8; @@ -206,13 +206,15 @@ function main () call draw_box (screen_buffer width selected_color mouse_x mouse_y brush_size brush_size); - jump draw_loop; + jump draw_loop; + } // Flush and exit exit 0; +} function set_color_if_clicked (int click_x $0, int click_y $1, - int box_x $2, int box_y $3, byte check_color $4, int bsize $5) + int box_x $2, int box_y $3, byte check_color $4, int bsize $5) { // Compute right int right_edge $6; @@ -232,9 +234,10 @@ function set_color_if_clicked (int click_x $0, int click_y $1, else fail return; +} function draw_outlined_swatch(nat dos_base $0, - byte swatch_color $1, int x $2, int y $3, int dos_width $4) + byte swatch_color $1, int x $2, int y $3, int dos_width $4) { // Constants nat background_color $5; @@ -266,10 +269,11 @@ function draw_outlined_swatch(nat dos_base $0, call draw_box (dos_base dos_width swatch_color $9 $10 fill_size fill_size); return; +} function draw_box (nat db_base $0, nat screen_width $1, byte box_color $2, nat x_start $3, nat y_start $4, - nat db_width $5, nat height $6) + nat db_width $5, nat height $6) { // Compute start address: base + y*640 + x nat offset $15; @@ -286,9 +290,11 @@ function draw_box (nat db_base $0, nat screen_width $1, int zero $26; load_immediate 0 -> zero; - loop draw_box_outer + loop draw_box_outer { memset_8 box_color db_width -> offset; // draw row add_int offset screen_width -> offset; // next row += 640 sub_int height i -> height; // decrement row count - jump_gt_int draw_box_outer height zero; + jump_gt_int draw_box_outer height zero; + } return; +} \ No newline at end of file diff --git a/test/simple.ul.ir b/test/simple.ul.ir index 3c04c23..11fc1d1 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -1,15 +1,16 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; -function main () +function main () { load_immediate 1.0 -> $0; load_immediate 2.0 -> $1; add_real $0 $1 -> $0; real_to_string $0 -> $0; call pln ($0); exit 0; +} -function pln (str message $0) +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -26,3 +27,4 @@ function pln (str message $0) string_length nl -> nl_length; syscall WRITE term nl nl_length; return; +} diff --git a/test/window.ul.ir b/test/window.ul.ir index 185cfd0..877bf3d 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -4,7 +4,7 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; global byte WHITE = 255; -function main () +function main () { plex screen $0; plex mouse $1; str tmp_str $2; @@ -47,13 +47,13 @@ function main () syscall WRITE screen screen_buffer buffer_size; // redraw - loop draw_loop + loop draw_loop { // load mouse click data syscall REFRESH mouse; load_offset_8 mouse 16 -> left_down; - jump_eq_nat draw_loop left_down mode; // mode = 0 / false + jump_eq_nat draw_loop left_down mode; // if (!left_down) continue; load_offset_32 mouse 8 -> x; load_offset_32 mouse 12 -> y; @@ -70,10 +70,13 @@ function main () syscall WRITE screen screen_buffer buffer_size; // redraw - jump draw_loop; - exit 0; + jump draw_loop; + } -function pln (str message $0) + exit 0; +} + +function pln (str message $0) { plex term $1; int msg_length $2; str nl $3; @@ -90,3 +93,4 @@ function pln (str message $0) string_length nl -> nl_length; syscall WRITE term nl nl_length; return; +} From 2bb1166085e1b6f326c1e9e3fb18e81d50c72f8d Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 20 Dec 2025 12:24:52 -0800 Subject: [PATCH 27/27] Make compiler as iterations on the assembler --- src/tools/assembler/assembler.h | 1 - src/tools/compiler/compiler.c | 2485 ++++++++++++++++++++++++++++--- src/tools/compiler/compiler.h | 76 +- test/add.uir.ul | 47 + test/fib.uir.ul | 60 + test/hello.uir.ul | 32 + test/hello.ul | 5 +- test/loop.uir.ul | 65 + test/malloc.uir.ul | 26 + test/paint.uir.ul | 278 ++++ 10 files changed, 2832 insertions(+), 243 deletions(-) create mode 100644 test/add.uir.ul create mode 100644 test/fib.uir.ul create mode 100644 test/hello.uir.ul create mode 100644 test/loop.uir.ul create mode 100644 test/malloc.uir.ul create mode 100644 test/paint.uir.ul diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 95edd43..ba2492d 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -27,7 +27,6 @@ typedef enum { typedef struct symbol_s Symbol; typedef struct symbol_tab_s SymbolTable; typedef struct scope_tab_s ScopeTable; -typedef struct assembler_s Assembler; #define MAX_SYMBOL_NAME_LENGTH 64 struct symbol_s { diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c index 04e6ae0..cc1890a 100644 --- a/src/tools/compiler/compiler.c +++ b/src/tools/compiler/compiler.c @@ -1,222 +1,2339 @@ +#include "../../vm/common.h" +#include "../../vm/fixed.h" +#include "../../vm/libc.h" +#include "../../vm/opcodes.h" + #include "parser.h" #include "compiler.h" -#include "../../vm/common.h" -#include "../../vm/opcodes.h" -#include "../../vm/libc.h" -#include "../../vm/fixed.h" + +/* FIXME: remove these and replace with libc.h instead */ #include #include #include -typedef struct { - Token current; - Token previous; - Token before; - bool hadError; - bool panicMode; - i8 rp; -} Parser; -typedef enum { - PREC_NONE, - PREC_ASSIGNMENT, /* = */ - PREC_OR, /* or */ - PREC_AND, /* and */ - PREC_EQUALITY, /* == != */ - PREC_COMPARISON, /* < > <= >= */ - PREC_TERM, /* + - */ - PREC_FACTOR, /* * / */ - PREC_UNARY, /* not */ - PREC_CALL, /* . () */ - PREC_PRIMARY -} Precedence; - -typedef void (*ParseFn)(char *program); - -typedef struct { - ParseFn prefix; - ParseFn infix; - Precedence precedence; -} ParseRule; - -Parser parser; - -const char *internalErrorMsg = - "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes."; - -bool isType(TokenType type) { - return type == TOKEN_TYPE_INT || type == TOKEN_TYPE_NAT || - type == TOKEN_TYPE_REAL || type == TOKEN_TYPE_STR || - type == TOKEN_TYPE_BOOL; +void emit_op(VM *vm, u8 byte) { + vm->code[vm->cp] = byte; } -void errorAt(Token *token, const char *message) { - if (parser.panicMode) - return; - parser.panicMode = true; - fprintf(stderr, "[line %d] Error", token->line); - - if (token->type == TOKEN_EOF) { - fprintf(stderr, " at end"); - } else if (token->type == TOKEN_ERROR) { - } else { - fprintf(stderr, " at '%.*s'", token->length, token->start); - } - - fprintf(stderr, ": %s\n", message); - parser.hadError = true; +void emit_byte(VM *vm, u8 byte) { + vm->code[vm->cp] = byte; } -void error(const char *message) { errorAt(&parser.previous, message); } - -void errorAtCurrent(const char *message) { errorAt(&parser.current, message); } - -void advance() { - parser.previous = parser.current; - - for (;;) { - parser.current = next_token(); - if (parser.current.type != TOKEN_ERROR) - break; - - errorAtCurrent(parser.current.start); - } -} - -void consume(TokenType type, const char *message) { - if (parser.current.type == type) { - advance(); - return; - } - - errorAtCurrent(message); -} - -static int allocateRegister() { - char buffer[38]; - if (parser.rp + 1 > 31) { - sprintf(buffer, "Out of registers (used %d, max 32)", parser.rp + 1); - error(buffer); - return -1; - } - - return parser.rp++; -} - -static void popRegister() { - if (parser.rp - 1 > 0) { - parser.rp--; - } -} - -static void clearRegisters(u8 reg) { parser.rp = 0; } - -void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } - void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); - vm->cp += 4; } -void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } +Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length, + i32 scope_ref) { + SymbolTable st = table->scopes[scope_ref]; + for (u32 i = 0; i < st.count; i++) { + if (st.symbols[i].name_length == length) { + if (strleq(st.symbols[i].name, name, length)) { + return &table->scopes[scope_ref].symbols[i]; + } + } + } + if (st.parent < 0) + return nil; + return symbol_table_lookup(table, name, length, st.parent); +} -static bool check(TokenType type) { return parser.current.type == type; } +u8 symbol_table_add(ScopeTable *table, Symbol s) { + Symbol *sym = + symbol_table_lookup(table, s.name, s.name_length, table->scope_ref); + if (sym != nil) { + fprintf(stderr, + "Error: Symbol '%.*s' already defined, in this scope" + " please pick a different variable name or create a new scope.\n", + s.name_length, s.name); + exit(1); + } -static bool match(TokenType type) { - if (!check(type)) + if (table->scopes[table->scope_ref].count + 1 > 255) { + fprintf(stderr, "Error: Only 255 symbols are allowed per scope" + " first off: impressive; secondly:" + " just create a new scope and keep going.\n"); + exit(1); + } + + if (!table_realloc(table)) { + fprintf(stderr, + "Error: Symbol table is out of memory! This is likely because you " + " built the assembler in static mode, increase the static size." + " if you built using malloc, that means your computer is out of" + " memory. Close a few tabs in your web browser and try again." + " Count was %d, while capacity was %d\n", + table->count, table->capacity); + exit(1); + } +#ifdef DEBUG_PRINT + if (s.scope == VAR) { + printf("$%d = %s\n", s.ref, s.name); + } else if (s.scope == GLOBAL) { + printf("memory[%d] = %s\n", s.ref, s.name); + } else { + printf("code[%d] = %s\n", s.ref, s.name); + } +#endif + table->scopes[table->scope_ref].symbols[table->scopes[table->scope_ref].count] = s; + u8 index = table->scopes[table->scope_ref].count; + table->scopes[table->scope_ref].count++; + return index; +} + +u32 get_ref(ScopeTable *st, const char *name, u32 length) { + Symbol *sym = symbol_table_lookup(st, name, length, st->scope_ref); + if (!sym) { + fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n", + length, name); + exit(1); + return 0; + } + return sym->ref; +} + +u32 get_ptr(Token token, ScopeTable *st) { + if (token.type == TOKEN_IDENTIFIER) { + return get_ref(st, token.start, token.length); + } + + if (token.type == TOKEN_LITERAL_INT) { + return atoi(token.start); + } + + if (token.type == TOKEN_LITERAL_NAT) { + char *endptr; + u32 out = (u32)strtoul(token.start, &endptr, 10); + if (endptr == token.start || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal at line %d: %.*s\n", token.line, + token.length, token.start); + exit(1); + } + return out; + } + + fprintf(stderr, "Error: Not a pointer or symbol at line %d: %.*s\n", + token.line, token.length, token.start); + exit(1); +} + +u32 get_reg(Token token, ScopeTable *st) { + if (token.type == TOKEN_IDENTIFIER) { + return get_ref(st, token.start, token.length); + } + + if (token.type == TOKEN_BIG_MONEY) { + token = next_token(); + return atoi(token.start); + } + + fprintf(stderr, "Error: Not a register or symbol at line %d: %.*s\n", + token.line, token.length, token.start); + exit(1); +} + +Token next_id_or_reg() { + Token token = next_token(); + if (token.type == TOKEN_IDENTIFIER) { + return token; + } + + if (token.type == TOKEN_BIG_MONEY) { + token = next_token(); + return token; + } + + printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + + return token; +} + +Token next_id_or_ptr() { + Token token = next_token(); + + if (token.type != TOKEN_IDENTIFIER && token.type != TOKEN_LITERAL_NAT && + token.type != TOKEN_LITERAL_INT && token.type != TOKEN_LITERAL_REAL) { + printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } + return token; +} + +Token next_token_is(TokenType type) { + Token token = next_token(); + if (token.type != type) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + exit(1); + } + return token; +} + +/** + * Global . + */ +bool define_global(VM *vm, ScopeTable *st) { + Symbol s; + + Token token_type = next_token(); + switch (token_type.type) { + case TOKEN_TYPE_BOOL: + s.type = BOOL; + s.size = 1; + break; + case TOKEN_TYPE_I8: + s.type = I8; + s.size = 1; + break; + case TOKEN_TYPE_U8: + s.type = U8; + s.size = 1; + break; + case TOKEN_TYPE_I16: + s.type = I16; + s.size = 2; + break; + case TOKEN_TYPE_U16: + s.type = U16; + s.size = 2; + break; + case TOKEN_TYPE_INT: + s.type = I32; + s.size = 4; + break; + case TOKEN_TYPE_NAT: + s.type = U32; + s.size = 4; + break; + case TOKEN_TYPE_REAL: + s.type = F32; + s.size = 4; + break; + case TOKEN_TYPE_STR: + s.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: return false; - advance(); + } + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + return false; + } + + memcpy(s.name, name.start, name.length); + s.name_length = name.length; + s.name[name.length] = '\0'; + + u32 addr = vm->mp; + s.ref = addr; + s.scope = GLOBAL; + + next_token_is(TOKEN_EQ); + + Token value = next_token(); + switch (value.type) { + case TOKEN_KEYWORD_TRUE: { + u32 addr = vm->mp; + write_u8(vm, memory, addr, 1); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + break; + } + case TOKEN_KEYWORD_FALSE: { + u32 addr = vm->mp; + write_u8(vm, memory, addr, 0); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + break; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + break; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", value.start); + exit(1); + } + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + break; + } + case TOKEN_LITERAL_REAL: { + fixed_t out = float_to_fixed(atof(value.start)); + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + break; + } + case TOKEN_LITERAL_STR: { + const char *src = value.start; + i32 len = 0; + i32 i = 0; + + while (i < value.length) { + char c = src[i++]; + if (c == '"') { + continue; + } + if (c == '\\' && i < value.length) { + switch (src[i++]) { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + case '\\': + case '"': + case '\'': + break; + default: + i--; /* Rewind for unknown escapes */ + } + } + write_u8(vm, memory, addr + 4 + len, c); + len++; + } + + u32 size = len + 5; /* 4 (len) + dst_len + 1 (null) */ + s.size = size; + + vm->mp += size; + vm->frames[vm->fp].end += size; + + write_u32(vm, memory, addr, len); + write_u8(vm, memory, addr + 4 + len, '\0'); + break; + } + default: + return false; + } + next_token_is(TOKEN_SEMICOLON); + + symbol_table_add(st, s); return true; } -static void expression(VM *vm) { - USED(vm); -} - -void number(VM *vm) { - emit_opcode(vm, OP_LOAD_IMM); - int reg = allocateRegister(); - if (reg < 0) - return; - emit_byte(vm, reg); - - switch (parser.previous.type) { - case TOKEN_LITERAL_INT: { - char *endptr; - i32 value = (i32)strtol(parser.previous.start, &endptr, 10); - emit_u32(vm, value); - return; +/** + * Var . + */ +void define_var(ScopeTable *st, Token regType) { + Symbol s; + s.scope = VAR; + switch (regType.type) { + case TOKEN_KEYWORD_PLEX: { + s.type = PLEX; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; } - case TOKEN_LITERAL_NAT: { - long value = atol(parser.previous.start); - emit_u32(vm, value); - return; + case TOKEN_TYPE_I8: { + s.type = I8; + s.size = 1; + break; } - case TOKEN_LITERAL_REAL: { - float value = atof(parser.previous.start); - fixed_t fvalue = float_to_fixed(value); - emit_u32(vm, fvalue); - return; + case TOKEN_TYPE_I16: { + s.type = I16; + s.size = 2; + break; + } + case TOKEN_TYPE_INT: { + s.type = I32; + s.size = 4; + break; + } + case TOKEN_TYPE_U8: { + s.type = U8; + s.size = 1; + break; + } + case TOKEN_TYPE_U16: { + s.type = U16; + s.size = 2; + break; + } + case TOKEN_TYPE_NAT: { + s.type = U32; + s.size = 4; + break; + } + case TOKEN_TYPE_REAL: { + s.type = F32; + s.size = 4; + break; + } + case TOKEN_TYPE_BOOL: { + s.type = BOOL; + s.size = 1; + break; + } + case TOKEN_TYPE_STR: { + s.type = STR; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; } default: - return; // Unreachable. + printf("ERROR at line %d: %.*s\n", regType.line, regType.length, + regType.start); + exit(1); } - errorAtCurrent("Invalid number format"); + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + + memcpy(s.name, name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; + + next_token_is(TOKEN_BIG_MONEY); + + Token reg_num = next_token_is(TOKEN_LITERAL_INT); + s.ref = atoi(reg_num.start); + symbol_table_add(st, s); } -static void unary(VM *vm) { - TokenType operatorType = parser.previous.type; +/** + * function . + */ +void define_function(VM *vm, ScopeTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = FUNCTION; - // Compile the operand. - expression(vm); + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); + exit(1); + } + memcpy(s.name, name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; - // Emit the operator instruction. - switch (operatorType) { - case TOKEN_MINUS: { - switch (parser.previous.type) { - case TOKEN_LITERAL_NAT: - emit_opcode(vm, OP_NEG_NAT); - case TOKEN_LITERAL_REAL: - emit_opcode(vm, OP_NEG_REAL); - default: - emit_opcode(vm, OP_NEG_INT); + next_token_is(TOKEN_LPAREN); + + i32 temp = st->scope_ref; + + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + define_var(st, next); + next = next_token(); + if (next.type == TOKEN_COMMA) { + next = next_token(); + continue; + } else if (next.type == TOKEN_RPAREN) { + break; + } else { + printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start); + exit(1); + } + } + s.ref = vm->cp; + next = next_token_is(TOKEN_LBRACE); + + st->scope_ref = temp; // need to add to the parents scope + symbol_table_add(st, s); + st->scope_ref = (i32)st->count; +} + +/** + * Branch. + */ +void define_branch(VM *vm, ScopeTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = VOID; + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); + exit(1); + } + memcpy(s.name, name.start, name.length); + s.name_length = name.length; + s.name[name.length] = '\0'; + + s.ref = vm->cp; + symbol_table_add(st, s); +} + +int get_instruction_byte_size(const char *opname) { + + if (strcmp(opname, "return") == 0) { + return 2; + } + + if (strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || + strcmp(opname, "neg_nat") == 0 || strcmp(opname, "abs_nat") == 0 || + strcmp(opname, "neg_real") == 0 || strcmp(opname, "abs_real") == 0 || + strcmp(opname, "int_to_string") == 0 || + strcmp(opname, "load_indirect_8") == 0 || + strcmp(opname, "nat_to_string") == 0 || + strcmp(opname, "load_indirect_16") == 0 || + strcmp(opname, "real_to_string") == 0 || + strcmp(opname, "load_indirect_32") == 0 || + strcmp(opname, "int_to_real") == 0 || + strcmp(opname, "store_indirect_8") == 0 || + strcmp(opname, "nat_to_real") == 0 || + strcmp(opname, "store_indirect_16") == 0 || + strcmp(opname, "real_to_int") == 0 || + strcmp(opname, "store_indirect_32") == 0 || + strcmp(opname, "real_to_nat") == 0 || strcmp(opname, "nat_to_int") == 0 || + strcmp(opname, "int_to_nat") == 0 || + strcmp(opname, "string_length") == 0 || strcmp(opname, "memset") == 0 || + strcmp(opname, "memset") == 0 || strcmp(opname, "memset_8") == 0 || + strcmp(opname, "memset_16") == 0 || + strcmp(opname, "register_move") == 0 || strcmp(opname, "malloc") == 0) { + return 3; + } + + if (strcmp(opname, "add_int") == 0 || strcmp(opname, "sub_int") == 0 || + strcmp(opname, "mul_int") == 0 || strcmp(opname, "div_int") == 0 || + strcmp(opname, "add_nat") == 0 || strcmp(opname, "sub_nat") == 0 || + strcmp(opname, "mul_nat") == 0 || strcmp(opname, "div_nat") == 0 || + strcmp(opname, "add_real") == 0 || strcmp(opname, "sub_real") == 0 || + strcmp(opname, "bit_shift_left") == 0 || + strcmp(opname, "bit_shift_right") == 0 || + strcmp(opname, "bit_shift_r_ext") == 0 || + strcmp(opname, "bit_and") == 0 || strcmp(opname, "bit_or") == 0 || + strcmp(opname, "bit_xor") == 0 || strcmp(opname, "mul_real") == 0 || + strcmp(opname, "div_real") == 0) { + return 4; + } + + if (strcmp(opname, "halt") == 0 || strcmp(opname, "jump_if_flag") == 0 || + strcmp(opname, "jump") == 0) { + return 5; + } + + if (strcmp(opname, "load_absolute_32") == 0 || + strcmp(opname, "load_immediate") == 0 || + strcmp(opname, "load_address") == 0 || + strcmp(opname, "load_absolute_16") == 0 || + strcmp(opname, "load_absolute_8") == 0 || + strcmp(opname, "store_absolute_32") == 0 || + strcmp(opname, "store_absolute_8") == 0 || + strcmp(opname, "store_absolute_16") == 0) { + return 6; + } + + if (strcmp(opname, "jump_eq_int") == 0 || + strcmp(opname, "jump_neq_int") == 0 || + strcmp(opname, "jump_gt_int") == 0 || + strcmp(opname, "jump_lt_int") == 0 || + strcmp(opname, "jump_le_int") == 0 || + strcmp(opname, "jump_ge_int") == 0 || + strcmp(opname, "jump_eq_nat") == 0 || + strcmp(opname, "jump_neq_nat") == 0 || + strcmp(opname, "jump_gt_nat") == 0 || + strcmp(opname, "jump_lt_nat") == 0 || + strcmp(opname, "jump_le_nat") == 0 || + strcmp(opname, "jump_ge_nat") == 0 || + strcmp(opname, "jump_eq_real") == 0 || + strcmp(opname, "jump_neq_real") == 0 || + strcmp(opname, "jump_gt_real") == 0 || + strcmp(opname, "jump_lt_real") == 0 || + strcmp(opname, "jump_le_real") == 0 || + strcmp(opname, "jump_ge_real") == 0 || + strcmp(opname, "store_offset_8") == 0 || + strcmp(opname, "store_offset_16") == 0 || + strcmp(opname, "store_offset_32") == 0 || + strcmp(opname, "load_offset_8") == 0 || + strcmp(opname, "load_offset_16") == 0 || + strcmp(opname, "load_offset_32") == 0) { + return 7; + } + + fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); + exit(-1); +} + +#define FAKE_OP(op) \ + } \ + else if (strleq(token.start, op, token.length)) { \ + do { \ + while (token.type != TOKEN_SEMICOLON) { \ + token = next_token(); \ + } \ + /*printf("code[%d]=%s\n %d + %d = %d\n", vm->cp, op, \ + * get_instruction_byte_size(op), vm->cp, vm->cp + \ + * get_instruction_byte_size(op)); */ \ + vm->cp += get_instruction_byte_size(op); \ + } while (0); + +/** + * Build the symbol table and calculate the types/size/offsets of all values. + */ +void build_symbol_table(VM *vm, char *source, ScopeTable *st) { + Token token; + init_lexer(source); + do { + token = next_token(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + exit(1); } - int dest = allocateRegister(); - emit_byte(vm, dest); - emit_byte(vm, dest); - } - default: - return; // Unreachable. - } + if (token.type != TOKEN_EOF) { + + if (token.type == TOKEN_LBRACE) { + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + continue; + } + + if (token.type == TOKEN_RBRACE) { + i32 current_scope = st->scope_ref; + i32 parent = st->scopes[current_scope].parent; + if (parent < 0) parent = 0; + st->scope_ref = parent; + continue; + } + + if (token.type == TOKEN_KEYWORD_GLOBAL) { + define_global(vm, st); + continue; + } + + if (token.type == TOKEN_KEYWORD_FN) { + define_function(vm, st); + continue; + } + + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { + define_var(st, token); + next_token_is(TOKEN_SEMICOLON); + continue; + } + + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(vm, st); + continue; + } + + if (token.type == TOKEN_KEYWORD_RETURN) { + vm->cp++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + vm->cp++; + continue; + } + + get_reg(next, st); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + continue; + } + +#ifdef DEBUG_PRINT + printf("-- %.*s --\n", token.length, token.start); +#endif + if (token.type == TOKEN_IDENTIFIER) { + /* check to see if it is an opcode first */ + if (strleq(token.start, "exit", token.length)) { + + vm->cp++; + + next_token(); + vm->cp += 4; + +#ifdef DEBUG_PRINT + printf("code[%d] = exit\n", vm->cp); +#endif + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "call", token.length)) { + + vm->cp++; + + next_token_is(TOKEN_IDENTIFIER); + vm->cp += 4; + + vm->cp++; + Token next = next_token_is(TOKEN_LPAREN); + next = next_token(); + while (next.type != TOKEN_RPAREN) { + get_reg(next, st); + vm->cp++; + next = next_token(); + } + + next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + vm->cp++; + } else { + next = next_token(); + get_reg(next, st); + vm->cp++; + } +#ifdef DEBUG_PRINT + printf("code[%d] = call\n", vm->cp); +#endif + continue; + } else if (strleq(token.start, "syscall", token.length)) { + + vm->cp++; + + Token next = next_token(); + vm->cp += 4; + + next = next_token(); + while (next.type != TOKEN_SEMICOLON) { + get_reg(next, st); + vm->cp++; + next = next_token(); + } +#ifdef DEBUG_PRINT + printf("code[%d] = syscall\n", vm->cp); +#endif + continue; + FAKE_OP("load_immediate") + FAKE_OP("load_address") + FAKE_OP("malloc") + FAKE_OP("memset_8") + FAKE_OP("memset_16") + FAKE_OP("memset_32") + FAKE_OP("load_offset_8") + FAKE_OP("load_offset_16") + FAKE_OP("load_offset_32") + FAKE_OP("load_indirect_8") + FAKE_OP("load_indirect_16") + FAKE_OP("load_indirect_32") + FAKE_OP("load_absolute_8") + FAKE_OP("load_absolute_16") + FAKE_OP("load_absolute_32") + FAKE_OP("store_absolute_8") + FAKE_OP("store_absolute_16") + FAKE_OP("store_absolute_32") + FAKE_OP("store_indirect_8") + FAKE_OP("store_indirect_16") + FAKE_OP("store_indirect_32") + FAKE_OP("store_offset_8") + FAKE_OP("store_offset_16") + FAKE_OP("store_offset_32") + FAKE_OP("register_move") + FAKE_OP("add_int") + FAKE_OP("sub_int") + FAKE_OP("mul_int") + FAKE_OP("div_int") + FAKE_OP("abs_int") + FAKE_OP("neg_int") + FAKE_OP("add_nat") + FAKE_OP("sub_nat") + FAKE_OP("mul_nat") + FAKE_OP("div_nat") + FAKE_OP("abs_nat") + FAKE_OP("neg_nat") + FAKE_OP("add_real") + FAKE_OP("sub_real") + FAKE_OP("mul_real") + FAKE_OP("div_real") + FAKE_OP("abs_real") + FAKE_OP("neg_real") + FAKE_OP("int_to_real") + FAKE_OP("nat_to_real") + FAKE_OP("real_to_int") + FAKE_OP("real_to_nat") + FAKE_OP("bit_shift_left") + FAKE_OP("bit_shift_right") + FAKE_OP("bit_shift_r_ext") + FAKE_OP("bit_and") + FAKE_OP("bit_or") + FAKE_OP("bit_xor") + FAKE_OP("jump") + FAKE_OP("jump_if_flag") + FAKE_OP("jump_eq_int") + FAKE_OP("jump_neq_int") + FAKE_OP("jump_gt_int") + FAKE_OP("jump_lt_int") + FAKE_OP("jump_le_int") + FAKE_OP("jump_ge_int") + FAKE_OP("jump_eq_nat") + FAKE_OP("jump_neq_nat") + FAKE_OP("jump_gt_nat") + FAKE_OP("jump_lt_nat") + FAKE_OP("jump_le_nat") + FAKE_OP("jump_ge_nat") + FAKE_OP("jump_eq_real") + FAKE_OP("jump_neq_real") + FAKE_OP("jump_ge_real") + FAKE_OP("jump_gt_real") + FAKE_OP("jump_lt_real") + FAKE_OP("jump_le_real") + FAKE_OP("string_length") + FAKE_OP("int_to_string") + FAKE_OP("nat_to_string") + FAKE_OP("real_to_string") + FAKE_OP("string_eq") + FAKE_OP("string_concat") + FAKE_OP("string_get_char") + FAKE_OP("string_find_char") + FAKE_OP("string_slice") + FAKE_OP("string_to_int") + FAKE_OP("string_to_nat") + FAKE_OP("string_to_real") + } else { + /* some other identifier */ + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } + } + } + } while (token.type != TOKEN_EOF); } -static void emitHalt(VM *vm) { - emit_opcode(vm, OP_EXIT); - advance(); - number(vm); +/** + * 2nd pass, emit the bytecode + */ +void emit_bytecode(VM *vm, char *source, ScopeTable *st) { + Token token; + init_lexer(source); + do { + token = next_token(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + break; + } + if (token.type != TOKEN_EOF) { + + if (token.type == TOKEN_LBRACE) { + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + continue; + } + + if (token.type == TOKEN_RBRACE) { + i32 current_scope = st->scope_ref; + i32 parent = st->scopes[current_scope].parent; + if (parent < 0) parent = 0; + st->scope_ref = parent; + continue; + } + + if (token.type == TOKEN_KEYWORD_GLOBAL) { + /* ignore, already processed */ + next_token(); /* type */ + next_token(); /* var */ + next_token(); /* eq */ + next_token(); /* value */ + next_token(); /* ; */ + continue; + } + + if (token.type == TOKEN_KEYWORD_FN) { + /* ignore, already processed */ + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + next = next_token(); + } + continue; + } + + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR) { + /* ignore, already processed */ + next_token(); /* type */ + next_token(); /* var */ + next_token(); /* reg */ + next_token(); /* ; */ + continue; + } + + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + /* ignore, already processed */ + next_token(); /* id */ + } + + if (token.type == TOKEN_KEYWORD_RETURN) { + emit_op(vm, OP_RETURN); + vm->cp++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + emit_byte(vm, 0xFF); + vm->cp++; + continue; + } + + u32 reg = get_reg(next, st); + emit_byte(vm, reg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + continue; + } + +#ifdef DEBUG_PRINT + printf("-- %.*s --\n", token.length, token.start); +#endif + if (token.type == TOKEN_IDENTIFIER) { + /* check to see if it is an opcode first */ + if (strleq(token.start, "exit", token.length)) { + + emit_op(vm, OP_EXIT); + vm->cp++; + + Token next = next_token(); + u32 ptr = get_ptr(next, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "call", token.length)) { + + emit_op(vm, OP_CALL); + vm->cp++; + + Token id = next_token_is(TOKEN_IDENTIFIER); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + u8 arg_count = 0; + u32 arg_pos = vm->cp++; + Token next = next_token_is(TOKEN_LPAREN); + next = next_token(); + while (next.type != TOKEN_RPAREN) { + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + arg_count++; + next = next_token(); + } + + vm->code[arg_pos] = arg_count; + +#ifdef DEBUG_PRINT + printf("^code[%d] = %d\n", arg_pos, arg_count); +#endif + + next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + emit_byte(vm, 255); + vm->cp++; + } else { + next = next_token(); + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + } + + continue; + } else if (strleq(token.start, "syscall", token.length)) { + + emit_op(vm, OP_SYSCALL); + vm->cp++; + + Token next = next_token(); + + u32 syscall_id = 0; + const char *syscall_name = next.start; + if (strleq(syscall_name, "EXIT", next.length)) + syscall_id = SYSCALL_EXIT; + else if (strleq(syscall_name, "OPEN", next.length)) + syscall_id = SYSCALL_DEVICE_OPEN; + else if (strleq(syscall_name, "READ", next.length)) + syscall_id = SYSCALL_DEVICE_READ; + else if (strleq(syscall_name, "WRITE", next.length)) + syscall_id = SYSCALL_DEVICE_WRITE; + else if (strleq(syscall_name, "CLOSE", next.length)) + syscall_id = SYSCALL_DEVICE_CLOSE; + else if (strleq(syscall_name, "IOCTL", next.length)) + syscall_id = SYSCALL_DEVICE_IOCTL; + else if (strleq(syscall_name, "REFRESH", next.length)) + syscall_id = SYSCALL_DEVICE_REFRESH; + + emit_u32(vm, syscall_id); + vm->cp += 4; + + next = next_token(); + while (next.type != TOKEN_SEMICOLON && + next.type != TOKEN_ARROW_RIGHT) { + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next = next_token(); + } + + if (next.type == TOKEN_ARROW_RIGHT) { + next = next_token(); + u8 arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + } + + } else if (strleq(token.start, "load_immediate", token.length)) { + + emit_op(vm, OP_LOAD_IMM); + vm->cp++; + + Token value = next_token(); + switch (value.type) { + case TOKEN_KEYWORD_TRUE: { + emit_u32(vm, 1); + break; + } + case TOKEN_KEYWORD_FALSE: { + emit_u32(vm, 0); + break; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + emit_u32(vm, out); + break; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid 'real' number: '%.*s'\n", token.length, + token.start); + exit(1); + } + emit_u32(vm, out); + break; + } + case TOKEN_LITERAL_REAL: { + fixed_t out = float_to_fixed(atof(value.start)); + emit_u32(vm, out); + break; + } + default: { + fprintf(stderr, "Unknown immediate: '%.*s'\n", token.length, + token.start); + exit(1); + } + } + + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_address", token.length)) { + emit_op(vm, OP_LOAD_IMM); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "malloc", token.length)) { + emit_op(vm, OP_MALLOC); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_8", token.length)) { + emit_op(vm, OP_MEMSET_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_16", token.length)) { + emit_op(vm, OP_MEMSET_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_32", token.length)) { + emit_op(vm, OP_MEMSET_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_offset_8", token.length)) { + emit_op(vm, OP_LOAD_OFF_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_offset_16", token.length)) { + emit_op(vm, OP_LOAD_OFF_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_offset_32", token.length)) { + emit_op(vm, OP_LOAD_OFF_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_indirect_8", token.length)) { + emit_op(vm, OP_LOAD_IND_8); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_indirect_16", token.length)) { + emit_op(vm, OP_LOAD_IND_16); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_indirect_32", token.length)) { + emit_op(vm, OP_LOAD_IND_32); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_absolute_8", token.length)) { + emit_op(vm, OP_LOAD_ABS_8); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_absolute_16", token.length)) { + emit_op(vm, OP_LOAD_ABS_16); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_absolute_32", token.length)) { + emit_op(vm, OP_LOAD_ABS_32); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_absolute_8", token.length)) { + emit_op(vm, OP_STORE_ABS_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_absolute_16", token.length)) { + emit_op(vm, OP_STORE_ABS_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_absolute_32", token.length)) { + emit_op(vm, OP_STORE_ABS_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_indirect_8", token.length)) { + emit_op(vm, OP_STORE_IND_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_indirect_16", token.length)) { + emit_op(vm, OP_STORE_IND_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_indirect_32", token.length)) { + emit_op(vm, OP_STORE_IND_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_offset_8", token.length)) { + emit_op(vm, OP_STORE_OFF_8); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_offset_16", token.length)) { + emit_op(vm, OP_STORE_OFF_16); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "store_offset_32", token.length)) { + emit_op(vm, OP_STORE_OFF_32); + vm->cp++; + + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "register_move", token.length)) { + emit_op(vm, OP_REG_MOV); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "add_int", token.length)) { + emit_op(vm, OP_ADD_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "sub_int", token.length)) { + emit_op(vm, OP_SUB_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "mul_int", token.length)) { + emit_op(vm, OP_MUL_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "div_int", token.length)) { + emit_op(vm, OP_DIV_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "abs_int", token.length)) { + emit_op(vm, OP_ABS_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "neg_int", token.length)) { + emit_op(vm, OP_NEG_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "add_nat", token.length)) { + emit_op(vm, OP_ADD_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "sub_nat", token.length)) { + emit_op(vm, OP_SUB_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "mul_nat", token.length)) { + emit_op(vm, OP_MUL_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "div_nat", token.length)) { + emit_op(vm, OP_DIV_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "abs_nat", token.length)) { + emit_op(vm, OP_ABS_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "neg_nat", token.length)) { + emit_op(vm, OP_NEG_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "add_real", token.length)) { + emit_op(vm, OP_ADD_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + ; + } else if (strleq(token.start, "sub_real", token.length)) { + emit_op(vm, OP_SUB_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "mul_real", token.length)) { + emit_op(vm, OP_MUL_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "div_real", token.length)) { + emit_op(vm, OP_DIV_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "abs_real", token.length)) { + emit_op(vm, OP_ABS_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "neg_real", token.length)) { + emit_op(vm, OP_NEG_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "int_to_real", token.length)) { + emit_op(vm, OP_INT_TO_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "nat_to_real", token.length)) { + emit_op(vm, OP_NAT_TO_REAL); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "real_to_int", token.length)) { + emit_op(vm, OP_REAL_TO_INT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "real_to_nat", token.length)) { + emit_op(vm, OP_REAL_TO_NAT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_shift_left", token.length)) { + emit_op(vm, OP_BIT_SHIFT_LEFT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_shift_right", token.length)) { + emit_op(vm, OP_BIT_SHIFT_RIGHT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { + emit_op(vm, OP_BIT_SHIFT_R_EXT); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_and", token.length)) { + emit_op(vm, OP_BAND); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_or", token.length)) { + emit_op(vm, OP_BOR); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "bit_xor", token.length)) { + emit_op(vm, OP_BXOR); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + Token next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + next = next_token(); + arg = get_reg(next, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump", token.length)) { + emit_op(vm, OP_JMP); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_if_flag", token.length)) { + emit_op(vm, OP_JMPF); + vm->cp++; + + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_eq_int", token.length)) { + emit_op(vm, OP_JEQ_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_neq_int", token.length)) { + emit_op(vm, OP_JNEQ_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_gt_int", token.length)) { + emit_op(vm, OP_JGT_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_lt_int", token.length)) { + emit_op(vm, OP_JLT_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_le_int", token.length)) { + emit_op(vm, OP_JLE_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_ge_int", token.length)) { + emit_op(vm, OP_JGE_INT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_eq_nat", token.length)) { + emit_op(vm, OP_JEQ_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_neq_nat", token.length)) { + emit_op(vm, OP_JNEQ_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_gt_nat", token.length)) { + emit_op(vm, OP_JGT_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_lt_nat", token.length)) { + emit_op(vm, OP_JLT_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_le_nat", token.length)) { + emit_op(vm, OP_JLE_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_ge_nat", token.length)) { + emit_op(vm, OP_JGE_NAT); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_eq_real", token.length)) { + emit_op(vm, OP_JEQ_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_neq_real", token.length)) { + emit_op(vm, OP_JNEQ_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_ge_real", token.length)) { + emit_op(vm, OP_JGE_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_gt_real", token.length)) { + emit_op(vm, OP_JGT_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_lt_real", token.length)) { + emit_op(vm, OP_JLT_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "jump_le_real", token.length)) { + emit_op(vm, OP_JLE_REAL); + vm->cp++; + Token id = next_token(); + u32 ptr = get_ptr(id, st); + emit_u32(vm, ptr); + vm->cp += 4; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "string_length", token.length)) { + emit_op(vm, OP_STRLEN); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "int_to_string", token.length)) { + emit_op(vm, OP_INT_TO_STRING); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "nat_to_string", token.length)) { + emit_op(vm, OP_NAT_TO_STRING); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "real_to_string", token.length)) { + emit_op(vm, OP_REAL_TO_STRING); + vm->cp++; + Token reg = next_token(); + u8 arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_ARROW_RIGHT); + reg = next_token(); + arg = get_reg(reg, st); + emit_byte(vm, arg); + vm->cp++; + next_token_is(TOKEN_SEMICOLON); + ; + } else if (strleq(token.start, "string_eq", token.length)) { + } else if (strleq(token.start, "string_concat", token.length)) { + } else if (strleq(token.start, "string_get_char", token.length)) { + } else if (strleq(token.start, "string_find_char", token.length)) { + } else if (strleq(token.start, "string_slice", token.length)) { + } else if (strleq(token.start, "string_to_int", token.length)) { + } else if (strleq(token.start, "string_to_nat", token.length)) { + } else if (strleq(token.start, "string_to_real", token.length)) { + } else { + /* some other identifier */ + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } + } + } + } while (token.type != TOKEN_EOF); } -static void endCompiler(VM *vm) { emitHalt(vm); } - -static void grouping(VM *vm) { - expression(vm); - consume(TOKEN_RPAREN, "Expect ')' after expression."); -} - -bool compile(const char *source, VM *vm) { - USED(source); - USED(vm); - initLexer(source); - - parser.hadError = false; - parser.panicMode = false; - - advance(); - expression(vm); - consume(TOKEN_EOF, "Expect end of expression."); - endCompiler(vm); - - return parser.hadError; +/** + * Compile. + */ +bool compile(VM *vm, ScopeTable *st, char *source) { + build_symbol_table(vm, source, st); + vm->cp = 0; /* actually start emitting code */ + st->count = 0; + emit_bytecode(vm, source, st); } diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h index 59e420b..c921029 100644 --- a/src/tools/compiler/compiler.h +++ b/src/tools/compiler/compiler.h @@ -2,7 +2,9 @@ #define UNDAR_COMPILER_H #import "../../vm/common.h" +#include "../../vm/opcodes.h" +typedef enum { GLOBAL, LOCAL, VAR } ScopeType; typedef enum { VOID, BOOL, @@ -21,17 +23,12 @@ typedef enum { FUNCTION } SymbolType; -typedef struct value_type_s ValueType; -typedef struct function_def_s FunctionDef; -typedef struct function_tab_s FunctionTable; -typedef struct plex_def_s PlexDef; -typedef struct plex_tab_s PlexTable; -typedef struct array_def_s ArrayDef; -typedef struct array_tab_s ArrayTable; typedef struct symbol_s Symbol; typedef struct symbol_tab_s SymbolTable; -typedef struct names_tab_s NamesTable; +typedef struct value_type_s ValueType; typedef struct plex_fields_tab_s PlexFieldsTable; +typedef struct plex_def_s PlexDef; +typedef struct plex_tab_s PlexTable; typedef struct scope_s Scope; typedef struct scope_tab_s ScopeTable; @@ -42,13 +39,6 @@ struct value_type_s { u32 table_ref; // if it is a heap object }; -struct function_def_s { - u32 name; - ValueType args[8]; - u8 arg_count; - ValueType return_type; -}; - struct plex_def_s { u32 name; u32 size; @@ -56,22 +46,6 @@ struct plex_def_s { u32 field_count; }; -struct array_def_s { - ValueType type; - u32 length; - u32 logical_size; // length of the array - u32 physical_size; // logical_size * type_size + fat pointer -}; - -struct symbol_s { - u32 name; - ValueType type; - union { - u32 local; // register - u32 global; // address - } ref; -}; - struct plex_fields_tab_s { u32 *plex_refs; ValueType *fields; @@ -85,38 +59,30 @@ struct plex_tab_s { u32 capacity; }; -struct array_tab_s { - ArrayDef *symbols; - u32 count; - u32 capacity; -}; - -struct function_tab_s { - FunctionDef *symbols; - u32 count; - u32 capacity; -}; - -struct names_tab_s { - char **names; - u32 count; - u32 capacity; +#define MAX_SYMBOL_NAME_LENGTH 64 +struct symbol_s { + char name[MAX_SYMBOL_NAME_LENGTH]; + u8 name_length; + SymbolType type; + ScopeType scope; + u32 ref; // vm->mp if global, vm->pc local, register if var + u32 size; // size of symbol }; struct symbol_tab_s { - Symbol *symbols; - u32 count; - u32 capacity; -}; - -struct scope_s { - SymbolTable table; + Symbol symbols[256]; + u8 count; + i32 parent; }; struct scope_tab_s { - Scope *scopes; + SymbolTable *scopes; u32 count; u32 capacity; + i32 scope_ref; }; +bool compile(VM *vm, ScopeTable *st, char *source); +extern bool table_realloc(ScopeTable *table);/* implement this in arch/ not here */ + #endif diff --git a/test/add.uir.ul b/test/add.uir.ul new file mode 100644 index 0000000..4e33824 --- /dev/null +++ b/test/add.uir.ul @@ -0,0 +1,47 @@ +/** + * Constants + */ +str term_namespace = "/dev/term/0"; +str nl = "\n"; +int x = 0; +int y = 1; + +plex Terminal { + nat handle; +} + +/** + * Main function + */ +function main() { + int tmp0 = x; + int tmp1 = y; + int tmp2 = add(tmp0, tmp1); + str tmp3 = tmp2 as str; + pln(tmp3); +} + +/** + * Add two numbers together + */ +function add(int a, int b) int { + int tmp0 = a + b; + return tmp0; +} + +/** + * Print with a newline + */ +function pln(str message) { + str term_ns = term_namespace; + int mode = 0; + + Terminal term = open(term_ns, mode); + + int msg_len = message.length; + write(term, message, msg_len); + + str nl_local = nl; + int nl_len = nl.length; + write(term, nl_local, nl_len); +} diff --git a/test/fib.uir.ul b/test/fib.uir.ul new file mode 100644 index 0000000..4cb6d6e --- /dev/null +++ b/test/fib.uir.ul @@ -0,0 +1,60 @@ +/** + * Constants + */ +str term_namespace = "/dev/term/0"; +str nl = "\n"; + +plex Terminal { + nat handle; +} + +/** + * Main function + */ +function main() { + int fib = 35; + int ans = fib(35); + str ans_s = ans as str; + pln(ans_s); +} + +/** + * Recursively calculate fibonacci + */ +function fib(int n) int { + int base_check = 2; + + jump_lt_int base_case n base_check; + jump end1; + do base_case; + return n; + else base_case_end; + + int tmp_c2 = 2; + int tmp_c2_n = n - tmp_c2; + int ans_c2 = fib(tmp_c2_n); + + int tmp_c1 = 1; + int tmp_c1_n = tmp_c1 - n; + int ans_c1 = fib(tmp_c1_n); + + int ans = tmp_c1_n + tmp_c2_n; + return ans; +} + +/** + * Print with a newline + */ +function pln(str message) { + str term_ns = term_namespace; + int mode = 0; + + Terminal term = open(term_ns, mode); + + int msg_len = message.length; + write(term, message, msg_len); + + str nl_local = nl; + int nl_len = nl.length; + write(term, nl_local, nl_len); +} diff --git a/test/hello.uir.ul b/test/hello.uir.ul new file mode 100644 index 0000000..e54ea7e --- /dev/null +++ b/test/hello.uir.ul @@ -0,0 +1,32 @@ +str term_namespace = "/dev/term/0"; +str hello = "nuqneH 'u'?"; +str nl = "\n"; + +plex Terminal { + nat handle; +} + +/** + * Main function + */ +function main() { + str msg = hello; + pln(msg); +} + +/** + * Print with a newline + */ +function pln(str message) { + str term_ns = term_namespace; + int mode = 0; + + Terminal term = open(term_ns, mode); + + int msg_len = message.length; + write(term, message, msg_len); + + str nl_local = nl; + int nl_len = nl.length; + write(term, nl_local, nl_len); +} diff --git a/test/hello.ul b/test/hello.ul index 396daf5..31553f8 100644 --- a/test/hello.ul +++ b/test/hello.ul @@ -1,8 +1,6 @@ /** - * Constants + * Plexes */ -const str nl = "\n"; - plex Terminal { nat handle; } @@ -20,5 +18,6 @@ function main() { function pln(str message) { Terminal term = open("/dev/term/0", 0); write(term, message, message.length); + const str nl = "\n"; write(term, nl, nl.length); } diff --git a/test/loop.uir.ul b/test/loop.uir.ul new file mode 100644 index 0000000..8a37d99 --- /dev/null +++ b/test/loop.uir.ul @@ -0,0 +1,65 @@ +str term_namespace = "/dev/term/0"; +str prompt = "Enter a string:"; +str nl = "\n"; + + +plex Terminal { + nat handle; +} + +/** + * Main function + */ +function main() { + str term_ns = term_namespace; + int mode = 0; + + Terminal term = open(term_ns, mode); + + real a = 5.0; + + // do (int i = 5000; i >= 0, i = i - 1) + int i = 5000; + int tmp0 = 0; + int tmp1 = 1; + int tmp2 = 5.0; + loop loop_body { + a = a + tmp2; + i = i - tmp1; + jump_ge_int loop_body i tmp0; + } + + nat b = a as nat; + str local_prompt = prompt; + pln(local_prompt); + + nat size = 32; + str user_string = malloc(size); + read(term, user_string, size); + + str a_str = a as str; + pln(a_str); + + str b_str = b as str; + pln(b_str); + + pln(user_string); +} + +/** + * Print with a newline + */ +function pln(str message) { + str term_ns = term_namespace; + int mode = 0; + + Terminal term = open(term_ns, mode); + + int msg_len = message.length; + write(term, message, msg_len); + + str nl_local = nl; + int nl_len = nl.length; + write(term, nl_local, nl_len); +} + diff --git a/test/malloc.uir.ul b/test/malloc.uir.ul new file mode 100644 index 0000000..6c81ea3 --- /dev/null +++ b/test/malloc.uir.ul @@ -0,0 +1,26 @@ +/** + * Constants + */ +const str nl = "\n"; + +plex Terminal { + nat handle; +} + +/** + * Main function + */ +function main() { + Terminal term = open("/dev/term/0", 0); + pln(term, "Enter a string: "); + pln(term, term.read(32)); + return 0; +} + +/** + * Print with a newline + */ +function pln(Terminal term, str message) { + write(term, message, message.length); + write(term, nl, nl.length); +} diff --git a/test/paint.uir.ul b/test/paint.uir.ul new file mode 100644 index 0000000..55a017d --- /dev/null +++ b/test/paint.uir.ul @@ -0,0 +1,278 @@ +str screen_namespace = "/dev/screen/0"; +str mouse_namespace = "/dev/mouse/0"; +byte BLACK = 0; +byte WHITE = 255; +byte DARK_GRAY = 73; +byte GRAY = 146; +byte LIGHT_GRAY = 182; +byte CHARCOAL = 36; +byte DARK_RED = 128; +byte RED = 224; +byte DARK_YELLOW = 144; +byte YELLOW = 252; +byte DARK_TEAL = 9; +byte TEAL = 18; +byte DARK_GREEN = 12; +byte GREEN = 16; +byte LIME = 28; +byte LIGHT_CYAN = 159; +byte NAVY = 2; +byte BLUE = 3; +byte DEEP_SKY_BLUE = 10; +byte LIGHT_BLUE = 19; +byte PURPLE = 131; +byte LIGHT_PURPLE = 147; +byte DARK_MAGENTA = 130; +byte MAGENTA = 227; +byte PLUM = 129; +byte PINK = 226; +byte SADDLE_BROWN = 72; +byte PERU = 141; +byte SIENNA = 136; +byte ORANGE = 241; +byte DARK_ORANGE = 208; +byte GOLD = 244; +byte SELECTED_COLOR = 255; + +plex Screen { + nat handle; + nat width; + nat height; + byte[] buffer; +} + +plex Mouse { + nat handle; + nat x; + nat y; + bool left; + bool right; + bool middle; + bool btn4; +} + +function main () { + str screen_name = screen_namespace; + int mode = 0; + Screen screen = open(screen_name, mode); + + nat width = screen.width; + nat size = screen.size; + nat screen_offset = 16; + nat screen_buffer = screen_buffer + screen_offset; + + // open mouse + str mouse_name = mouse_namespace; + Mouse mouse = open(mouse_name, mode); + + byte color = BLACK; + nat x_pos = 1; + nat y_pos = 1; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = WHITE; + x_pos = 21; + y_pos = 1; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = CHARCOAL; + x_pos = 1; + y_pos = 21; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = DARK_GRAY; + x_pos = 21; + y_pos = 21; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = RED; + x_pos = 1; + y_pos = 41; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = ORANGE; + x_pos = 21; + y_pos = 41; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = YELLOW; + x_pos = 1; + y_pos = 61; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = GREEN; + x_pos = 21; + y_pos = 61; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = BLUE; + x_pos = 1; + y_pos = 81; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + color = PURPLE; + x_pos = 21; + y_pos = 81; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + + // screen.draw + write(screen, screen_buffer, size); + + nat zero = 0; + + loop draw { + // load mouse click data + refresh(mouse); + + byte left_down = mouse.down; + + jump_eq_nat draw left_down zero; // if (!btn1.left) continue; + + nat mouse_x = mouse.x; + nat mouse_y = mouse.y; + + nat box_size = 20; + + // first row + color = BLACK; + x_pos = 1; + y_pos = 1; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = WHITE; + x_pos = 21; + y_pos = 1; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = CHARCOAL; + x_pos = 1; + y_pos = 21; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + DARK_GRAY -> color; + x_pos = 21; + y_pos = 21; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = RED; + x_pos = 1; + y_pos = 41; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = ORANGE; + x_pos = 21; + y_pos = 41; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = YELLOW; + x_pos = 1; + y_pos = 61; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = GREEN; + x_pos = 21; + y_pos = 61; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = BLUE; + x_pos = 1; + y_pos = 81; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + color = PURPLE; + x_pos = 21; + y_pos = 81; + draw_outlined_swatch(screen_buffer, color, x_pos, y_pos, width); + set_color(mouse_x, mouse_y, x_pos, y_pos, color, box_size); + + write(screen, screen_buffer, size); + + byte selected_color = SELECTED_COLOR; + + nat brush_size = 5; + + draw_box(screen_buffer, width, selected_color, mouse_x, mouse_y, brush_size, brush_size); + + jump draw; + } + + // Flush and exit + exit 0; +} + +function set_color (int click_x, int click_y, int box_x, int box_y, byte check_color, int size) { + + // Compute right + int right_edge = box_x + size; + + // Compute bottom = box_y + size + int bottom_edge = box_y + size; + + // Bounds check: x in [box_x, right] and y in [box_y, bottom] + jump_lt_int fail click_x box_x; + jump_gt_int fail click_x right_edge; + jump_lt_int fail click_y box_y; + jump_gt_int fail click_y bottom_edge; + + SELECTED_COLOR = check_color; + + else fail + return; +} + +function draw_outlined_swatch(nat base, byte swatch_color, int x, int y, int width) { + + nat background_color = GRAY; + byte selected_color = SELECTED_COLOR; + + jump_eq_int set_selected swatch_color selected_color; + jump end_set_selected; + do set_selected + background_color = DARK_GRAY; + else end_set_selected + + nat outline_size = 20; + nat fill_size = 17; + + draw_box(base, width, background_color, x, y, outline_size, outline_size); + + nat offset = 2; + int xO = x + offset; // x + 2 + int yO = y + offset; // y + 2 + + draw_box(base, width, swatch_color, xO, yO, fill_size, fill_size); + + return; +} + +function draw_box (nat base, nat screen_width, byte box_color, + nat x, nat y, nat width, nat height) { + + nat fat_ptr_size = 4; + + // Compute start address: base + y*640 + x + nat offset = y * screen_width; + offset = offset + x; + offset = offset + base; + offset = offset + fat_ptr_size; // need to add offset for fat pointer size + + int i = 1; + int zero = 0; + + loop draw_box_outer { + memset(offset, width, box_color); // draw row + offset = offset + screen_width; // next row += 640 + height = height - i; // decrement row count + jump_gt_int draw_box_outer height zero; + } + return; +}