From 2e5eb0322793fd86ee8680333469e7b28d9dc995 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 19:44:54 -0800 Subject: [PATCH] remove old assembler, finish symbol table pass, start working on code gen --- Makefile | 4 - src/arch/linux/main.c | 66 +- src/tools/assembler/assembler.c | 405 +++++---- src/tools/assembler/lexer.c | 26 +- src/tools/old_assembler/assembler.c | 1211 --------------------------- src/tools/old_assembler/assembler.h | 20 - src/tools/old_assembler/parser.c | 244 ------ src/tools/old_assembler/parser.h | 25 - src/vm/libc.c | 17 +- src/vm/libc.h | 1 + test/add.rom | Bin 143 -> 0 bytes test/add.ul.ir | 4 +- test/fib.ul.ir | 4 +- test/hello.rom | Bin 135 -> 0 bytes test/hello.ul.ir | 4 +- test/loop.rom | Bin 258 -> 0 bytes test/loop.ul.ir | 8 +- test/malloc.rom | Bin 167 -> 0 bytes test/malloc.ul.ir | 4 +- test/paint-bw.rom | Bin 574 -> 0 bytes test/paint.rom | Bin 1266 -> 0 bytes test/simple.rom | Bin 140 -> 0 bytes test/simple.ul.ir | 14 +- test/window.rom | Bin 326 -> 0 bytes test/window.ul.ir | 4 +- 25 files changed, 262 insertions(+), 1799 deletions(-) delete mode 100644 src/tools/old_assembler/assembler.c delete mode 100644 src/tools/old_assembler/assembler.h delete mode 100644 src/tools/old_assembler/parser.c delete mode 100644 src/tools/old_assembler/parser.h delete mode 100644 test/add.rom delete mode 100644 test/hello.rom delete mode 100644 test/loop.rom delete mode 100644 test/malloc.rom delete mode 100644 test/paint-bw.rom delete mode 100644 test/paint.rom delete mode 100644 test/simple.rom delete mode 100644 test/window.rom diff --git a/Makefile b/Makefile index 88100b7..6aa77f9 100644 --- a/Makefile +++ b/Makefile @@ -86,15 +86,11 @@ VM_SOURCES := \ ifeq ($(BUILD_MODE), release) PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c\ - $(SRC_DIR)/tools/old_assembler/parser.c \ - $(SRC_DIR)/tools/old_assembler/assembler.c \ $(SRC_DIR)/tools/assembler/lexer.c \ $(SRC_DIR)/tools/assembler/assembler.c else PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c \ - $(SRC_DIR)/tools/old_assembler/parser.c \ - $(SRC_DIR)/tools/old_assembler/assembler.c\ $(SRC_DIR)/tools/assembler/lexer.c \ $(SRC_DIR)/tools/assembler/assembler.c endif diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index cebdef0..94a4e2a 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,5 +1,3 @@ -#include "../../tools/old_assembler/assembler.h" -#include "../../tools/old_assembler/parser.h" #include "../../tools/assembler/assembler.h" #include "../../vm/vm.h" #include "devices.h" @@ -126,27 +124,7 @@ bool loadVM(const char *filename, VM *vm) { bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { USED(vm); USED(output_file); - FILE *f = fopen(source_file, "rb"); - if (!f) { - perror("fopen"); - return false; - } - - static char source[MAX_SRC_SIZE + 1]; - - fseek(f, 0, SEEK_END); - long len = ftell(f); - fseek(f, 0, SEEK_SET); - if (len >= MAX_SRC_SIZE) { - fprintf(stderr, "Source is larger than buffer\n"); - fclose(f); - return false; - } - size_t read = fread(source, 1, len, f); - source[read] = '\0'; - fclose(f); - - assemble(vm, source); + USED(source_file); return true; } @@ -173,42 +151,34 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) { source[read] = '\0'; fclose(f); - ExprNode *ast = expr_parse(source, strlen(source)); - if (!ast) { - printf("Parse failed.\n"); - return false; - } else { - old_assemble(vm, ast); - expr_free(ast); + assemble(vm, source); - // If output file specified, save the VM - if (output_file) { - if (!saveVM(output_file, vm)) { - printf("Failed to save VM to %s\n", output_file); - return false; - } - printf("VM saved to %s\n", output_file); + if (output_file) { + if (!saveVM(output_file, vm)) { + printf("Failed to save VM to %s\n", output_file); + return false; } - return true; + printf("VM saved to %s\n", output_file); } + return true; } bool init_vm(VM *vm) { - vm->memory = (u8*)malloc(MEMORY_SIZE * sizeof(u8)); + vm->memory = (u8 *)malloc(MEMORY_SIZE * sizeof(u8)); vm->memory_size = MEMORY_SIZE; - vm->code = (u8*)malloc(CODE_SIZE * sizeof(u8)); + vm->code = (u8 *)malloc(CODE_SIZE * sizeof(u8)); vm->code_size = CODE_SIZE; - vm->frames = (Frame*)malloc(FRAMES_SIZE * sizeof(Frame)); + vm->frames = (Frame *)malloc(FRAMES_SIZE * sizeof(Frame)); vm->frames_size = FRAMES_SIZE; - vm->stack = (u32*)malloc(STACK_SIZE * sizeof(u32)) + vm->stack = (u32 *)malloc(STACK_SIZE * sizeof(u32)); vm->stack_size = STACK_SIZE; - vm->devices = (Device*)malloc(DEVICES_SIZE * sizeof(Device)); - vm->devices_size = DEVICES_SIZE; - + vm->devices = (Device *)malloc(DEVICES_SIZE * sizeof(Device)); + vm->device_size = DEVICES_SIZE; + return true; } @@ -217,7 +187,6 @@ i32 main(i32 argc, char *argv[]) { char *input_file = nil; char *output_file = nil; bool is_rom = false; - bool is_assembly = false; bool is_ir = false; // Parse command line arguments @@ -233,9 +202,6 @@ i32 main(i32 argc, char *argv[]) { if (ext && (strcmp(ext, ".rom") == 0)) { is_rom = true; } - if (ext && (strcmp(ext, ".lisp") == 0)) { - is_assembly = true; - } if (ext && (strcmp(ext, ".ir") == 0)) { is_ir = true; } @@ -256,7 +222,7 @@ i32 main(i32 argc, char *argv[]) { if (is_rom) { // Load ROM file directly compilation_success = loadVM(input_file, &vm); - } else if (is_assembly) { + } else if (is_ir) { // Compile Lisp file if (dump_rom && output_file) { compilation_success = assembleAndSave(input_file, output_file, &vm); diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 7c3bf11..b45d6f9 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -44,7 +44,7 @@ Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { return nil; } -u32 get_ref(VM *vm, SymbolTable *st, const char *name, ScopeType scope) { +u32 get_ref(SymbolTable *st, const char *name) { Symbol *sym = symbol_table_lookup(st, name); if (!sym) { fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); @@ -74,7 +74,11 @@ Token next_id_or_reg() { Token next_id_or_ptr() { Token token = next_token(); - if (token.type != TOKEN_IDENTIFIER || token.type != TOKEN_LITERAL_NAT) { + + if (token.type != TOKEN_IDENTIFIER && + token.type != TOKEN_LITERAL_NAT && + token.type != TOKEN_LITERAL_INT && + token.type != TOKEN_LITERAL_REAL) { printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, token.start); exit(1); @@ -91,6 +95,15 @@ Token next_token_is(TokenType type) { return token; } +Token next_token_is_either(TokenType type, TokenType type2) { + Token token = next_token(); + if (token.type != type && token.type != type2) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + exit(1); + } + return token; +} + /** * Global . */ @@ -140,9 +153,7 @@ bool define_global(VM *vm, SymbolTable *st) { return false; } - Token eq = next_token_is(TOKEN_EQ); Token name = next_token_is(TOKEN_IDENTIFIER); - if (name.length > MAX_SYMBOL_NAME_LENGTH) { return false; } @@ -153,6 +164,8 @@ bool define_global(VM *vm, SymbolTable *st) { s.ref = addr; s.scope = GLOBAL; + next_token_is(TOKEN_EQ); + Token value = next_token(); switch (value.type) { case TOKEN_KEYWORD_TRUE: { @@ -161,6 +174,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_KEYWORD_FALSE: { u32 addr = vm->mp; @@ -168,6 +182,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_LITERAL_INT: { i32 out = atoi(value.start); @@ -177,6 +192,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_LITERAL_NAT: { char *endptr; @@ -191,6 +207,7 @@ bool define_global(VM *vm, SymbolTable *st) { vm->mp += s.size; vm->frames[vm->fp].end += s.size; + break; } case TOKEN_LITERAL_REAL: { fixed_t out = float_to_fixed(atof(value.start)); @@ -204,8 +221,8 @@ bool define_global(VM *vm, SymbolTable *st) { } case TOKEN_LITERAL_STR: { const char *src = value.start; - u32 len = 0; - u32 i = 0; + i32 len = 0; + i32 i = 0; while (i < value.length) { char c = src[i++]; @@ -253,7 +270,7 @@ bool define_global(VM *vm, SymbolTable *st) { /** * Var . */ -void define_var(VM *vm, SymbolTable *st, Token regType) { +void define_var(SymbolTable *st, Token regType) { Symbol s; s.scope = VAR; switch (regType.type) { @@ -326,8 +343,6 @@ void define_var(VM *vm, SymbolTable *st, Token regType) { Token reg_num = next_token_is(TOKEN_LITERAL_INT); s.ref = atoi(reg_num.start); - - next_token_is(TOKEN_SEMICOLON); symbol_table_add(st, s); } @@ -351,11 +366,11 @@ void define_function(VM *vm, SymbolTable *st) { Token next = next_token(); while (next.type != TOKEN_RPAREN) { - Token regType = next_token(); - define_var(vm, st, regType); + define_var(st, next); - Token next = next_token(); + next = next_token(); if (next.type == TOKEN_COMMA) { + next = next_token(); continue; } else if (next.type == TOKEN_RPAREN) { break; @@ -401,6 +416,9 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { exit(1); } + printf("Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), + token.length, token.start); + if (token.type == TOKEN_KEYWORD_GLOBAL) { define_global(vm, st); continue; @@ -416,7 +434,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { - define_var(vm, st, token); + define_var(st, token); + next_token_is(TOKEN_SEMICOLON); continue; } @@ -427,16 +446,31 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { continue; } + if (token.type == TOKEN_KEYWORD_RETURN) { + vm->pc++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + vm->pc++; + continue; + } + + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + continue; + } + if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first - if (streq(token.start, "exit")) { + if (strleq(token.start, "exit", token.length)) { vm->pc++; - next_token_is(TOKEN_LITERAL_NAT); + next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "call")) { + } else if (strleq(token.start, "call", token.length)) { vm->pc++; next_token_is(TOKEN_IDENTIFIER); @@ -447,7 +481,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token next = next_token(); while (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; - Token next = next_token(); + next = next_token(); } /* return type */ next = next_token(); @@ -458,10 +492,10 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { } /* if it is not void, then it was the value */ next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "syscall")) { + } else if (strleq(token.start, "syscall", token.length)) { vm->pc++; - Token id_or_ptr = next_id_or_ptr(); + next_id_or_ptr(); vm->pc += 4; Token next = next_token(); @@ -469,33 +503,34 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { if (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; } - Token next = next_token(); - } - } else if (streq(token.start, "return")) { - vm->pc++; - - Token next = next_token(); - if (next.type == TOKEN_SEMICOLON) { - /* put 0xFF as return register */ - vm->pc++; - continue; + next = next_token(); } - next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_immediate", token.length)) { vm->pc++; - } else if (streq(token.start, "load_immediate")) { - vm->pc++; - - Token id_or_ptr = next_id_or_ptr(); + next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); - Token output = next_id_or_reg(); + next_id_or_reg(); vm->pc++; - } else if (streq(token.start, "malloc")) { + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "load_address", token.length)) { + vm->pc++; + + next_id_or_ptr(); + vm->pc += 4; + + next_token_is(TOKEN_ARROW_RIGHT); + + next_id_or_reg(); + vm->pc++; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "malloc", token.length)) { vm->pc++; next_id_or_reg(); @@ -506,7 +541,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { next_id_or_reg(); vm->pc++; - } else if (streq(token.start, "memset_8")) { + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "memset_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -517,7 +553,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "memset_16")) { + } else if (strleq(token.start, "memset_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -528,7 +564,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "memset_32")) { + } else if (strleq(token.start, "memset_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -539,7 +575,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_offset_8")) { + } else if (strleq(token.start, "load_offset_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -554,7 +590,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_offset_16")) { + } else if (strleq(token.start, "load_offset_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -569,7 +605,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_offset_32")) { + } else if (strleq(token.start, "load_offset_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -584,7 +620,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_indirect_8")) { + } else if (strleq(token.start, "load_indirect_8", token.length)) { vm->pc++; next_id_or_ptr(); @@ -596,7 +632,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_indirect_16")) { + } else if (strleq(token.start, "load_indirect_16", token.length)) { vm->pc++; next_id_or_ptr(); @@ -608,7 +644,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_indirect_32")) { + } else if (strleq(token.start, "load_indirect_32", token.length)) { vm->pc++; next_id_or_ptr(); @@ -620,7 +656,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_absolute_8")) { + } else if (strleq(token.start, "load_absolute_8", token.length)) { vm->pc++; next_id_or_ptr(); @@ -632,7 +668,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_absolute_16")) { + } else if (strleq(token.start, "load_absolute_16", token.length)) { vm->pc++; next_id_or_ptr(); @@ -644,7 +680,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "load_absolute_32")) { + } else if (strleq(token.start, "load_absolute_32", token.length)) { vm->pc++; next_id_or_ptr(); @@ -656,7 +692,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_absolute_8")) { + } else if (strleq(token.start, "store_absolute_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -668,7 +704,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_absolute_16")) { + } else if (strleq(token.start, "store_absolute_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -680,7 +716,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_absolute_32")) { + } else if (strleq(token.start, "store_absolute_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -692,7 +728,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_indirect_8")) { + } else if (strleq(token.start, "store_indirect_8", token.length)) { vm->pc++; next_id_or_reg(); @@ -704,7 +740,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_indirect_16")) { + } else if (strleq(token.start, "store_indirect_16", token.length)) { vm->pc++; next_id_or_reg(); @@ -716,7 +752,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_indirect_32")) { + } else if (strleq(token.start, "store_indirect_32", token.length)) { vm->pc++; next_id_or_reg(); @@ -728,7 +764,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_offset_8")) { + } else if (strleq(token.start, "store_offset_8", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ @@ -743,7 +779,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_offset_16")) { + } else if (strleq(token.start, "store_offset_16", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ @@ -758,7 +794,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "store_offset_32")) { + } else if (strleq(token.start, "store_offset_32", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ @@ -773,7 +809,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "register_move")) { + } else if (strleq(token.start, "register_move", token.length)) { vm->pc++; next_id_or_reg(); @@ -785,7 +821,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "add_int")) { + } else if (strleq(token.start, "add_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -800,7 +836,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "sub_int")) { + } else if (strleq(token.start, "sub_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -815,7 +851,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "mul_int")) { + } else if (strleq(token.start, "mul_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -830,7 +866,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "div_int")) { + } else if (strleq(token.start, "div_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -845,7 +881,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "abs_int")) { + } else if (strleq(token.start, "abs_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -857,7 +893,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "neg_int")) { + } else if (strleq(token.start, "neg_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -869,7 +905,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "add_nat")) { + } else if (strleq(token.start, "add_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -884,7 +920,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "sub_nat")) { + } else if (strleq(token.start, "sub_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -899,7 +935,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "mul_nat")) { + } else if (strleq(token.start, "mul_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -914,7 +950,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "div_nat")) { + } else if (strleq(token.start, "div_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -929,7 +965,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "abs_nat")) { + } else if (strleq(token.start, "abs_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -941,7 +977,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "neg_nat")) { + } else if (strleq(token.start, "neg_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -953,7 +989,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "add_real")) { + } else if (strleq(token.start, "add_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -968,7 +1004,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "sub_real")) { + } else if (strleq(token.start, "sub_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -983,7 +1019,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "mul_real")) { + } else if (strleq(token.start, "mul_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -998,7 +1034,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "div_real")) { + } else if (strleq(token.start, "div_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1013,7 +1049,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "abs_real")) { + } else if (strleq(token.start, "abs_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1025,7 +1061,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "neg_real")) { + } else if (strleq(token.start, "neg_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1037,7 +1073,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "int_to_real")) { + } else if (strleq(token.start, "int_to_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1049,7 +1085,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "nat_to_real")) { + } else if (strleq(token.start, "nat_to_real", token.length)) { vm->pc++; next_id_or_reg(); @@ -1061,7 +1097,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "real_to_int")) { + } else if (strleq(token.start, "real_to_int", token.length)) { vm->pc++; next_id_or_reg(); @@ -1073,7 +1109,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "real_to_nat")) { + } else if (strleq(token.start, "real_to_nat", token.length)) { vm->pc++; next_id_or_reg(); @@ -1085,7 +1121,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_shift_left")) { + } else if (strleq(token.start, "bit_shift_left", token.length)) { vm->pc++; next_id_or_reg(); @@ -1100,7 +1136,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_shift_right")) { + } else if (strleq(token.start, "bit_shift_right", token.length)) { vm->pc++; next_id_or_reg(); @@ -1115,7 +1151,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { vm->pc++; next_id_or_reg(); @@ -1130,7 +1166,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_and")) { + } else if (strleq(token.start, "bit_and", token.length)) { vm->pc++; next_id_or_reg(); @@ -1145,7 +1181,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_or")) { + } else if (strleq(token.start, "bit_or", token.length)) { vm->pc++; next_id_or_reg(); @@ -1160,7 +1196,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "bit_xor")) { + } else if (strleq(token.start, "bit_xor", token.length)) { vm->pc++; next_id_or_reg(); @@ -1175,21 +1211,21 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump")) { + } else if (strleq(token.start, "jump", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_if_flag")) { + } else if (strleq(token.start, "jump_if_flag", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_eq_int")) { + } else if (strleq(token.start, "jump_eq_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1202,7 +1238,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_neq_int")) { + } else if (strleq(token.start, "jump_neq_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1215,7 +1251,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_gt_int")) { + } else if (strleq(token.start, "jump_gt_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1228,7 +1264,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_lt_int")) { + } else if (strleq(token.start, "jump_lt_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1241,7 +1277,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_le_int")) { + } else if (strleq(token.start, "jump_le_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1254,7 +1290,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_ge_int")) { + } else if (strleq(token.start, "jump_ge_int", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1267,7 +1303,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_eq_nat")) { + } else if (strleq(token.start, "jump_eq_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1280,7 +1316,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_neq_nat")) { + } else if (strleq(token.start, "jump_neq_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1293,7 +1329,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_gt_nat")) { + } else if (strleq(token.start, "jump_gt_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1306,7 +1342,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_lt_nat")) { + } else if (strleq(token.start, "jump_lt_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1319,7 +1355,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_le_nat")) { + } else if (strleq(token.start, "jump_le_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1332,7 +1368,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_ge_nat")) { + } else if (strleq(token.start, "jump_ge_nat", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1345,7 +1381,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_eq_real")) { + } else if (strleq(token.start, "jump_eq_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1358,7 +1394,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_neq_real")) { + } else if (strleq(token.start, "jump_neq_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1371,7 +1407,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_ge_real")) { + } else if (strleq(token.start, "jump_ge_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1384,7 +1420,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_gt_real")) { + } else if (strleq(token.start, "jump_gt_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1397,7 +1433,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_lt_real")) { + } else if (strleq(token.start, "jump_lt_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1410,7 +1446,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "jump_le_real")) { + } else if (strleq(token.start, "jump_le_real", token.length)) { vm->pc++; next_id_or_ptr(); @@ -1423,7 +1459,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "string_length")) { + } else if (strleq(token.start, "string_length", token.length)) { vm->pc++; next_id_or_reg(); @@ -1435,7 +1471,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "int_to_string")) { + } else if (strleq(token.start, "int_to_string", token.length)) { vm->pc++; next_id_or_reg(); @@ -1447,7 +1483,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "nat_to_string")) { + } else if (strleq(token.start, "nat_to_string", token.length)) { vm->pc++; next_id_or_reg(); @@ -1459,7 +1495,7 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "real_to_string")) { + } else if (strleq(token.start, "real_to_string", token.length)) { vm->pc++; next_id_or_reg(); @@ -1471,19 +1507,19 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { vm->pc++; next_token_is(TOKEN_SEMICOLON); - } else if (streq(token.start, "string_eq")) { - } else if (streq(token.start, "string_concat")) { - } else if (streq(token.start, "string_get_char")) { - } else if (streq(token.start, "string_find_char")) { - } else if (streq(token.start, "string_slice")) { - } else if (streq(token.start, "string_to_int")) { - } else if (streq(token.start, "string_to_nat")) { - } else if (streq(token.start, "string_to_real")) { + } else if (strleq(token.start, "string_eq", token.length)) { + } else if (strleq(token.start, "string_concat", token.length)) { + } else if (strleq(token.start, "string_get_char", token.length)) { + } else if (strleq(token.start, "string_find_char", token.length)) { + } else if (strleq(token.start, "string_slice", token.length)) { + } else if (strleq(token.start, "string_to_int", token.length)) { + } else if (strleq(token.start, "string_to_nat", token.length)) { + } else if (strleq(token.start, "string_to_real", token.length)) { } else { // some other identifier printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); - exit(1); + exit(1); } } } while (token.type != TOKEN_EOF); @@ -1493,6 +1529,8 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { * 2nd pass, emit the bytecode */ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { + USED(st); + Token token; init_lexer(source); do { @@ -1502,15 +1540,26 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - printf("Line %d [%s]: %.*s\n", token.line, + printf("[Generate Bytecode] Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed + next_token(); // type + next_token(); // var + next_token(); // eq + next_token(); // value + next_token(); // ; + continue; } if (token.type == TOKEN_KEYWORD_FN) { // ignore, already processed + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + next = next_token(); + } + continue; } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || @@ -1519,106 +1568,34 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { // ignore, already processed + next_token(); // type + next_token(); // var + next_token(); // reg + next_token(); // ; + continue; } - if (token.type == TOKEN_KEYWORD_LOOP || - token.type == TOKEN_KEYWORD_ELSE) { + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { // ignore, already processed + next_token(); // id } - if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first - if (streq(token.start, "exit")) { - } else if (streq(token.start, "call")) { - } else if (streq(token.start, "return")) { - } else if (streq(token.start, "syscall")) { - } else if (streq(token.start, "load_immediate")) { - } else if (streq(token.start, "load_indirect_8")) { - } else if (streq(token.start, "load_indirect_16")) { - } else if (streq(token.start, "load_indirect_32")) { - } else if (streq(token.start, "load_absolute_8")) { - } else if (streq(token.start, "load_absolute_16")) { - } else if (streq(token.start, "load_absolute_32")) { - } else if (streq(token.start, "load_offset_8")) { - } else if (streq(token.start, "load_offset_16")) { - } else if (streq(token.start, "load_offset_32")) { - } else if (streq(token.start, "store_absolute_8")) { - } else if (streq(token.start, "store_absolute_16")) { - } else if (streq(token.start, "store_absolute_32")) { - } else if (streq(token.start, "store_indirect_8")) { - } else if (streq(token.start, "store_indirect_16")) { - } else if (streq(token.start, "store_indirect_32")) { - } else if (streq(token.start, "store_offset_8")) { - } else if (streq(token.start, "store_offset_16")) { - } else if (streq(token.start, "store_offset_32")) { - } else if (streq(token.start, "malloc")) { - } else if (streq(token.start, "memset_8")) { - } else if (streq(token.start, "memset_16")) { - } else if (streq(token.start, "memset_32")) { - } else if (streq(token.start, "register_move")) { - } else if (streq(token.start, "add_int")) { - } else if (streq(token.start, "sub_int")) { - } else if (streq(token.start, "mul_int")) { - } else if (streq(token.start, "div_int")) { - } else if (streq(token.start, "abs_int")) { - } else if (streq(token.start, "neg_int")) { - } else if (streq(token.start, "add_nat")) { - } else if (streq(token.start, "sub_nat")) { - } else if (streq(token.start, "mul_nat")) { - } else if (streq(token.start, "div_nat")) { - } else if (streq(token.start, "abs_nat")) { - } else if (streq(token.start, "neg_nat")) { - } else if (streq(token.start, "add_real")) { - } else if (streq(token.start, "sub_real")) { - } else if (streq(token.start, "mul_real")) { - } else if (streq(token.start, "div_real")) { - } else if (streq(token.start, "abs_real")) { - } else if (streq(token.start, "neg_real")) { - } else if (streq(token.start, "int_to_real")) { - } else if (streq(token.start, "nat_to_real")) { - } else if (streq(token.start, "real_to_int")) { - } else if (streq(token.start, "real_to_nat")) { - } else if (streq(token.start, "bit_shift_left")) { - } else if (streq(token.start, "bit_shift_right")) { - } else if (streq(token.start, "bit_shift_r_ext")) { - } else if (streq(token.start, "bit_and")) { - } else if (streq(token.start, "bit_or")) { - } else if (streq(token.start, "bit_xor")) { - } else if (streq(token.start, "jump")) { - } else if (streq(token.start, "jump_if_flag")) { - } else if (streq(token.start, "jump_eq_int")) { - } else if (streq(token.start, "jump_neq_int")) { - } else if (streq(token.start, "jump_gt_int")) { - } else if (streq(token.start, "jump_lt_int")) { - } else if (streq(token.start, "jump_le_int")) { - } else if (streq(token.start, "jump_ge_int")) { - } else if (streq(token.start, "jump_eq_nat")) { - } else if (streq(token.start, "jump_neq_nat")) { - } else if (streq(token.start, "jump_gt_nat")) { - } else if (streq(token.start, "jump_lt_nat")) { - } else if (streq(token.start, "jump_le_nat")) { - } else if (streq(token.start, "jump_ge_nat")) { - } else if (streq(token.start, "jump_eq_real")) { - } else if (streq(token.start, "jump_neq_real")) { - } else if (streq(token.start, "jump_ge_real")) { - } else if (streq(token.start, "jump_gt_real")) { - } else if (streq(token.start, "jump_lt_real")) { - } else if (streq(token.start, "jump_le_real")) { - } else if (streq(token.start, "string_length")) { - } else if (streq(token.start, "string_eq")) { - } else if (streq(token.start, "string_concat")) { - } else if (streq(token.start, "string_get_char")) { - } else if (streq(token.start, "string_find_char")) { - } else if (streq(token.start, "string_slice")) { - } else if (streq(token.start, "int_to_string")) { - } else if (streq(token.start, "nat_to_string")) { - } else if (streq(token.start, "real_to_string")) { - } else if (streq(token.start, "string_to_int")) { - } else if (streq(token.start, "string_to_nat")) { - } else if (streq(token.start, "string_to_real")) { - } else { - // some other identifier + if (token.type == TOKEN_KEYWORD_RETURN) { + vm->pc++; + + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + emit_u8(vm, 0xFF); + vm->pc++; + continue; } + + vm->pc++; + next_token_is(TOKEN_SEMICOLON); + continue; } } } while (token.type != TOKEN_EOF); diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index 371bed0..d926380 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -215,12 +215,19 @@ static TokenType identifierType() { case 'e': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { - case 'a': - return check_keyword(3, 1, "d", TOKEN_KEYWORD_READ); case 'f': return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); case 't': return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + case 'a': + if (lexer.current - lexer.start > 3) { + switch(lexer.start[3]) { + case 'd': + return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); + case 'l': + return check_keyword(4, 0, "", TOKEN_TYPE_REAL); + } + } } } break; @@ -272,7 +279,7 @@ static TokenType identifierType() { case 'g': return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); case 'l': - return check_keyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); + return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); case 'd': return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); case 'v': @@ -331,7 +338,8 @@ Token next_token() { char c = advance(); if (is_alpha(c)) return identifier(); - if (is_digit(c)) + char next = peek(); + if ((c == '-' && is_digit(next)) || is_digit(c)) return number(); switch (c) { @@ -354,7 +362,7 @@ Token next_token() { case '.': return make_token(TOKEN_DOT); case '-': - return make_token(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); + return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); case '+': return make_token(TOKEN_PLUS); case '/': @@ -389,7 +397,7 @@ const char *token_type_to_string(TokenType type) { case TOKEN_IDENTIFIER: return "IDENTIFIER"; case TOKEN_LITERAL_INT: - return "LITERAL_i32"; + return "LITERAL_INT"; case TOKEN_LITERAL_NAT: return "LITERAL_NAT"; case TOKEN_LITERAL_REAL: @@ -397,7 +405,7 @@ const char *token_type_to_string(TokenType type) { case TOKEN_LITERAL_STR: return "LITERAL_STR"; case TOKEN_TYPE_INT: - return "TYPE_i32"; + return "TYPE_INT"; case TOKEN_TYPE_NAT: return "TYPE_NAT"; case TOKEN_TYPE_REAL: @@ -498,8 +506,8 @@ const char *token_type_to_string(TokenType type) { return "LBRACKET"; case TOKEN_RBRACKET: return "RBRACKET"; - case TOKEN_ARROW_LEFT: - return "ARROW_LEFT"; + case TOKEN_ARROW_RIGHT: + return "ARROW_RIGHT"; case TOKEN_MESH: return "MESH"; case TOKEN_BIG_MONEY: diff --git a/src/tools/old_assembler/assembler.c b/src/tools/old_assembler/assembler.c deleted file mode 100644 index e66be6b..0000000 --- a/src/tools/old_assembler/assembler.c +++ /dev/null @@ -1,1211 +0,0 @@ -#include "assembler.h" -#include "parser.h" -typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType; - -typedef struct { - char *name; - u32 address; - SymbolType type; - int size; // How much memory this symbol occupies - int is_constant; // 1 = constant, 0 = variable -} Symbol; - -typedef struct { - Symbol *symbols; - int count; - int capacity; -} SymbolTable; - -void symbol_table_init(SymbolTable *table) { - table->capacity = 32; - table->count = 0; - table->symbols = malloc(table->capacity * sizeof(Symbol)); -} - -void symbol_table_add(SymbolTable *table, const char *name, u32 address, - SymbolType type) { - // Check for duplicates - for (int i = 0; i < table->count; i++) { - if (strcmp(table->symbols[i].name, name) == 0) { - fprintf(stderr, "Error: Duplicate label '%s'\n", name); - exit(1); - } - } - - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); - } - - Symbol *sym = &table->symbols[table->count++]; - sym->name = strdup(name); - sym->address = address; - sym->type = type; - sym->size = 4; // Default size - sym->is_constant = 0; -} - -Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { - for (int i = 0; i < table->count; i++) { - if (strcmp(table->symbols[i].name, name) == 0) { - return &table->symbols[i]; - } - } - return NULL; -} - -u32 find_label_in_table(SymbolTable *table, const char *name) { - Symbol *sym = symbol_table_lookup(table, name); - if (!sym) { - fprintf(stderr, "Error: Undefined label '%s'\n", name); - exit(1); - } - return sym->address; -} - -int get_instruction_byte_size(ExprNode *node) { - const char *opname = node->token; - - // Return (1 + 1) - if (strcmp(opname, "return") == 0) { - return 2; // 1 byte opcode + 1 byte return register - } - - if (strcmp(opname, "neg-int") == 0 || - strcmp(opname, "abs-int") == 0 || - strcmp(opname, "neg-nat") == 0 || - strcmp(opname, "abs-nat") == 0 || - strcmp(opname, "neg-real") == 0 || - strcmp(opname, "abs-real") == 0 || - strcmp(opname, "int-to-string") == 0 || - strcmp(opname, "load-indirect-8") == 0 || - strcmp(opname, "nat-to-string") == 0 || - strcmp(opname, "load-indirect-16") == 0 || - strcmp(opname, "real-to-string") == 0 || - strcmp(opname, "load-indirect-32") == 0 || - strcmp(opname, "int-to-real") == 0 || - strcmp(opname, "store-indirect-8") == 0 || - strcmp(opname, "nat-to-real") == 0 || - strcmp(opname, "store-indirect-16") == 0 || - strcmp(opname, "real-to-int") == 0 || - strcmp(opname, "store-indirect-32") == 0 || - strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 || - strcmp(opname, "int-to-nat") == 0 || - strcmp(opname, "string-length") == 0 || - strcmp(opname, "store-absolute-32") == 0 || - strcmp(opname, "store-absolute-8") == 0 || - strcmp(opname, "store-absolute-16") == 0 || - strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || - strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 || - strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) { - return 3; - } - - // Register-register-register opcodes (4 bytes: 1 + 3) - if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 || - strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 || - strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || - strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || - strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || - strcmp(opname, "bit-shift-left") == 0 || - strcmp(opname, "bit-shift-right") == 0 || - strcmp(opname, "bit-shift-r-ext") == 0 || - strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || - strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || - strcmp(opname, "div-real") == 0) { - return 4; - } - - // (5 bytes: 1 + 4) - if (strcmp(opname, "exit") == 0 || strcmp(opname, "jump-if-flag") == 0 || - strcmp(opname, "jump") == 0) { - return 5; - } - - // Load, Load-immediate (6 bytes: 1 + 1 + 4) - if (strcmp(opname, "load-absolute-32") == 0 || - strcmp(opname, "load-immediate") == 0 || - strcmp(opname, "load-absolute-16") == 0 || - strcmp(opname, "load-absolute-8") == 0) { - return 6; - } - - // jump compare (7 bytes: 1 + 4 + 1 + 1) - if (strcmp(opname, "jump-eq-int") == 0 || - strcmp(opname, "jump-neq-int") == 0 || - strcmp(opname, "jump-gt-int") == 0 || - strcmp(opname, "jump-lt-int") == 0 || - strcmp(opname, "jump-le-int") == 0 || - strcmp(opname, "jump-ge-int") == 0 || - strcmp(opname, "jump-eq-nat") == 0 || - strcmp(opname, "jump-neq-nat") == 0 || - strcmp(opname, "jump-gt-nat") == 0 || - strcmp(opname, "jump-lt-nat") == 0 || - strcmp(opname, "jump-le-nat") == 0 || - strcmp(opname, "jump-ge-nat") == 0 || - strcmp(opname, "jump-eq-real") == 0 || - strcmp(opname, "jump-neq-real") == 0 || - strcmp(opname, "jump-gt-real") == 0 || - strcmp(opname, "jump-lt-real") == 0 || - strcmp(opname, "jump-le-real") == 0 || - strcmp(opname, "jump-ge-real") == 0 || - strcmp(opname, "store-offset-8") == 0 || - strcmp(opname, "store-offset-16") == 0 || - strcmp(opname, "store-offset-32") == 0 || - strcmp(opname, "load-offset-8") == 0 || - strcmp(opname, "load-offset-16") == 0 || - strcmp(opname, "load-offset-32") == 0) { - return 7; - } - - // Call (1 + 4 + 1 + args + 1) - if (strcmp(opname, "call") == 0) { - ExprNode *args_node = node->children[1]; - u32 args_count; - - if (strcmp(args_node->token, "nil") == 0) { - args_count = 0; - } else { - args_count = 1 + args_node->child_count; - } - - return 1 + 1 + 1 + 4 + args_count; - } - - // Syscall (1 + syscall_id (4) + args) - if (strcmp(opname, "syscall") == 0) { - return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); - } - - fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); - exit(-1); -} - -int calculate_instruction_size(ExprNode *node) { - if (node->child_count == 0) - return 0; - - return get_instruction_byte_size(node); -} - -void collect_symbols_in_node(SymbolTable *table, ExprNode *node, - u32 *current_addr, int depth) { - char indent[32] = ""; - for (int i = 0; i < depth; i++) - strcat(indent, " "); - -#ifdef ASM_DEBUG - printf("%s%d %s ", indent, *current_addr, node->token); -#endif - - if (strcmp(node->token, "label") == 0) { - if (node->child_count >= 1) { - const char *name = node->children[0]->token; -#ifdef ASM_DEBUG - printf(" %s -> %d\n", name, *current_addr); -#endif - symbol_table_add(table, name, *current_addr, SYMBOL_CODE); - } - - for (size_t i = 1; i < node->child_count; i++) { - collect_symbols_in_node(table, node->children[i], current_addr, - depth + 1); - } - } else { - int size = get_instruction_byte_size(node); - *current_addr += size; -#ifdef ASM_DEBUG - printf(" +%d bytes -> %d\n", size, *current_addr); -#endif - } -} - -void collect_symbols(SymbolTable *table, ExprNode *program) { - // First, collect all data labels (with placeholder address) - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "data") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - ExprNode *item = section->children[j]; - if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { - const char *name = item->children[0]->token; - symbol_table_add(table, name, 0, SYMBOL_DATA); - } - } - } - } - - // Second, collect all code labels with proper nesting - u32 code_addr = 0; - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "code") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - collect_symbols_in_node(table, section->children[j], &code_addr, 0); - } - } - } -} - -u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { - u32 addr = vm->mp; - vm->mp += size; - vm->frames[vm->fp].end += size; - - // Update the symbol's address - Symbol *sym = symbol_table_lookup(table, name); - if (sym && sym->type == SYMBOL_DATA) { - sym->address = addr; - sym->size = size; - } - - return addr; -} - -void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } - -void emit_u32(VM *vm, u32 value) { - write_u32(vm, code, vm->cp, value); - vm->cp += 4; -} - -void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } - -int parse_register(const char *reg_str) { - if (reg_str[0] != '$') - return -1; - return atoi(reg_str + 1); -} - -u32 resolve_symbol(SymbolTable *table, const char *ref) { - // Handle symbol references (e.g., &label) - if (ref[0] == '&') { - return find_label_in_table(table, ref + 1); - } - - // Handle fixed-point numbers (e.g., 0.5) - if (strchr(ref, '.')) { - return TO_FIXED(atof(ref)); - } - - // Handle hexadecimal literals (e.g., 0x7) - if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) { - char *endptr; - u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x" - - if (endptr == ref + 2 || *endptr != '\0') { - fprintf(stderr, "Invalid hex literal: %s\n", ref); - exit(1); - } - return value; - } - - // Handle decimal literals (e.g., 7) - char *endptr; - u32 value = (u32)strtoul(ref, &endptr, 10); - - if (endptr == ref || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: %s\n", ref); - exit(1); - } - return value; -} - -static char *unwrap_string(const char *quoted_str) { - if (!quoted_str) - return NULL; - - size_t len = strlen(quoted_str); - if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { - // Remove quotes and process escape sequences - const char *src = quoted_str + 1; - size_t src_len = len - 2; - - // First pass: calculate the actual length needed after escape processing - size_t actual_len = 0; - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Escape sequence - actual_len++; - i++; // Skip the next character - } else { - actual_len++; - } - } - - char *unwrapped = (char *)malloc(actual_len + 1); - size_t dst_idx = 0; - - // Second pass: process escape sequences - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Handle escape sequences - switch (src[i + 1]) { - case 'n': - unwrapped[dst_idx++] = '\n'; - break; - case 't': - unwrapped[dst_idx++] = '\t'; - break; - case 'r': - unwrapped[dst_idx++] = '\r'; - break; - case '\\': - unwrapped[dst_idx++] = '\\'; - break; - case '"': - unwrapped[dst_idx++] = '"'; - break; - case '\'': - unwrapped[dst_idx++] = '\''; - break; - default: - // Unknown escape, keep both characters - unwrapped[dst_idx++] = src[i]; - unwrapped[dst_idx++] = src[i + 1]; - break; - } - i++; // Skip the next character - } else { - unwrapped[dst_idx++] = src[i]; - } - } - unwrapped[dst_idx] = '\0'; - return unwrapped; - } - // Not quoted, return copy - return strdup(quoted_str); -} - -void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { - for (size_t i = 0; i < block->child_count; ++i) { - ExprNode *item = block->children[i]; - if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { - const char *name = item->children[0]->token; - ExprNode *val = item->children[1]; - - if (val->child_count == 0) { - const char *token = val->token; - - // Case 1: String literal (enclosed in quotes) - if (token[0] == '"' && token[strlen(token) - 1] == '"') { - char *unwrapped = unwrap_string(token); - int len = strlen(unwrapped); - u32 addr = allocate_data(vm, table, name, len + 1 + 4); - - write_u32(vm, memory, addr, len); - for (int i = 0; i < len; i++) { - write_u8(vm, memory, addr + 4 + i, unwrapped[i]); - } - write_u8(vm, memory, addr + 4 + len, '\0'); - free(unwrapped); - } - // Case 2: Hexadecimal integer (0x...) - else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) { - char *endptr; - u32 value = (u32)strtoul(token + 2, &endptr, 16); - - if (endptr != token + strlen(token)) { - fprintf(stderr, "Invalid hex in data block: %s\n", token); - exit(1); - } - - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, value); - } - // Case 3: Floating-point (has decimal point) - else if (strchr(token, '.')) { - float f = atof(token); - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, TO_FIXED(f)); - } - // Case 4: Decimal integer - else { - char *endptr; - u32 value = (u32)strtoul(token, &endptr, 10); - - if (endptr != token + strlen(token)) { - fprintf(stderr, "Invalid decimal in data block: %s\n", token); - exit(1); - } - - u32 addr = allocate_data(vm, table, name, 4); - write_u32(vm, memory, addr, value); - //vm->mp += 4; - } - } else { - fprintf(stderr, "Unsupported data item\n"); - exit(1); - } - } - } -} - -void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { - const char *opname = node->token; - if (strcmp(opname, "label") == 0) { - for (size_t i = 1; i < node->child_count; i++) { - process_code_expr(vm, table, node->children[i]); - } - } else if (strcmp(opname, "exit") == 0) { - emit_opcode(vm, OP_EXIT); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "jump") == 0) { - emit_opcode(vm, OP_JMP); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "jump-if-flag") == 0) { - emit_opcode(vm, OP_JMPF); - u32 addr = resolve_symbol(table, node->children[0]->token); - emit_u32(vm, addr); - } else if (strcmp(opname, "call") == 0) { - emit_opcode(vm, OP_CALL); - - if (node->child_count < 3) { - fprintf(stderr, "Error: call requires (args) and return register\n"); - return; - } - - // Parse function address (first child) - u32 addr = resolve_symbol(table, node->children[0]->token); - if (addr == (u32)-1) { - fprintf(stderr, "Error: undefined symbol '%s'\n", - node->children[0]->token); - return; - } - emit_u32(vm, addr); - - // Parse argument list (second child) - ExprNode *args_node = node->children[1]; - u8 arg_count = 0; - - if (args_node->child_count > 0) { - // Multiple arguments case - arg_count = args_node->child_count + 1; // +1 for the token - } else { - // Single argument case - token is the argument - arg_count = (args_node->token[0] != '\0') ? 1 : 0; - } - emit_byte(vm, arg_count); - - // Emit arguments based on representation - if (arg_count > 0) { - // First argument is always the token - const char *reg_str = args_node->token; - int reg = parse_register(reg_str); - if (reg < 0) { - fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); - return; - } - emit_byte(vm, (u8)reg); - - // Emit children if present - for (size_t i = 0; i < args_node->child_count; i++) { - reg_str = args_node->children[i]->token; - reg = parse_register(reg_str); - if (reg < 0) { - fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); - return; - } - emit_byte(vm, (u8)reg); - } - } - // Parse return register (third child) - const char *return_reg_str = node->children[2]->token; - int return_reg = parse_register(return_reg_str); - - if (return_reg < 0) { - if (strcmp(return_reg_str, "nil") == 0) { - return_reg = 0xFF; - } else { - fprintf(stderr, "Error: invalid return register '%s'\n", - return_reg_str); - return; - } - } - emit_byte(vm, (u8)return_reg); - -} else if (strcmp(opname, "return") == 0) { - emit_opcode(vm, OP_RETURN); - - if (node->child_count != 1) { - fprintf(stderr, "Error: return requires exactly one argument\n"); - return; - } - - const char *reg_str = node->children[0]->token; - int reg = parse_register(reg_str); - - // Handle "nil" as special case (no return value) - if (reg < 0) { - if (strcmp(reg_str, "nil") == 0) { - reg = 0xFF; // Special value for "no return" - } else { - fprintf(stderr, "Error: invalid return register '%s'\n", reg_str); - return; - } - } - emit_byte(vm, (u8)reg); - } else if (strcmp(opname, "load-immediate") == 0) { - emit_opcode(vm, OP_LOAD_IMM); - int reg = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, reg); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-8") == 0) { - emit_opcode(vm, OP_LOAD_ABS_8); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-16") == 0) { - emit_opcode(vm, OP_LOAD_ABS_16); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-absolute-32") == 0) { - emit_opcode(vm, OP_LOAD_ABS_32); - int dest = parse_register(node->children[0]->token); - u32 addr = resolve_symbol(table, node->children[1]->token); - emit_byte(vm, dest); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-indirect-8") == 0) { - emit_opcode(vm, OP_LOAD_IND_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "load-indirect-16") == 0) { - emit_opcode(vm, OP_LOAD_IND_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "load-indirect-32") == 0) { - emit_opcode(vm, OP_LOAD_IND_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "malloc") == 0) { - emit_opcode(vm, OP_MALLOC); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "memset-8") == 0) { - emit_opcode(vm, OP_MEMSET_8); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "memset-16") == 0) { - emit_opcode(vm, OP_MEMSET_16); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "memset") == 0) { - emit_opcode(vm, OP_MEMSET_32); - int dest = parse_register(node->children[0]->token); - int value = parse_register(node->children[1]->token); - int count = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, value); - emit_byte(vm, count); - } else if (strcmp(opname, "store-absolute-8") == 0) { - emit_opcode(vm, OP_STORE_ABS_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-absolute-16") == 0) { - emit_opcode(vm, OP_STORE_ABS_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-absolute-32") == 0) { - emit_opcode(vm, OP_STORE_ABS_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-8") == 0) { - emit_opcode(vm, OP_STORE_IND_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-16") == 0) { - emit_opcode(vm, OP_STORE_IND_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-indirect-32") == 0) { - emit_opcode(vm, OP_STORE_IND_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "store-offset-8") == 0) { - emit_opcode(vm, OP_STORE_OFF_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "store-offset-16") == 0) { - emit_opcode(vm, OP_STORE_OFF_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "store-offset-32") == 0) { - emit_opcode(vm, OP_STORE_OFF_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-8") == 0) { - emit_opcode(vm, OP_LOAD_OFF_8); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-16") == 0) { - emit_opcode(vm, OP_LOAD_OFF_16); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "load-offset-32") == 0) { - emit_opcode(vm, OP_LOAD_OFF_32); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - u32 addr = resolve_symbol(table, node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_u32(vm, addr); - } else if (strcmp(opname, "register-move") == 0) { - emit_opcode(vm, OP_REG_MOV); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "syscall") == 0) { - emit_opcode(vm, OP_SYSCALL); - - // Parse syscall ID - u32 syscall_id = 0; - const char *syscall_name = node->children[0]->token; - if (strcmp(syscall_name, "EXIT") == 0) - syscall_id = SYSCALL_EXIT; - else if (strcmp(syscall_name, "OPEN") == 0) - syscall_id = SYSCALL_DEVICE_OPEN; - else if (strcmp(syscall_name, "READ") == 0) - syscall_id = SYSCALL_DEVICE_READ; - else if (strcmp(syscall_name, "WRITE") == 0) - syscall_id = SYSCALL_DEVICE_WRITE; - else if (strcmp(syscall_name, "CLOSE") == 0) - syscall_id = SYSCALL_DEVICE_CLOSE; - else if (strcmp(syscall_name, "IOCTL") == 0) - syscall_id = SYSCALL_DEVICE_IOCTL; - else if (strcmp(syscall_name, "REFRESH") == 0) - syscall_id = SYSCALL_DEVICE_REFRESH; - - emit_u32(vm, syscall_id); - - // Emit register arguments - for (size_t i = 1; i < node->child_count; ++i) { - int reg = parse_register(node->children[i]->token); - emit_byte(vm, reg); - } - } else if (strcmp(opname, "bit-shift-left") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_LEFT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-shift-right") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_RIGHT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-shift-r-ext") == 0) { - emit_opcode(vm, OP_BIT_SHIFT_R_EXT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-and") == 0) { - emit_opcode(vm, OP_BAND); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-or") == 0) { - emit_opcode(vm, OP_BOR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "bit-xor") == 0) { - emit_opcode(vm, OP_BXOR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "add-int") == 0) { - emit_opcode(vm, OP_ADD_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-int") == 0) { - emit_opcode(vm, OP_SUB_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-int") == 0) { - emit_opcode(vm, OP_MUL_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-int") == 0) { - emit_opcode(vm, OP_DIV_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-int") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-int") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "add-nat") == 0) { - emit_opcode(vm, OP_ADD_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-nat") == 0) { - emit_opcode(vm, OP_SUB_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-nat") == 0) { - emit_opcode(vm, OP_MUL_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-nat") == 0) { - emit_opcode(vm, OP_DIV_NAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-nat") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-nat") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "add-real") == 0) { - emit_opcode(vm, OP_ADD_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "sub-real") == 0) { - emit_opcode(vm, OP_SUB_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "mul-real") == 0) { - emit_opcode(vm, OP_MUL_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "div-real") == 0) { - emit_opcode(vm, OP_DIV_REAL); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "abs-real") == 0) { - emit_opcode(vm, OP_ABS_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "neg-real") == 0) { - emit_opcode(vm, OP_NEG_INT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - } else if (strcmp(opname, "int-to-real") == 0) { - emit_opcode(vm, OP_INT_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "nat-to-real") == 0) { - emit_opcode(vm, OP_NAT_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-int") == 0) { - emit_opcode(vm, OP_REAL_TO_INT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-nat") == 0) { - emit_opcode(vm, OP_REAL_TO_NAT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "jump-eq-int") == 0) { - emit_opcode(vm, OP_JEQ_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-int") == 0) { - emit_opcode(vm, OP_JNEQ_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-int") == 0) { - emit_opcode(vm, OP_JGT_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-int") == 0) { - emit_opcode(vm, OP_JLT_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-int") == 0) { - emit_opcode(vm, OP_JLE_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-int") == 0) { - emit_opcode(vm, OP_JGE_INT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-eq-nat") == 0) { - emit_opcode(vm, OP_JEQ_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-nat") == 0) { - emit_opcode(vm, OP_JNEQ_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-nat") == 0) { - emit_opcode(vm, OP_JGT_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-nat") == 0) { - emit_opcode(vm, OP_JLT_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-nat") == 0) { - emit_opcode(vm, OP_JLE_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-nat") == 0) { - emit_opcode(vm, OP_JGE_NAT); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-eq-real") == 0) { - emit_opcode(vm, OP_JEQ_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-neq-real") == 0) { - emit_opcode(vm, OP_JNEQ_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-gt-real") == 0) { - emit_opcode(vm, OP_JGT_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-lt-real") == 0) { - emit_opcode(vm, OP_JLT_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-le-real") == 0) { - emit_opcode(vm, OP_JLE_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "jump-ge-real") == 0) { - emit_opcode(vm, OP_JGE_REAL); - u32 addr = resolve_symbol(table, node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_u32(vm, addr); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-length") == 0) { - emit_opcode(vm, OP_STRLEN); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-eq") == 0) { - emit_opcode(vm, OP_STREQ); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-concat") == 0) { - emit_opcode(vm, OP_STRCAT); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-get-char") == 0) { - emit_opcode(vm, OP_STR_GET_CHAR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-find-char") == 0) { - emit_opcode(vm, OP_STR_FIND_CHAR); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - } else if (strcmp(opname, "string-slice") == 0) { - emit_opcode(vm, OP_STR_SLICE); - int dest = parse_register(node->children[0]->token); - int src1 = parse_register(node->children[1]->token); - int src2 = parse_register(node->children[2]->token); - int src3 = parse_register(node->children[3]->token); - emit_byte(vm, dest); - emit_byte(vm, src1); - emit_byte(vm, src2); - emit_byte(vm, src3); - } else if (strcmp(opname, "int-to-string") == 0) { - emit_opcode(vm, OP_INT_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "nat-to-string") == 0) { - emit_opcode(vm, OP_NAT_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "real-to-string") == 0) { - emit_opcode(vm, OP_REAL_TO_STRING); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-int") == 0) { - emit_opcode(vm, OP_STRING_TO_INT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-nat") == 0) { - emit_opcode(vm, OP_STRING_TO_NAT); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else if (strcmp(opname, "string-to-real") == 0) { - emit_opcode(vm, OP_STRING_TO_REAL); - int dest = parse_register(node->children[0]->token); - int src = parse_register(node->children[1]->token); - emit_byte(vm, dest); - emit_byte(vm, src); - } else { - fprintf(stderr, "Unknown opcode: %s\n", opname); - } -} - -void old_assemble(VM *vm, ExprNode *program) { - SymbolTable table; - symbol_table_init(&table); - - // PASS 1: Collect all symbols (both code and data) - collect_symbols(&table, program); - - // PASS 2: Process data section using symbol table - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "data") == 0) { - process_data_block(vm, &table, section); - } - } - - // PASS 3: Process code section using complete symbol table - for (size_t i = 0; i < program->child_count; ++i) { - ExprNode *section = program->children[i]; - if (strcmp(section->token, "code") == 0) { - for (size_t j = 0; j < section->child_count; ++j) { - process_code_expr(vm, &table, section->children[j]); - } - } - } - - // Cleanup symbol table - for (int i = 0; i < table.count; i++) { -#ifdef ASM_DEBUG - Symbol s = table.symbols[i]; - printf("%s[%d]\n", s.name, s.address); -#endif - free(table.symbols[i].name); - } - free(table.symbols); -} diff --git a/src/tools/old_assembler/assembler.h b/src/tools/old_assembler/assembler.h deleted file mode 100644 index 76b9326..0000000 --- a/src/tools/old_assembler/assembler.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef ASSEMBLER_H -#define ASSEMBLER_H - -#include "../../vm/common.h" -#include "../../vm/vm.h" -#include "parser.h" - -#include -#include -#include -#include - -#define AS_FIXED(v) ((float)(i32)(v) / 65536.0f) -#define TO_FIXED(f) ((i32)( \ - ((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \ -)) - -void old_assemble(VM *vm, ExprNode *program); - -#endif diff --git a/src/tools/old_assembler/parser.c b/src/tools/old_assembler/parser.c deleted file mode 100644 index 69610d9..0000000 --- a/src/tools/old_assembler/parser.c +++ /dev/null @@ -1,244 +0,0 @@ -#include "parser.h" -#include -#include -#include -#include -#include -#include - -// Helper function to create a new node -static ExprNode *expr_node_create(const char *token, int line) { - ExprNode *node = (ExprNode *)malloc(sizeof(ExprNode)); - node->token = strdup(token ? token : ""); - node->children = NULL; - node->child_count = 0; - node->line = line; - return node; -} - -// Forward declaration -static ExprNode *parse_expression(const char **ptr, int line); - -// Skip whitespace characters and comments -static const char *skip_whitespace(const char *ptr) { - while (*ptr) { - // Skip regular whitespace - if (isspace(*ptr)) { - ptr++; - continue; - } - - // Check for comment start - if (*ptr == ';') { - // Skip everything until end of line - while (*ptr && *ptr != '\n') { - ptr++; - } - continue; - } - break; - } - return ptr; -} - -// Parse a token (atom) -static char *parse_token(const char **ptr, int line) { - const char *start = *ptr; - - // Skip leading whitespace and comments - start = skip_whitespace(start); - if (!*start) { - printf("Error at line:%d\n", line); - return NULL; - } - - const char *end = start; - - // Handle quoted strings - if (*start == '"') { - end++; // Skip opening quote - // Read until closing quote or end of string - while (*end && *end != '"') { - if (*end == '\\' && *(end + 1)) { - end += 2; // Skip escaped character - } else { - end++; - } - } - if (*end == '"') { - end++; // Include closing quote - } - } - // Handle parentheses as separate tokens - else if (*end == '(' || *end == ')') { - end++; - } else { - // Read until whitespace, parentheses, or comment - while (*end && !isspace(*end) && *end != '(' && *end != ')' && - *end != ';') { - end++; - } - } - - if (end == start) { - printf("Error at line:%d\n", line); - return NULL; - } - - size_t len = end - start; - char *token = (char *)malloc(len + 1); - memcpy(token, start, len); - token[len] = '\0'; - - *ptr = end; - return token; -} - -// Parse a list (expression starting with '(') -static ExprNode *parse_list(const char **ptr, int line) { - // Skip the opening parenthesis - (*ptr)++; - - *ptr = skip_whitespace(*ptr); - if (**ptr == ')') { - // Empty list - (*ptr)++; - return expr_node_create("\0", line); - } - - // Parse all children first - ExprNode **temp_children = NULL; - size_t temp_count = 0; - - while (**ptr && **ptr != ')') { - ExprNode *child = parse_expression(ptr, line); - if (child) { - // Resize temp children array - ExprNode **new_temp = - (ExprNode **)malloc(sizeof(ExprNode *) * (temp_count + 1)); - - // Copy existing children - for (size_t i = 0; i < temp_count; i++) { - new_temp[i] = temp_children[i]; - } - - // Add new child - new_temp[temp_count] = child; - temp_count++; - - // Free old array and update - free(temp_children); - temp_children = new_temp; - } - - *ptr = skip_whitespace(*ptr); - } - - if (**ptr == ')') { - (*ptr)++; // Skip closing parenthesis - } else { - fprintf(stderr, "Error: Missing closing parenthesis at line %d\n", line); - } - - // Create the actual node - ExprNode *node; - if (temp_count > 0 && temp_children[0]->child_count == 0) { - // First child is an atom, use it as the operator - node = expr_node_create(temp_children[0]->token, line); - // Move remaining children - node->child_count = temp_count - 1; - if (node->child_count > 0) { - node->children = - (ExprNode **)malloc(sizeof(ExprNode *) * node->child_count); - for (size_t i = 0; i < node->child_count; i++) { - node->children[i] = temp_children[i + 1]; - } - } - // Free the first child since we used its token - expr_free(temp_children[0]); - } else { - // No operator or first child is a list - node = expr_node_create("list", line); - node->children = temp_children; - node->child_count = temp_count; - } - - if (temp_count == 0) { - free(temp_children); - } - - return node; -} - -// Parse an expression (either atom or list) -static ExprNode *parse_expression(const char **ptr, int line) { - *ptr = skip_whitespace(*ptr); - - if (!**ptr) - return NULL; - - if (**ptr == '(') { - return parse_list(ptr, line); - } else { - // Parse atom - char *token = parse_token(ptr, line); - if (token) { - ExprNode *node = expr_node_create(token, line); - free(token); - return node; - } - return NULL; - } -} - -// Main parsing function -ExprNode *expr_parse(const char *source, size_t source_len) { - if (!source || source_len == 0) - return NULL; - - const char *ptr = source; - int line = 1; - - ptr = skip_whitespace(ptr); - if (!*ptr) - return NULL; - - return parse_expression(&ptr, line); -} - -// Free an Expr AST (and all children) -void expr_free(ExprNode *node) { - if (!node) - return; - - free(node->token); - - for (size_t i = 0; i < node->child_count; i++) { - expr_free(node->children[i]); - } - free(node->children); - free(node); -} - -// Debug: print AST (for dev) -void expr_print(ExprNode *node, int indent) { - if (!node) - return; - - for (int i = 0; i < indent; i++) { - printf(" "); - } - - if (node->child_count == 0) { - // Atom - printf("Atom: '%s' (line %d)\n", node->token, node->line); - } else { - // List - printf("List: '%s' (line %d) [%zu children]\n", node->token, node->line, - node->child_count); - - for (size_t i = 0; i < node->child_count; i++) { - expr_print(node->children[i], indent + 1); - } - } -} diff --git a/src/tools/old_assembler/parser.h b/src/tools/old_assembler/parser.h deleted file mode 100644 index 53ac41b..0000000 --- a/src/tools/old_assembler/parser.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef PARSER_H -#define PARSER_H - -#include // for size_t - -// Forward declare -typedef struct ExprNode ExprNode; - -// Node type: atom or list -struct ExprNode { - char *token; // For atoms: the value ("123", "$0", "add") - // For lists: the operator (first token) - ExprNode **children; // Array of child nodes (NULL if atom) - size_t child_count; // 0 if atom - int line; // Source line number (for errors) -}; - -ExprNode *expr_parse(const char *source, size_t source_len); -ExprNode* expand_macros(ExprNode* node); -ExprNode* expand_lambda(ExprNode* lambda_node); -void expr_free(ExprNode *node); -void expr_print(ExprNode *node, int indent); -void *safe_malloc(size_t size); - -#endif diff --git a/src/vm/libc.c b/src/vm/libc.c index 05dcf3f..3a85ffe 100644 --- a/src/vm/libc.c +++ b/src/vm/libc.c @@ -95,6 +95,21 @@ bool streq(const char *s1, const char *s2) { return (*s1 == '\0' && *s2 == '\0'); } +bool strleq(const char *s1, const char *s2, u32 length) { + u32 i; + if (s1 == nil && s2 == nil) return true; + if (s1 == nil || s2 == nil) return false; + + i = 0; + while (i < length && *s1 && *s2) { + if (*s1 != *s2) return false; + s1++; + s2++; + i++; + } + if (i == length) return true; + return (*s1 == '\0' && *s2 == '\0'); +} u32 strlength(const char *str) { u32 i; @@ -238,4 +253,4 @@ void fixed_to_string(i32 value, char *buffer) { } strcopy(buffer, end, temp + sizeof(temp) - end); -} \ No newline at end of file +} diff --git a/src/vm/libc.h b/src/vm/libc.h index f9ffc45..4479f9a 100644 --- a/src/vm/libc.h +++ b/src/vm/libc.h @@ -4,6 +4,7 @@ #include "common.h" bool streq(const char *s1, const char *s2); +bool strleq(const char *s1, const char *s2, u32 length); i32 strcopy(char* to, const char *from, u32 length); u32 strlength(const char *str); u32 strnlength(const char *str, u32 max_len); diff --git a/test/add.rom b/test/add.rom deleted file mode 100644 index 0f1dd4cf7279a0013513ad42e030f59d47f9742f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmXwyp$>pB3`DQjDrQh9EDA9`WD1W1i6MmFA1!1}`g*-yQUKVzTMbS4G)Pc_GC2t0 us+2hltxjO1fV9ZKQc%CVq(Z3r?swDPMKS#iHP(pc{dn?yTxVXsR~=qfTnG{X diff --git a/test/add.ul.ir b/test/add.ul.ir index 5ed166d..64e269d 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -28,8 +28,8 @@ function pln (str message $0) load_immediate 0 -> mode; syscall OPEN terminal_namespace mode -> term; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE term message msg_length; - strlen new_line -> nl_length; + string_length new_line -> nl_length; syscall WRITE term nl nl_length; return; diff --git a/test/fib.ul.ir b/test/fib.ul.ir index e073416..3ceff47 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -40,10 +40,10 @@ function pln (str message $0) load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; \ No newline at end of file diff --git a/test/hello.rom b/test/hello.rom deleted file mode 100644 index 9632b4b38a5231fefdfc691030dca7459f1c33f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 135 zcmXwvu?m1N3`8$YCS3%<*~yOe51a&teuf<_DAwQCL@|)N$K3(ItpDHegSt?sO05aq vZy{tRwJ94D)xwQJbYQjsE((9h;%=LUHC^FM#}-EIfeM!9vgfq)zW50bk=_dT diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 7373ad9..c8d2475 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -19,9 +19,9 @@ function pln (str message $0) load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; diff --git a/test/loop.rom b/test/loop.rom deleted file mode 100644 index 1ae871122caf2b3379b7de38bba2408806ef034d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 258 zcmYL@u?oUK42F|SN{_omaBva1MGzdStBaFcH+=y?5eGZe;xp(o`Eci-mSP~uclpDA zAtL%B$5@YmuE>Iv-&3$NK_w%!u=xsQ&Mf-{1a=zwg=ke6C4{(T(sBZ>eJNtI(yLE^ zS@dF#A$tCxbIeEO0X3h529yj9-OVnRUKiirZrvLx| diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 0cc95f3..a9a03b9 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -2,7 +2,7 @@ global str terminal_namespace = "/dev/term/0"; global str prompt = "Enter a string:"; global str new_line = "\n"; -function main (); +function main () real a $0; int i $1; int mode $11; @@ -13,7 +13,7 @@ function main (); load_immediate 0 -> $2; load_immediate -1 -> $3; load_immediate 5.0 -> $5; - loop loop_body; + loop loop_body add_real a $5 -> a; add_int i $3 -> i; jump_ge_int loop_body i $2; @@ -50,9 +50,9 @@ function pln (str message $0); load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length ; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; \ No newline at end of file diff --git a/test/malloc.rom b/test/malloc.rom deleted file mode 100644 index f03a1bf9bfedf082c61fa5e08d8b8dc7fb61a933..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 167 zcmXwyu@1sO3`6b26J6L?Z((9MW=15oegsHVh=C5J;`fiF&`7bL70CgBYwS9Q6~eW! zx-{n!I&mAobBKXWEI~WbEga(iqLvZzeQ0lV&q(X#KXh5k;(8xGbsnZyJtCVgV}x=m O?|FKTk5fVZ-QWwl{|@Q^ diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 9cb5155..84a883d 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -32,9 +32,9 @@ function pln (str message $0) load_immediate terminal_namespace -> ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return; diff --git a/test/paint-bw.rom b/test/paint-bw.rom deleted file mode 100644 index 197ed3d0d5278c5530b9e1bcff1dacfb6ecc8b1d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 574 zcmah`OHRWu5cT*UC+U>H1EyVZ!8NT#0L<3pX0$H*2&j7lS)04ysU($uxvy@q+v|BtOe-tWaz?i9$=TN z1rO7qnjO;6UwM=+7uDy)-KlXRIoHxQoXzCAG^)vec#`72kC25=KA(w(^Dc+ N3m@h)vKO*9vQGwOFP{Ja diff --git a/test/paint.rom b/test/paint.rom deleted file mode 100644 index 329532c926885b2cd01e1c11d741dd26b236c765..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1266 zcmai!&5G1O6ou=i{*p?kt93e!!=!uB3L-8dxKM@}5ZoxPT)Wg>iK95ETYolg1kt6) zEPVsPm9O9F@ZkSs<>rRmoeDLZYmu zBFCzli4lUL3Fc)jNKH;+6;yswIy%eQ5t~%SGaX4GbWNjb+BrHOvh_c*ZOE>@tU2zz z*z*pvS9{)b1Mj<@cbL7=^X?D4w|d@T_HNI6bKrf_^Tt_;!U$2%q(Sgy;kl$&4)FR# zSy>;do%>Mj@f4|aZRr7SXKX&IX#Af-N77W)BWbDyku=qhNSbO+Bu#ZHlBU`gNmD(H zq^X7;lWw}}W~b3RMH(~xLXT`co!OkIyUeW)qcNFotknDOkr&xs2qs*S3|C~8One*N3f%dh}W-)(m-Aw$%g>IX5DIPVza~Gl+n1T uV5iD#F04i%$j9iUyGRV|O|pXY1vCf#%AnPw=_pUS%XOyJ?*7GdfZ_#>D+nL} diff --git a/test/simple.ul.ir b/test/simple.ul.ir index c973ff5..803bbde 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -3,12 +3,12 @@ global real x = 1.0; global real y = 1.0; function main () - real x $0; - load_absolute_32 x -> x; - real y $1; - load_absolute_32 y -> y; + real a $0; + load_absolute_32 x -> a; + real b $1; + load_absolute_32 y -> b; real result $2; - add_real x y -> result; + add_real a b -> result; str result_str $3; real_to_string result -> result_str; call pln result_str -> void; @@ -23,9 +23,9 @@ function pln (str message $0) load_immediate 0 -> mode; syscall OPEN terminal_namespace mode -> term; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE term message msg_length; load_address new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE term nl nl_length; return; diff --git a/test/window.rom b/test/window.rom deleted file mode 100644 index 1721b46b94eb2976d719953e36238f8e8f969092..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 326 zcmX|-O%8%E5QS&xplHPs11wpndraKu!o&lx;|SWsjhgs(InU%7@J$Ui>CAf{%xhXA zq7QbzcD+ECnHs}dXSR)r-%uQPX-)}L8|Qyg(j<_wC&C%PydZAl;tw;842dqY#=uZf zngN=#Bc~Jl6UxY@L#IciiPPL=mE`lP8e$j10s_TjvxS;grVrZaY*@kzbg^o}am-fC z8m@^b{LU0L@|$EMQJ3Odee|#Tez ts; load_immediate 0 -> mode; syscall OPEN ts mode -> ts; - strlen message -> msg_length; + string_length message -> msg_length; syscall WRITE ts message msg_length ; load_immediate new_line -> nl; - strlen nl -> nl_length; + string_length nl -> nl_length; syscall WRITE ts nl nl_length; return;