From 197b8ee0eff81d79d1ab721e1b990a83387aecdc Mon Sep 17 00:00:00 2001 From: zongor Date: Thu, 26 Feb 2026 16:45:48 -0800 Subject: [PATCH] Optimizations, have return ask if it is a heap value, move lexer up --- .clocignore | 2 + .gitignore | 4 +- Makefile | 12 +++--- src/arch/linux/main.c | 10 +++-- src/tools/assembler/assembler.c | 63 +++++++++++++++++++++++------- src/tools/codegen.h | 2 + src/tools/lexer.c | 2 +- src/tools/lexer.h | 1 + src/vm/opcodes.h | 1 - src/vm/vm.c | 67 ++++++++------------------------ test/add.rom | Bin 151 -> 153 bytes test/fib.rom | Bin 187 -> 190 bytes test/hello.rom | Bin 135 -> 136 bytes test/loop.rom | Bin 257 -> 258 bytes test/malloc.rom | Bin 187 -> 188 bytes test/paint-bw.rom | Bin 550 -> 553 bytes test/paint.rom | Bin 1161 -> 1164 bytes test/simple.rom | Bin 132 -> 133 bytes test/window.rom | Bin 332 -> 333 bytes 19 files changed, 88 insertions(+), 76 deletions(-) create mode 100644 .clocignore diff --git a/.clocignore b/.clocignore new file mode 100644 index 0000000..b6d0ea4 --- /dev/null +++ b/.clocignore @@ -0,0 +1,2 @@ +.ccls-cache/ +build/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9a1d2a2..1cce5a5 100644 --- a/.gitignore +++ b/.gitignore @@ -107,4 +107,6 @@ memory_dump.bin build/ .gdb_history .vscode -.clangd \ No newline at end of file +.clangd +.directory +perf.* \ No newline at end of file diff --git a/Makefile b/Makefile index aacc8de..108bfcd 100644 --- a/Makefile +++ b/Makefile @@ -44,8 +44,8 @@ ifeq ($(BUILD_MODE), release) LDFLAGS += -s TARGET_SUFFIX := -release else - CORE_CFLAGS += -g -DDEBUG - PLATFORM_CFLAGS += -g -DDEBUG + CORE_CFLAGS += -O0 -ggdb3 -fno-omit-frame-pointer -DDEBUG + PLATFORM_CFLAGS += -O0 -ggdb3 -fno-omit-frame-pointer -DDEBUG TARGET_SUFFIX := -debug endif @@ -91,14 +91,14 @@ ifeq ($(BUILD_MODE), release) PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c\ $(SRC_DIR)/tools/lexer.c \ - $(SRC_DIR)/tools/assembler/assembler.c \ - $(SRC_DIR)/tools/compiler/compiler.c + $(SRC_DIR)/tools/assembler/assembler.c + # $(SRC_DIR)/tools/compiler/compiler.c else PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ $(ARCH_DIR)/devices.c \ $(SRC_DIR)/tools/lexer.c \ - $(SRC_DIR)/tools/assembler/assembler.c \ - $(SRC_DIR)/tools/compiler/compiler.c + $(SRC_DIR)/tools/assembler/assembler.c + # $(SRC_DIR)/tools/compiler/compiler.c endif # --- OBJECT FILES --- diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index ac9f5c0..a8e310f 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,4 +1,4 @@ -#include "../../tools/compiler/compiler.h" +//#include "../../tools/compiler/compiler.h" #include "../../tools/assembler/assembler.h" #include "../../vm/vm.h" #include "devices.h" @@ -225,8 +225,8 @@ bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { fclose(f); ScopeTable table = {0}; - symbol_table_init(&table); - compile(vm, &table, source); + /* symbol_table_init(&table); */ + /* compile(vm, &table, source); */ #ifndef STATIC free(table.scopes); #endif @@ -340,7 +340,9 @@ i32 main(i32 argc, char *argv[]) { &console_device_ops, 4); if (terminal_only_mode) { - while (step_vm(&vm)); + while (step_vm(&vm)) { + //printf("code[%d] = %s\n", vm.pc, opcode_to_string(vm.code[vm.pc])); + } return 0; } diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 6c1c109..c981d66 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -163,7 +163,7 @@ void asm_emit_u32(VM *vm, u32 value) { } Symbol *asm_symbol_table_lookup(ScopeTable *table, const char *name, u32 length, - i32 scope_ref) { + i32 scope_ref) { SymbolTable st = table->scopes[scope_ref]; for (u32 i = 0; i < st.count; i++) { if (st.symbols[i].name_length == length) { @@ -214,7 +214,8 @@ u8 asm_symbol_table_add(ScopeTable *table, Symbol s) { printf("code[%d] = %s\n", s.ref, s.name); } #endif - table->scopes[table->scope_ref].symbols[table->scopes[table->scope_ref].count] = s; + table->scopes[table->scope_ref] + .symbols[table->scopes[table->scope_ref].count] = s; u8 index = table->scopes[table->scope_ref].count; table->scopes[table->scope_ref].count++; return index; @@ -602,11 +603,8 @@ void asm_define_branch(VM *vm, ScopeTable *st) { int asm_get_instruction_byte_size(const char *opname) { - if (strcmp(opname, "return") == 0) { - return 2; - } - - if (strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || + if (strcmp(opname, "return") == 0 || + strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || strcmp(opname, "neg_nat") == 0 || strcmp(opname, "abs_nat") == 0 || strcmp(opname, "neg_real") == 0 || strcmp(opname, "abs_real") == 0 || strcmp(opname, "int_to_string") == 0 || @@ -699,9 +697,9 @@ int asm_get_instruction_byte_size(const char *opname) { token = next_token(); \ } \ /*printf("code[%d]=%s\n %d + %d = %d\n", vm->cp, op, \ - * asm_get_instruction_byte_size(op), vm->cp, vm->cp + \ - * asm_get_instruction_byte_size(op)); */ \ - vm->cp += asm_get_instruction_byte_size(op); \ + * asm_get_instruction_byte_size(op), vm->cp, vm->cp + \ + * asm_get_instruction_byte_size(op)); */ \ + vm->cp += asm_get_instruction_byte_size(op); \ } while (0); /** @@ -729,7 +727,8 @@ void asm_build_symbol_table(VM *vm, char *source, ScopeTable *st) { if (token.type == TOKEN_RBRACE) { i32 current_scope = st->scope_ref; i32 parent = st->scopes[current_scope].parent; - if (parent < 0) parent = 0; + if (parent < 0) + parent = 0; st->scope_ref = parent; continue; } @@ -773,6 +772,15 @@ void asm_build_symbol_table(VM *vm, char *source, ScopeTable *st) { asm_get_reg(next, st); vm->cp++; + + next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* assume it is not a heap value */ + vm->cp++; + continue; + } + + vm->cp++; asm_next_token_is(TOKEN_SEMICOLON); continue; } @@ -948,7 +956,7 @@ void asm_emit_bytecode(VM *vm, char *source, ScopeTable *st) { break; } if (token.type != TOKEN_EOF) { - + if (token.type == TOKEN_LBRACE) { st->count++; st->scopes[st->count].parent = st->scope_ref; @@ -959,7 +967,8 @@ void asm_emit_bytecode(VM *vm, char *source, ScopeTable *st) { if (token.type == TOKEN_RBRACE) { i32 current_scope = st->scope_ref; i32 parent = st->scopes[current_scope].parent; - if (parent < 0) parent = 0; + if (parent < 0) + parent = 0; st->scope_ref = parent; continue; } @@ -1012,12 +1021,40 @@ void asm_emit_bytecode(VM *vm, char *source, ScopeTable *st) { /* put 0xFF as return register */ asm_emit_byte(vm, 0xFF); vm->cp++; + + /* if no return then also no heap */ + asm_emit_byte(vm, false); + vm->cp++; continue; } u32 reg = asm_get_reg(next, st); asm_emit_byte(vm, reg); vm->cp++; + + next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* assume it is not a heap value */ + asm_emit_byte(vm, false); + vm->cp++; + continue; + } + + switch (next.type) { + case TOKEN_KEYWORD_TRUE: { + asm_emit_byte(vm, true); + break; + } + case TOKEN_KEYWORD_FALSE: { + asm_emit_byte(vm, false); + break; + } + default: + printf("Error: must be true or false for if it is a heap value"); + exit(1); + } + vm->cp++; + asm_next_token_is(TOKEN_SEMICOLON); continue; } diff --git a/src/tools/codegen.h b/src/tools/codegen.h index e5b2ccd..9ff76ee 100644 --- a/src/tools/codegen.h +++ b/src/tools/codegen.h @@ -2,6 +2,7 @@ #define UNDAR_CODEGEN_H #include "../vm/common.h" +#include "lexer.h" typedef enum { GLOBAL, LOCAL, VAR } ScopeType; typedef enum { @@ -66,6 +67,7 @@ struct symbol_s { char name[MAX_SYMBOL_NAME_LENGTH]; u8 name_length; SymbolType type; + TokenType return_type; ScopeType scope; u32 ref; // vm->mp if global, vm->pc local, register if var u32 size; // size of symbol diff --git a/src/tools/lexer.c b/src/tools/lexer.c index 977a437..847175a 100644 --- a/src/tools/lexer.c +++ b/src/tools/lexer.c @@ -30,7 +30,7 @@ static char advance() { return lexer.current[-1]; } -static char peek() { return *lexer.current; } +char peek() { return *lexer.current; } static char peek_next() { if (is_at_end()) diff --git a/src/tools/lexer.h b/src/tools/lexer.h index fb4448f..413d4da 100644 --- a/src/tools/lexer.h +++ b/src/tools/lexer.h @@ -86,5 +86,6 @@ typedef struct { void init_lexer(const char *source); Token next_token(); const char* token_type_to_string(TokenType type); +char peek(); #endif diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index 66c10f7..28b2aec 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -99,7 +99,6 @@ typedef enum { #define MAX_LOCALS 256 typedef struct frame_s { - u32 heap_mask[8]; u32 locals[MAX_LOCALS]; /* $0-$255 */ u32 start; /* start of memory block */ u32 end; /* end of memory block */ diff --git a/src/vm/vm.c b/src/vm/vm.c index fbc8e2a..d1d1da4 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -16,7 +16,7 @@ const char radix_set[11] = "0123456789"; u8 src1, src2; \ type value; \ type value2; \ - target = read_u32(vm, code, vm->pc); \ + target = *code_u32; \ vm->pc += 4; \ src1 = read_u8(vm, code, vm->pc); \ vm->pc++; \ @@ -58,23 +58,6 @@ const char radix_set[11] = "0123456789"; return true; \ } while (0) -/* Set heap status for a register in current frame */ -void set_heap_status(VM *vm, u8 reg, bool is_heap) { - u32 index = reg / 32; - u32 mask = 1 << (reg % 32); - - if (is_heap) { - vm->frames[vm->fp].heap_mask[index] |= mask; - } else { - vm->frames[vm->fp].heap_mask[index] &= ~mask; - } -} - -/* Check if register contains heap pointer */ -bool is_heap_value(VM *vm, u8 reg) { - u32 index = reg / 32; - return (vm->frames[vm->fp].heap_mask[index] >> (reg % 32)) & 1; -} u32 str_alloc(VM *vm, Frame *frame, const char *str, u32 length) { u32 str_addr = vm->mp; @@ -95,20 +78,20 @@ u32 str_alloc(VM *vm, Frame *frame, const char *str, u32 length) { */ bool step_vm(VM *vm) { /* Get current instruction & Advance to next instruction */ - u8 opcode = vm->code[vm->pc++]; Frame *frame = &vm->frames[vm->fp]; + u8 opcode = vm->code[vm->pc++]; + u32 *code_u32 = (u32*)(&vm->code[vm->pc]); switch (opcode) { case OP_EXIT: { - vm->flag = read_u32(vm, code, vm->pc); + vm->flag = *code_u32; return false; } case OP_CALL: { u8 N, return_reg, src_reg, args[MAX_LOCALS]; Frame *child; - u32 jmp, mask, i; - - jmp = read_u32(vm, code, vm->pc); + u32 jmp, i; + jmp = *code_u32; vm->pc += 4; N = vm->code[vm->pc++]; @@ -135,16 +118,13 @@ bool step_vm(VM *vm) { for (i = 0; i < N; i++) { src_reg = args[i]; child->locals[i] = frame->locals[src_reg]; - mask = 1 << (src_reg % 32); - if (frame->heap_mask[src_reg / 32] & mask) { - child->heap_mask[i / 32] |= 1 << (i % 32); - } } vm->pc = jmp; return true; } case OP_RETURN: { + bool is_heap_value; u8 child_return_reg; u32 value; u32 ptr; @@ -154,13 +134,14 @@ bool step_vm(VM *vm) { Frame *parent; child_return_reg = vm->code[vm->pc++]; + is_heap_value = vm->code[vm->pc++]; child = frame; parent = &vm->frames[vm->fp - 1]; - if (child_return_reg != 0xFF && parent->return_reg != 0xFF) { + if (child_return_reg != 0xFF) { value = child->locals[child_return_reg]; - if (is_heap_value(vm, child_return_reg)) { + if (is_heap_value) { ptr = value; size = *(u32 *)(vm->memory + ptr - 4); new_ptr = parent->end; @@ -171,11 +152,8 @@ bool step_vm(VM *vm) { memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); parent->end += size + 4; parent->locals[parent->return_reg] = new_ptr; - parent->heap_mask[parent->return_reg / 32] |= (1 << parent->return_reg); } else { parent->locals[parent->return_reg] = value; - parent->heap_mask[parent->return_reg / 32] &= - ~(1 << parent->return_reg); } } @@ -187,7 +165,7 @@ bool step_vm(VM *vm) { case OP_LOAD_IMM: { u32 v; u8 dest; - v = read_u32(vm, code, vm->pc); + v = *code_u32; vm->pc += 4; dest = read_u8(vm, code, vm->pc); vm->pc++; @@ -205,13 +183,12 @@ bool step_vm(VM *vm) { size = frame->locals[src1]; write_u32(vm, memory, vm->mp, size); vm->mp += (size + 4); - set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } case OP_LOAD_ABS_32: { u32 v, ptr; u8 dest; - ptr = read_u32(vm, code, vm->pc); + ptr = *code_u32; vm->pc += 4; v = read_u32(vm, memory, ptr); dest = read_u8(vm, code, vm->pc); @@ -222,7 +199,7 @@ bool step_vm(VM *vm) { case OP_LOAD_ABS_16: { u32 v, ptr; u8 dest; - ptr = read_u32(vm, code, vm->pc); + ptr = *code_u32; vm->pc += 4; v = read_u16(vm, memory, ptr); dest = read_u8(vm, code, vm->pc); @@ -233,7 +210,7 @@ bool step_vm(VM *vm) { case OP_LOAD_ABS_8: { u32 v, ptr; u8 dest; - ptr = read_u32(vm, code, vm->pc); + ptr = *code_u32; vm->pc += 4; v = read_u8(vm, memory, ptr); dest = read_u8(vm, code, vm->pc); @@ -544,23 +521,16 @@ bool step_vm(VM *vm) { dest = read_u8(vm, code, vm->pc); vm->pc++; frame->locals[dest] = frame->locals[src1]; - - if (is_heap_value(vm, src1)) { - set_heap_status(vm, dest, true); - } else { - set_heap_status(vm, dest, false); - } - return true; } case OP_JMP: { - u32 jmp = read_u32(vm, code, vm->pc); + u32 jmp = *code_u32; vm->pc = jmp; /* Jump to address */ return true; } case OP_JMPF: { /* error handling for syscall, jump if flag == 0 */ u32 mask; - u32 jmp = read_u32(vm, code, vm->pc); + u32 jmp = *code_u32; mask = -(u32)(vm->flag == 0); vm->pc = (jmp & mask) | (vm->pc & ~mask); return true; @@ -568,7 +538,7 @@ bool step_vm(VM *vm) { case OP_SYSCALL: { u32 syscall_id; - syscall_id = read_u32(vm, code, vm->pc); + syscall_id = *code_u32; vm->pc += 4; switch (syscall_id) { @@ -966,7 +936,6 @@ bool step_vm(VM *vm) { buffer[--i] = '0'; frame->locals[dest] = str_alloc(vm, frame, buffer + i, MAX_LEN_INT32 - i); - set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } case OP_NAT_TO_STRING: { @@ -991,7 +960,6 @@ bool step_vm(VM *vm) { buffer[--i] = '0'; /* Copy from buffer[i] to buffer + MAX_LEN_INT32 */ frame->locals[dest] = str_alloc(vm, frame, buffer + i, MAX_LEN_INT32 - i); - set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } case OP_REAL_TO_STRING: { @@ -1036,7 +1004,6 @@ bool step_vm(VM *vm) { buffer[--i] = '-'; frame->locals[dest] = str_alloc(vm, frame, buffer + i, MAX_LEN_REAL32 - i); - set_heap_status(vm, dest, true); /* Mark as heap pointer */ return true; } case OP_STRLEN: { diff --git a/test/add.rom b/test/add.rom index 7ced01fc9fae8f7f892509e8edb8dd33468d3f47..4375032ead0fd08cb6d9a17a30411c7588112537 100644 GIT binary patch delta 41 rcmbQvIFpf;0SMwJvKk2M02z$T|3Q2P83slsCMJf7?naCZ6D#Eae18S? delta 38 ocmbQqIGvG|0SICzvKk0!0~w6W|3Q2P83slsCZ>rVMiZ;$0B*ts^8f$< diff --git a/test/fib.rom b/test/fib.rom index f09af70d3e9f81ed2e30cdce403d2b250de2c6f6..2706bbb4e309c65ce250896e22eb3c87024041b2 100644 GIT binary patch delta 68 zcmdnZxQ~&Q0SI~~vTBM%0vU{q|3Q2P7A6qKK5U|sJ~IOo!^9$EaRzRnkbX*PnSM!X OQLerL10#^n#Q*?;>I(D# delta 65 zcmdnTxSNrc0SG!LvTBNi0~w5r|3Q2P7A6qKK6Ij!J|ok_5@RuLAYVTvwM@SxwJ2BL LfPoRn=VAZ=Zo>-g diff --git a/test/hello.rom b/test/hello.rom index 74ed6378c0b6d7890e4365c4e21a0c4a89fdcff8..8a7603ba5c4b859c725d0be1c8daac604f58e3f8 100644 GIT binary patch delta 18 XcmZo?>|kVN00Os(tTBuX6BE?{9$y1! delta 16 VcmeBRY-eO;00P&EtT7Xl)BqW|17`pL diff --git a/test/loop.rom b/test/loop.rom index 48f675eb1a74b105b694330d617f6620ec3e002e..f0e932bc2df858584b509d8e88eb933c4db472cc 100644 GIT binary patch delta 18 YcmZopF delta 16 WcmdnPxSNrc0SHPbvi3}zqy+#XMFe92 diff --git a/test/paint-bw.rom b/test/paint-bw.rom index 1f22c521cbf5b225334371b4ac3c992fac5726ba..a453d0a6c1ef65f0989f206fb2ca10a102cc0635 100644 GIT binary patch delta 32 jcmZ3+vXX_B0SKOKWX)z|WSCsQ7zQMdF`5I(zl@pyey<1% delta 25 fcmZ3WEapJcmBj@BY5)dk delta 26 gcmeC-?Brx+00Pa8tecr9?_>^}EWlzu*@MLe08r}&XaE2J diff --git a/test/simple.rom b/test/simple.rom index 9ea8e30f3987b053c87f58ec31024966ea6a3e46..34f5e59338fdf1403f12adbbbe36c1ac89a37259 100644 GIT binary patch delta 18 XcmZo+Y-MC+0D|C&teK1q6Z6CX9>xQC delta 16 VcmZo=Y++<&0D_>2teF$@#Q+)j19$)c diff --git a/test/window.rom b/test/window.rom index 002b86542fbc13977ac473d9d95d6c72df310d58..398202575b0e4e686597322fedb28e68c76a6cd9 100644 GIT binary patch delta 20 acmX@Zbe4&g0SLZtWaVXKWSA_(XaxW>?F2sn delta 18 YcmX@hbcTtQ0SLZrWaVX?EX-&H04*m3J^%m!