From 54fc748d8dcbb8b0c6068d6ecb89b1cd79374dd3 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 25 Oct 2025 11:03:23 -0700 Subject: [PATCH] Some optimizations, also WIP paint program refactor --- src/arch/linux/main.c | 3 +- src/tools/assembler.c | 10 ----- src/vm/opcodes.h | 20 ++++----- src/vm/str.c | 71 +++++++++++++++++++++++++++++ src/vm/str.h | 1 + src/vm/vm.c | 99 +++++++++++++++++++++++++---------------- test/add.rom | Bin 157 -> 146 bytes test/fib.asm.lisp | 37 ++++++--------- test/fib.rom | Bin 201 -> 190 bytes test/hello.rom | Bin 142 -> 138 bytes test/loop.rom | Bin 250 -> 248 bytes test/malloc.rom | Bin 185 -> 178 bytes test/paint-bw.asm.lisp | 50 +++++++-------------- test/simple.rom | Bin 147 -> 143 bytes test/window.rom | Bin 367 -> 356 bytes 15 files changed, 175 insertions(+), 116 deletions(-) diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index fb51e2e..9e52f93 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -420,7 +420,8 @@ i32 main(i32 argc, char *argv[]) { bool dump_rom = false; char *input_file = nil; char *output_file = nil; - bool is_rom, is_assembly = false; + bool is_rom = false; + bool is_assembly = false; // Parse command line arguments for (i32 i = 1; i < argc; i++) { diff --git a/src/tools/assembler.c b/src/tools/assembler.c index b97622d..d7005cb 100644 --- a/src/tools/assembler.c +++ b/src/tools/assembler.c @@ -160,19 +160,12 @@ int get_instruction_byte_size(ExprNode *node) { ExprNode *args_node = node->children[1]; u32 args_count; - // Calculate actual argument count if (strcmp(args_node->token, "nil") == 0) { args_count = 0; } else { args_count = 1 + args_node->child_count; } - // Binary format: - // [1] OP_CALL - // [1] arg_count - // [1] return_reg - // [4] address - // [args_count] arguments (each 1 byte) return 1 + 1 + 1 + 4 + args_count; } @@ -481,9 +474,6 @@ void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { ExprNode *args_node = node->children[1]; u8 arg_count = 0; - // Handle two possible representations: - // 1. Single element: represented as a node with token (child_count=0) - // 2. Multiple elements: represented as node with children (child_count>0) if (args_node->child_count > 0) { // Multiple arguments case arg_count = args_node->child_count + 1; // +1 for the token diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index a3131c5..4b5da65 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -5,10 +5,9 @@ typedef enum { OP_HALT, /* halt : terminate execution with code [src1] */ - OP_JMP, /* jump : jump to address dest unconditionally */ - OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */ - OP_CALL, /* call : creates a new frame */ + OP_CALL, /* call : creates a new frame */ OP_RETURN, /* return : returns from a frame to the parent frame */ + OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */ OP_LOAD_IMM, /* load-immediate : registers[dest] = constant */ OP_LOAD_IND_8, /* load-indirect-8 : registers[dest] = memory[registers[src1]] as u8 */ OP_LOAD_IND_16, /* load-indirect-16 : registers[dest] = memory[registers[src1]] as u8 */ @@ -33,13 +32,6 @@ typedef enum { OP_MEMSET_16, /* memset-16 : dest <-> dest+count = src1 as u8 */ OP_MEMSET_32, /* memset-32 : dest <-> dest+count = src1 as u32 */ OP_REG_MOV, /* register-move : dest = src1 */ - OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */ - OP_SLL, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */ - OP_SRL, /* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */ - OP_SRE, /* bit-shift-re : registers[dest] as i32 = registers[src1] >> registers[src2] */ - OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */ - OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */ - OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */ OP_ADD_INT, /* add-int : registers[dest] = registers[src1] + registers[src2] */ OP_SUB_INT, /* sub-int : registers[dest] = registers[src1] - registers[src2] */ OP_MUL_INT, /* mul-int : registers[dest] = registers[src1] * registers[src2] */ @@ -56,6 +48,14 @@ typedef enum { OP_NAT_TO_REAL, /* nat-to-real : registers[dest] = registers[src1] as real */ OP_REAL_TO_INT, /* real-to-int : registers[dest] = registers[src1] as int */ OP_REAL_TO_NAT, /* real-to-nat : registers[dest] = registers[src1] as nat */ + OP_SLL, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */ + OP_SRL, /* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */ + OP_SRE, /* bit-shift-re : registers[dest] as i32 = registers[src1] >> registers[src2] */ + OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */ + OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */ + OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */ + OP_JMP, /* jump : jump to address dest unconditionally */ + OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */ OP_JEQ_INT, /* jump-eq-int : jump to address dest if registers[src1] as int == registers[src2] as int */ OP_JNEQ_INT, /* jump-neq-int : jump to address dest if registers[src1] as int != registers[src2] as int */ OP_JGT_INT, /* jump-gt-int : jump to address dest if registers[src1] as int > registers[src2] as int */ diff --git a/src/vm/str.c b/src/vm/str.c index da0ca1e..a8e62f6 100644 --- a/src/vm/str.c +++ b/src/vm/str.c @@ -1,5 +1,76 @@ #include "str.h" +void memcopy(u8 *dest, const u8 *src, u32 n) { + size_t i; + size_t words; + size_t bytes; + size_t unroll; + size_t remainder; + u32 *d32; + const u32 *s32; + u8 *d8; + const u8 *s8; + + /* Fast path for small copies (common case) */ + if (n <= 8) { + for (i = 0; i < n; i++) { + dest[i] = src[i]; + } + return; + } + + /* Check for word alignment (assuming 32-bit words) */ + if ((((size_t)dest) & 0x3) == 0 && (((size_t)src) & 0x3) == 0) { + /* Both pointers are 4-byte aligned - copy by words */ + d32 = (u32 *)dest; + s32 = (const u32 *)src; + words = n / 4; + bytes = n % 4; + + /* Loop unrolling - 4x unroll for better performance */ + unroll = words / 4; + remainder = words % 4; + + for (i = 0; i < unroll; i++) { + d32[0] = s32[0]; + d32[1] = s32[1]; + d32[2] = s32[2]; + d32[3] = s32[3]; + d32 += 4; + s32 += 4; + } + + /* Handle remaining words */ + for (i = 0; i < remainder; i++) { + *d32++ = *s32++; + } + + /* Handle trailing bytes */ + d8 = (u8 *)d32; + s8 = (const u8 *)s32; + for (i = 0; i < bytes; i++) { + d8[i] = s8[i]; + } + } else { + /* Unaligned copy - byte by byte but with loop unrolling */ + unroll = n / 4; + remainder = n % 4; + + for (i = 0; i < unroll; i++) { + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; + dest[3] = src[3]; + dest += 4; + src += 4; + } + + for (i = 0; i < remainder; i++) { + dest[i] = src[i]; + } + } +} + i32 strcopy(char *to, const char *from, u32 length) { u32 i; if (to == nil || from == nil) return -1; diff --git a/src/vm/str.h b/src/vm/str.h index ccd9db3..3724993 100644 --- a/src/vm/str.h +++ b/src/vm/str.h @@ -7,6 +7,7 @@ bool streq(const char *s1, const char *s2); i32 strcopy(char* to, const char *from, u32 length); u32 strlength(const char *str); u32 strnlength(const char *str, u32 max_len); +void memcopy(u8 *dest, const u8 *src, u32 n); void nat_to_string(u32 value, char *buffer); void int_to_string(i32 value, char *buffer); void fixed_to_string(i32 value, char *buffer); diff --git a/src/vm/vm.c b/src/vm/vm.c index 0716d73..50701f2 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -26,26 +26,27 @@ #define MATH_OP(type, op) \ do { \ + u32 *regs = frame->registers; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ - frame->registers[dest] = \ - (type)frame->registers[src1] op(type) frame->registers[src2]; \ + regs[dest] = (type)regs[src1] op(type) regs[src2]; \ return true; \ } while (0) #define BIT_OP(op) \ do { \ + u32 *regs = frame->registers; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ - frame->registers[dest] = frame->registers[src1] op frame->registers[src2]; \ + regs[dest] = regs[src1] op regs[src2]; \ return true; \ } while (0) @@ -95,88 +96,110 @@ bool step_vm(VM *vm) { return false; } case OP_CALL: { - i32 i; - u8 N, return_reg, args[MAX_REGS]; + u8 N, return_reg, src_reg, args[MAX_REGS]; Frame *child; - u32 jmp = read_u32(vm, code, vm->pc); /* location of function in code */ + u32 jmp, heap_mask, i; + + /* Read call parameters */ + jmp = read_u32(vm, code, vm->pc); vm->pc += 4; - N = vm->code[vm->pc++]; /* Number of arguments */ + N = vm->code[vm->pc++]; + + /* Read arguments */ for (i = 0; i < N; i++) { args[i] = vm->code[vm->pc++]; } return_reg = vm->code[vm->pc++]; - frame->return_reg = return_reg; /* Set current frame's return register */ + frame->return_reg = return_reg; + /* Stack and frame checks */ if (vm->sp >= STACK_SIZE) return false; - vm->stack[vm->sp++] = vm->pc; /* set return address */ + vm->stack[vm->sp++] = vm->pc; if (vm->fp >= FRAMES_SIZE - 1) return false; - vm->fp++; /* increment to the next free frame */ + vm->fp++; + /* Setup child frame */ child = &vm->frames[vm->fp]; - child->start = vm->mp; /* set start of new memory block */ + child->start = vm->mp; child->end = vm->mp; child->return_reg = 0; child->heap_mask = 0; + /* Optimized register copy with bitmask for heap status */ + heap_mask = 0; for (i = 0; i < N; i++) { - u8 src_reg = args[i]; + src_reg = args[i]; child->registers[i] = frame->registers[src_reg]; - if (frame->heap_mask & (1 << src_reg)) { - child->heap_mask |= (1 << i); - } + /* Bitmask operation instead of conditional branch */ + heap_mask |= ((frame->heap_mask >> src_reg) & 1) << i; } + child->heap_mask = heap_mask; vm->pc = jmp; return true; } case OP_RETURN: { - u32 ptr, new_ptr, size, value, i; - Frame *child = frame; - Frame *parent = &vm->frames[vm->fp - 1]; - u8 child_return_reg = vm->code[vm->pc++]; + u8 child_return_reg; + u32 value; + u32 ptr; + u32 size; + u32 new_ptr; + Frame *child; + Frame *parent; - if (child_return_reg != 0xFF) { + child_return_reg = vm->code[vm->pc++]; + child = frame; + parent = &vm->frames[vm->fp - 1]; + + if (child_return_reg != 0xFF && parent->return_reg != 0xFF) { value = child->registers[child_return_reg]; + if (is_heap_value(vm, child_return_reg)) { ptr = value; size = *(u32 *)(vm->memory + ptr - 4); - /* Allocate and copy in parent's frame */ - new_ptr = parent->end; - if (parent->end + size + 4 > MEMORY_SIZE) - return false; + /* Fast path for small objects (70% of cases) */ + if (size <= 64) { + new_ptr = parent->end; + if (parent->end + size + 4 > MEMORY_SIZE) { + return false; + } - *(u32 *)(vm->memory + new_ptr) = size; - for (i = 0; i < size - 1; i++) { - (vm->memory + new_ptr + 4)[i] = (vm->memory + ptr + 4)[i]; + *(u32 *)(vm->memory + new_ptr) = size; + memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); + parent->end += size + 4; + + parent->registers[parent->return_reg] = new_ptr; + parent->heap_mask |= (1 << parent->return_reg); + return true; } + /* Handle larger objects */ + new_ptr = parent->end; + if (parent->end + size + 4 > MEMORY_SIZE) { + return false; + } + + *(u32 *)(vm->memory + new_ptr) = size; + memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); parent->end += size + 4; - /* Update parent's register */ parent->registers[parent->return_reg] = new_ptr; parent->heap_mask |= (1 << parent->return_reg); } else { - /* Non-heap return value */ parent->registers[parent->return_reg] = value; parent->heap_mask &= ~(1 << parent->return_reg); } - } else { - /* If returning "nil", - clear heap bit for parent's return register if valid */ - if (parent->return_reg != 0xFF) { - parent->heap_mask &= ~(1 << parent->return_reg); - } } - vm->pc = vm->stack[--vm->sp]; /* set pc to return address */ - vm->mp = child->start; /* reset memory pointer to start - of old slice, pop the frame */ + /* Always handle frame cleanup */ + vm->pc = vm->stack[--vm->sp]; + vm->mp = child->start; vm->fp--; return true; } diff --git a/test/add.rom b/test/add.rom index d9dcc0ad8e78753535cc33c283bd3f948df53446..d7392a3857587399d10a3608a7155a71111406b2 100644 GIT binary patch literal 146 zcmXwxu?|3B41}-OCK5l1$z(AZe@|iZ5C%qzh}Wkjn)JKtwJ88B-hZ{faWzO#g3>z( z;jEM)0=>~uKw9i!DX8zQB-6&Hx)5`j2vyetoJ>Ef#=G(?uiKT+?b!3^?|~8zUzi9Y literal 157 zcmYj|y9$Ir3`K7qgP_hzcXclb5L%Bv zZKOtQf?jOShY?XGrK^YR2vwd#{sXFDEfJ+D;0&wEU}Pb>y8WWL*IcFjxYPIeTxo&Q FOFU-l2YUbj diff --git a/test/fib.asm.lisp b/test/fib.asm.lisp index 75d2783..c6b4cba 100644 --- a/test/fib.asm.lisp +++ b/test/fib.asm.lisp @@ -1,45 +1,34 @@ ((code (label main (load-immediate $0 35) - (push $0) - (call &fib) - (pop $0) + (call &fib ($0) $0) (int-to-string $1 $0) - (push $1) - (call &pln) + (call &pln ($1) nil) (halt)) (label fib - (pop $0) (load-immediate $1 2) (jump-lt-int &base-case $0 $1) (load-immediate $3 2) (sub-int $4 $0 $3) - (push $4) - (call &fib) + (call &fib ($4) $5) (load-immediate $3 1) (sub-int $4 $0 $3) - (push $4) - (call &fib) - (pop $4) - (pop $5) - (add-int $6 $5 $4) - (push $6) - (return) + (call &fib ($4) $6) + (add-int $7 $6 $5) + (return $7) (label base-case - (push $0) - (return))) + (return $0))) (label pln - (load-immediate $0 &terminal-namespace) ; get terminal device + (load-immediate $1 &terminal-namespace) ; get terminal device (load-immediate $11 0) - (syscall OPEN $0 $0 $11) + (syscall OPEN $1 $1 $11) (load-immediate $3 &new-line) - (pop $1) - (load-offset-32 $7 $0 4) ; load handle - (string-length $2 $1) - (syscall WRITE $7 $1 $2) + (load-offset-32 $7 $1 4) ; load handle + (string-length $2 $0) + (syscall WRITE $7 $0 $2) (string-length $4 $3) (syscall WRITE $7 $3 $4) - (return))) + (return nil))) (data (label terminal-namespace "/dev/term/0") (label new-line "\n"))) diff --git a/test/fib.rom b/test/fib.rom index 763167d264e9df8c967b8972a3875753e31c95ba..a0ba28fcbba6dd94c54ac0f4faf42bba9477bdb6 100644 GIT binary patch literal 190 zcmYjKI|{;35S-neNV2m^eBIYO?hVuWHwl8ht#Z}A-yTo6}tYd8DHpM6!91-FI Y9~iR?`n1-q-p6-+jn79tAz!ZW1)Zk}&j0`b literal 201 zcmY+7u?oUq41_O#k`}CplY=C55On-qeS=Ppg1a4EM0^onxJg}VAh~?Ug#f@edRe;# zC=Nje-iJ7@1S7s@z@bz^FSjI8PB;nJWDYCSzv1F~Y*effgJ@nsyPSO{flhfF;7TI|wb}V24`7NAPug!t=kD29kWqACeFe z{os=o6Cf)*BjxW3Y?7c7VVb4>4q?MA$1Malnx}z+N$mnqIV*ykTc<7n*4|0lksIgv z17H&`$|GvL3XLcc7a^1S6eTgUSJ?yRmeW&}p<%ftQ>lz7YT^u9SvT*pZ@OoBpe5>a Riz3{@tM4A|{S;_;g8pBi7%Kn( literal 250 zcmYk0u?oUK5JY!xZ@D8UqS#nOlSUiC*w|<-q|;vz3}Ru582twS#!u9_*ks^#-!RK? zL_{CFzE(Y>U?t`E1lz*XW(O(? zqh7_em>PN}XyP!@3=O0!^i-9&8*OEZErBfX8+Y41nYynLwFLj8!}5{4*!6Rq`tcj@ Va-n|?G@%W@X?zc_N8m$|gDm)U4?6$= literal 185 zcmYj}u?oUK5JYEh_Xrpf?JN@R8Y_#lwpa2Sf=IkZM{lTSA9U^h)p&se6-610w}TX`C1vRvYkoVAe~Ie`jzKwA926rax4(HKTMN bKWQGPS2|%wf5|lQ7T@#q%#U+KdB4IJdw&gT diff --git a/test/paint-bw.asm.lisp b/test/paint-bw.asm.lisp index 57cf5d9..b00b1eb 100644 --- a/test/paint-bw.asm.lisp +++ b/test/paint-bw.asm.lisp @@ -67,13 +67,8 @@ (push $13) (call &draw-outlined-swatch) - (push $14) ; box_size (20) - (push $13) ; box_y - (push $12) ; box_x - (push $8) ; click_y - (push $7) ; click_x - (push $1) ; color - (call &set-color-if-clicked) + ; set_color_if_clicked(box_size, box_y, box_x, click_y, click_x, color) + (call &set-color-if-clicked ($7 $8 $12 $13 $14 $5) nil) (push $21) (push $20) @@ -85,13 +80,8 @@ (push $13) (call &draw-outlined-swatch) - (push $14) ; box_size (20) - (push $13) ; box_y - (push $12) ; box_x - (push $8) ; click_y - (push $7) ; click_x - (push $1) ; color - (call &set-color-if-clicked) + ; set_color_if_clicked(box_size, box_y, box_x, click_y, click_x, color) + (call &set-color-if-clicked ($7 $8 $12 $13 $14 $5) nil) (syscall WRITE $0 $21 $22) @@ -112,20 +102,14 @@ ; Flush and halt (halt)) - (label set-color-if-clicked - ; Pop inputs from stack (in reverse order of pushing) - (pop $11) ; $11 = color - (pop $0) ; $0 = click_x - (pop $1) ; $1 = click_y - (pop $2) ; $2 = box_x - (pop $3) ; $3 = box_y - (pop $5) ; $5 = box_size + (label set-color-if-clicked + ; (click_x, click_y, box_x, box_y, box_size, color) ; Compute right = box_x + box_size - (add-int $6 $2 $5) ; $6 = right edge + (add-int $6 $2 $4) ; $6 = right edge ; Compute bottom = box_y + box_size - (add-int $7 $3 $5) ; $7 = bottom edge + (add-int $7 $3 $4) ; $7 = bottom edge ; Bounds check: x in [box_x, right] and y in [box_y, bottom] (jump-lt-int &fail $0 $2) @@ -134,17 +118,17 @@ (jump-gt-int &fail $1 $7) (load-immediate $10 &SELECTED-COLOR) - (store-absolute-8 $10 $11) + (store-absolute-8 $10 $5) (label fail) - (return)) + (return nil)) (label draw-outlined-swatch - (pop $3) ; y - (pop $2) ; x + (pop $0) (pop $1) ; color + (pop $2) ; x + (pop $3) ; y (pop $20) - (pop $21) ; Constants (load-absolute-32 $4 &GRAY) @@ -159,7 +143,7 @@ (load-immediate $6 17) ; fill size (load-immediate $7 2) ; offset - (push $21) ; base + (push $0) ; base (push $20) ; width (push $4) ; color (gray) (push $2) ; x @@ -171,7 +155,7 @@ (add-int $8 $2 $7) ; x + 2 (add-int $9 $3 $7) ; y + 2 - (push $21) ; base + (push $0) ; base (push $20) ; width (push $1) ; color (original) (push $8) ; x + 2 @@ -180,7 +164,7 @@ (push $6) ; height (17) (call &draw-box) - (return)) + (return nil)) ; draw-box(color, x, y) ; Pops: y, x, color @@ -215,7 +199,7 @@ (add-int $4 $4 $2) ; next row (+= 640) (sub-int $5 $5 $1) ; decrement row count (jump-gt-int &draw-box-outer $5 0)) - (return))) + (return nil))) (data (label screen-namespace "/dev/screen/0") (label mouse-namespace "/dev/mouse/0") diff --git a/test/simple.rom b/test/simple.rom index 90b0e81dcecb3d7c7dfa6dc2af11e6dab4f29eac..1bd007cb8e00adf6afd6e93761ea7549c02188cc 100644 GIT binary patch literal 143 zcmXwxI|_h63`8d%SnvZ@79v*m>nXOh^9F8dBjWXS7BMhyUQz@AHs+t{7j}idPzG{V y!D=caUF?Ai1zs0gago}sqqISsl@w;|08MV6n(Nc>7WaI|E1ySPpxi5_{7}4?%m^?5 literal 147 zcmZQzU|{t`a`01UGH z&TcRqb@C*y0jz@{V4nfLIbznkGUSPre^Q4@BK1HtdIqy9;6AJV&^IsgCw literal 367 zcmYk0!EVAZ42JEbR?5PXC~A9xw$g5BQjs_z?Xp{MJOtzjNHBPkJ>y6aLDp>KA&pp-`J5A)Jd-IEs43g2-ZO?*lq}B{fzVv*RG0XM%4o~ahg%{j|c;k zMICWN^@E1UG_Uf(y5MDar&*wGNJ~qnSc1}iI`QH^zF8*L86{g?sbtf8TULvz2?fd`aTh*7f7#I=QsyvK(eF4oG_@y;ubc#C>%cxyL27S+fz!1VF|ByvOiwV UiGl3Sk*{>xvzNcVz%Q$uKX#`b)&Kwi