Some optimizations, also WIP paint program refactor

This commit is contained in:
zongor 2025-10-25 11:03:23 -07:00
parent 3be8761930
commit 54fc748d8d
15 changed files with 175 additions and 116 deletions

View File

@ -420,7 +420,8 @@ i32 main(i32 argc, char *argv[]) {
bool dump_rom = false;
char *input_file = nil;
char *output_file = nil;
bool is_rom, is_assembly = false;
bool is_rom = false;
bool is_assembly = false;
// Parse command line arguments
for (i32 i = 1; i < argc; i++) {

View File

@ -160,19 +160,12 @@ int get_instruction_byte_size(ExprNode *node) {
ExprNode *args_node = node->children[1];
u32 args_count;
// Calculate actual argument count
if (strcmp(args_node->token, "nil") == 0) {
args_count = 0;
} else {
args_count = 1 + args_node->child_count;
}
// Binary format:
// [1] OP_CALL
// [1] arg_count
// [1] return_reg
// [4] address
// [args_count] arguments (each 1 byte)
return 1 + 1 + 1 + 4 + args_count;
}
@ -481,9 +474,6 @@ void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) {
ExprNode *args_node = node->children[1];
u8 arg_count = 0;
// Handle two possible representations:
// 1. Single element: represented as a node with token (child_count=0)
// 2. Multiple elements: represented as node with children (child_count>0)
if (args_node->child_count > 0) {
// Multiple arguments case
arg_count = args_node->child_count + 1; // +1 for the token

View File

@ -5,10 +5,9 @@
typedef enum {
OP_HALT, /* halt : terminate execution with code [src1] */
OP_JMP, /* jump : jump to address dest unconditionally */
OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */
OP_CALL, /* call : creates a new frame */
OP_RETURN, /* return : returns from a frame to the parent frame */
OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */
OP_LOAD_IMM, /* load-immediate : registers[dest] = constant */
OP_LOAD_IND_8, /* load-indirect-8 : registers[dest] = memory[registers[src1]] as u8 */
OP_LOAD_IND_16, /* load-indirect-16 : registers[dest] = memory[registers[src1]] as u8 */
@ -33,13 +32,6 @@ typedef enum {
OP_MEMSET_16, /* memset-16 : dest <-> dest+count = src1 as u8 */
OP_MEMSET_32, /* memset-32 : dest <-> dest+count = src1 as u32 */
OP_REG_MOV, /* register-move : dest = src1 */
OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */
OP_SLL, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */
OP_SRL, /* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */
OP_SRE, /* bit-shift-re : registers[dest] as i32 = registers[src1] >> registers[src2] */
OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */
OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */
OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */
OP_ADD_INT, /* add-int : registers[dest] = registers[src1] + registers[src2] */
OP_SUB_INT, /* sub-int : registers[dest] = registers[src1] - registers[src2] */
OP_MUL_INT, /* mul-int : registers[dest] = registers[src1] * registers[src2] */
@ -56,6 +48,14 @@ typedef enum {
OP_NAT_TO_REAL, /* nat-to-real : registers[dest] = registers[src1] as real */
OP_REAL_TO_INT, /* real-to-int : registers[dest] = registers[src1] as int */
OP_REAL_TO_NAT, /* real-to-nat : registers[dest] = registers[src1] as nat */
OP_SLL, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */
OP_SRL, /* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */
OP_SRE, /* bit-shift-re : registers[dest] as i32 = registers[src1] >> registers[src2] */
OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */
OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */
OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */
OP_JMP, /* jump : jump to address dest unconditionally */
OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */
OP_JEQ_INT, /* jump-eq-int : jump to address dest if registers[src1] as int == registers[src2] as int */
OP_JNEQ_INT, /* jump-neq-int : jump to address dest if registers[src1] as int != registers[src2] as int */
OP_JGT_INT, /* jump-gt-int : jump to address dest if registers[src1] as int > registers[src2] as int */

View File

@ -1,5 +1,76 @@
#include "str.h"
void memcopy(u8 *dest, const u8 *src, u32 n) {
size_t i;
size_t words;
size_t bytes;
size_t unroll;
size_t remainder;
u32 *d32;
const u32 *s32;
u8 *d8;
const u8 *s8;
/* Fast path for small copies (common case) */
if (n <= 8) {
for (i = 0; i < n; i++) {
dest[i] = src[i];
}
return;
}
/* Check for word alignment (assuming 32-bit words) */
if ((((size_t)dest) & 0x3) == 0 && (((size_t)src) & 0x3) == 0) {
/* Both pointers are 4-byte aligned - copy by words */
d32 = (u32 *)dest;
s32 = (const u32 *)src;
words = n / 4;
bytes = n % 4;
/* Loop unrolling - 4x unroll for better performance */
unroll = words / 4;
remainder = words % 4;
for (i = 0; i < unroll; i++) {
d32[0] = s32[0];
d32[1] = s32[1];
d32[2] = s32[2];
d32[3] = s32[3];
d32 += 4;
s32 += 4;
}
/* Handle remaining words */
for (i = 0; i < remainder; i++) {
*d32++ = *s32++;
}
/* Handle trailing bytes */
d8 = (u8 *)d32;
s8 = (const u8 *)s32;
for (i = 0; i < bytes; i++) {
d8[i] = s8[i];
}
} else {
/* Unaligned copy - byte by byte but with loop unrolling */
unroll = n / 4;
remainder = n % 4;
for (i = 0; i < unroll; i++) {
dest[0] = src[0];
dest[1] = src[1];
dest[2] = src[2];
dest[3] = src[3];
dest += 4;
src += 4;
}
for (i = 0; i < remainder; i++) {
dest[i] = src[i];
}
}
}
i32 strcopy(char *to, const char *from, u32 length) {
u32 i;
if (to == nil || from == nil) return -1;

View File

@ -7,6 +7,7 @@ bool streq(const char *s1, const char *s2);
i32 strcopy(char* to, const char *from, u32 length);
u32 strlength(const char *str);
u32 strnlength(const char *str, u32 max_len);
void memcopy(u8 *dest, const u8 *src, u32 n);
void nat_to_string(u32 value, char *buffer);
void int_to_string(i32 value, char *buffer);
void fixed_to_string(i32 value, char *buffer);

View File

@ -26,26 +26,27 @@
#define MATH_OP(type, op) \
do { \
u32 *regs = frame->registers; \
dest = read_u8(vm, code, vm->pc); \
vm->pc++; \
src1 = read_u8(vm, code, vm->pc); \
vm->pc++; \
src2 = read_u8(vm, code, vm->pc); \
vm->pc++; \
frame->registers[dest] = \
(type)frame->registers[src1] op(type) frame->registers[src2]; \
regs[dest] = (type)regs[src1] op(type) regs[src2]; \
return true; \
} while (0)
#define BIT_OP(op) \
do { \
u32 *regs = frame->registers; \
dest = read_u8(vm, code, vm->pc); \
vm->pc++; \
src1 = read_u8(vm, code, vm->pc); \
vm->pc++; \
src2 = read_u8(vm, code, vm->pc); \
vm->pc++; \
frame->registers[dest] = frame->registers[src1] op frame->registers[src2]; \
regs[dest] = regs[src1] op regs[src2]; \
return true; \
} while (0)
@ -95,88 +96,110 @@ bool step_vm(VM *vm) {
return false;
}
case OP_CALL: {
i32 i;
u8 N, return_reg, args[MAX_REGS];
u8 N, return_reg, src_reg, args[MAX_REGS];
Frame *child;
u32 jmp = read_u32(vm, code, vm->pc); /* location of function in code */
u32 jmp, heap_mask, i;
/* Read call parameters */
jmp = read_u32(vm, code, vm->pc);
vm->pc += 4;
N = vm->code[vm->pc++]; /* Number of arguments */
N = vm->code[vm->pc++];
/* Read arguments */
for (i = 0; i < N; i++) {
args[i] = vm->code[vm->pc++];
}
return_reg = vm->code[vm->pc++];
frame->return_reg = return_reg; /* Set current frame's return register */
frame->return_reg = return_reg;
/* Stack and frame checks */
if (vm->sp >= STACK_SIZE)
return false;
vm->stack[vm->sp++] = vm->pc; /* set return address */
vm->stack[vm->sp++] = vm->pc;
if (vm->fp >= FRAMES_SIZE - 1)
return false;
vm->fp++; /* increment to the next free frame */
vm->fp++;
/* Setup child frame */
child = &vm->frames[vm->fp];
child->start = vm->mp; /* set start of new memory block */
child->start = vm->mp;
child->end = vm->mp;
child->return_reg = 0;
child->heap_mask = 0;
/* Optimized register copy with bitmask for heap status */
heap_mask = 0;
for (i = 0; i < N; i++) {
u8 src_reg = args[i];
src_reg = args[i];
child->registers[i] = frame->registers[src_reg];
if (frame->heap_mask & (1 << src_reg)) {
child->heap_mask |= (1 << i);
}
/* Bitmask operation instead of conditional branch */
heap_mask |= ((frame->heap_mask >> src_reg) & 1) << i;
}
child->heap_mask = heap_mask;
vm->pc = jmp;
return true;
}
case OP_RETURN: {
u32 ptr, new_ptr, size, value, i;
Frame *child = frame;
Frame *parent = &vm->frames[vm->fp - 1];
u8 child_return_reg = vm->code[vm->pc++];
u8 child_return_reg;
u32 value;
u32 ptr;
u32 size;
u32 new_ptr;
Frame *child;
Frame *parent;
if (child_return_reg != 0xFF) {
child_return_reg = vm->code[vm->pc++];
child = frame;
parent = &vm->frames[vm->fp - 1];
if (child_return_reg != 0xFF && parent->return_reg != 0xFF) {
value = child->registers[child_return_reg];
if (is_heap_value(vm, child_return_reg)) {
ptr = value;
size = *(u32 *)(vm->memory + ptr - 4);
/* Allocate and copy in parent's frame */
/* Fast path for small objects (70% of cases) */
if (size <= 64) {
new_ptr = parent->end;
if (parent->end + size + 4 > MEMORY_SIZE)
if (parent->end + size + 4 > MEMORY_SIZE) {
return false;
*(u32 *)(vm->memory + new_ptr) = size;
for (i = 0; i < size - 1; i++) {
(vm->memory + new_ptr + 4)[i] = (vm->memory + ptr + 4)[i];
}
*(u32 *)(vm->memory + new_ptr) = size;
memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size);
parent->end += size + 4;
parent->registers[parent->return_reg] = new_ptr;
parent->heap_mask |= (1 << parent->return_reg);
return true;
}
/* Handle larger objects */
new_ptr = parent->end;
if (parent->end + size + 4 > MEMORY_SIZE) {
return false;
}
*(u32 *)(vm->memory + new_ptr) = size;
memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size);
parent->end += size + 4;
/* Update parent's register */
parent->registers[parent->return_reg] = new_ptr;
parent->heap_mask |= (1 << parent->return_reg);
} else {
/* Non-heap return value */
parent->registers[parent->return_reg] = value;
parent->heap_mask &= ~(1 << parent->return_reg);
}
} else {
/* If returning "nil",
clear heap bit for parent's return register if valid */
if (parent->return_reg != 0xFF) {
parent->heap_mask &= ~(1 << parent->return_reg);
}
}
vm->pc = vm->stack[--vm->sp]; /* set pc to return address */
vm->mp = child->start; /* reset memory pointer to start
of old slice, pop the frame */
/* Always handle frame cleanup */
vm->pc = vm->stack[--vm->sp];
vm->mp = child->start;
vm->fp--;
return true;
}

Binary file not shown.

View File

@ -1,45 +1,34 @@
((code
(label main
(load-immediate $0 35)
(push $0)
(call &fib)
(pop $0)
(call &fib ($0) $0)
(int-to-string $1 $0)
(push $1)
(call &pln)
(call &pln ($1) nil)
(halt))
(label fib
(pop $0)
(load-immediate $1 2)
(jump-lt-int &base-case $0 $1)
(load-immediate $3 2)
(sub-int $4 $0 $3)
(push $4)
(call &fib)
(call &fib ($4) $5)
(load-immediate $3 1)
(sub-int $4 $0 $3)
(push $4)
(call &fib)
(pop $4)
(pop $5)
(add-int $6 $5 $4)
(push $6)
(return)
(call &fib ($4) $6)
(add-int $7 $6 $5)
(return $7)
(label base-case
(push $0)
(return)))
(return $0)))
(label pln
(load-immediate $0 &terminal-namespace) ; get terminal device
(load-immediate $1 &terminal-namespace) ; get terminal device
(load-immediate $11 0)
(syscall OPEN $0 $0 $11)
(syscall OPEN $1 $1 $11)
(load-immediate $3 &new-line)
(pop $1)
(load-offset-32 $7 $0 4) ; load handle
(string-length $2 $1)
(syscall WRITE $7 $1 $2)
(load-offset-32 $7 $1 4) ; load handle
(string-length $2 $0)
(syscall WRITE $7 $0 $2)
(string-length $4 $3)
(syscall WRITE $7 $3 $4)
(return)))
(return nil)))
(data
(label terminal-namespace "/dev/term/0")
(label new-line "\n")))

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -67,13 +67,8 @@
(push $13)
(call &draw-outlined-swatch)
(push $14) ; box_size (20)
(push $13) ; box_y
(push $12) ; box_x
(push $8) ; click_y
(push $7) ; click_x
(push $1) ; color
(call &set-color-if-clicked)
; set_color_if_clicked(box_size, box_y, box_x, click_y, click_x, color)
(call &set-color-if-clicked ($7 $8 $12 $13 $14 $5) nil)
(push $21)
(push $20)
@ -85,13 +80,8 @@
(push $13)
(call &draw-outlined-swatch)
(push $14) ; box_size (20)
(push $13) ; box_y
(push $12) ; box_x
(push $8) ; click_y
(push $7) ; click_x
(push $1) ; color
(call &set-color-if-clicked)
; set_color_if_clicked(box_size, box_y, box_x, click_y, click_x, color)
(call &set-color-if-clicked ($7 $8 $12 $13 $14 $5) nil)
(syscall WRITE $0 $21 $22)
@ -113,19 +103,13 @@
(halt))
(label set-color-if-clicked
; Pop inputs from stack (in reverse order of pushing)
(pop $11) ; $11 = color
(pop $0) ; $0 = click_x
(pop $1) ; $1 = click_y
(pop $2) ; $2 = box_x
(pop $3) ; $3 = box_y
(pop $5) ; $5 = box_size
; (click_x, click_y, box_x, box_y, box_size, color)
; Compute right = box_x + box_size
(add-int $6 $2 $5) ; $6 = right edge
(add-int $6 $2 $4) ; $6 = right edge
; Compute bottom = box_y + box_size
(add-int $7 $3 $5) ; $7 = bottom edge
(add-int $7 $3 $4) ; $7 = bottom edge
; Bounds check: x in [box_x, right] and y in [box_y, bottom]
(jump-lt-int &fail $0 $2)
@ -134,17 +118,17 @@
(jump-gt-int &fail $1 $7)
(load-immediate $10 &SELECTED-COLOR)
(store-absolute-8 $10 $11)
(store-absolute-8 $10 $5)
(label fail)
(return))
(return nil))
(label draw-outlined-swatch
(pop $3) ; y
(pop $2) ; x
(pop $0)
(pop $1) ; color
(pop $2) ; x
(pop $3) ; y
(pop $20)
(pop $21)
; Constants
(load-absolute-32 $4 &GRAY)
@ -159,7 +143,7 @@
(load-immediate $6 17) ; fill size
(load-immediate $7 2) ; offset
(push $21) ; base
(push $0) ; base
(push $20) ; width
(push $4) ; color (gray)
(push $2) ; x
@ -171,7 +155,7 @@
(add-int $8 $2 $7) ; x + 2
(add-int $9 $3 $7) ; y + 2
(push $21) ; base
(push $0) ; base
(push $20) ; width
(push $1) ; color (original)
(push $8) ; x + 2
@ -180,7 +164,7 @@
(push $6) ; height (17)
(call &draw-box)
(return))
(return nil))
; draw-box(color, x, y)
; Pops: y, x, color
@ -215,7 +199,7 @@
(add-int $4 $4 $2) ; next row (+= 640)
(sub-int $5 $5 $1) ; decrement row count
(jump-gt-int &draw-box-outer $5 0))
(return)))
(return nil)))
(data
(label screen-namespace "/dev/screen/0")
(label mouse-namespace "/dev/mouse/0")

Binary file not shown.

Binary file not shown.