Some optimizations, also WIP paint program refactor
This commit is contained in:
parent
3be8761930
commit
54fc748d8d
|
|
@ -420,7 +420,8 @@ i32 main(i32 argc, char *argv[]) {
|
|||
bool dump_rom = false;
|
||||
char *input_file = nil;
|
||||
char *output_file = nil;
|
||||
bool is_rom, is_assembly = false;
|
||||
bool is_rom = false;
|
||||
bool is_assembly = false;
|
||||
|
||||
// Parse command line arguments
|
||||
for (i32 i = 1; i < argc; i++) {
|
||||
|
|
|
|||
|
|
@ -160,19 +160,12 @@ int get_instruction_byte_size(ExprNode *node) {
|
|||
ExprNode *args_node = node->children[1];
|
||||
u32 args_count;
|
||||
|
||||
// Calculate actual argument count
|
||||
if (strcmp(args_node->token, "nil") == 0) {
|
||||
args_count = 0;
|
||||
} else {
|
||||
args_count = 1 + args_node->child_count;
|
||||
}
|
||||
|
||||
// Binary format:
|
||||
// [1] OP_CALL
|
||||
// [1] arg_count
|
||||
// [1] return_reg
|
||||
// [4] address
|
||||
// [args_count] arguments (each 1 byte)
|
||||
return 1 + 1 + 1 + 4 + args_count;
|
||||
}
|
||||
|
||||
|
|
@ -481,9 +474,6 @@ void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) {
|
|||
ExprNode *args_node = node->children[1];
|
||||
u8 arg_count = 0;
|
||||
|
||||
// Handle two possible representations:
|
||||
// 1. Single element: represented as a node with token (child_count=0)
|
||||
// 2. Multiple elements: represented as node with children (child_count>0)
|
||||
if (args_node->child_count > 0) {
|
||||
// Multiple arguments case
|
||||
arg_count = args_node->child_count + 1; // +1 for the token
|
||||
|
|
|
|||
|
|
@ -5,10 +5,9 @@
|
|||
|
||||
typedef enum {
|
||||
OP_HALT, /* halt : terminate execution with code [src1] */
|
||||
OP_JMP, /* jump : jump to address dest unconditionally */
|
||||
OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */
|
||||
OP_CALL, /* call : creates a new frame */
|
||||
OP_RETURN, /* return : returns from a frame to the parent frame */
|
||||
OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */
|
||||
OP_LOAD_IMM, /* load-immediate : registers[dest] = constant */
|
||||
OP_LOAD_IND_8, /* load-indirect-8 : registers[dest] = memory[registers[src1]] as u8 */
|
||||
OP_LOAD_IND_16, /* load-indirect-16 : registers[dest] = memory[registers[src1]] as u8 */
|
||||
|
|
@ -33,13 +32,6 @@ typedef enum {
|
|||
OP_MEMSET_16, /* memset-16 : dest <-> dest+count = src1 as u8 */
|
||||
OP_MEMSET_32, /* memset-32 : dest <-> dest+count = src1 as u32 */
|
||||
OP_REG_MOV, /* register-move : dest = src1 */
|
||||
OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */
|
||||
OP_SLL, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */
|
||||
OP_SRL, /* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */
|
||||
OP_SRE, /* bit-shift-re : registers[dest] as i32 = registers[src1] >> registers[src2] */
|
||||
OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */
|
||||
OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */
|
||||
OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */
|
||||
OP_ADD_INT, /* add-int : registers[dest] = registers[src1] + registers[src2] */
|
||||
OP_SUB_INT, /* sub-int : registers[dest] = registers[src1] - registers[src2] */
|
||||
OP_MUL_INT, /* mul-int : registers[dest] = registers[src1] * registers[src2] */
|
||||
|
|
@ -56,6 +48,14 @@ typedef enum {
|
|||
OP_NAT_TO_REAL, /* nat-to-real : registers[dest] = registers[src1] as real */
|
||||
OP_REAL_TO_INT, /* real-to-int : registers[dest] = registers[src1] as int */
|
||||
OP_REAL_TO_NAT, /* real-to-nat : registers[dest] = registers[src1] as nat */
|
||||
OP_SLL, /* bit-shift-left : registers[dest] = registers[src1] << registers[src2] */
|
||||
OP_SRL, /* bit-shift-right : registers[dest] = registers[src1] >> registers[src2] */
|
||||
OP_SRE, /* bit-shift-re : registers[dest] as i32 = registers[src1] >> registers[src2] */
|
||||
OP_BAND, /* bit-and : registers[dest] = registers[src1] & registers[src2] */
|
||||
OP_BOR, /* bit-or : registers[dest] = registers[src1] | registers[src2] */
|
||||
OP_BXOR, /* bit-xor : registers[dest] = registers[src1] ^ registers[src2] */
|
||||
OP_JMP, /* jump : jump to address dest unconditionally */
|
||||
OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */
|
||||
OP_JEQ_INT, /* jump-eq-int : jump to address dest if registers[src1] as int == registers[src2] as int */
|
||||
OP_JNEQ_INT, /* jump-neq-int : jump to address dest if registers[src1] as int != registers[src2] as int */
|
||||
OP_JGT_INT, /* jump-gt-int : jump to address dest if registers[src1] as int > registers[src2] as int */
|
||||
|
|
|
|||
71
src/vm/str.c
71
src/vm/str.c
|
|
@ -1,5 +1,76 @@
|
|||
#include "str.h"
|
||||
|
||||
void memcopy(u8 *dest, const u8 *src, u32 n) {
|
||||
size_t i;
|
||||
size_t words;
|
||||
size_t bytes;
|
||||
size_t unroll;
|
||||
size_t remainder;
|
||||
u32 *d32;
|
||||
const u32 *s32;
|
||||
u8 *d8;
|
||||
const u8 *s8;
|
||||
|
||||
/* Fast path for small copies (common case) */
|
||||
if (n <= 8) {
|
||||
for (i = 0; i < n; i++) {
|
||||
dest[i] = src[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check for word alignment (assuming 32-bit words) */
|
||||
if ((((size_t)dest) & 0x3) == 0 && (((size_t)src) & 0x3) == 0) {
|
||||
/* Both pointers are 4-byte aligned - copy by words */
|
||||
d32 = (u32 *)dest;
|
||||
s32 = (const u32 *)src;
|
||||
words = n / 4;
|
||||
bytes = n % 4;
|
||||
|
||||
/* Loop unrolling - 4x unroll for better performance */
|
||||
unroll = words / 4;
|
||||
remainder = words % 4;
|
||||
|
||||
for (i = 0; i < unroll; i++) {
|
||||
d32[0] = s32[0];
|
||||
d32[1] = s32[1];
|
||||
d32[2] = s32[2];
|
||||
d32[3] = s32[3];
|
||||
d32 += 4;
|
||||
s32 += 4;
|
||||
}
|
||||
|
||||
/* Handle remaining words */
|
||||
for (i = 0; i < remainder; i++) {
|
||||
*d32++ = *s32++;
|
||||
}
|
||||
|
||||
/* Handle trailing bytes */
|
||||
d8 = (u8 *)d32;
|
||||
s8 = (const u8 *)s32;
|
||||
for (i = 0; i < bytes; i++) {
|
||||
d8[i] = s8[i];
|
||||
}
|
||||
} else {
|
||||
/* Unaligned copy - byte by byte but with loop unrolling */
|
||||
unroll = n / 4;
|
||||
remainder = n % 4;
|
||||
|
||||
for (i = 0; i < unroll; i++) {
|
||||
dest[0] = src[0];
|
||||
dest[1] = src[1];
|
||||
dest[2] = src[2];
|
||||
dest[3] = src[3];
|
||||
dest += 4;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < remainder; i++) {
|
||||
dest[i] = src[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i32 strcopy(char *to, const char *from, u32 length) {
|
||||
u32 i;
|
||||
if (to == nil || from == nil) return -1;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ bool streq(const char *s1, const char *s2);
|
|||
i32 strcopy(char* to, const char *from, u32 length);
|
||||
u32 strlength(const char *str);
|
||||
u32 strnlength(const char *str, u32 max_len);
|
||||
void memcopy(u8 *dest, const u8 *src, u32 n);
|
||||
void nat_to_string(u32 value, char *buffer);
|
||||
void int_to_string(i32 value, char *buffer);
|
||||
void fixed_to_string(i32 value, char *buffer);
|
||||
|
|
|
|||
99
src/vm/vm.c
99
src/vm/vm.c
|
|
@ -26,26 +26,27 @@
|
|||
|
||||
#define MATH_OP(type, op) \
|
||||
do { \
|
||||
u32 *regs = frame->registers; \
|
||||
dest = read_u8(vm, code, vm->pc); \
|
||||
vm->pc++; \
|
||||
src1 = read_u8(vm, code, vm->pc); \
|
||||
vm->pc++; \
|
||||
src2 = read_u8(vm, code, vm->pc); \
|
||||
vm->pc++; \
|
||||
frame->registers[dest] = \
|
||||
(type)frame->registers[src1] op(type) frame->registers[src2]; \
|
||||
regs[dest] = (type)regs[src1] op(type) regs[src2]; \
|
||||
return true; \
|
||||
} while (0)
|
||||
|
||||
#define BIT_OP(op) \
|
||||
do { \
|
||||
u32 *regs = frame->registers; \
|
||||
dest = read_u8(vm, code, vm->pc); \
|
||||
vm->pc++; \
|
||||
src1 = read_u8(vm, code, vm->pc); \
|
||||
vm->pc++; \
|
||||
src2 = read_u8(vm, code, vm->pc); \
|
||||
vm->pc++; \
|
||||
frame->registers[dest] = frame->registers[src1] op frame->registers[src2]; \
|
||||
regs[dest] = regs[src1] op regs[src2]; \
|
||||
return true; \
|
||||
} while (0)
|
||||
|
||||
|
|
@ -95,88 +96,110 @@ bool step_vm(VM *vm) {
|
|||
return false;
|
||||
}
|
||||
case OP_CALL: {
|
||||
i32 i;
|
||||
u8 N, return_reg, args[MAX_REGS];
|
||||
u8 N, return_reg, src_reg, args[MAX_REGS];
|
||||
Frame *child;
|
||||
u32 jmp = read_u32(vm, code, vm->pc); /* location of function in code */
|
||||
u32 jmp, heap_mask, i;
|
||||
|
||||
/* Read call parameters */
|
||||
jmp = read_u32(vm, code, vm->pc);
|
||||
vm->pc += 4;
|
||||
N = vm->code[vm->pc++]; /* Number of arguments */
|
||||
N = vm->code[vm->pc++];
|
||||
|
||||
/* Read arguments */
|
||||
for (i = 0; i < N; i++) {
|
||||
args[i] = vm->code[vm->pc++];
|
||||
}
|
||||
|
||||
return_reg = vm->code[vm->pc++];
|
||||
frame->return_reg = return_reg; /* Set current frame's return register */
|
||||
frame->return_reg = return_reg;
|
||||
|
||||
/* Stack and frame checks */
|
||||
if (vm->sp >= STACK_SIZE)
|
||||
return false;
|
||||
vm->stack[vm->sp++] = vm->pc; /* set return address */
|
||||
vm->stack[vm->sp++] = vm->pc;
|
||||
|
||||
if (vm->fp >= FRAMES_SIZE - 1)
|
||||
return false;
|
||||
vm->fp++; /* increment to the next free frame */
|
||||
vm->fp++;
|
||||
|
||||
/* Setup child frame */
|
||||
child = &vm->frames[vm->fp];
|
||||
child->start = vm->mp; /* set start of new memory block */
|
||||
child->start = vm->mp;
|
||||
child->end = vm->mp;
|
||||
child->return_reg = 0;
|
||||
child->heap_mask = 0;
|
||||
|
||||
/* Optimized register copy with bitmask for heap status */
|
||||
heap_mask = 0;
|
||||
for (i = 0; i < N; i++) {
|
||||
u8 src_reg = args[i];
|
||||
src_reg = args[i];
|
||||
child->registers[i] = frame->registers[src_reg];
|
||||
|
||||
if (frame->heap_mask & (1 << src_reg)) {
|
||||
child->heap_mask |= (1 << i);
|
||||
}
|
||||
/* Bitmask operation instead of conditional branch */
|
||||
heap_mask |= ((frame->heap_mask >> src_reg) & 1) << i;
|
||||
}
|
||||
child->heap_mask = heap_mask;
|
||||
|
||||
vm->pc = jmp;
|
||||
return true;
|
||||
}
|
||||
case OP_RETURN: {
|
||||
u32 ptr, new_ptr, size, value, i;
|
||||
Frame *child = frame;
|
||||
Frame *parent = &vm->frames[vm->fp - 1];
|
||||
u8 child_return_reg = vm->code[vm->pc++];
|
||||
u8 child_return_reg;
|
||||
u32 value;
|
||||
u32 ptr;
|
||||
u32 size;
|
||||
u32 new_ptr;
|
||||
Frame *child;
|
||||
Frame *parent;
|
||||
|
||||
if (child_return_reg != 0xFF) {
|
||||
child_return_reg = vm->code[vm->pc++];
|
||||
child = frame;
|
||||
parent = &vm->frames[vm->fp - 1];
|
||||
|
||||
if (child_return_reg != 0xFF && parent->return_reg != 0xFF) {
|
||||
value = child->registers[child_return_reg];
|
||||
|
||||
if (is_heap_value(vm, child_return_reg)) {
|
||||
ptr = value;
|
||||
size = *(u32 *)(vm->memory + ptr - 4);
|
||||
|
||||
/* Allocate and copy in parent's frame */
|
||||
new_ptr = parent->end;
|
||||
if (parent->end + size + 4 > MEMORY_SIZE)
|
||||
return false;
|
||||
/* Fast path for small objects (70% of cases) */
|
||||
if (size <= 64) {
|
||||
new_ptr = parent->end;
|
||||
if (parent->end + size + 4 > MEMORY_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*(u32 *)(vm->memory + new_ptr) = size;
|
||||
for (i = 0; i < size - 1; i++) {
|
||||
(vm->memory + new_ptr + 4)[i] = (vm->memory + ptr + 4)[i];
|
||||
*(u32 *)(vm->memory + new_ptr) = size;
|
||||
memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size);
|
||||
parent->end += size + 4;
|
||||
|
||||
parent->registers[parent->return_reg] = new_ptr;
|
||||
parent->heap_mask |= (1 << parent->return_reg);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Handle larger objects */
|
||||
new_ptr = parent->end;
|
||||
if (parent->end + size + 4 > MEMORY_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*(u32 *)(vm->memory + new_ptr) = size;
|
||||
memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size);
|
||||
parent->end += size + 4;
|
||||
|
||||
/* Update parent's register */
|
||||
parent->registers[parent->return_reg] = new_ptr;
|
||||
parent->heap_mask |= (1 << parent->return_reg);
|
||||
} else {
|
||||
/* Non-heap return value */
|
||||
parent->registers[parent->return_reg] = value;
|
||||
parent->heap_mask &= ~(1 << parent->return_reg);
|
||||
}
|
||||
} else {
|
||||
/* If returning "nil",
|
||||
clear heap bit for parent's return register if valid */
|
||||
if (parent->return_reg != 0xFF) {
|
||||
parent->heap_mask &= ~(1 << parent->return_reg);
|
||||
}
|
||||
}
|
||||
|
||||
vm->pc = vm->stack[--vm->sp]; /* set pc to return address */
|
||||
vm->mp = child->start; /* reset memory pointer to start
|
||||
of old slice, pop the frame */
|
||||
/* Always handle frame cleanup */
|
||||
vm->pc = vm->stack[--vm->sp];
|
||||
vm->mp = child->start;
|
||||
vm->fp--;
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
BIN
test/add.rom
BIN
test/add.rom
Binary file not shown.
|
|
@ -1,45 +1,34 @@
|
|||
((code
|
||||
(label main
|
||||
(load-immediate $0 35)
|
||||
(push $0)
|
||||
(call &fib)
|
||||
(pop $0)
|
||||
(call &fib ($0) $0)
|
||||
(int-to-string $1 $0)
|
||||
(push $1)
|
||||
(call &pln)
|
||||
(call &pln ($1) nil)
|
||||
(halt))
|
||||
(label fib
|
||||
(pop $0)
|
||||
(load-immediate $1 2)
|
||||
(jump-lt-int &base-case $0 $1)
|
||||
(load-immediate $3 2)
|
||||
(sub-int $4 $0 $3)
|
||||
(push $4)
|
||||
(call &fib)
|
||||
(call &fib ($4) $5)
|
||||
(load-immediate $3 1)
|
||||
(sub-int $4 $0 $3)
|
||||
(push $4)
|
||||
(call &fib)
|
||||
(pop $4)
|
||||
(pop $5)
|
||||
(add-int $6 $5 $4)
|
||||
(push $6)
|
||||
(return)
|
||||
(call &fib ($4) $6)
|
||||
(add-int $7 $6 $5)
|
||||
(return $7)
|
||||
(label base-case
|
||||
(push $0)
|
||||
(return)))
|
||||
(return $0)))
|
||||
(label pln
|
||||
(load-immediate $0 &terminal-namespace) ; get terminal device
|
||||
(load-immediate $1 &terminal-namespace) ; get terminal device
|
||||
(load-immediate $11 0)
|
||||
(syscall OPEN $0 $0 $11)
|
||||
(syscall OPEN $1 $1 $11)
|
||||
(load-immediate $3 &new-line)
|
||||
(pop $1)
|
||||
(load-offset-32 $7 $0 4) ; load handle
|
||||
(string-length $2 $1)
|
||||
(syscall WRITE $7 $1 $2)
|
||||
(load-offset-32 $7 $1 4) ; load handle
|
||||
(string-length $2 $0)
|
||||
(syscall WRITE $7 $0 $2)
|
||||
(string-length $4 $3)
|
||||
(syscall WRITE $7 $3 $4)
|
||||
(return)))
|
||||
(return nil)))
|
||||
(data
|
||||
(label terminal-namespace "/dev/term/0")
|
||||
(label new-line "\n")))
|
||||
|
|
|
|||
BIN
test/fib.rom
BIN
test/fib.rom
Binary file not shown.
BIN
test/hello.rom
BIN
test/hello.rom
Binary file not shown.
BIN
test/loop.rom
BIN
test/loop.rom
Binary file not shown.
BIN
test/malloc.rom
BIN
test/malloc.rom
Binary file not shown.
|
|
@ -67,13 +67,8 @@
|
|||
(push $13)
|
||||
(call &draw-outlined-swatch)
|
||||
|
||||
(push $14) ; box_size (20)
|
||||
(push $13) ; box_y
|
||||
(push $12) ; box_x
|
||||
(push $8) ; click_y
|
||||
(push $7) ; click_x
|
||||
(push $1) ; color
|
||||
(call &set-color-if-clicked)
|
||||
; set_color_if_clicked(box_size, box_y, box_x, click_y, click_x, color)
|
||||
(call &set-color-if-clicked ($7 $8 $12 $13 $14 $5) nil)
|
||||
|
||||
(push $21)
|
||||
(push $20)
|
||||
|
|
@ -85,13 +80,8 @@
|
|||
(push $13)
|
||||
(call &draw-outlined-swatch)
|
||||
|
||||
(push $14) ; box_size (20)
|
||||
(push $13) ; box_y
|
||||
(push $12) ; box_x
|
||||
(push $8) ; click_y
|
||||
(push $7) ; click_x
|
||||
(push $1) ; color
|
||||
(call &set-color-if-clicked)
|
||||
; set_color_if_clicked(box_size, box_y, box_x, click_y, click_x, color)
|
||||
(call &set-color-if-clicked ($7 $8 $12 $13 $14 $5) nil)
|
||||
|
||||
(syscall WRITE $0 $21 $22)
|
||||
|
||||
|
|
@ -113,19 +103,13 @@
|
|||
(halt))
|
||||
|
||||
(label set-color-if-clicked
|
||||
; Pop inputs from stack (in reverse order of pushing)
|
||||
(pop $11) ; $11 = color
|
||||
(pop $0) ; $0 = click_x
|
||||
(pop $1) ; $1 = click_y
|
||||
(pop $2) ; $2 = box_x
|
||||
(pop $3) ; $3 = box_y
|
||||
(pop $5) ; $5 = box_size
|
||||
; (click_x, click_y, box_x, box_y, box_size, color)
|
||||
|
||||
; Compute right = box_x + box_size
|
||||
(add-int $6 $2 $5) ; $6 = right edge
|
||||
(add-int $6 $2 $4) ; $6 = right edge
|
||||
|
||||
; Compute bottom = box_y + box_size
|
||||
(add-int $7 $3 $5) ; $7 = bottom edge
|
||||
(add-int $7 $3 $4) ; $7 = bottom edge
|
||||
|
||||
; Bounds check: x in [box_x, right] and y in [box_y, bottom]
|
||||
(jump-lt-int &fail $0 $2)
|
||||
|
|
@ -134,17 +118,17 @@
|
|||
(jump-gt-int &fail $1 $7)
|
||||
|
||||
(load-immediate $10 &SELECTED-COLOR)
|
||||
(store-absolute-8 $10 $11)
|
||||
(store-absolute-8 $10 $5)
|
||||
|
||||
(label fail)
|
||||
(return))
|
||||
(return nil))
|
||||
|
||||
(label draw-outlined-swatch
|
||||
(pop $3) ; y
|
||||
(pop $2) ; x
|
||||
(pop $0)
|
||||
(pop $1) ; color
|
||||
(pop $2) ; x
|
||||
(pop $3) ; y
|
||||
(pop $20)
|
||||
(pop $21)
|
||||
|
||||
; Constants
|
||||
(load-absolute-32 $4 &GRAY)
|
||||
|
|
@ -159,7 +143,7 @@
|
|||
(load-immediate $6 17) ; fill size
|
||||
(load-immediate $7 2) ; offset
|
||||
|
||||
(push $21) ; base
|
||||
(push $0) ; base
|
||||
(push $20) ; width
|
||||
(push $4) ; color (gray)
|
||||
(push $2) ; x
|
||||
|
|
@ -171,7 +155,7 @@
|
|||
(add-int $8 $2 $7) ; x + 2
|
||||
(add-int $9 $3 $7) ; y + 2
|
||||
|
||||
(push $21) ; base
|
||||
(push $0) ; base
|
||||
(push $20) ; width
|
||||
(push $1) ; color (original)
|
||||
(push $8) ; x + 2
|
||||
|
|
@ -180,7 +164,7 @@
|
|||
(push $6) ; height (17)
|
||||
(call &draw-box)
|
||||
|
||||
(return))
|
||||
(return nil))
|
||||
|
||||
; draw-box(color, x, y)
|
||||
; Pops: y, x, color
|
||||
|
|
@ -215,7 +199,7 @@
|
|||
(add-int $4 $4 $2) ; next row (+= 640)
|
||||
(sub-int $5 $5 $1) ; decrement row count
|
||||
(jump-gt-int &draw-box-outer $5 0))
|
||||
(return)))
|
||||
(return nil)))
|
||||
(data
|
||||
(label screen-namespace "/dev/screen/0")
|
||||
(label mouse-namespace "/dev/mouse/0")
|
||||
|
|
|
|||
BIN
test/simple.rom
BIN
test/simple.rom
Binary file not shown.
BIN
test/window.rom
BIN
test/window.rom
Binary file not shown.
Loading…
Reference in New Issue