From 3c9603adea66a7d5c57ef55871116106c5974c45 Mon Sep 17 00:00:00 2001 From: zongor Date: Thu, 25 Sep 2025 17:24:56 -0700 Subject: [PATCH] make read syscall more flexable; WIP bit shift ops --- src/arch/linux/main.c | 68 --------------------- src/tools/assembler.c | 17 ++++-- src/vm/opcodes.h | 134 +++++++++++++++++++++++------------------- src/vm/vm.c | 110 ++++++++++++++++++++++++++++++++-- test/loop.asm.lisp | 6 +- 5 files changed, 195 insertions(+), 140 deletions(-) diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index a1b1d2e..8fd0574 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -227,73 +227,6 @@ void register_sdl_devices(VM *vm) { &keyboard_ops); } -const char* opcode_to_string(Opcode op) { - static const char* names[] = { - [OP_HALT] = "halt", - [OP_JMP] = "jump", - [OP_JMPF] = "jump-if-flag", - [OP_CALL] = "call", - [OP_RETURN] = "return", - [OP_LOAD] = "load", - [OP_LOAD_IMM] = "load-immediate", - [OP_STORE] = "store", - [OP_PUSH] = "push", - [OP_POP] = "pop", - [OP_REG_MOV] = "register-move", - [OP_SYSCALL] = "syscall", - [OP_ADD_INT] = "add-int", - [OP_SUB_INT] = "sub-int", - [OP_MUL_INT] = "mul-int", - [OP_DIV_INT] = "div-int", - [OP_ADD_UINT] = "add-nat", - [OP_SUB_UINT] = "sub-nat", - [OP_MUL_UINT] = "mul-nat", - [OP_DIV_UINT] = "div-nat", - [OP_ADD_REAL] = "add-real", - [OP_SUB_REAL] = "sub-real", - [OP_MUL_REAL] = "mul-real", - [OP_DIV_REAL] = "div-real", - [OP_INT_TO_REAL] = "int-to-real", - [OP_UINT_TO_REAL] = "nat-to-real", - [OP_REAL_TO_INT] = "real-to-int", - [OP_REAL_TO_UINT] = "real-to-nat", - [OP_JEQ_INT] = "jump-eq-int", - [OP_JGT_INT] = "jump-gt-int", - [OP_JLT_INT] = "jump-lt-int", - [OP_JLE_INT] = "jump-le-int", - [OP_JGE_INT] = "jump-ge-int", - [OP_JEQ_UINT] = "jump-eq-nat", - [OP_JGT_UINT] = "jump-gt-nat", - [OP_JLT_UINT] = "jump-lt-nat", - [OP_JLE_UINT] = "jump-le-nat", - [OP_JGE_UINT] = "jump-ge-nat", - [OP_JEQ_REAL] = "jump-eq-real", - [OP_JGE_REAL] = "jump-ge-real", - [OP_JGT_REAL] = "jump-gt-real", - [OP_JLT_REAL] = "jump-lt-real", - [OP_JLE_REAL] = "jump-le-real", - [OP_STRLEN] = "string-length", - [OP_STREQ] = "string-eq", - [OP_STRCAT] = "string-concat", - [OP_STR_GET_CHAR] = "string-get-char", - [OP_STR_FIND_CHAR]= "string-find-char", - [OP_STR_SLICE] = "string-slice", - [OP_INT_TO_STRING] = "int-to-string", - [OP_UINT_TO_STRING] = "nat-to-string", - [OP_REAL_TO_STRING] = "real-to-string", - [OP_STRING_TO_INT] = "string-to-int", - [OP_STRING_TO_UINT] = "string-to-nat", - [OP_STRING_TO_REAL] = "string-to-real" - }; - - if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { - return ""; - } - - const char* name = names[op]; - return name ? name : ""; -} - i32 main(i32 argc, char *argv[]) { struct CompilerConfig config = {0}; @@ -419,7 +352,6 @@ i32 main(i32 argc, char *argv[]) { } } else { while (running) { - //printf("| %d %s %d\n", vm.code[vm.pc], opcode_to_string(vm.code[vm.pc]), vm.pc); running = step_vm(&vm); } } diff --git a/src/tools/assembler.c b/src/tools/assembler.c index 569d226..d9baa04 100644 --- a/src/tools/assembler.c +++ b/src/tools/assembler.c @@ -95,6 +95,9 @@ int get_instruction_byte_size(ExprNode *node) { // Load/store with register and address (5 bytes: 1 + 1 + 4) if (strcmp(opname, "load") == 0 || strcmp(opname, "store") == 0 || + strcmp(opname, "load-u8") == 0 || strcmp(opname, "load-i8") == 0 || + strcmp(opname, "store-8") == 0 || strcmp(opname, "load-u16") == 0 || + strcmp(opname, "load-i16") == 0 || strcmp(opname, "store-16") == 0 || strcmp(opname, "call") == 0) { return 5; } @@ -105,7 +108,11 @@ int get_instruction_byte_size(ExprNode *node) { strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || - strcmp(opname, "mul-real") == 0 || strcmp(opname, "div-real") == 0) { + strcmp(opname, "bit-shift-left") == 0 || + strcmp(opname, "bit-shift-right") == 0 || + strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || + strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || + strcmp(opname, "div-real") == 0) { return 4; } @@ -156,12 +163,12 @@ void collect_symbols_in_node(SymbolTable *table, ExprNode *node, for (int i = 0; i < depth; i++) strcat(indent, " "); - //printf("%s%d %s ", indent, *current_addr, node->token); + // printf("%s%d %s ", indent, *current_addr, node->token); if (strcmp(node->token, "label") == 0) { if (node->child_count >= 1) { const char *name = node->children[0]->token; - //printf(" %s -> %d\n", name, *current_addr); + // printf(" %s -> %d\n", name, *current_addr); symbol_table_add(table, name, *current_addr, SYMBOL_CODE); } @@ -172,8 +179,8 @@ void collect_symbols_in_node(SymbolTable *table, ExprNode *node, } else { int size = get_instruction_byte_size(node); *current_addr += size; - //printf(" +%d bytes -> %d\n", size, *current_addr); - //printf("\n"); + // printf(" +%d bytes -> %d\n", size, *current_addr); + // printf("\n"); } } diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index 0975f9c..ed3e132 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -4,61 +4,73 @@ #include "common.h" typedef enum { - OP_HALT, /* halt : terminate execution */ - OP_JMP, /* jump : jump to address dest unconditionally */ - OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */ - OP_CALL, /* call : creates a new frame */ - OP_RETURN, /* return : returns from a frame to the parent frame */ - OP_LOAD, /* load : dest = &[next memory location] */ - OP_LOAD_IMM, /* load-immediate : dest = &[next memory location] */ - OP_STORE, /* store : next memory location = src1 as float */ - OP_PUSH, /* push : push str ref from register onto the stack and copy str */ - OP_POP, /* pop : pop int from stack onto the register */ - OP_REG_MOV, /* register-move : dest = src1 */ - OP_SYSCALL, /* syscall : src1 src2 src3 src4 ? does a system call based on args */ - OP_ADD_INT, /* add-int : dest = src1 + src2 */ - OP_SUB_INT, /* sub-int : dest = src1 - src2 */ - OP_MUL_INT, /* mul-int : dest = src1 * src2 */ - OP_DIV_INT, /* div-int : dest = src1 / src2 */ - OP_ADD_UINT, /* add-nat : dest = src1 + src2 */ - OP_SUB_UINT, /* sub-nat : dest = src1 - src2 */ - OP_MUL_UINT, /* mul-nat : dest = src1 * src2 */ - OP_DIV_UINT, /* div-nat : dest = src1 / src2 */ - OP_ADD_REAL, /* add-real : dest = src1 + src2 */ - OP_SUB_REAL, /* sub-real : dest = src1 - src2 */ - OP_MUL_REAL, /* mul-real : dest = src1 * src2 */ - OP_DIV_REAL, /* div-real : dest = src1 / src2 */ - OP_INT_TO_REAL, /* int-to-real : dest = src1 as real */ - OP_UINT_TO_REAL, /* nat-to-real : dest = src1 as real */ - OP_REAL_TO_INT, /* real-to-int : dest = src1 as int */ - OP_REAL_TO_UINT, /* real-to-nat : dest = src1 as uint */ - OP_JEQ_INT, /* jump-eq-int : jump to address dest if src1 as int == src2 as int */ - OP_JGT_INT, /* jump-gt-int : jump to address dest if src1 as int > src2 as int*/ - OP_JLT_INT, /* jump-lt-int : jump to address dest if src1 as int < src2 as int */ - OP_JLE_INT, /* jump-le-int : jump to address dest if src1 as int <= src2 as int */ - OP_JGE_INT, /* jump-ge-int : jump to address dest if src1 as int >= src2 as int*/ - OP_JEQ_UINT, /* jump-eq-nat : jump to address dest if src1 as int == src2 as uint */ - OP_JGT_UINT, /* jump-gt-nat : jump to address dest if src1 as int > src2 as uint*/ - OP_JLT_UINT, /* jump-lt-nat : jump to address dest if src1 as int < src2 as uint */ - OP_JLE_UINT, /* jump-le-nat : jump to address dest if src1 as int <= src2 as uint */ - OP_JGE_UINT, /* jump-ge-nat : jump to address dest if src1 as int >= src2 as uint*/ - OP_JEQ_REAL, /* jump-eq-real : jump to address dest if src1 as real == src2 as real */ - OP_JGE_REAL, /* jump-gt-real : jump to address dest if src1 as real >= src2 as real */ - OP_JGT_REAL, /* jump-lt-real : jump to address dest if src1 as real > src2 as real */ - OP_JLT_REAL, /* jump-le-real : jump to address dest if src1 as real < src2 as real */ - OP_JLE_REAL, /* jump-ge-real : jump to address dest if src1 as real <= src2 as real */ - OP_STRLEN, /* string-length : dest = length of str at src1 ptr */ - OP_STREQ, /* string-eq : dest = src1 ptr string == src2 ptr string */ - OP_STRCAT, /* string-concat : dest = ptr of src1 ptr string + src2 ptr string */ - OP_STR_GET_CHAR, /* string-get-char : dest = ptr of src1 ptr str, src2 index of str */ - OP_STR_FIND_CHAR, /* string-find-char : dest = ptr of src1 ptr string, src2 uint8 char */ - OP_STR_SLICE, /* string-slice : dest = ptr of src1 ptr str, src2 start index, src3 end index */ - OP_INT_TO_STRING, /* int-to-string : dest = src1 as str */ - OP_UINT_TO_STRING, /* nat-to-string : dest = src1 as str */ - OP_REAL_TO_STRING, /* real-to-string : dest = src1 as str */ - OP_STRING_TO_INT, /* string-to-int : dest = src1 as int */ - OP_STRING_TO_UINT, /* string-to-nat : dest = src1 as uint */ - OP_STRING_TO_REAL /* string-to-real : dest = src1 as real */ + OP_HALT, /* halt : terminate execution with code [src1] */ + OP_JMP, /* jump : jump to address dest unconditionally */ + OP_JMPF, /* jump-if-flag : jump to address dest if flag is ne 0 */ + OP_CALL, /* call : creates a new frame */ + OP_RETURN, /* return : returns from a frame to the parent frame */ + OP_LOAD, /* load : dest = memory[src1] */ + OP_LOADI8, /* load-i8 : dest = memory[src1] */ + OP_LOADU8, /* load-u8 : dest = memory[src1] */ + OP_LOADI16, /* load-i16 : dest = memory[src1] */ + OP_LOADU16, /* load-u16 : dest = memory[src1] */ + OP_LOAD_IMM, /* load-immediate : dest = constant */ + OP_STORE, /* store : memory[dest] = src1 */ + OP_STORE8, /* store-8 : memory[dest] = src1 << 8 */ + OP_STORE16, /* store-16 : memory[dest] = src1 << 16 */ + OP_PUSH, /* push : push const of ref */ + OP_POP, /* pop : pop cosnt or ref */ + OP_REG_MOV, /* register-move : dest = src1 */ + OP_SYSCALL, /* syscall : src1 src2 src3 src4 more? does a system call based on args */ + OP_SLL, /* bit-shift-left : dest = src1 << src2 */ + OP_SRL, /* bit-shift-right : dest = src1 >> src2 */ + OP_SRE, /* bit-shift-right-extend : dest as i32 = src1 >> src2 */ + OP_BAND, /* bit-and : dest = src1 & src2 */ + OP_BOR, /* bit-or : dest = src1 | src2 */ + OP_BXOR, /* bit-xor : dest = src1 ^ src2 */ + OP_ADD_INT, /* add-int : dest = src1 + src2 */ + OP_SUB_INT, /* sub-int : dest = src1 - src2 */ + OP_MUL_INT, /* mul-int : dest = src1 * src2 */ + OP_DIV_INT, /* div-int : dest = src1 / src2 */ + OP_ADD_UINT, /* add-nat : dest = src1 + src2 */ + OP_SUB_UINT, /* sub-nat : dest = src1 - src2 */ + OP_MUL_UINT, /* mul-nat : dest = src1 * src2 */ + OP_DIV_UINT, /* div-nat : dest = src1 / src2 */ + OP_ADD_REAL, /* add-real : dest = src1 + src2 */ + OP_SUB_REAL, /* sub-real : dest = src1 - src2 */ + OP_MUL_REAL, /* mul-real : dest = src1 * src2 */ + OP_DIV_REAL, /* div-real : dest = src1 / src2 */ + OP_INT_TO_REAL, /* int-to-real : dest = src1 as real */ + OP_UINT_TO_REAL, /* nat-to-real : dest = src1 as real */ + OP_REAL_TO_INT, /* real-to-int : dest = src1 as int */ + OP_REAL_TO_UINT, /* real-to-nat : dest = src1 as uint */ + OP_JEQ_INT, /* jump-eq-int : jump to address dest if src1 as int == src2 as int */ + OP_JGT_INT, /* jump-gt-int : jump to address dest if src1 as int > src2 as int */ + OP_JLT_INT, /* jump-lt-int : jump to address dest if src1 as int < src2 as int */ + OP_JLE_INT, /* jump-le-int : jump to address dest if src1 as int <= src2 as int */ + OP_JGE_INT, /* jump-ge-int : jump to address dest if src1 as int >= src2 as int */ + OP_JEQ_UINT, /* jump-eq-nat : jump to address dest if src1 as int == src2 as uint */ + OP_JGT_UINT, /* jump-gt-nat : jump to address dest if src1 as int > src2 as uint */ + OP_JLT_UINT, /* jump-lt-nat : jump to address dest if src1 as int < src2 as uint */ + OP_JLE_UINT, /* jump-le-nat : jump to address dest if src1 as int <= src2 as uint */ + OP_JGE_UINT, /* jump-ge-nat : jump to address dest if src1 as int >= src2 as uint */ + OP_JEQ_REAL, /* jump-eq-real : jump to address dest if src1 as real == src2 as real */ + OP_JGE_REAL, /* jump-ge-real : jump to address dest if src1 as real >= src2 as real */ + OP_JGT_REAL, /* jump-gt-real : jump to address dest if src1 as real > src2 as real */ + OP_JLT_REAL, /* jump-lt-real : jump to address dest if src1 as real < src2 as real */ + OP_JLE_REAL, /* jump-le-real : jump to address dest if src1 as real <= src2 as real */ + OP_STRLEN, /* string-length : dest = length of str at src1 ptr */ + OP_STREQ, /* string-eq : dest = src1 ptr string == src2 ptr string */ + OP_STRCAT, /* string-concat : dest = ptr of src1 ptr string + src2 ptr string */ + OP_STR_GET_CHAR, /* string-get-char : dest = ptr of src1 ptr str, src2 index of str */ + OP_STR_FIND_CHAR, /* string-find-char : dest = ptr of src1 ptr string, src2 uint8 char */ + OP_STR_SLICE, /* string-slice : dest = ptr of src1 ptr str, src2 start index, src3 end index */ + OP_INT_TO_STRING, /* int-to-string : dest = src1 as str */ + OP_UINT_TO_STRING, /* nat-to-string : dest = src1 as str */ + OP_REAL_TO_STRING, /* real-to-string : dest = src1 as str */ + OP_STRING_TO_INT, /* string-to-int : dest = src1 as int */ + OP_STRING_TO_UINT, /* string-to-nat : dest = src1 as uint */ + OP_STRING_TO_REAL /* string-to-real : dest = src1 as real */ } Opcode; #define MAX_REGS 32 @@ -71,11 +83,11 @@ typedef struct frame_s { typedef enum { SYSCALL_EXIT = 0, - SYSCALL_DEVICE_OPEN, - SYSCALL_DEVICE_READ, - SYSCALL_DEVICE_WRITE, - SYSCALL_DEVICE_CLOSE, - SYSCALL_DEVICE_IOCTL + SYSCALL_DEVICE_OPEN, /* */ + SYSCALL_DEVICE_READ, /* */ + SYSCALL_DEVICE_WRITE, /* */ + SYSCALL_DEVICE_CLOSE, /* */ + SYSCALL_DEVICE_IOCTL /* */ } SyscallID; typedef struct device_ops_s { @@ -131,7 +143,7 @@ typedef struct vm_s { #define write_u8(vm, location, addr, value) \ do { \ if ((addr) < sizeof((vm)->location)) { \ - (vm)->location[(addr)] = (value); \ + (vm)->location[(addr)] = (value) & 0xFF; \ } \ } while (0) diff --git a/src/vm/vm.c b/src/vm/vm.c index efde85f..be2d58c 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -37,6 +37,20 @@ return true; \ } while (0) +#define BIT_OP(op) \ +do { \ + dest = read_u8(vm, code, vm->pc); \ + vm->pc++; \ + src1 = read_u8(vm, code, vm->pc); \ + vm->pc++; \ + src2 = read_u8(vm, code, vm->pc); \ + vm->pc++; \ + frame->registers[dest] = \ + frame->registers[src1] op frame->registers[src2]; \ + return true; \ +} while (0) + + u32 str_alloc(VM *vm, Frame *frame, const char *str, u32 length) { u32 str_addr = vm->mp; u32 i = 0; @@ -55,7 +69,7 @@ u32 str_alloc(VM *vm, Frame *frame, const char *str, u32 length) { * Step to the next opcode in the vm. */ bool step_vm(VM *vm) { - u8 opcode, dest, src1, src2; + u16 opcode, dest, src1, src2; u32 v, ptr; i32 value; Frame *frame; @@ -102,6 +116,46 @@ bool step_vm(VM *vm) { frame->registers[dest] = v; return true; } + case OP_LOADI8: { + i8 v8; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v8 = (i8)read_u8(vm, memory, ptr); + frame->registers[dest] = v8; + return true; + } + case OP_LOADU8: { + u8 v8; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v8 = read_u8(vm, memory, ptr); + frame->registers[dest] = v8; + return true; + } + case OP_LOADI16: { + i16 v16; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v16 = (i16)read_u16(vm, memory, ptr); + frame->registers[dest] = v16; + return true; + } + case OP_LOADU16: { + u16 v16; + dest = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v16 = read_u16(vm, memory, ptr); + frame->registers[dest] = v16; + return true; + } case OP_STORE: { src1 = read_u8(vm, code, vm->pc); vm->pc++; @@ -110,6 +164,22 @@ bool step_vm(VM *vm) { v = frame->registers[src1]; write_u32(vm, memory, ptr, v); return true; + }case OP_STORE8: { + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v = frame->registers[src1]; + write_u8(vm, memory, ptr, v); + return true; + }case OP_STORE16: { + src1 = read_u8(vm, code, vm->pc); + vm->pc++; + ptr = read_u32(vm, code, vm->pc); + vm->pc += 4; + v = frame->registers[src1]; + write_u16(vm, memory, ptr, v); + return true; } case OP_PUSH: { dest = read_u8(vm, code, vm->pc); @@ -180,25 +250,27 @@ bool step_vm(VM *vm) { case SYSCALL_DEVICE_READ: { Device *dev; u32 path_ptr, buffer_ptr, size; - u8 path_reg, buffer_reg, size_reg; + u16 path_reg, buffer_reg, size_reg; path_reg = read_u8(vm, code, vm->pc); vm->pc++; buffer_reg = read_u8(vm, code, vm->pc); vm->pc++; size_reg = read_u8(vm, code, vm->pc); vm->pc++; + dest = read_u8(vm, code, vm->pc); + vm->pc++; path_ptr = frame->registers[path_reg]; /* path pointer */ size = frame->registers[size_reg]; /* size */ - buffer_ptr = vm->mp; + buffer_ptr = (dest > 32) ? vm->mp : dest; /* dest ptr, if > 32 then use memory pointer*/ dev = find_device_by_path(vm, (const char *)&vm->memory[path_ptr + 4]); if (dev && dev->ops->read) { vm->flag = dev->ops->read(dev->data, (u8 *)&vm->memory[buffer_ptr + 4], size); - vm->mp += size + 4; + if (dest > 32) { vm->mp += size + 4;} write_u32(vm, memory, buffer_ptr, size); - frame->end = vm->mp; + if (dest > 32) { frame->end = vm->mp; } frame->registers[buffer_reg] = buffer_ptr; } else { vm->flag = 0; @@ -292,6 +364,18 @@ bool step_vm(VM *vm) { } return true; } + case OP_SLL: + BIT_OP(<<); + case OP_SRL: + BIT_OP(>>); + case OP_SRE: + MATH_OP(i32, >>); + case OP_BAND: + BIT_OP(&); + case OP_BOR: + BIT_OP(|); + case OP_BXOR: + BIT_OP(^); case OP_ADD_INT: MATH_OP(i32, +); case OP_SUB_INT: @@ -488,6 +572,22 @@ bool step_vm(VM *vm) { frame->registers[dest] = length; return true; } + case OP_STRCAT: { + /* not implemented yet */ + return false; + } + case OP_STR_GET_CHAR: { + /* not implemented yet */ + return false; + } + case OP_STR_FIND_CHAR: { + /* not implemented yet */ + return false; + } + case OP_STR_SLICE: { + /* not implemented yet */ + return false; + } case OP_STRING_TO_INT: { /* not implemented yet */ return false; diff --git a/test/loop.asm.lisp b/test/loop.asm.lisp index 6ad068e..c0b5b55 100644 --- a/test/loop.asm.lisp +++ b/test/loop.asm.lisp @@ -15,7 +15,11 @@ (string-length $8 $7) (syscall DEVICE-WRITE $10 $7 $8) (load-immediate $8 32) - (syscall DEVICE-READ $10 $2 $8) + (load-immediate $9 255) + ; note 255 is not a real register + ; it just means "append to end of memory" + ; technically any register > 32 will work + (syscall DEVICE-READ $10 $2 $8 $255) (push $2) (call &println) (nat-to-string $4 $1)