From 07528b1f3f341b24ed13a6128a1166cbbec91bfd Mon Sep 17 00:00:00 2001 From: zongor Date: Wed, 3 Dec 2025 21:45:03 -0800 Subject: [PATCH] fix silly missing `get_reg` error. all non gui ones are working now --- src/tools/assembler/assembler.c | 1053 ++++++++++++------------------- test/fib.ul.ir | 9 +- test/hello.ul.ir | 8 +- test/loop.ul.ir | 14 +- test/malloc.ul.ir | 18 +- 5 files changed, 431 insertions(+), 671 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 244cdcd..0ae0285 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -10,127 +10,126 @@ const char *opcode_to_string(Opcode op) { static const char *names[] = { - [OP_EXIT] = "exit", - [OP_JMP] = "jump", - [OP_JMPF] = "jump-if-flag", - [OP_CALL] = "call", - [OP_RETURN] = "return", - - /* Immediate loads (only 32-bit variant needed) */ - [OP_LOAD_IMM] = "load-immediate", - - /* Register-indirect loads */ - [OP_LOAD_IND_8] = "load-indirect-8", - [OP_LOAD_IND_16] = "load-indirect-16", - [OP_LOAD_IND_32] = "load-indirect-32", - - /* Absolute address loads */ - [OP_LOAD_ABS_8] = "load-absolute-8", - [OP_LOAD_ABS_16] = "load-absolute-16", - [OP_LOAD_ABS_32] = "load-absolute-32", - - /* Base+offset loads */ - [OP_LOAD_OFF_8] = "load-offset-8", - [OP_LOAD_OFF_16] = "load-offset-16", - [OP_LOAD_OFF_32] = "load-offset-32", - - /* Absolute address stores */ - [OP_STORE_ABS_8] = "store-absolute-8", - [OP_STORE_ABS_16] = "store-absolute-16", - [OP_STORE_ABS_32] = "store-absolute-32", - - /* Register-indirect stores */ - [OP_STORE_IND_8] = "store-indirect-8", - [OP_STORE_IND_16] = "store-indirect-16", - [OP_STORE_IND_32] = "store-indirect-32", - - /* Base+offset stores */ - [OP_STORE_OFF_8] = "store-offset-8", - [OP_STORE_OFF_16] = "store-offset-16", - [OP_STORE_OFF_32] = "store-offset-32", - - /* Memory operations */ - [OP_MALLOC] = "malloc", - [OP_MEMSET_8] = "memset-8", - [OP_MEMSET_16] = "memset-16", - [OP_MEMSET_32] = "memset-32", - - /* Register operations */ - [OP_REG_MOV] = "register-move", - [OP_SYSCALL] = "syscall", - - /* Bit operations */ - [OP_BIT_SHIFT_LEFT] = "bit-shift-left", - [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", - [OP_BIT_SHIFT_R_EXT] = "bit-shift-re", - [OP_BAND] = "bit-and", - [OP_BOR] = "bit-or", - [OP_BXOR] = "bit-xor", - - /* Integer arithmetic */ - [OP_ADD_INT] = "add-int", - [OP_SUB_INT] = "sub-int", - [OP_MUL_INT] = "mul-int", - [OP_DIV_INT] = "div-int", - - /* Natural number arithmetic */ - [OP_ADD_NAT] = "add-nat", - [OP_SUB_NAT] = "sub-nat", - [OP_MUL_NAT] = "mul-nat", - [OP_DIV_NAT] = "div-nat", - - /* Floating point operations */ - [OP_ADD_REAL] = "add-real", - [OP_SUB_REAL] = "sub-real", - [OP_MUL_REAL] = "mul-real", - [OP_DIV_REAL] = "div-real", - - /* Type conversions */ - [OP_INT_TO_REAL] = "int-to-real", - [OP_NAT_TO_REAL] = "nat-to-real", - [OP_REAL_TO_INT] = "real-to-int", - [OP_REAL_TO_NAT] = "real-to-nat", - - /* Integer comparisons */ - [OP_JEQ_INT] = "jump-eq-int", - [OP_JNEQ_INT] = "jump-neq-int", - [OP_JGT_INT] = "jump-gt-int", - [OP_JLT_INT] = "jump-lt-int", - [OP_JLE_INT] = "jump-le-int", - [OP_JGE_INT] = "jump-ge-int", - - /* Natural number comparisons */ - [OP_JEQ_NAT] = "jump-eq-nat", - [OP_JNEQ_NAT] = "jump-neq-nat", - [OP_JGT_NAT] = "jump-gt-nat", - [OP_JLT_NAT] = "jump-lt-nat", - [OP_JLE_NAT] = "jump-le-nat", - [OP_JGE_NAT] = "jump-ge-nat", - - /* Floating point comparisons */ - [OP_JEQ_REAL] = "jump-eq-real", - [OP_JNEQ_REAL] = "jump-neq-real", - [OP_JGE_REAL] = "jump-ge-real", - [OP_JGT_REAL] = "jump-gt-real", - [OP_JLT_REAL] = "jump-lt-real", - [OP_JLE_REAL] = "jump-le-real", - - /* String operations */ - [OP_STRLEN] = "string-length", - [OP_STREQ] = "string-eq", - [OP_STRCAT] = "string-concat", - [OP_STR_GET_CHAR] = "string-get-char", - [OP_STR_FIND_CHAR] = "string-find-char", - [OP_STR_SLICE] = "string-slice", - - /* String conversions */ - [OP_INT_TO_STRING] = "int-to-string", - [OP_NAT_TO_STRING] = "nat-to-string", - [OP_REAL_TO_STRING] = "real-to-string", - [OP_STRING_TO_INT] = "string-to-int", - [OP_STRING_TO_NAT] = "string-to-nat", - [OP_STRING_TO_REAL] = "string-to-real" - }; + [OP_EXIT] = "exit", + [OP_JMP] = "jump", + [OP_JMPF] = "jump-if-flag", + [OP_CALL] = "call", + [OP_RETURN] = "return", + + /* Immediate loads (only 32-bit variant needed) */ + [OP_LOAD_IMM] = "load-immediate", + + /* Register-indirect loads */ + [OP_LOAD_IND_8] = "load-indirect-8", + [OP_LOAD_IND_16] = "load-indirect-16", + [OP_LOAD_IND_32] = "load-indirect-32", + + /* Absolute address loads */ + [OP_LOAD_ABS_8] = "load-absolute-8", + [OP_LOAD_ABS_16] = "load-absolute-16", + [OP_LOAD_ABS_32] = "load-absolute-32", + + /* Base+offset loads */ + [OP_LOAD_OFF_8] = "load-offset-8", + [OP_LOAD_OFF_16] = "load-offset-16", + [OP_LOAD_OFF_32] = "load-offset-32", + + /* Absolute address stores */ + [OP_STORE_ABS_8] = "store-absolute-8", + [OP_STORE_ABS_16] = "store-absolute-16", + [OP_STORE_ABS_32] = "store-absolute-32", + + /* Register-indirect stores */ + [OP_STORE_IND_8] = "store-indirect-8", + [OP_STORE_IND_16] = "store-indirect-16", + [OP_STORE_IND_32] = "store-indirect-32", + + /* Base+offset stores */ + [OP_STORE_OFF_8] = "store-offset-8", + [OP_STORE_OFF_16] = "store-offset-16", + [OP_STORE_OFF_32] = "store-offset-32", + + /* Memory operations */ + [OP_MALLOC] = "malloc", + [OP_MEMSET_8] = "memset-8", + [OP_MEMSET_16] = "memset-16", + [OP_MEMSET_32] = "memset-32", + + /* Register operations */ + [OP_REG_MOV] = "register-move", + [OP_SYSCALL] = "syscall", + + /* Bit operations */ + [OP_BIT_SHIFT_LEFT] = "bit-shift-left", + [OP_BIT_SHIFT_RIGHT] = "bit-shift-right", + [OP_BIT_SHIFT_R_EXT] = "bit-shift-re", + [OP_BAND] = "bit-and", + [OP_BOR] = "bit-or", + [OP_BXOR] = "bit-xor", + + /* Integer arithmetic */ + [OP_ADD_INT] = "add-int", + [OP_SUB_INT] = "sub-int", + [OP_MUL_INT] = "mul-int", + [OP_DIV_INT] = "div-int", + + /* Natural number arithmetic */ + [OP_ADD_NAT] = "add-nat", + [OP_SUB_NAT] = "sub-nat", + [OP_MUL_NAT] = "mul-nat", + [OP_DIV_NAT] = "div-nat", + + /* Floating point operations */ + [OP_ADD_REAL] = "add-real", + [OP_SUB_REAL] = "sub-real", + [OP_MUL_REAL] = "mul-real", + [OP_DIV_REAL] = "div-real", + + /* Type conversions */ + [OP_INT_TO_REAL] = "int-to-real", + [OP_NAT_TO_REAL] = "nat-to-real", + [OP_REAL_TO_INT] = "real-to-int", + [OP_REAL_TO_NAT] = "real-to-nat", + + /* Integer comparisons */ + [OP_JEQ_INT] = "jump-eq-int", + [OP_JNEQ_INT] = "jump-neq-int", + [OP_JGT_INT] = "jump-gt-int", + [OP_JLT_INT] = "jump-lt-int", + [OP_JLE_INT] = "jump-le-int", + [OP_JGE_INT] = "jump-ge-int", + + /* Natural number comparisons */ + [OP_JEQ_NAT] = "jump-eq-nat", + [OP_JNEQ_NAT] = "jump-neq-nat", + [OP_JGT_NAT] = "jump-gt-nat", + [OP_JLT_NAT] = "jump-lt-nat", + [OP_JLE_NAT] = "jump-le-nat", + [OP_JGE_NAT] = "jump-ge-nat", + + /* Floating point comparisons */ + [OP_JEQ_REAL] = "jump-eq-real", + [OP_JNEQ_REAL] = "jump-neq-real", + [OP_JGE_REAL] = "jump-ge-real", + [OP_JGT_REAL] = "jump-gt-real", + [OP_JLT_REAL] = "jump-lt-real", + [OP_JLE_REAL] = "jump-le-real", + + /* String operations */ + [OP_STRLEN] = "string-length", + [OP_STREQ] = "string-eq", + [OP_STRCAT] = "string-concat", + [OP_STR_GET_CHAR] = "string-get-char", + [OP_STR_FIND_CHAR] = "string-find-char", + [OP_STR_SLICE] = "string-slice", + + /* String conversions */ + [OP_INT_TO_STRING] = "int-to-string", + [OP_NAT_TO_STRING] = "nat-to-string", + [OP_REAL_TO_STRING] = "real-to-string", + [OP_STRING_TO_INT] = "string-to-int", + [OP_STRING_TO_NAT] = "string-to-nat", + [OP_STRING_TO_REAL] = "string-to-real"}; if (op < 0 || op >= (int)(sizeof(names) / sizeof(names[0]))) { return ""; @@ -140,19 +139,19 @@ const char *opcode_to_string(Opcode op) { return name ? name : ""; } -void emit_op(VM *vm, u8 byte) { +void emit_op(VM *vm, u8 byte) { printf("vm->code[%d] = %s\n", vm->cp, opcode_to_string(byte)); - vm->code[vm->cp] = byte; + vm->code[vm->cp] = byte; } -void emit_byte(VM *vm, u8 byte) { +void emit_byte(VM *vm, u8 byte) { printf("vm->code[%d] = %d\n", vm->cp, byte); - vm->code[vm->cp] = byte; + vm->code[vm->cp] = byte; } -void emit_u32(VM *vm, u32 value) { - printf("vm->code[%d..%d] = %d\n", vm->cp, vm->cp+3, value); - write_u32(vm, code, vm->cp, value); +void emit_u32(VM *vm, u32 value) { + printf("vm->code[%d..%d] = %d\n", vm->cp, vm->cp + 3, value); + write_u32(vm, code, vm->cp, value); } void symbol_table_init(SymbolTable *table) { @@ -161,7 +160,28 @@ void symbol_table_init(SymbolTable *table) { table->capacity = 16; } +Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { + for (u32 i = 0; i < table->count; i++) { + if (table->symbols[i].name_length == length) { + if (strleq(table->symbols[i].name, name, length)) { + return &table->symbols[i]; + } + } + } + return nil; +} + u32 symbol_table_add(SymbolTable *table, Symbol s) { + Symbol *sym = symbol_table_lookup(table, s.name, s.name_length); + if (sym != nil) { + fprintf(stderr, + "Error: Symbol '%.*s' already defined, the assembler is not smart " + "enough to do scope properly so please pick a different variable " + "name (hard I know)\n", + s.name_length, s.name); + exit(1); + } + if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); @@ -182,17 +202,6 @@ u32 symbol_table_add(SymbolTable *table, Symbol s) { return index; } -Symbol *symbol_table_lookup(SymbolTable *table, const char *name, u32 length) { - for (u32 i = 0; i < table->count; i++) { - if (table->symbols[i].name_length == length) { - if (strleq(table->symbols[i].name, name, length)) { - return &table->symbols[i]; - } - } - } - return nil; -} - u32 get_ref(SymbolTable *st, const char *name, u32 length) { Symbol *sym = symbol_table_lookup(st, name, length); if (!sym) { @@ -591,6 +600,109 @@ void define_branch(VM *vm, SymbolTable *st) { symbol_table_add(st, s); } +int get_instruction_byte_size(const char *opname) { + + // Return (1 + 1) + if (strcmp(opname, "return") == 0) { + return 2; // 1 byte opcode + 1 byte return register + } + + if (strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || + strcmp(opname, "neg_nat") == 0 || strcmp(opname, "abs_nat") == 0 || + strcmp(opname, "neg_real") == 0 || strcmp(opname, "abs_real") == 0 || + strcmp(opname, "int_to_string") == 0 || + strcmp(opname, "load_indirect_8") == 0 || + strcmp(opname, "nat_to_string") == 0 || + strcmp(opname, "load_indirect_16") == 0 || + strcmp(opname, "real_to_string") == 0 || + strcmp(opname, "load_indirect_32") == 0 || + strcmp(opname, "int_to_real") == 0 || + strcmp(opname, "store_indirect_8") == 0 || + strcmp(opname, "nat_to_real") == 0 || + strcmp(opname, "store_indirect_16") == 0 || + strcmp(opname, "real_to_int") == 0 || + strcmp(opname, "store_indirect_32") == 0 || + strcmp(opname, "real_to_nat") == 0 || strcmp(opname, "nat_to_int") == 0 || + strcmp(opname, "int_to_nat") == 0 || + strcmp(opname, "string_length") == 0 || + strcmp(opname, "store_absolute_32") == 0 || + strcmp(opname, "store_absolute_8") == 0 || + strcmp(opname, "store_absolute_16") == 0 || + strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || + strcmp(opname, "memset_8") == 0 || strcmp(opname, "memset_16") == 0 || + strcmp(opname, "register_move") == 0 || strcmp(opname, "malloc") == 0) { + return 3; + } + + // Register_register_register opcodes (4 bytes: 1 + 3) + if (strcmp(opname, "add_int") == 0 || strcmp(opname, "sub_int") == 0 || + strcmp(opname, "mul_int") == 0 || strcmp(opname, "div_int") == 0 || + strcmp(opname, "add_nat") == 0 || strcmp(opname, "sub_nat") == 0 || + strcmp(opname, "mul_nat") == 0 || strcmp(opname, "div_nat") == 0 || + strcmp(opname, "add_real") == 0 || strcmp(opname, "sub_real") == 0 || + strcmp(opname, "bit_shift_left") == 0 || + strcmp(opname, "bit_shift_right") == 0 || + strcmp(opname, "bit_shift_r_ext") == 0 || + strcmp(opname, "bit_and") == 0 || strcmp(opname, "bit_or") == 0 || + strcmp(opname, "bit_xor") == 0 || strcmp(opname, "mul_real") == 0 || + strcmp(opname, "div_real") == 0) { + return 4; + } + + // (5 bytes: 1 + 4) + if (strcmp(opname, "halt") == 0 || strcmp(opname, "jump_if_flag") == 0 || + strcmp(opname, "jump") == 0) { + return 5; + } + + // Load, Load_immediate (6 bytes: 1 + 1 + 4) + if (strcmp(opname, "load_absolute_32") == 0 || + strcmp(opname, "load_immediate") == 0 || + strcmp(opname, "load_address") == 0 || + strcmp(opname, "load_absolute_16") == 0 || + strcmp(opname, "load_absolute_8") == 0) { + return 6; + } + + // jump compare (7 bytes: 1 + 4 + 1 + 1) + if (strcmp(opname, "jump_eq_int") == 0 || + strcmp(opname, "jump_neq_int") == 0 || + strcmp(opname, "jump_gt_int") == 0 || + strcmp(opname, "jump_lt_int") == 0 || + strcmp(opname, "jump_le_int") == 0 || + strcmp(opname, "jump_ge_int") == 0 || + strcmp(opname, "jump_eq_nat") == 0 || + strcmp(opname, "jump_neq_nat") == 0 || + strcmp(opname, "jump_gt_nat") == 0 || + strcmp(opname, "jump_lt_nat") == 0 || + strcmp(opname, "jump_le_nat") == 0 || + strcmp(opname, "jump_ge_nat") == 0 || + strcmp(opname, "jump_eq_real") == 0 || + strcmp(opname, "jump_neq_real") == 0 || + strcmp(opname, "jump_gt_real") == 0 || + strcmp(opname, "jump_lt_real") == 0 || + strcmp(opname, "jump_le_real") == 0 || + strcmp(opname, "jump_ge_real") == 0 || + strcmp(opname, "store_offset_8") == 0 || + strcmp(opname, "store_offset_16") == 0 || + strcmp(opname, "store_offset_32") == 0 || + strcmp(opname, "load_offset_8") == 0 || + strcmp(opname, "load_offset_16") == 0 || + strcmp(opname, "load_offset_32") == 0) { + return 7; + } + + fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); + exit(-1); +} + +#define FAKE_OP(op) \ + } \ + else if (strleq(token.start, op, token.length)) { \ + while (token.type != TOKEN_SEMICOLON) \ + token = next_token(); \ + vm->cp += get_instruction_byte_size(op); + /** * Build the symbol table and calculate the types/size/offsets of all values. */ @@ -604,69 +716,69 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { exit(1); } - printf("Line %d [%s]: %.*s cp=%d mp=%d\n", token.line, - token_type_to_string(token.type), token.length, token.start, vm->cp, - vm->mp); - - if (token.type == TOKEN_KEYWORD_GLOBAL) { - define_global(vm, st); - continue; - } - - if (token.type == TOKEN_KEYWORD_FN) { - define_function(vm, st); - continue; - } - - if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || - token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || - token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || - token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || - token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { - define_var(st, token); - next_token_is(TOKEN_SEMICOLON); - continue; - } - - if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || - token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || - token.type == TOKEN_KEYWORD_FOR) { - define_branch(vm, st); - continue; - } - - if (token.type == TOKEN_KEYWORD_RETURN) { - vm->cp++; - - Token next = next_token(); - if (next.type == TOKEN_SEMICOLON) { - /* put 0xFF as return register */ - vm->cp++; + if (token.type != TOKEN_EOF) { + if (token.type == TOKEN_KEYWORD_GLOBAL) { + define_global(vm, st); continue; } - get_reg(next, st); - vm->cp++; - next_token_is(TOKEN_SEMICOLON); - continue; - } + if (token.type == TOKEN_KEYWORD_FN) { + define_function(vm, st); + continue; + } - if (token.type == TOKEN_IDENTIFIER) { - // check to see if it is an opcode first - if (strleq(token.start, "exit", token.length)) { + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { + define_var(st, token); + next_token_is(TOKEN_SEMICOLON); + continue; + } + + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(vm, st); + continue; + } + + if (token.type == TOKEN_KEYWORD_RETURN) { vm->cp++; - next_token(); - vm->cp += 4; + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + vm->cp++; + continue; + } + get_reg(next, st); + vm->cp++; next_token_is(TOKEN_SEMICOLON); - } else if (strleq(token.start, "call", token.length)) { + continue; + } + + if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first + if (strleq(token.start, "exit", token.length)) { + + vm->cp++; + + next_token(); + vm->cp += 4; + + next_token_is(TOKEN_SEMICOLON); + } else if (strleq(token.start, "call", token.length)) { + vm->cp++; next_token_is(TOKEN_IDENTIFIER); vm->cp += 4; bool has_return = false; + u8 arg_count = 0; vm->cp++; Token next = next_token(); @@ -674,471 +786,125 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { if (next.type != TOKEN_ARROW_RIGHT) { get_reg(next, st); vm->cp++; + arg_count++; } else { has_return = true; + arg_count--; // is a return not an arg } next = next_token(); } if (!has_return) { - vm->cp+=2; + vm->cp++; continue; } - } else if (strleq(token.start, "syscall", token.length)) { - vm->cp++; + } else if (strleq(token.start, "syscall", token.length)) { - Token next = next_token(); - vm->cp += 4; + vm->cp++; + + Token next = next_token(); + vm->cp += 4; - next = next_token(); - while (next.type != TOKEN_SEMICOLON) { - if (next.type != TOKEN_ARROW_RIGHT) { - vm->cp++; - } next = next_token(); - } + while (next.type != TOKEN_SEMICOLON) { + if (next.type != TOKEN_ARROW_RIGHT) { + get_reg(next, st); + vm->cp++; + } + next = next_token(); + } - } else if (strleq(token.start, "load_immediate", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_address", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "malloc", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "memset_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "memset_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "memset_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "load_offset_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_offset_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_offset_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_indirect_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - } else if (strleq(token.start, "load_indirect_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - } else if (strleq(token.start, "load_indirect_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_absolute_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_absolute_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "load_absolute_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "store_absolute_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_absolute_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_absolute_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_indirect_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_indirect_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_indirect_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - } else if (strleq(token.start, "store_offset_8", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "store_offset_16", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "store_offset_32", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp += 4; - vm->cp++; - } else if (strleq(token.start, "register_move", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "add_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "sub_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "mul_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "div_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "abs_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "neg_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "add_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "sub_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "mul_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "div_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "abs_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "neg_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "add_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "sub_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "mul_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "div_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "abs_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "neg_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "int_to_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "nat_to_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "real_to_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "real_to_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_shift_left", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_shift_right", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_and", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_or", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "bit_xor", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - } else if (strleq(token.start, "jump_if_flag", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - } else if (strleq(token.start, "jump_eq_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_neq_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_gt_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_lt_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_le_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_ge_int", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_eq_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_neq_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_gt_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_lt_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_le_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_ge_nat", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_eq_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_neq_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_ge_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_gt_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_lt_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "jump_le_real", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp+=4; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "string_length", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "int_to_string", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "nat_to_string", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "real_to_string", token.length)) { - while (token.type != TOKEN_SEMICOLON) token = next_token(); - vm->cp++; - vm->cp++; - vm->cp++; - } else if (strleq(token.start, "string_eq", token.length)) { - } else if (strleq(token.start, "string_concat", token.length)) { - } else if (strleq(token.start, "string_get_char", token.length)) { - } else if (strleq(token.start, "string_find_char", token.length)) { - } else if (strleq(token.start, "string_slice", token.length)) { - } else if (strleq(token.start, "string_to_int", token.length)) { - } else if (strleq(token.start, "string_to_nat", token.length)) { - } else if (strleq(token.start, "string_to_real", token.length)) { - } else { - // some other identifier - printf("Unknown id at line %d: %.*s\n", token.line, token.length, - token.start); - exit(1); + FAKE_OP("load_immediate") + FAKE_OP("load_address") + FAKE_OP("malloc") + FAKE_OP("memset_8") + FAKE_OP("memset_16") + FAKE_OP("memset_32") + FAKE_OP("load_offset_8") + FAKE_OP("load_offset_16") + FAKE_OP("load_offset_32") + FAKE_OP("load_indirect_8") + FAKE_OP("load_indirect_16") + FAKE_OP("load_indirect_32") + FAKE_OP("load_absolute_8") + FAKE_OP("load_absolute_16") + FAKE_OP("load_absolute_32") + FAKE_OP("store_absolute_8") + FAKE_OP("store_absolute_16") + FAKE_OP("store_absolute_32") + FAKE_OP("store_indirect_8") + FAKE_OP("store_indirect_16") + FAKE_OP("store_indirect_32") + FAKE_OP("store_offset_8") + FAKE_OP("store_offset_16") + FAKE_OP("store_offset_32") + FAKE_OP("register_move") + FAKE_OP("add_int") + FAKE_OP("sub_int") + FAKE_OP("mul_int") + FAKE_OP("div_int") + FAKE_OP("abs_int") + FAKE_OP("neg_int") + FAKE_OP("add_nat") + FAKE_OP("sub_nat") + FAKE_OP("mul_nat") + FAKE_OP("div_nat") + FAKE_OP("abs_nat") + FAKE_OP("neg_nat") + FAKE_OP("add_real") + FAKE_OP("sub_real") + FAKE_OP("mul_real") + FAKE_OP("div_real") + FAKE_OP("abs_real") + FAKE_OP("neg_real") + FAKE_OP("int_to_real") + FAKE_OP("nat_to_real") + FAKE_OP("real_to_int") + FAKE_OP("real_to_nat") + FAKE_OP("bit_shift_left") + FAKE_OP("bit_shift_right") + FAKE_OP("bit_shift_r_ext") + FAKE_OP("bit_and") + FAKE_OP("bit_or") + FAKE_OP("bit_xor") + FAKE_OP("jump") + FAKE_OP("jump_if_flag") + FAKE_OP("jump_eq_int") + FAKE_OP("jump_neq_int") + FAKE_OP("jump_gt_int") + FAKE_OP("jump_lt_int") + FAKE_OP("jump_le_int") + FAKE_OP("jump_ge_int") + FAKE_OP("jump_eq_nat") + FAKE_OP("jump_neq_nat") + FAKE_OP("jump_gt_nat") + FAKE_OP("jump_lt_nat") + FAKE_OP("jump_le_nat") + FAKE_OP("jump_ge_nat") + FAKE_OP("jump_eq_real") + FAKE_OP("jump_neq_real") + FAKE_OP("jump_ge_real") + FAKE_OP("jump_gt_real") + FAKE_OP("jump_lt_real") + FAKE_OP("jump_le_real") + FAKE_OP("string_length") + FAKE_OP("int_to_string") + FAKE_OP("nat_to_string") + FAKE_OP("real_to_string") + FAKE_OP("string_eq") + FAKE_OP("string_concat") + FAKE_OP("string_get_char") + FAKE_OP("string_find_char") + FAKE_OP("string_slice") + FAKE_OP("string_to_int") + FAKE_OP("string_to_nat") + FAKE_OP("string_to_real") + } else { + // some other identifier + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } } } } while (token.type != TOKEN_EOF); @@ -1148,8 +914,6 @@ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { * 2nd pass, emit the bytecode */ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { - USED(st); - Token token; init_lexer(source); do { @@ -1159,9 +923,6 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { break; } if (token.type != TOKEN_EOF) { - //printf("[Generate Bytecode cp=%d mp=%d ] Line %d [%s]: %.*s\n", vm->cp, - // vm->mp, token.line, token_type_to_string(token.type), token.length, - // token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed @@ -1269,7 +1030,7 @@ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { printf("^vm->code[%d] = %d\n", arg_pos, arg_count); if (!has_return) { - vm->cp+=2; + vm->cp++; emit_byte(vm, 255); continue; } diff --git a/test/fib.ul.ir b/test/fib.ul.ir index d283508..36f4de0 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -2,13 +2,12 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; function main () - int n $0; int str_n $1; - load_immediate 35 -> n; - call fib n -> n; - int_to_string n -> str_n; - call pln str_n -> void; + load_immediate 36 -> $0; + call fib $0 -> $0; + int_to_string $0 -> str_n; + call pln str_n; exit 0; function fib (int n $0) diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 4f0a609..8243a1e 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -1,12 +1,12 @@ global str terminal_namespace = "/dev/term/0"; global str new_line = "\n"; -global str message = "nuqneH 'u'?"; +global str hello = "nuqneH 'u'?"; function main () - str hello $0; + str msg $0; - load_address message -> hello; - call pln hello; + load_address hello -> msg; + call pln msg; exit 0; function pln (str message $0) diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 06db690..54dbede 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -5,8 +5,8 @@ global str new_line = "\n"; function main () real a $0; int i $1; - int mode $11; - str term $10; + int in_mode $11; + str in_term $10; load_immediate 5.0 -> a; load_immediate 5000 -> i; @@ -18,20 +18,20 @@ function main () add_int i $3 -> i; jump_ge_int loop_body i $2; - load_address terminal_namespace -> term; - load_immediate 0 -> mode; - syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> in_term; + load_immediate 0 -> in_mode; + syscall OPEN in_term in_mode -> in_term; // Terminal term = open("/dev/term/0", 0); nat b $1; real_to_nat a -> b; load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE term $7 $8; // print prompt + syscall WRITE in_term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ term user_string $8; // read in max 32 byte string + syscall READ in_term user_string $8; // read in max 32 byte string call pln user_string; nat_to_string b -> $4; diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index 87e4109..8eb06fd 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -3,23 +3,23 @@ global str prompt = "Enter a string:"; global str new_line = "\n"; function main () - int mode $11; - str term $10; + int in_mode $11; + str in_term $10; - load_immediate terminal_namespace -> term; - load_immediate 0 -> mode; - syscall OPEN term mode -> term; // Terminal term = open("/dev/term/0", 0); + load_address terminal_namespace -> in_term; + load_immediate 0 -> in_mode; + syscall OPEN in_term in_mode -> in_term; // Terminal term = open("/dev/term/0", 0); - load_immediate prompt -> $7; + load_address prompt -> $7; string_length $7 -> $8; - syscall WRITE term $7 $8; // print prompt + syscall WRITE in_term $7 $8; // print prompt str user_string $9; load_immediate 32 -> $8; malloc $8 -> user_string; - syscall READ term user_string $8; // read in max 32 byte string + syscall READ in_term user_string $8; // read in max 32 byte string - call pln user_string -> void; + call pln user_string; exit 0; function pln (str message $0)