From 4f73339efb6108ec64c0c46b1b01673f89fff9c6 Mon Sep 17 00:00:00 2001 From: zongor Date: Sat, 29 Nov 2025 00:50:25 -0800 Subject: [PATCH] wip symbols + offsets --- src/tools/assembler/assembler.c | 623 ++++++++++++++++++++++---------- src/tools/assembler/assembler.h | 26 +- src/vm/opcodes.h | 39 +- test/add.ul.ir | 12 +- test/loop.ul.ir | 5 +- test/malloc.ul.ir | 21 +- test/paint.ul.ir | 118 +++--- test/window.ul.ir | 14 +- 8 files changed, 559 insertions(+), 299 deletions(-) diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 39bf0da..3110e60 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -8,76 +8,6 @@ #include #include -SymbolTable *symbol_table_init() { - SymbolTable *table = malloc(sizeof(SymbolTable)); - table->symbols = malloc(16 * sizeof(Symbol)); - table->count = 0; - table->capacity = 16; - return table; -} - -NamesTable *names_table_init() { - NamesTable *table = malloc(sizeof(NamesTable)); - table->names = malloc(16 * sizeof(char *)); - table->count = 0; - table->capacity = 16; - return table; -} - -u32 names_table_add(NamesTable *table, const char *name) { - for (u32 i = 0; i < table->count; i++) { - if (strcmp(table->names[i], name) == 0) { - return i; - } - } - - if (table->count >= table->capacity) { - table->capacity *= 2; - table->names = realloc(table->names, table->capacity * sizeof(char *)); - } - - table->names[table->count] = malloc(strlen(name) + 1); - strcpy(table->names[table->count], name); - u32 index = table->count; - table->count++; - return index; -} - -u32 symbol_table_add(SymbolTable *table, Symbol *s) { - if (table->count >= table->capacity) { - table->capacity *= 2; - table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); - } - - table->symbols[table->count] = *s; - u32 index = table->count; - table->count++; - return index; -} - -Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, - const char *name) { - for (u32 i = 0; i < nt->count; i++) { - if (strcmp(nt->names[i], name) == 0) { - for (u32 j = 0; j < table->count; j++) { - if (table->symbols[j].name == i) { - return &table->symbols[j]; - } - } - } - } - return nil; -} - -u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) { - Symbol *sym = symbol_table_lookup(nt, table, name); - if (!sym) { - fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); - exit(1); - } - return sym->ref; -} - void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } void emit_u32(VM *vm, u32 value) { @@ -87,70 +17,96 @@ void emit_u32(VM *vm, u32 value) { void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } -int parse_register(const char *reg_str) { - if (reg_str[0] != '$') - return -1; - return atoi(reg_str + 1); +SymbolTable *symbol_table_init() { + SymbolTable *table = malloc(sizeof(SymbolTable)); + table->symbols = malloc(16 * sizeof(Symbol)); + table->count = 0; + table->capacity = 16; + return table; } -u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { - // symbol references (e.g., &label) - if (ref[0] == '&') { - return get_ref(nt, table, ref + 1); +u32 symbol_table_add(SymbolTable *table, Symbol s) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } - // fixed-point numbers - if (strchr(ref, '.')) { - return float_to_fixed(atof(ref)); + table->symbols[table->count] = s; + u32 index = table->count; + table->count++; + return index; +} + +Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { + for (u32 i = 0; i < table->count; i++) { + if (streq(table->symbols[i].name, name)) { + return &table->symbols[i]; + } } + return nil; +} - // decimal literals - char *endptr; - u32 value = (u32)strtoul(ref, &endptr, 10); +u32 get_ref(VM *vm, SymbolTable *st, const char *name, ScopeType scope) { + Symbol *sym = symbol_table_lookup(st, name); + if (!sym) { + fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); + exit(1); + return 0; + } + return sym->ref; +} - if (endptr == ref || *endptr != '\0') { - fprintf(stderr, "Invalid decimal literal: %s\n", ref); +Token nextTokenIs(TokenType type) { + Token token = nextToken(); + if (token.type != type) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } - return value; + return token; } -bool global(VM *vm, NamesTable *nt, SymbolTable *st) { - Symbol *s = (Symbol *)malloc(sizeof(Symbol)); - ValueType t; +/** + * Global . + */ +bool define_global(VM *vm, SymbolTable *st) { + Symbol s; Token token_type = nextToken(); switch (token_type.type) { + case TOKEN_TYPE_BOOL: + s.type = BOOL; + s.size = 1; + break; case TOKEN_TYPE_I8: - t.type = I8; - t.size = 1; + s.type = I8; + s.size = 1; break; case TOKEN_TYPE_U8: - t.type = U8; - t.size = 1; + s.type = U8; + s.size = 1; break; case TOKEN_TYPE_I16: - t.type = I16; - t.size = 2; + s.type = I16; + s.size = 2; break; case TOKEN_TYPE_U16: - t.type = U16; - t.size = 2; + s.type = U16; + s.size = 2; break; case TOKEN_TYPE_INT: - t.type = I32; - t.size = 4; + s.type = I32; + s.size = 4; break; case TOKEN_TYPE_NAT: - t.type = U32; - t.size = 4; + s.type = U32; + s.size = 4; break; case TOKEN_TYPE_REAL: - t.type = F32; - t.size = 4; + s.type = F32; + s.size = 4; break; case TOKEN_TYPE_STR: - t.type = STR; + s.type = STR; break; case TOKEN_IDENTIFIER: break; @@ -158,134 +114,291 @@ bool global(VM *vm, NamesTable *nt, SymbolTable *st) { return false; } - Token eq = nextToken(); - if (eq.type != TOKEN_EQ) - return false; + Token eq = nextTokenIs(TOKEN_EQ); + Token name = nextTokenIs(TOKEN_IDENTIFIER); - Token name = nextToken(); - if (name.type != TOKEN_IDENTIFIER) + if (name.length > MAX_SYMBOL_NAME_LENGTH) { return false; + } - s->name = names_table_add(nt, name.start); + memcpy(s.name, name.start, name.length); u32 addr = vm->mp; - s->ref = addr; + s.ref = addr; + s.scope = GLOBAL; - u32 result; Token value = nextToken(); switch (value.type) { - case TOKEN_LITERAL_INT: - case TOKEN_LITERAL_NAT: - case TOKEN_LITERAL_REAL: - result = resolve_symbol(nt, st, value.start); - write_u32(vm, memory, addr, result); + case TOKEN_KEYWORD_TRUE: { + u32 addr = vm->mp; + write_u8(vm, memory, addr, 1); - vm->mp += t.size; - vm->frames[vm->fp].end += t.size; + vm->mp += 1; + vm->frames[vm->fp].end += 1; + } + case TOKEN_KEYWORD_FALSE: { + u32 addr = vm->mp; + write_u8(vm, memory, addr, 0); + + vm->mp += 1; + vm->frames[vm->fp].end += 1; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", value.start); + exit(1); + } + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; + } + case TOKEN_LITERAL_REAL: { + fixed_t out = float_to_fixed(atof(value.start)); + + u32 addr = vm->mp; + write_u32(vm, memory, addr, out); + + vm->mp += s.size; + vm->frames[vm->fp].end += s.size; break; + } case TOKEN_LITERAL_STR: { - const char* src = value.start; - u32 len = 0; - u32 i = 0; + const char *src = value.start; + u32 len = 0; + u32 i = 0; - while (i < value.length) { - char c = src[i++]; - if (c == '\\' && i < value.length) { - switch (src[i++]) { - case 'n': c = '\n'; break; - case 't': c = '\t'; break; - case 'r': c = '\r'; break; - case '\\': case '"': case '\'': break; // Keep as-is - default: i--; // Rewind for unknown escapes - } - } - write_u8(vm, memory, addr + 4 + len++, c); + while (i < value.length) { + char c = src[i++]; + if (c == '\\' && i < value.length) { + switch (src[i++]) { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + case '\\': + case '"': + case '\'': + break; // Keep as-is + default: + i--; // Rewind for unknown escapes + } } + write_u8(vm, memory, addr + 4 + len++, c); + } - u32 size = len + 5; // 4 (len) + dst_len + 1 (null) - vm->mp += size; - vm->frames[vm->fp].end += size; - write_u32(vm, memory, addr, len); - write_u8(vm, memory, addr + 4 + len, '\0'); - break; + u32 size = len + 5; // 4 (len) + dst_len + 1 (null) + s.size = size; + + vm->mp += size; + vm->frames[vm->fp].end += size; + + write_u32(vm, memory, addr, len); + write_u8(vm, memory, addr + 4 + len, '\0'); + break; } default: return false; } - s->type = t; symbol_table_add(st, s); return true; } -bool function(VM *vm, NamesTable *nt, SymbolTable *st) { - USED(vm); - USED(nt); - USED(st); - return true; +/** + * Var . + */ +void define_var(VM *vm, SymbolTable *st, Token regType) { + Symbol s; + s.scope = VAR; + switch (regType.type) { + case TOKEN_KEYWORD_PLEX: { + s.type = PLEX; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; + } + case TOKEN_TYPE_I8: { + s.type = I8; + s.size = 1; + break; + } + case TOKEN_TYPE_I16: { + s.type = I16; + s.size = 2; + break; + } + case TOKEN_TYPE_INT: { + s.type = I32; + s.size = 4; + break; + } + case TOKEN_TYPE_U8: { + s.type = U8; + s.size = 1; + break; + } + case TOKEN_TYPE_U16: { + s.type = U16; + s.size = 2; + break; + } + case TOKEN_TYPE_NAT: { + s.type = U32; + s.size = 4; + break; + } + case TOKEN_TYPE_REAL: { + s.type = REAL; + s.size = 4; + break; + } + case TOKEN_TYPE_BOOL: { + s.type = BOOL; + s.size = 1; + break; + } + case TOKEN_TYPE_STR: { + s.type = STR; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; + } + default: + printf("ERROR at line %d: %.*s\n", regType.line, regType.length, + regType.start); + exit(1); + } + + Token name = nextTokenIs(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + + memcpy(s.name, name.start, name.length); + + nextTokenIs(TOKEN_BIG_MONEY); + + Token reg_num = nextTokenIs(TOKEN_LITERAL_INT); + s.ref = atoi(reg_num.start); + symbol_table_add(st, s); } -bool variable(VM *vm, NamesTable *nt, SymbolTable *st) { - USED(vm); - USED(nt); - USED(st); - return true; +/** + * function . + */ +void define_function(vm *vm, SymbolTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = FUNCTION; + + Token name = nextTokenIs(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + memcpy(s.name, name.start, name.length); + + nextTokenIs(TOKEN_LPAREN); + + Token next = nextToken(); + while (next.type != TOKEN_RPAREN) { + Token regType = nextToken(); + define_var(vm, st, regType); + + Token comma = nextToken(); + if (comma.type == TOKEN_COMMA) { + continue; + } else if (comma.type == TOKEN_RPAREN) { + break; + } else { + printf("ERROR at line %d: %.*s\n", comma.line, comma.length, comma.start); + exit(1); + } + } + s.ref = vm->pc; + symbol_table_add(st, s); } -bool label(VM *vm, NamesTable *nt, SymbolTable *st) { - USED(vm); - USED(nt); - USED(st); - return true; +/** + * Branch. + */ +void define_branch(VM *vm, SymbolTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = VOID; + + token name = nextTokenIs(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("BRANCH NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + memcpy(s.name, name.start, name.length); + + s.ref = vm->pc; + symbol_table_add(st, s); } -void assemble(VM *vm, char *source) { - SymbolTable *st = symbol_table_init(); - NamesTable *nt = names_table_init(); - - initLexer(source); +/** + * Build the symbol table and calculate the types/size/offsets of all values. + */ +void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token token; + initLexer(source); do { token = nextToken(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); - break; + exit(1); } if (token.type != TOKEN_EOF) { printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { - if (!global(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + define_global(vm, st); + continue; } if (token.type == TOKEN_KEYWORD_FN) { - if (!function(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + define_function(vm, st); + continue; } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || - token.type == TOKEN_TYPE_STR) { - if (!variable(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { + define_var(vm, st, token); + continue; } - if (token.type == TOKEN_KEYWORD_LOOP || - token.type == TOKEN_KEYWORD_ELSE) { - if (!label(vm, nt, st)) { - printf("ERROR at line %d: %.*s\n", token.line, token.length, - token.start); - } + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(vm, st); + continue; } if (token.type == TOKEN_IDENTIFIER) { @@ -385,3 +498,149 @@ void assemble(VM *vm, char *source) { } } while (token.type != TOKEN_EOF); } + +/** + * 2nd pass, emit the bytecode + */ +void emit_bytecode(VM *vm, char *source, SymbolTable *st) { + Token token; + initLexer(source); + do { + token = nextToken(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + break; + } + if (token.type != TOKEN_EOF) { + printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), + token.length, token.start); + + if (token.type == TOKEN_KEYWORD_GLOBAL) { + // ignore, already processed + } + + if (token.type == TOKEN_KEYWORD_FN) { + // ignore, already processed + } + + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR) { + // ignore, already processed + } + + if (token.type == TOKEN_KEYWORD_LOOP || + token.type == TOKEN_KEYWORD_ELSE) { + // ignore, already processed + } + + if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first + if (streq(token.start, "exit")) { + } else if (streq(token.start, "call")) { + } else if (streq(token.start, "syscall")) { + } else if (streq(token.start, "load_immediate")) { + } else if (streq(token.start, "load_indirect_8")) { + } else if (streq(token.start, "load_indirect_16")) { + } else if (streq(token.start, "load_indirect_32")) { + } else if (streq(token.start, "load_absolute_8")) { + } else if (streq(token.start, "load_absolute_16")) { + } else if (streq(token.start, "load_absolute_32")) { + } else if (streq(token.start, "load_offset_8")) { + } else if (streq(token.start, "load_offset_16")) { + } else if (streq(token.start, "load_offset_32")) { + } else if (streq(token.start, "store_absolute_8")) { + } else if (streq(token.start, "store_absolute_16")) { + } else if (streq(token.start, "store_absolute_32")) { + } else if (streq(token.start, "store_indirect_8")) { + } else if (streq(token.start, "store_indirect_16")) { + } else if (streq(token.start, "store_indirect_32")) { + } else if (streq(token.start, "store_offset_8")) { + } else if (streq(token.start, "store_offset_16")) { + } else if (streq(token.start, "store_offset_32")) { + } else if (streq(token.start, "malloc")) { + } else if (streq(token.start, "malloc_immediate")) { + } else if (streq(token.start, "memset_8")) { + } else if (streq(token.start, "memset_16")) { + } else if (streq(token.start, "memset_32")) { + } else if (streq(token.start, "register_move")) { + } else if (streq(token.start, "add_int")) { + } else if (streq(token.start, "sub_int")) { + } else if (streq(token.start, "mul_int")) { + } else if (streq(token.start, "div_int")) { + } else if (streq(token.start, "abs_int")) { + } else if (streq(token.start, "neg_int")) { + } else if (streq(token.start, "add_nat")) { + } else if (streq(token.start, "sub_nat")) { + } else if (streq(token.start, "mul_nat")) { + } else if (streq(token.start, "div_nat")) { + } else if (streq(token.start, "abs_nat")) { + } else if (streq(token.start, "neg_nat")) { + } else if (streq(token.start, "add_real")) { + } else if (streq(token.start, "sub_real")) { + } else if (streq(token.start, "mul_real")) { + } else if (streq(token.start, "div_real")) { + } else if (streq(token.start, "abs_real")) { + } else if (streq(token.start, "neg_real")) { + } else if (streq(token.start, "int_to_real")) { + } else if (streq(token.start, "nat_to_real")) { + } else if (streq(token.start, "real_to_int")) { + } else if (streq(token.start, "real_to_nat")) { + } else if (streq(token.start, "bit_shift_left")) { + } else if (streq(token.start, "bit_shift_right")) { + } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (streq(token.start, "bit_and")) { + } else if (streq(token.start, "bit_or")) { + } else if (streq(token.start, "bit_xor")) { + } else if (streq(token.start, "jump")) { + } else if (streq(token.start, "jump_if_flag")) { + } else if (streq(token.start, "jump_eq_int")) { + } else if (streq(token.start, "jump_neq_int")) { + } else if (streq(token.start, "jump_gt_int")) { + } else if (streq(token.start, "jump_lt_int")) { + } else if (streq(token.start, "jump_le_int")) { + } else if (streq(token.start, "jump_ge_int")) { + } else if (streq(token.start, "jump_eq_nat")) { + } else if (streq(token.start, "jump_neq_nat")) { + } else if (streq(token.start, "jump_gt_nat")) { + } else if (streq(token.start, "jump_lt_nat")) { + } else if (streq(token.start, "jump_le_nat")) { + } else if (streq(token.start, "jump_ge_nat")) { + } else if (streq(token.start, "jump_eq_real")) { + } else if (streq(token.start, "jump_neq_real")) { + } else if (streq(token.start, "jump_ge_real")) { + } else if (streq(token.start, "jump_gt_real")) { + } else if (streq(token.start, "jump_lt_real")) { + } else if (streq(token.start, "jump_le_real")) { + } else if (streq(token.start, "string_length")) { + } else if (streq(token.start, "string_eq")) { + } else if (streq(token.start, "string_concat")) { + } else if (streq(token.start, "string_get_char")) { + } else if (streq(token.start, "string_find_char")) { + } else if (streq(token.start, "string_slice")) { + } else if (streq(token.start, "int_to_string")) { + } else if (streq(token.start, "nat_to_string")) { + } else if (streq(token.start, "real_to_string")) { + } else if (streq(token.start, "string_to_int")) { + } else if (streq(token.start, "string_to_nat")) { + } else if (streq(token.start, "string_to_real")) { + } else { + // some other identifier + } + } + } + } while (token.type != TOKEN_EOF); +} + +/** + * Emit bytecode to the VM from the source string. + */ +void assemble(VM *vm, char *source) { + SymbolTable *st = symbol_table_init(); + build_symbol_table(vm, source, st); + emit_bytecode(vm, source, st); + free(st->symbols); + free(st); +} diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 515a74c..657dd40 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -5,7 +5,7 @@ #include "../../vm/opcodes.h" #include "lexer.h" -typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { GLOBAL, LOCAL, VAR } ScopeType; typedef enum { VOID, BOOL, @@ -24,28 +24,16 @@ typedef enum { FUNCTION } SymbolType; -typedef struct names_tab_s NamesTable; -typedef struct value_type_s ValueType; typedef struct symbol_s Symbol; typedef struct symbol_tab_s SymbolTable; -struct names_tab_s { - char **names; - u32 count; - u32 capacity; -}; - -struct value_type_s { - SymbolType type; - u32 name; - u32 size; -}; - +#define MAX_SYMBOL_NAME_LENGTH 64 struct symbol_s { - u32 name; - ValueType type; - ScopeType scope; - u32 ref; // address if global, register if local + char name[MAX_SYMBOL_NAME_LENGTH]; + SymbolType type; + ScopeType scope; + u32 ref; // vm->mp if global, vm->pc local, register if var + u32 size; // size of symbol }; struct symbol_tab_s { diff --git a/src/vm/opcodes.h b/src/vm/opcodes.h index fe22ff4..6a53937 100644 --- a/src/vm/opcodes.h +++ b/src/vm/opcodes.h @@ -52,7 +52,9 @@ typedef enum { OP_ABS_REAL, /* abs_real : locals[dest] = | locals[src1] | */ OP_NEG_REAL, /* neg_real : locals[dest] = _locals[src1] */ OP_INT_TO_REAL, /* int_to_real : locals[dest] = locals[src1] as real */ + OP_INT_TO_NAT, /* int_to_nat : locals[dest] = locals[src1] as nat */ OP_NAT_TO_REAL, /* nat_to_real : locals[dest] = locals[src1] as real */ + OP_NAT_TO_INT, /* nat_to_int : locals[dest] = locals[src1] as int */ OP_REAL_TO_INT, /* real_to_int : locals[dest] = locals[src1] as int */ OP_REAL_TO_NAT, /* real_to_nat : locals[dest] = locals[src1] as nat */ OP_BIT_SHIFT_LEFT, /* bit_shift_left : locals[dest] = locals[src1] << locals[src2] */ @@ -92,7 +94,8 @@ typedef enum { OP_REAL_TO_STRING, /* real_to_string : locals[dest] = src1 as str */ OP_STRING_TO_INT, /* string_to_int : locals[dest] = src1 as int */ OP_STRING_TO_NAT, /* string_to_nat : locals[dest] = src1 as nat */ - OP_STRING_TO_REAL /* string_to_real : locals[dest] = src1 as real */ + OP_STRING_TO_REAL, /* string_to_real : locals[dest] = src1 as real */ + OP_MAX_OPCODE /* not really an opcode but used to check max length of ops */ } Opcode; #define MAX_LOCALS 32 @@ -141,20 +144,30 @@ typedef struct device_s { #define STACK_SIZE 256 #define DEVICES_SIZE 8 typedef struct vm_s { - u32 pc; /* program counter */ - u32 cp; /* code pointer (last allocated opcode) */ - u32 fp; /* frame pointer (current frame) */ - u32 sp; /* stack pointer (top of stack) */ - u32 mp; /* memory pointer (last allocated value) */ - u32 dc; /* device count */ - i32 flag; /* flag (temporary results like SYSCALL status) */ - Frame frames[FRAMES_SIZE]; /* function call frames */ - u32 stack[STACK_SIZE]; /* main stack */ - Device devices[DEVICES_SIZE]; /* device definitions */ - u8 code[CODE_SIZE]; /* code block */ - u8 memory[MEMORY_SIZE]; /* memory block */ + u32 pc; /* program counter */ + u32 cp; /* code pointer (last allocated opcode) */ + u32 fp; /* frame pointer (current frame) */ + u32 sp; /* stack pointer (top of stack) */ + u32 mp; /* memory pointer (last allocated value) */ + u32 dc; /* device count */ + i32 flag; /* flag (temporary results like SYSCALL status) */ + Frame *frames; /* function call frames */ + u32 frames_size; /* max frames */ + u32 *stack; /* main stack */ + u32 stack_size; /* max stack */ + Device *devices; /* device definitions */ + u32 device_size; /* max devices */ + u8 *code; /* code block */ + u32 code_size; /* max code size */ + u8 *memory; /* memory block */ + u32 memory_size; /* max memory size */ } VM; +/** + * Creates a new vm based on the arch. + */ +bool init_vm(VM *vm); + #define read_u8(vm, location, addr) ((vm)->location[addr]) #define read_u16(vm, location, addr) \ diff --git a/test/add.ul.ir b/test/add.ul.ir index abdfc8b..f7e6a8a 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -4,14 +4,12 @@ global int x = 1 global int y = 1 function main () - int a $0 - int b $1 int ans $2 str ans_string $3 - load_absolute_32 &x -> a - load_absolute_32 &y -> b - call add a b -> ans + load_absolute_32 x -> $0 + load_absolute_32 y -> $1 + call add $0 $1 -> ans int_to_string ans -> ans_string call pln ans_string exit 0 @@ -29,9 +27,9 @@ function pln (str message $0) int mode $5 load_immediate 0 -> mode - syscall OPEN &terminal_namespace mode -> term + syscall OPEN terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length - strlen &new_line -> nl_length + strlen new_line -> nl_length syscall WRITE term nl nl_length return diff --git a/test/loop.ul.ir b/test/loop.ul.ir index 88f7660..a72d97c 100644 --- a/test/loop.ul.ir +++ b/test/loop.ul.ir @@ -10,10 +10,11 @@ function main () load_immediate 0 -> $2 load_immediate -1 -> $3 load_immediate 5.0 -> $5 - &loop_body + loop loop_body add_real a $5 -> a add_int i $3 -> i - jump_ge_int &loop_body i $2 + jump_ge_int loop_body i $2 + malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode syscall OPEN term mode -> term # Terminal term = open("/dev/term/0", 0); diff --git a/test/malloc.ul.ir b/test/malloc.ul.ir index ef7f66b..cc69b12 100644 --- a/test/malloc.ul.ir +++ b/test/malloc.ul.ir @@ -1,7 +1,7 @@ function main () - int mode is $11 - str term is $10 + int mode $11; + str term $10; malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode @@ -11,20 +11,20 @@ function main () string_length $7 -> $8 syscall WRITE term $7 $8 # print prompt - str user_string is $9 + str user_string $9 load_immediate 32 -> $8 malloc $8 -> user_string syscall READ term user_string $8 # read in max 32 byte string - call pln user_string + call pln user_string; exit 0 -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode @@ -34,3 +34,4 @@ function pln (str message is $0) malloc_immediate "\n" -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length + return diff --git a/test/paint.ul.ir b/test/paint.ul.ir index e7ebc4c..3dac05e 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -8,113 +8,113 @@ global const byte LIGHT_GRAY = 182 global byte SELECTED_COLOR = 255 function main () - # Open screen + // Open screen plex screen $0 str screen_name $18 int mode $11 nat screen_buffer $21 - # use load immediate because it a pointer to a string, not a value - load_address &screen_namespace -> screen_name + // use load immediate because it a pointer to a string, not a value + load_address screen_namespace -> screen_name load_immediate 0 -> mode - syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); + syscall OPEN screen_name mode -> screen // Screen screen = open("/dev/screen/0", 0); nat width $20 nat size $22 - load_offset_32 screen 8 -> width # load width - load_offset_32 screen 12 -> size # load size - load_immediate 16 -> $1 # offset for screen buffer + load_offset_32 screen 8 -> width // load width + load_offset_32 screen 12 -> size // load size + load_immediate 16 -> $1 // offset for screen buffer add_nat screen $1 -> screen_buffer - # open mouse + // open mouse plex mouse $15 str mouse_name $16 - load_address &mouse_namespace -> mouse_name - syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); + load_address mouse_namespace -> mouse_name + syscall OPEN mouse_name mode -> mouse // Mouse mouse = open("/dev/mouse/0", 0); byte color $1 nat x_pos $12 nat y_pos $13 - load_absolute_32 &BLACK -> color + load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width - load_absolute_32 &WHITE -> color + load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width + call draw_outlined_swatch screen_buffer color x_pos y_pos width - # screen.draw# + // screen.draw// syscall WRITE screen screen_buffer size nat zero $11 loop draw_loop - # load mouse click data + // load mouse click data syscall REFRESH mouse byte left_down $9 - load_offset_8 mouse 16 -> left_down # load btn1 pressed + load_offset_8 mouse 16 -> left_down // load btn1 pressed - jump_eq_nat &draw_loop left_down zero + jump_eq_nat draw_loop left_down zero nat mouse_x $7 nat mouse_y $8 - load_offset_32 mouse 8 -> mouse_x # load x - load_offset_32 mouse 12 -> mouse_y # load y + load_offset_32 mouse 8 -> mouse_x // load x + load_offset_32 mouse 12 -> mouse_y // load y nat box_size $14 load_immediate 20 -> box_size - # first row - load_absolute_32 &BLACK -> color + // first row + load_absolute_32 BLACK -> color load_immediate 1 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width - call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size - load_absolute_32 &WHITE -> color + load_absolute_32 WHITE -> color load_immediate 21 -> x_pos load_immediate 1 -> y_pos - call &draw_outlined_swatch screen_buffer color x_pos y_pos width - call &set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size + call draw_outlined_swatch screen_buffer color x_pos y_pos width + call set_color_if_clicked mouse_x mouse_y x_pos y_pos color box_size syscall WRITE screen screen_buffer size byte selected_color $25 - load_absolute_32 &SELECTED_COLOR -> selected_color + load_absolute_32 SELECTED_COLOR -> selected_color nat brush_size $19 load_immediate 5 -> brush_size - call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size + call draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size - jump &draw_loop + jump draw_loop - # Flush and exit + // Flush and exit exit 0 function set_color_if_clicked (int click_x $0, int click_y $1, int box_x $2, int box_y $3, byte color $4, int box_size $5) - # Compute right + // Compute right int right_edge $6 add_int box_x box_size -> right_edge - # Compute bottom = box_y + box_size + // Compute bottom = box_y + box_size int bottom_edge $7 add_int box_y box_size -> bottom_edge - # Bounds check: x in [box_x, right] and y in [box_y, bottom] - jump_lt_int &fail click_x box_x - jump_ge_int &fail click_x right_edge - jump_lt_int &fail click_y box_y - jump_ge_int &fail click_y bottom_edge + // Bounds check: x in [box_x, right] and y in [box_y, bottom] + jump_lt_int fail click_x box_x + jump_ge_int fail click_x right_edge + jump_lt_int fail click_y box_y + jump_ge_int fail click_y bottom_edge - store_absolute_8 &SELECTED_COLOR color + store_absolute_8 SELECTED_COLOR color else fail return @@ -122,18 +122,18 @@ function set_color_if_clicked (int click_x $0, int click_y $1, function draw_outlined_swatch(nat base $0, byte color $1, int x $2, int y $3, int width $4) - # Constants + // Constants nat background_color $5 - load_absolute_32 &GRAY -> background_color + load_absolute_32 GRAY -> background_color byte selected_color $10 - load_absolute_32 &SELECTED_COLOR -> selected_color + load_absolute_32 SELECTED_COLOR -> selected_color - jump_eq_int &set_selected selected_color color - jump &end_set_selected - set_selected: - load_absolute_32 &DARK_GRAY -> background_color - end_set_selected: + jump_eq_int set_selected selected_color color + jump end_set_selected + do set_selected + load_absolute_32 DARK_GRAY -> background_color + else end_set_selected nat outline_size $6 load_immediate 20 -> outline_size @@ -144,26 +144,26 @@ function draw_outlined_swatch(nat base $0, nat offset $8 load_immediate 2 -> offset - call &draw_box base width background_color x y outline_size outline_size + call draw_box base width background_color x y outline_size outline_size - add_int x offset -> $9 # x + 2 - add_int y offset -> $10 # y + 2 + add_int x offset -> $9 // x + 2 + add_int y offset -> $10 // y + 2 - call &draw_box base width color $9 $10 fill_size fill_size + call draw_box base width color $9 $10 fill_size fill_size return function draw_box (nat base $0, nat screen_width $1, byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6) - # Compute start address: base + y*640 + x + // Compute start address: base + y*640 + x nat offset $15 mul_int y_start screen_width -> offset add_int offset x_start -> offset add_nat offset base -> offset nat fat_ptr_size $25 load_immediate 4 -> fat_ptr_size - add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size + add_nat offset fat_ptr_size -> offset // need to add offset for fat pointer size int i $30 load_immediate 1 -> i @@ -175,10 +175,10 @@ function draw_box (nat base $0, nat screen_width $1, nat pixel_ptr $29 loop draw_box_outer - add_int offset width -> row_end # current + width - register_move offset -> pixel_ptr # set pixel point - memset_8 pixel_ptr color width # draw row - add_int offset screen_width -> offset # next row += 640 - sub_int height i -> height # decrement row count - jump_gt_int &draw_box_outer height zero + add_int offset width -> row_end // current + width + register_move offset -> pixel_ptr // set pixel point + memset_8 pixel_ptr color width // draw row + add_int offset screen_width -> offset // next row += 640 + sub_int height i -> height // decrement row count + jump_gt_int draw_box_outer height zero return diff --git a/test/window.ul.ir b/test/window.ul.ir index 4fb6c04..c942eeb 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -19,29 +19,29 @@ function main () nat buffer_size $11 nat pixel_pos $12 - load_immediate &screen_namespace -> screen + load_immediate screen_namespace -> screen load_immediate 0 -> mode syscall OPEN screen mode -> screen nat_to_string screen -> tmp_str - call &pln tmp_str + call pln tmp_str load_offset_32 screen 8 -> width nat_to_string width -> tmp_str - call &pln tmp_str + call pln tmp_str load_offset_32 screen 12 -> buffer_size nat_to_string buffer_size -> tmp_str - call &pln tmp_str + call pln tmp_str load_immediate 16 -> offset_temp add_nat screen offset_temp -> screen_buffer nat_to_string screen_buffer -> tmp_str - call &pln tmp_str + call pln tmp_str // open mouse - load_immediate &mouse_namespace -> mouse + load_immediate mouse_namespace -> mouse syscall OPEN mouse mode -> mouse syscall WRITE screen screen_buffer buffer_size // redraw @@ -79,7 +79,7 @@ function pln (str message $0) int mode $5 load_immediate 0 -> mode - syscall OPEN &terminal_namespace mode -> term + syscall OPEN terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length load_address new_line -> nl