From 6f47ee7ea17b82637547b5329dcf7294d269262e Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 23 Nov 2025 23:48:26 -0800 Subject: [PATCH] wip new assembler; start of compiler --- src/tools/assembler/assembler.c | 311 +++++++++++++++++-- src/tools/assembler/assembler.h | 51 +++- src/tools/assembler/lexer.c | 26 ++ src/tools/assembler/lexer.h | 4 + src/tools/compiler/compiler.c | 502 +++++++++++++++++++++++++++++++ src/tools/compiler/compiler.h | 176 ++++++----- src/tools/compiler/lexer.c | 510 ++++++++++++++++++++++++++++++++ src/tools/compiler/lexer.h | 85 ++++++ src/vm/vm.c | 44 +-- test/add.ul.ir | 40 +-- test/fib.ul.ir | 36 ++- test/hello.ul.ir | 29 +- test/window.ul.ir | 54 ++-- test/window.ul.ir2 | 73 ----- test/window.ul.vuir | 95 ------ 15 files changed, 1662 insertions(+), 374 deletions(-) create mode 100644 src/tools/compiler/compiler.c create mode 100644 src/tools/compiler/lexer.c create mode 100644 src/tools/compiler/lexer.h delete mode 100644 test/window.ul.ir2 delete mode 100644 test/window.ul.vuir diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index c137e64..8a21f2d 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,25 +1,291 @@ -#include "../../vm/libc.h" #include "assembler.h" +#include "../../vm/common.h" +#include "../../vm/fixed.h" +#include "../../vm/libc.h" +#include "../../vm/opcodes.h" #include +#include +#include -bool global() { - Token token = nextToken(); - if (token.type == TOKEN_KEYWORD_CONST) { - token = nextToken(); - } - - if (token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_NAT || - token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { - return true; - } - - return false; +SymbolTable *symbol_table_init() { + SymbolTable *table = malloc(sizeof(SymbolTable)); + table->symbols = malloc(16 * sizeof(Symbol)); + table->count = 0; + table->capacity = 16; + return table; } -void function() {} +NamesTable *names_table_init() { + NamesTable *table = malloc(sizeof(NamesTable)); + table->names = malloc(16 * sizeof(char *)); + table->count = 0; + table->capacity = 16; + return table; +} + +u32 names_table_add(NamesTable *table, const char *name) { + for (u32 i = 0; i < table->count; i++) { + if (strcmp(table->names[i], name) == 0) { + return i; + } + } + + if (table->count >= table->capacity) { + table->capacity *= 2; + table->names = realloc(table->names, table->capacity * sizeof(char *)); + } + + table->names[table->count] = malloc(strlen(name) + 1); + strcpy(table->names[table->count], name); + u32 index = table->count; + table->count++; + return index; +} + +u32 symbol_table_add(SymbolTable *table, Symbol s) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + } + + table->symbols[table->count] = s; + u32 index = table->count; + table->count++; + return index; +} + +Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, + const char *name) { + for (u32 i = 0; i < nt->count; i++) { + if (strcmp(nt->names[i], name) == 0) { + for (int j = 0; j < table->count; j++) { + if (table->symbols[j].name == i) { + return &table->symbols[j]; + } + } + } + } + return nil; +} + +u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) { + Symbol *sym = symbol_table_lookup(nt, table, name); + if (!sym) { + fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); + exit(1); + } + return sym->ref; +} + +void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } + +void emit_u32(VM *vm, u32 value) { + write_u32(vm, code, vm->cp, value); + vm->cp += 4; +} + +void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } + +int parse_register(const char *reg_str) { + if (reg_str[0] != '$') + return -1; + return atoi(reg_str + 1); +} + +u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { + // Handle symbol references (e.g., &label) + if (ref[0] == '&') { + return get_ref(nt, table, ref + 1); + } + + // fixed-point numbers (e.g., 0.5) + if (strchr(ref, '.')) { + return float_to_fixed(atof(ref)); + } + + // decimal literals (e.g., 7) + char *endptr; + u32 value = (u32)strtoul(ref, &endptr, 10); + + if (endptr == ref || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", ref); + exit(1); + } + return value; +} + +static char *unwrap_string(const char *quoted_str) { + if (!quoted_str) + return nil; + + size_t len = strlen(quoted_str); + if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { + // Remove quotes and process escape sequences + const char *src = quoted_str + 1; + size_t src_len = len - 2; + + // First pass: calculate the actual length needed after escape processing + size_t actual_len = 0; + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Escape sequence + actual_len++; + i++; // Skip the next character + } else { + actual_len++; + } + } + + char *unwrapped = (char *)malloc(actual_len + 1); + size_t dst_idx = 0; + + // Second pass: process escape sequences + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Handle escape sequences + switch (src[i + 1]) { + case 'n': + unwrapped[dst_idx++] = '\n'; + break; + case 't': + unwrapped[dst_idx++] = '\t'; + break; + case 'r': + unwrapped[dst_idx++] = '\r'; + break; + case '\\': + unwrapped[dst_idx++] = '\\'; + break; + case '"': + unwrapped[dst_idx++] = '"'; + break; + case '\'': + unwrapped[dst_idx++] = '\''; + break; + default: + // Unknown escape, keep both characters + unwrapped[dst_idx++] = src[i]; + unwrapped[dst_idx++] = src[i + 1]; + break; + } + i++; // Skip the next character + } else { + unwrapped[dst_idx++] = src[i]; + } + } + unwrapped[dst_idx] = '\0'; + return unwrapped; + } + // Not quoted, return copy + return strdup(quoted_str); +} + +Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { + Symbol *s = (Symbol *)malloc(sizeof(Symbol)); + ValueType t; + + Token token_type = nextToken(); + switch (token_type.type) { + case TOKEN_TYPE_I8: + t.type = I8; + t.size = 1; + break; + case TOKEN_TYPE_I16: + t.type = I16; + t.size = 2; + break; + case TOKEN_TYPE_U8: + t.type = U8; + t.size = 1; + break; + case TOKEN_TYPE_U16: + t.type = U16; + t.size = 2; + break; + case TOKEN_TYPE_INT: + t.type = I32; + t.size = 4; + break; + case TOKEN_TYPE_NAT: + t.type = U32; + t.size = 4; + break; + case TOKEN_TYPE_REAL: + t.type = F32; + t.size = 4; + break; + case TOKEN_TYPE_STR: + t.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: + return nil; + } + + Token eq = nextToken(); + if (eq.type != TOKEN_EQ) + return nil; + + Token name = nextToken(); + if (name.type != TOKEN_IDENTIFIER) + return nil; + + s->name = names_table_add(nt, name.start); + + u32 addr = vm->mp; + s->ref = addr; + + u32 result; + Token value = nextToken(); + switch (value.type) { + case TOKEN_LITERAL_INT: + case TOKEN_LITERAL_NAT: + case TOKEN_LITERAL_REAL: + result = resolve_symbol(nt, st, value.start); + write_u32(vm, memory, addr, result); + + vm->mp += t.size; + vm->frames[vm->fp].end += t.size; + break; + case TOKEN_LITERAL_STR: { + char *unwrapped = unwrap_string(value.start); + int len = strlen(unwrapped); + + u32 addr = vm->mp; + u32 size = len + 1 + 4; + t.size = size; + + vm->mp += size; + vm->frames[vm->fp].end += size; + + write_u32(vm, memory, addr, len); + for (int i = 0; i < len; i++) { + write_u8(vm, memory, addr + 4 + i, unwrapped[i]); + } + write_u8(vm, memory, addr + 4 + len, '\0'); + free(unwrapped); + break; + } + default: + return nil; + } + + s->type = t; + return s; +} + +Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) { + USED(vm); + USED(nt); + USED(st); + return nil; +} void assemble(VM *vm, char *source) { - USED(vm); + SymbolTable *st = symbol_table_init(); + NamesTable *nt = names_table_init(); + initLexer(source); Token token; do { @@ -33,17 +299,26 @@ void assemble(VM *vm, char *source) { token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { - if (!global()) { + if (!global(vm, nt, st)) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); } } if (token.type == TOKEN_KEYWORD_FN) { - function(); + function(vm, nt, st); + } + + if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || + token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || + token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || + token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || + token.type == TOKEN_TYPE_STR) { + } if (token.type == TOKEN_IDENTIFIER) { + // check to see if it is an opcode first if (streq(token.start, "exit")) { } else if (streq(token.start, "call")) { } else if (streq(token.start, "syscall")) { @@ -132,6 +407,8 @@ void assemble(VM *vm, char *source) { } else if (streq(token.start, "string_to_int")) { } else if (streq(token.start, "string_to_nat")) { } else if (streq(token.start, "string_to_real")) { + } else { + // some other identifier } } } diff --git a/src/tools/assembler/assembler.h b/src/tools/assembler/assembler.h index 1b3c38a..515a74c 100644 --- a/src/tools/assembler/assembler.h +++ b/src/tools/assembler/assembler.h @@ -5,6 +5,55 @@ #include "../../vm/opcodes.h" #include "lexer.h" +typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION +} SymbolType; + +typedef struct names_tab_s NamesTable; +typedef struct value_type_s ValueType; +typedef struct symbol_s Symbol; +typedef struct symbol_tab_s SymbolTable; + +struct names_tab_s { + char **names; + u32 count; + u32 capacity; +}; + +struct value_type_s { + SymbolType type; + u32 name; + u32 size; +}; + +struct symbol_s { + u32 name; + ValueType type; + ScopeType scope; + u32 ref; // address if global, register if local +}; + +struct symbol_tab_s { + Symbol *symbols; + u32 count; + u32 capacity; +}; + void assemble(VM *vm, char *source); -#endif \ No newline at end of file +#endif diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index 8c26f61..ee0bdab 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -257,6 +257,32 @@ static TokenType identifierType() { break; case 'g': return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + case 'I': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); + } + } + break; + case 'U': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'F': + return checkKeyword(1, 2, "32", TOKEN_TYPE_REAL); } return TOKEN_IDENTIFIER; diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index 4b7a8ae..eaa137c 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -8,7 +8,11 @@ typedef enum { TOKEN_LITERAL_NAT, TOKEN_LITERAL_REAL, TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, TOKEN_TYPE_NAT, TOKEN_TYPE_REAL, TOKEN_TYPE_STR, diff --git a/src/tools/compiler/compiler.c b/src/tools/compiler/compiler.c new file mode 100644 index 0000000..1b29e68 --- /dev/null +++ b/src/tools/compiler/compiler.c @@ -0,0 +1,502 @@ +#include "compiler.h" +#include "../../vm/common.h" +#include "../../vm/libc.h" +#include +#include +#include + +NamesTable *names_table_init() { + NamesTable *table = malloc(sizeof(NamesTable)); + table->names = malloc(16 * sizeof(char *)); + table->count = 0; + table->capacity = 16; + return table; +} + +FunctionTable *function_table_init() { + FunctionTable *table = malloc(sizeof(FunctionTable)); + table->symbols = malloc(16 * sizeof(FunctionDef)); + table->count = 0; + table->capacity = 16; + return table; +} + +ArrayTable *array_table_init() { + ArrayTable *table = malloc(sizeof(ArrayTable)); + table->symbols = malloc(16 * sizeof(ArrayDef)); + table->count = 0; + table->capacity = 16; + return table; +} + +PlexTable *plex_table_init() { + PlexTable *table = malloc(sizeof(PlexTable)); + table->symbols = malloc(16 * sizeof(PlexDef)); + table->count = 0; + table->capacity = 16; + return table; +} + +PlexFieldsTable *plex_fields_table_init() { + PlexFieldsTable *table = malloc(sizeof(PlexFieldsTable)); + table->plex_refs = malloc(64 * sizeof(u32)); + table->fields = malloc(64 * sizeof(ValueType)); + table->count = 0; + table->capacity = 64; + return table; +} + +u32 names_table_add(NamesTable *table, const char *name) { + for (u32 i = 0; i < table->count; i++) { + if (strcmp(table->names[i], name) == 0) { + return (u32)i; + } + } + + if (table->count >= table->capacity) { + table->capacity *= 2; + table->names = realloc(table->names, table->capacity * sizeof(char *)); + } + + table->names[table->count] = malloc(strlen(name) + 1); + strcpy(table->names[table->count], name); + u32 index = (u32)table->count; + table->count++; + return index; +} + +u32 function_table_add(FunctionTable *table, FunctionDef def) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = + realloc(table->symbols, table->capacity * sizeof(FunctionDef)); + } + + table->symbols[table->count] = def; + u32 index = (u32)table->count; + table->count++; + return index; +} + +u32 array_table_add(ArrayTable *table, ArrayDef def) { + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(ArrayDef)); + } + + table->symbols[table->count] = def; + u32 index = (u32)table->count; + table->count++; + return index; +} + +u32 plex_add(PlexTable *plex_table, u32 name, u32 size, u32 field_start, + u32 field_count) { + if (plex_table->count >= plex_table->capacity) { + plex_table->capacity *= 2; + plex_table->symbols = + realloc(plex_table->symbols, plex_table->capacity * sizeof(PlexDef)); + } + + plex_table->symbols[plex_table->count].name = name; + plex_table->symbols[plex_table->count].size = size; + plex_table->symbols[plex_table->count].field_ref_start = field_start; + plex_table->symbols[plex_table->count].field_count = field_count; + + u32 index = (u32)plex_table->count; + plex_table->count++; + return index; +} + +u32 plex_fields_add(PlexFieldsTable *fields_table, u32 plex_ref, + ValueType field) { + if (fields_table->count + 1 > fields_table->capacity) { + u32 new_capacity = fields_table->capacity * 2; + if (new_capacity < fields_table->count + 1) { + new_capacity = fields_table->count + 1; + } + fields_table->plex_refs = + realloc(fields_table->plex_refs, new_capacity * sizeof(u32)); + fields_table->fields = + realloc(fields_table->fields, new_capacity * sizeof(ValueType)); + fields_table->capacity = new_capacity; + } + + u32 start_index = fields_table->count; + fields_table->plex_refs[start_index] = plex_ref; + fields_table->fields[start_index] = field; + fields_table->count++; + return start_index; +} + +int plex_get_field_index_by_name(PlexTable *plex_table, + PlexFieldsTable *fields_table, + NamesTable *names_table, u32 plex_index, + const char *field_name) { + if (plex_index >= plex_table->count) + return -1; + + PlexDef *plex_def = &plex_table->symbols[plex_index]; + u32 field_start = plex_def->field_ref_start; + u32 field_count = plex_def->field_count; + + for (u32 i = 0; i < field_count; i++) { + u32 field_table_index = field_start + i; + ValueType *field = &fields_table->fields[field_table_index]; + + if (field->name < names_table->count) { + if (strcmp(names_table->names[field->name], field_name) == 0) { + return (int)i; // Return field index within the plex + } + } + } + return -1; // Not found +} + +ValueType *plex_get_field(PlexTable *plex_table, PlexFieldsTable *fields_table, + u32 plex_index, u32 field_in_plex_index) { + if (plex_index >= plex_table->count) + return nil; + + PlexDef *plex_def = &plex_table->symbols[plex_index]; + if (field_in_plex_index >= plex_def->field_count) + return nil; + + u32 field_table_index = plex_def->field_ref_start + field_in_plex_index; + return &fields_table->fields[field_table_index]; +} + +ValueType *plex_get_field_by_name(PlexTable *plex_table, + PlexFieldsTable *fields_table, + NamesTable *names_table, u32 plex_index, + const char *field_name) { + int field_index = plex_get_field_index_by_name( + plex_table, fields_table, names_table, plex_index, field_name); + if (field_index == -1) + return nil; + + return plex_get_field(plex_table, fields_table, plex_index, (u32)field_index); +} + +Symbol *global(VM *vm) { + Symbol s; + ValueType t; + + s.ref.global = vm->mp; + + Token token_type = nextToken(); + Token array_or_eq = nextToken(); + if (array_or_eq.type == TOKEN_LBRACKET) { + Token rb = nextToken(); + if (rb.type != TOKEN_RBRACKET) + return nil; + + Token eq = nextToken(); + if (eq.type != TOKEN_EQ) + return nil; + + t.type = ARRAY; + ValueType array_type; + + switch (token_type.type) { + case TOKEN_TYPE_I8: + array_type.type = I8; + break; + case TOKEN_TYPE_I16: + array_type.type = I16; + break; + case TOKEN_TYPE_INT: + array_type.type = I32; + break; + case TOKEN_TYPE_U8: + array_type.type = U8; + break; + case TOKEN_TYPE_U16: + array_type.type = U16; + break; + case TOKEN_TYPE_NAT: + array_type.type = U32; + break; + case TOKEN_TYPE_REAL: + array_type.type = F32; + break; + case TOKEN_TYPE_STR: + array_type.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: + return nil; + } + + } else { + // its not an array, so should be = + if (array_or_eq.type != TOKEN_EQ) + return nil; + + switch (token_type.type) { + case TOKEN_TYPE_I8: + t.type = I8; + break; + case TOKEN_TYPE_I16: + t.type = I16; + break; + case TOKEN_TYPE_INT: + t.type = I32; + break; + case TOKEN_TYPE_U8: + t.type = U8; + break; + case TOKEN_TYPE_U16: + t.type = U16; + break; + case TOKEN_TYPE_NAT: + t.type = U32; + break; + case TOKEN_TYPE_REAL: + t.type = F32; + break; + case TOKEN_TYPE_STR: + t.type = STR; + break; + case TOKEN_IDENTIFIER: + break; + default: + return nil; + } + } + + s.type = t; + + Token value = nextToken(); + + return nil; +} + +typedef struct { + Token current; + Token previous; + bool hadError; + bool panicMode; +} Parser; + +typedef enum { + PREC_NONE, + PREC_ASSIGNMENT, /* = */ + PREC_OR, /* or */ + PREC_AND, /* and */ + PREC_EQUALITY, /* == != */ + PREC_COMPARISON, /* < > <= >= */ + PREC_TERM, /* + - */ + PREC_FACTOR, /* * / */ + PREC_UNARY, /* not */ + PREC_CALL, /* . () */ + PREC_PRIMARY +} Precedence; + +typedef void (*ParseFn)(char *program); + +typedef struct { + ParseFn prefix; + ParseFn infix; + Precedence precedence; +} ParseRule; + +typedef struct { + SymbolTable table; + Symbol current; + Symbol last; + i8 rp; // Next free register +} Compiler; + +Parser parser; + +const char *internalErrorMsg = + "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes."; + +bool isType(TokenType type) { + return type == TOKEN_TYPE_INT || type == TOKEN_TYPE_NAT || + type == TOKEN_TYPE_REAL || type == TOKEN_TYPE_STR || + type == TOKEN_TYPE_BOOL; +} + +void errorAt(Token *token, const char *message) { + if (parser.panicMode) + return; + parser.panicMode = true; + fprintf(stderr, "[line %d] Error", token->line); + + if (token->type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } else if (token->type == TOKEN_ERROR) { + } else { + fprintf(stderr, " at '%.*s'", token->length, token->start); + } + + fprintf(stderr, ": %s\n", message); + parser.hadError = true; +} + +void error(const char *message) { errorAt(&parser.previous, message); } + +void errorAtCurrent(const char *message) { errorAt(&parser.current, message); } + +void advance() { + parser.previous = parser.current; + + for (;;) { + parser.current = nextToken(); + if (parser.current.type != TOKEN_ERROR) + break; + + errorAtCurrent(parser.current.start); + } +} + +void consume(TokenType type, const char *message) { + if (parser.current.type == type) { + advance(); + return; + } + + errorAtCurrent(message); +} + +static int allocateRegister(Compiler *c) { + char buffer[38]; + if (c->rp + 1 > 31) { + sprintf(buffer, "Out of registers (used %d, max 32)", c->rp + 1); + error(buffer); + return -1; + } + + return c->rp++; +} + +static void popRegister(Compiler *c) { + if (c->rp - 1 > 0) { + c->rp--; + } +} + +static void freeRegister(Compiler *c, u8 reg) { + if (reg == c->rp - 1) { + c->rp--; + } +} + +static void clearRegisters(Compiler *c, u8 reg) { c->rp = 0; } + +void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } + +void emit_u32(VM *vm, u32 value) { + write_u32(vm, code, vm->cp, value); + vm->cp += 4; +} + +void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } + +static bool check(TokenType type) { return parser.current.type == type; } + +static bool match(TokenType type) { + if (!check(type)) + return false; + advance(); + return true; +} + +static void expression(Compiler *c, VM *vm) { + USED(c); + USED(vm); +} + +void number(Compiler *c, VM *vm) { + emit_opcode(vm, OP_LOAD_IMM); + int reg = allocateRegister(c); + if (reg < 0) + return; + emit_byte(vm, reg); + + c->last = Symbol{ .type=parser.previous.type }; + + switch (parser.previous.type) { + case TOKEN_INT_LITERAL: { + char *endptr; + i32 value = (i32)strtol(parser.previous.start, &endptr, 10); + emit_u32(vm, value); + return; + } + case TOKEN_UINT_LITERAL: { + long value = atol(parser.previous.start); + emit_u32(vm, value); + return; + } + case TOKEN_FLOAT_LITERAL: { + float value = atof(parser.previous.start); + fixed_t fvalue = float_to_fixed(value); + emit_u32(vm, fvalue); + return; + } + default: + return; // Unreachable. + } + + errorAtCurrent("Invalid number format"); +} + +static void unary(Compiler *c, VM *vm) { + TokenType operatorType = parser.previous.type; + + // Compile the operand. + expression(c, vm); + + // Emit the operator instruction. + switch (operatorType) { + case TOKEN_MINUS: { + switch (c->last.type) { + case TOKEN_UINT_LITERAL: + emit_opcode(vm, OP_NEG_UINT); + case TOKEN_FLOAT_LITERAL: + emit_opcode(vm, OP_NEG_FLOAT); + default: + emit_opcode(vm, OP_NEG_INT); + } + + int dest = allocateRegister(); + emit_byte(vm, dest); + emit_byte(vm, dest); + } + default: + return; // Unreachable. + } +} + +static void emitHalt(Compiler *c, VM *vm) { + emit_opcode(vm, OP_HALT); + advance(); + number(c, vm); +} + +static void endCompiler(Compiler *c, VM *vm) { emitHalt(c, vm); } + +static void grouping(Compiler *c, VM *vm) { + expression(c, vm); + consume(TOKEN_RPAREN, "Expect ')' after expression."); +} + +bool compile(const char *source, VM *vm) { + USED(source); + USED(vm); + initLexer(source); + + parser.hadError = false; + parser.panicMode = false; + + Compiler compiler; + advance(); + expression(&compiler, vm); + consume(TOKEN_EOF, "Expect end of expression."); + endCompiler(&compiler, vm); + + return parser.hadError; +} diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h index 21f1a25..e223513 100644 --- a/src/tools/compiler/compiler.h +++ b/src/tools/compiler/compiler.h @@ -4,87 +4,109 @@ #import "../../vm/common.h" typedef enum { GLOBAL, LOCAL } ScopeType; -typedef enum { - VOID, - BOOL, - I8, - I16, - I32, - U8, - U16, - U32, - F8, - F16, - F32, - STR, - PLEX, - ARRAY, - FUNCTION - } SymbolType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION +} SymbolType; -typedef struct field_s { - char *name; - SymbolType type; - u32 offset; - u32 size; -} Field; +typedef struct value_type_s ValueType; +typedef struct function_def_s FunctionDef; +typedef struct function_tab_s FunctionTable; +typedef struct plex_def_s PlexDef; +typedef struct plex_tab_s PlexTable; +typedef struct array_def_s ArrayDef; +typedef struct array_tab_s ArrayTable; +typedef struct symbol_s Symbol; +typedef struct symbol_tab_s SymbolTable; +typedef struct names_tab_s NamesTable; +typedef struct plex_fields_tab_s PlexFieldsTable; -typedef struct function_def_s { - char *name; - SymbolType args[8]; - u8 arg_count; - SymbolType return_type; -} FunctionDef; - -typedef struct trait_def_s { - char *name; - Field *fields; - u32 field_count; - FunctionDef *methods; - u32 method_count; -} TraitDef; - -typedef struct plex_def_s { - char *name; - u32 logical_size; - u32 physical_size; - Field *fields; - u32 field_count; - TraitDef *traits; - u32 trait_count; - FunctionDef *methods; - u32 method_count; -} PlexDef; - -typedef struct array_def_s { - SymbolType type; - u32 length; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - struct array_def_s *array; - } ref; -} ArrayDef; - -typedef struct symbol_s { - char *name; - u32 address; - ScopeType scope; +struct value_type_s { SymbolType type; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - ArrayDef *array; - FunctionDef *function; - } ref; -} Symbol; + u32 name; + u32 size; + u32 table_ref; // if it is a heap object +}; -typedef struct symbol_tab_s { +struct function_def_s { + u32 name; + ValueType args[8]; + u8 arg_count; + ValueType return_type; +}; + +struct plex_def_s { + u32 name; + u32 size; + u32 field_ref_start; + u32 field_count; +}; + +struct array_def_s { + ValueType type; + u32 length; + u32 logical_size; // length of the array + u32 physical_size; // logical_size * type_size + fat pointer +}; + +struct symbol_s { + u32 name; + ValueType type; + ScopeType scope; + union { + u32 local; // register + u32 global; // address + } ref; +}; + +struct plex_fields_tab_s { + u32 *plex_refs; + ValueType *fields; + u32 count; + u32 capacity; +}; + +struct plex_tab_s { + PlexDef *symbols; + u32 count; + u32 capacity; +}; + +struct array_tab_s { + ArrayDef *symbols; + u32 count; + u32 capacity; +}; + +struct function_tab_s { + FunctionDef *symbols; + u32 count; + u32 capacity; +}; + +struct names_tab_s { + char **names; + u32 count; + u32 capacity; +}; + +struct symbol_tab_s { Symbol *symbols; - int count; - int capacity; -} SymbolTable; + u32 count; + u32 capacity; +}; #endif diff --git a/src/tools/compiler/lexer.c b/src/tools/compiler/lexer.c new file mode 100644 index 0000000..397d7cc --- /dev/null +++ b/src/tools/compiler/lexer.c @@ -0,0 +1,510 @@ +#include + +#include "../../vm/common.h" +#include "lexer.h" + +typedef struct { + const char *start; + const char *current; + int line; +} Lexer; + +Lexer lexer; + +void initLexer(const char *source) { + lexer.start = source; + lexer.current = source; + lexer.line = 1; +} + +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static bool isDigit(char c) { return c >= '0' && c <= '9'; } + +static bool isAtEnd() { return *lexer.current == '\0'; } + +static char advance() { + lexer.current++; + return lexer.current[-1]; +} + +static char peek() { return *lexer.current; } + +static char peekNext() { + if (isAtEnd()) + return '\0'; + return lexer.current[1]; +} + +static bool match(char expected) { + if (isAtEnd()) + return false; + if (*lexer.current != expected) + return false; + lexer.current++; + return true; +} + +static Token makeToken(TokenType type) { + Token token; + token.type = type; + token.start = lexer.start; + token.length = (int)(lexer.current - lexer.start); + token.line = lexer.line; + return token; +} + +static Token errorToken(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int)strlen(message); + token.line = lexer.line; + return token; +} + +static void skipWhitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '/': + if (peekNext() == '/') { + // Single-line comment: skip until newline or end of file + advance(); + while (peek() != '\n' && !isAtEnd()) + advance(); + } else if (peekNext() == '*') { + // Multi-line comment: skip until '*/' or end of file + advance(); + advance(); + while (!isAtEnd()) { + if (peek() == '\n') + lexer.line++; + if (peek() == '*' && peekNext() == '/') { + advance(); + advance(); + break; // Exit loop, comment ended + } + advance(); + } + } else { + return; // Not a comment, let tokenization handle it + } + break; + default: + return; + } + } +} + +static TokenType checkKeyword(int start, int length, const char *rest, + TokenType type) { + if (lexer.current - lexer.start == start + length && + memcmp(lexer.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + switch (lexer.start[0]) { + case 'a': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'n': + return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; + case 'c': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return checkKeyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); + case 'o': + return checkKeyword(2, 3, "nst", TOKEN_KEYWORD_CONST); + } + } + break; + case 'e': + return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + case 'o': + return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_REAL); + } + return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN); + } + break; + case 'i': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'f': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); + case 'n': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'i': + return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT); + case 't': + return checkKeyword(3, 0, "", TOKEN_TYPE_INT); + } + } + break; + } + } + break; + case 'n': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return checkKeyword(2, 1, "t", TOKEN_TYPE_NAT); + case 'i': + return checkKeyword(2, 1, "l", TOKEN_KEYWORD_NIL); + } + } + break; + case 'o': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'p': + return checkKeyword(2, 2, "en", TOKEN_KEYWORD_OPEN); + case 'r': + return checkKeyword(2, 0, "", TOKEN_OPERATOR_OR); + } + } + break; + case 'p': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return checkKeyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); + } + } + break; + case 'r': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'e': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'a': + return checkKeyword(3, 1, "d", TOKEN_KEYWORD_READ); + case 'f': + return checkKeyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); + case 't': + return checkKeyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + } + } + break; + } + } + break; + case 's': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 't': + return checkKeyword(2, 1, "r", TOKEN_TYPE_STR); + } + } + break; + case 't': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return checkKeyword(2, 2, "is", TOKEN_KEYWORD_THIS); + case 'r': + return checkKeyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + } + } + break; + case 'u': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 's': + return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'w': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return checkKeyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); + case 'r': + return checkKeyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); + } + } + break; + case 'g': + return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (isAlpha(peek()) || isDigit(peek())) + advance(); + return makeToken(identifierType()); +} + +static Token number() { + while (isDigit(peek())) + advance(); + + /* Look for a fractional part. */ + if (peek() == '.' && isDigit(peekNext())) { + /* Consume the ".". */ + advance(); + + while (isDigit(peek())) + advance(); + + return makeToken(TOKEN_LITERAL_REAL); + } + + return makeToken(TOKEN_LITERAL_INT); +} + +static Token string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') + lexer.line++; + advance(); + } + + if (isAtEnd()) + return errorToken("Unterminated string."); + + /* The closing quote. */ + advance(); + return makeToken(TOKEN_LITERAL_STR); +} + +Token nextToken() { + skipWhitespace(); + lexer.start = lexer.current; + + if (isAtEnd()) + return makeToken(TOKEN_EOF); + + char c = advance(); + if (isAlpha(c)) + return identifier(); + if (isDigit(c)) + return number(); + + switch (c) { + case '(': + return makeToken(TOKEN_LPAREN); + case ')': + return makeToken(TOKEN_RPAREN); + case '{': + return makeToken(TOKEN_LBRACE); + case '}': + return makeToken(TOKEN_RBRACE); + case '[': + return makeToken(TOKEN_LBRACKET); + case ']': + return makeToken(TOKEN_RBRACKET); + case ';': + return makeToken(TOKEN_SEMICOLON); + case ',': + return makeToken(TOKEN_COMMA); + case '.': + return makeToken(TOKEN_DOT); + case '-': + return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); + case '+': + return makeToken(TOKEN_PLUS); + case '/': + return makeToken(TOKEN_SLASH); + case '&': + return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return makeToken(TOKEN_MESH); + case '$': + return makeToken(TOKEN_BIG_MONEY); + case '*': + return makeToken(TOKEN_STAR); + case '!': + return makeToken(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + case '=': + return makeToken(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + case '<': + return makeToken(match('=') ? TOKEN_LTE : TOKEN_LT); + case '>': + return makeToken(match('=') ? TOKEN_GTE : TOKEN_GT); + case '"': + return string(); + } + + return errorToken("Unexpected character."); +} + +const char *tokenTypeToString(TokenType type) { + switch (type) { + case TOKEN_EOF: + return "EOF"; + case TOKEN_IDENTIFIER: + return "IDENTIFIER"; + case TOKEN_LITERAL_INT: + return "LITERAL_INT"; + case TOKEN_LITERAL_NAT: + return "LITERAL_NAT"; + case TOKEN_LITERAL_REAL: + return "LITERAL_REAL"; + case TOKEN_LITERAL_STR: + return "LITERAL_STR"; + case TOKEN_TYPE_INT: + return "TYPE_INT"; + case TOKEN_TYPE_NAT: + return "TYPE_NAT"; + case TOKEN_TYPE_REAL: + return "TYPE_REAL"; + case TOKEN_TYPE_STR: + return "TYPE_STR"; + case TOKEN_KEYWORD_PLEX: + return "KEYWORD_PLEX"; + case TOKEN_KEYWORD_FN: + return "KEYWORD_FN"; + case TOKEN_KEYWORD_CONST: + return "KEYWORD_CONST"; + case TOKEN_KEYWORD_IF: + return "KEYWORD_IF"; + case TOKEN_KEYWORD_IS: + return "IS"; + case TOKEN_KEYWORD_AS: + return "AS"; + case TOKEN_KEYWORD_ELSE: + return "KEYWORD_ELSE"; + case TOKEN_KEYWORD_WHILE: + return "KEYWORD_WHILE"; + case TOKEN_KEYWORD_FOR: + return "KEYWORD_FOR"; + case TOKEN_KEYWORD_RETURN: + return "KEYWORD_RETURN"; + case TOKEN_KEYWORD_USE: + return "KEYWORD_USE"; + case TOKEN_KEYWORD_INIT: + return "KEYWORD_INIT"; + case TOKEN_KEYWORD_THIS: + return "KEYWORD_THIS"; + case TOKEN_KEYWORD_OPEN: + return "TOKEN_KEYWORD_OPEN"; + case TOKEN_KEYWORD_READ: + return "TOKEN_KEYWORD_READ"; + case TOKEN_KEYWORD_WRITE: + return "TOKEN_KEYWORD_WRITE"; + case TOKEN_KEYWORD_REFRESH: + return "TOKEN_KEYWORD_REFRESH"; + case TOKEN_KEYWORD_CLOSE: + return "TOKEN_KEYWORD_CLOSE"; + case TOKEN_KEYWORD_NIL: + return "KEYWORD_NIL"; + case TOKEN_KEYWORD_TRUE: + return "KEYWORD_TRUE"; + case TOKEN_KEYWORD_FALSE: + return "KEYWORD_FALSE"; + case TOKEN_KEYWORD_GLOBAL: + return "KEYWORD_GLOBAL"; + case TOKEN_OPERATOR_NOT: + return "OPERATOR_NOT"; + case TOKEN_OPERATOR_AND: + return "OPERATOR_AND"; + case TOKEN_OPERATOR_OR: + return "OPERATOR_OR"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_BANG_EQ: + return "BANG_EQ"; + case TOKEN_EQ: + return "EQ"; + case TOKEN_EQ_EQ: + return "EQ_EQ"; + case TOKEN_GT: + return "GT"; + case TOKEN_LT: + return "LT"; + case TOKEN_GTE: + return "GTE"; + case TOKEN_LTE: + return "LTE"; + case TOKEN_DOT: + return "DOT"; + case TOKEN_COMMA: + return "COMMA"; + case TOKEN_COLON: + return "COLON"; + case TOKEN_SEMICOLON: + return "SEMICOLON"; + case TOKEN_PLUS: + return "PLUS"; + case TOKEN_MINUS: + return "MINUS"; + case TOKEN_STAR: + return "STAR"; + case TOKEN_SLASH: + return "SLASH"; + case TOKEN_LPAREN: + return "LPAREN"; + case TOKEN_RPAREN: + return "RPAREN"; + case TOKEN_LBRACE: + return "LBRACE"; + case TOKEN_RBRACE: + return "RBRACE"; + case TOKEN_LBRACKET: + return "LBRACKET"; + case TOKEN_RBRACKET: + return "RBRACKET"; + case TOKEN_ARROW_LEFT: + return "ARROW_LEFT"; + case TOKEN_MESH: + return "MESH"; + case TOKEN_BIG_MONEY: + return "BIG_MONEY"; + case TOKEN_AND: + return "AND"; + case TOKEN_AND_AND: + return "AND_AND"; + case TOKEN_ERROR: + return "ERROR"; + default: + return "UNKNOWN_TOKEN"; + } +} diff --git a/src/tools/compiler/lexer.h b/src/tools/compiler/lexer.h new file mode 100644 index 0000000..eaa137c --- /dev/null +++ b/src/tools/compiler/lexer.h @@ -0,0 +1,85 @@ +#ifndef UNDAR_LEXER_H +#define UNDAR_LEXER_H + +typedef enum { + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, + TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, + TOKEN_KEYWORD_PLEX, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, + TOKEN_KEYWORD_OPEN, + TOKEN_KEYWORD_READ, + TOKEN_KEYWORD_WRITE, + TOKEN_KEYWORD_REFRESH, + TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_NIL, + TOKEN_KEYWORD_TRUE, + TOKEN_KEYWORD_FALSE, + TOKEN_OPERATOR_NOT, + TOKEN_OPERATOR_AND, + TOKEN_OPERATOR_OR, + TOKEN_BANG, + TOKEN_BANG_EQ, + TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ARROW_LEFT, + TOKEN_ERROR +} TokenType; + +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; + +void initLexer(const char *source); +Token nextToken(); +const char* tokenTypeToString(TokenType type); + +#endif diff --git a/src/vm/vm.c b/src/vm/vm.c index e606efd..cca75e5 100644 --- a/src/vm/vm.c +++ b/src/vm/vm.c @@ -17,8 +17,8 @@ vm->pc++; \ src2 = read_u8(vm, code, vm->pc); \ vm->pc++; \ - value = (type)frame->locals[src1]; \ - value2 = (type)frame->locals[src2]; \ + value = (type)frame->locals[src1]; \ + value2 = (type)frame->locals[src2]; \ cond = !!(value op value2); \ mask = -(u32)cond; \ vm->pc = (target & mask) | (vm->pc & ~mask); \ @@ -27,7 +27,7 @@ #define MATH_OP(type, op) \ do { \ - u32 *regs = frame->locals; \ + u32 *regs = frame->locals; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ @@ -40,7 +40,7 @@ #define BIT_OP(op) \ do { \ - u32 *regs = frame->locals; \ + u32 *regs = frame->locals; \ dest = read_u8(vm, code, vm->pc); \ vm->pc++; \ src1 = read_u8(vm, code, vm->pc); \ @@ -94,7 +94,7 @@ bool step_vm(VM *vm) { switch (opcode) { case OP_EXIT: { - vm->flag = read_u32(vm, code, vm->pc); + vm->flag = read_u32(vm, code, vm->pc); return false; } case OP_CALL: { @@ -131,8 +131,6 @@ bool step_vm(VM *vm) { for (i = 0; i < N; i++) { src_reg = args[i]; child->locals[i] = frame->locals[src_reg]; - - /* Bitmask operation instead of conditional branch */ heap_mask |= ((frame->heap_mask >> src_reg) & 1) << i; } child->heap_mask = heap_mask; @@ -159,24 +157,6 @@ bool step_vm(VM *vm) { if (is_heap_value(vm, child_return_reg)) { ptr = value; size = *(u32 *)(vm->memory + ptr - 4); - - /* Fast path for small objects (70% of cases) */ - if (size <= 64) { - new_ptr = parent->end; - if (parent->end + size + 4 > MEMORY_SIZE) { - return false; - } - - *(u32 *)(vm->memory + new_ptr) = size; - memcopy(vm->memory + new_ptr + 4, vm->memory + ptr + 4, size); - parent->end += size + 4; - - parent->locals[parent->return_reg] = new_ptr; - parent->heap_mask |= (1 << parent->return_reg); - return true; - } - - /* Handle larger objects */ new_ptr = parent->end; if (parent->end + size + 4 > MEMORY_SIZE) { return false; @@ -617,7 +597,7 @@ bool step_vm(VM *vm) { vm->pc++; device_ptr = frame->locals[device_reg]; /* device pointer */ - handle = vm->memory[device_ptr + 4]; /* get device handle */ + handle = vm->memory[device_ptr + 4]; /* get device handle */ dev = &vm->devices[handle]; if (dev && dev->ops->refresh) { vm->flag = dev->ops->refresh(dev->data, &vm->memory[device_ptr + 4]); @@ -773,8 +753,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_mul(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_mul(frame->locals[src1], frame->locals[src2]); return true; } @@ -785,8 +764,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_div(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_div(frame->locals[src1], frame->locals[src2]); return true; } @@ -797,8 +775,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_add(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_add(frame->locals[src1], frame->locals[src2]); return true; } @@ -809,8 +786,7 @@ bool step_vm(VM *vm) { vm->pc++; src2 = read_u8(vm, code, vm->pc); vm->pc++; - frame->locals[dest] = - fixed_sub(frame->locals[src1], frame->locals[src2]); + frame->locals[dest] = fixed_sub(frame->locals[src1], frame->locals[src2]); return true; } case OP_REAL_TO_INT: { diff --git a/test/add.ul.ir b/test/add.ul.ir index 3c54c04..abdfc8b 100644 --- a/test/add.ul.ir +++ b/test/add.ul.ir @@ -1,11 +1,13 @@ -global const int x = 1 -global const int y = 1 +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global int x = 1 +global int y = 1 function main () - int a is $0 - int b is $1 - int ans is $2 - str ans_string is $3 + int a $0 + int b $1 + int ans $2 + str ans_string $3 load_absolute_32 &x -> a load_absolute_32 &y -> b @@ -14,24 +16,22 @@ function main () call pln ans_string exit 0 -function add (int a is $0, int b is $1) - int result is $2 +function add (int a $0, int b $1) + int result $2 add_int a b -> result return result -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str term $1 + int msg_length $2 + str nl $3 + int nl_length $4 + int mode $5 - malloc_immediate "/dev/term/0" -> ts load_immediate 0 -> mode - syscall OPEN ts mode -> ts + syscall OPEN &terminal_namespace mode -> term strlen message -> msg_length - syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl - strlen nl -> nl_length - syscall WRITE ts nl nl_length + syscall WRITE term message msg_length + strlen &new_line -> nl_length + syscall WRITE term nl nl_length return diff --git a/test/fib.ul.ir b/test/fib.ul.ir index 03760c8..7f73714 100644 --- a/test/fib.ul.ir +++ b/test/fib.ul.ir @@ -1,44 +1,48 @@ +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" + function main () - int n is $0 - int str_n is $1 + int n $0 + int str_n $1 load_immediate 35 -> n - call fib n -> n + call &fib n -> n int_to_string n -> str_n - call pln str_nn + call &pln str_n exit 0 -function fib (int n is $0) +function fib (int n $0) load_immediate 2 -> $1 jump_lt_int &base_case n $1 load_immediate 2 -> $3 sub_int n $3 -> $4 - call fib $4 -> $5 + call &fib $4 -> $5 load_immediate 1 -> $3 sub_int n $3 -> $4 - call fib $4 -> $6 + call &fib $4 -> $6 add_int $6 $5 -> $7 return $7 -&base_case + + else base_case return n -function pln (str message is $0) - str ts is $1 - int mode is $5 - int msg_length is $2 - str nl is $3 - int nl_length is $4 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + load_immediate &terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate &new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length return diff --git a/test/hello.ul.ir b/test/hello.ul.ir index 67e729e..3c48000 100644 --- a/test/hello.ul.ir +++ b/test/hello.ul.ir @@ -1,23 +1,28 @@ -function main () - str hello is $0 +global str terminal_namespace = "/dev/term/0" +global str new_line = "\n" +global str message = "nuqneH 'u'?" - malloc_immediate "nuqneH 'u'?" -> hello +function main () + str hello $0 + + load_immediate &message -> hello call pln hello exit 0 -function pln (str message is $0) - str ts is $1 - int msg_length is $2 - str nl is $3 - int nl_length is $4 - int mode is $5 +function pln (str message $0) + str ts $1 + int mode $5 + int msg_length $2 + str nl $3 + int nl_length $4 - malloc_immediate "/dev/term/0" -> ts + + load_immediate &terminal_namespace -> ts load_immediate 0 -> mode syscall OPEN ts mode -> ts strlen message -> msg_length syscall WRITE ts message msg_length - malloc_immediate "\n" -> nl + load_immediate &new_line -> nl strlen nl -> nl_length syscall WRITE ts nl nl_length - return \ No newline at end of file + return diff --git a/test/window.ul.ir b/test/window.ul.ir index 7f52163..4fb6c04 100644 --- a/test/window.ul.ir +++ b/test/window.ul.ir @@ -2,18 +2,15 @@ global str screen_namespace = "/dev/screen/0" global str mouse_namespace = "/dev/mouse/0" global str terminal_namespace = "/dev/term/0" global str new_line = "\n" -global byte WHITE = 255 +global byte white = 255 function main () - // Open screen - // use load immediate because it a pointer to a string, not a value - plex screen $0 plex mouse $1 str tmp_str $2 byte color $3 - byte left_down $4 - int mode $5 + bool left_down $4 + int mode $5 nat offset_temp $6 nat x $7 nat y $8 @@ -22,30 +19,30 @@ function main () nat buffer_size $11 nat pixel_pos $12 - load_address screen_namespace -> tmp_str + load_immediate &screen_namespace -> screen load_immediate 0 -> mode - syscall OPEN tmp_str mode -> screen // open Plex screen, in namespace, in flags + syscall OPEN screen mode -> screen nat_to_string screen -> tmp_str - call pln tmp_str + call &pln tmp_str - load_offset_32 screen 8 -> width // load width + load_offset_32 screen 8 -> width nat_to_string width -> tmp_str - call pln tmp_str + call &pln tmp_str - load_offset_32 screen 12 -> buffer_size // load size + load_offset_32 screen 12 -> buffer_size nat_to_string buffer_size -> tmp_str - call pln tmp_str + call &pln tmp_str - load_immediate 16 -> offset_temp // offset for screen buffer + load_immediate 16 -> offset_temp add_nat screen offset_temp -> screen_buffer nat_to_string screen_buffer -> tmp_str - call pln tmp_str + call &pln tmp_str // open mouse - load_address mouse_namespace -> tmp_str - syscall OPEN tmp_str mode -> mouse // open Plex mouse, in namespace, in flags + load_immediate &mouse_namespace -> mouse + syscall OPEN mouse mode -> mouse syscall WRITE screen screen_buffer buffer_size // redraw @@ -53,21 +50,21 @@ function main () // load mouse click data syscall STAT mouse - load_offset_8 mouse 16 -> left_down // load btn1 pressed + load_offset_8 mouse 16 -> left_down - jump_eq_nat draw_loop left_down mode // mode 0 which an alias for false + jump_eq_nat draw_loop left_down mode // mode = 0 / false - load_offset_32 mouse 8 -> x // load x - load_offset_32 mouse 12 -> y // load y + load_offset_32 mouse 8 -> x + load_offset_32 mouse 12 -> y - // Compute start address: y*width + x - mul_nat y width -> pixel_pos // = y * width - add_nat x pixel_pos -> pixel_pos // += x - add_nat screen_buffer pixel_pos -> pixel_pos // += pixel_offset - load_immediate 4 -> fat_ptr_size // need to add offset for fat pointer size + // Compute start address: y *width + x + mul_nat y width -> pixel_pos + add_nat x pixel_pos -> pixel_pos + add_nat screen_buffer pixel_pos -> pixel_pos + load_immediate 4 -> fat_ptr_size add_nat pixel_pos fat_ptr_size -> pixel_pos - load_absolute_32 WHITE -> color + load_absolute_32 white -> color store_absolute_8 pixel_pos color // draw color at screen [x,y] syscall WRITE screen screen_buffer buffer_size // redraw @@ -81,9 +78,8 @@ function pln (str message $0) int nl_length $4 int mode $5 - load_address terminal_namespace -> term // get terminal device load_immediate 0 -> mode - syscall OPEN term mode -> term + syscall OPEN &terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length load_address new_line -> nl diff --git a/test/window.ul.ir2 b/test/window.ul.ir2 deleted file mode 100644 index e47579c..0000000 --- a/test/window.ul.ir2 +++ /dev/null @@ -1,73 +0,0 @@ -global str screen_namespace = "/dev/screen/0" -global str mouse_namespace = "/dev/mouse/0" -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global byte WHITE = 255 - -function main () - // open screen - // use load immediate because it is a pointer to a string not a value - - ptr tmp_ptr $0 = &screen_namespace - int mode $1 = 0 - ptr screen $2 = open tmp_ptr mode - - nat screen_handle $3 = @memory.u32[screen + 4] - str tmp_str $4 = nat_to_string screen_handle - pln(tmp_str) - - nat width $5 = @memory.u32[screen + 8] - tmp_str = nat_to_string width - pln(tmp_str) - - nat buffer_size $6 = @memory.u32[screen + 12] - tmp_str = nat_to_string buffer_size - pln(tmp_str) - - nat offset_temp $7 = 16 - ptr screen_buffer = add_nat screen offset_temp - - tmp_str = nat_to_string screen_buffer - pln(tmp_str) - - // open mouse - tmp_ptr = &mouse_namespace - ptr mouse $8 = open tmp_ptr mode - - write screen screen_buffer buffer_size // redraw - - loop draw_loop - // load mouse click data - stat mouse - - bool left_down $9 = @memory.u8[mouse + 16] // load btn1 pressed - - jump_eq_nat &draw_loop left_down mode // mode is 0 which is an alias for false - - nat x $10 = @memory.u32[mouse + 8] - nat y $11 = @memory.u32[mouse + 12] - - // Compute start address: y*width + x - nat pixel_pos $12 = mul_nat y width - pixel_pos = add_nat x pixel_pos - pixel_pos = add_nat screen_buffer pixel_pos - nat fat_ptr_size $13 = 4 // need to add offset for fat pointer size - pixel_pos = add_nat pixel_pos fat_ptr_size - - byte color $14 = @memory.u8[ &WHITE ] - @memory.u8[pixel_pos] = color // draw color at screen [xy] - write screen screen_buffer buffer_size // redraw - - jump &draw_loop - exit 0 - -function pln (str message $0) - nat term_ns $1 = &terminal_namespace // get terminal device - int mode $2 = 0 - ptr term $3 = open term_ns mode - int msg_length $4 = strlen message - write term message msg_length - str nl $5 = &new_line - int nl_length $6 = strlen nl - write term nl nl_length - return diff --git a/test/window.ul.vuir b/test/window.ul.vuir deleted file mode 100644 index aacf89b..0000000 --- a/test/window.ul.vuir +++ /dev/null @@ -1,95 +0,0 @@ -global str screen_namespace = "/dev/screen/0" -global str mouse_namespace = "/dev/mouse/0" -global str terminal_namespace = "/dev/term/0" -global str new_line = "\n" -global byte WHITE = 255 - -/** - * Devices - */ -plex Terminal - nat handle - -plex Screen - nat handle - nat width - nat height - byte[] buffer - -plex Mouse - nat handle - nat x - nat y - bool left - bool right - bool middle - bool btn4 - nat size - -function main () - // open screen - // use load immediate because it is a pointer to a string, not a value - - nat tmp_ptr = &screen_namespace - int mode = 0 - ptr screen = open(tmp_ptr, mode) - - nat screen_handle = screen.handle - str tmp_str = nat_to_string(screen_handle) - pln(tmp_str) - - nat width = screen.width - tmp_str = nat_to_string(width) - pln(tmp_str) - - nat buffer_size = screen.buffer - tmp_str = nat_to_string(buffer_size) - pln(tmp_str) - - nat offset_temp = 16 - nat screen_buffer = add_nat(screen, offset_temp) - - tmp_str = nat_to_string(screen_buffer) - pln(tmp_str) - - // open mouse - tmp_ptr = &mouse_namespace - ptr mouse = open(tmp_ptr, mode) - - write(screen, screen_buffer, buffer_size) // redraw - - loop draw_loop - // load mouse click data - stat(mouse) - - byte left_down = mouse.left // load btn1 pressed - - jump_eq_nat(&draw_loop, left_down, mode) // mode is 0 which is an alias for false - - nat x = mouse.x - nat y = mouse.y - - // Compute start address: y*width + x - nat pixel_pos = mul_nat(y, width) - pixel_pos = add_nat(x, pixel_pos) - pixel_pos = add_nat(screen_buffer, pixel_pos) - nat fat_ptr_size = 4 // need to add offset for fat pointer size - pixel_pos = add_nat(pixel_pos, fat_ptr_size) - - byte color = WHITE - screen.buffer[pixel_pos] = color // draw color at screen [x,y] - write(screen, screen_buffer, buffer_size) // redraw - - jump(&draw_loop) - exit 0 - -function pln (str message) - nat term_ns = &terminal_namespace // get terminal device - int mode = 0 - ptr term = open(term_ns, mode) - int msg_length = strlen(message) - write(term, message, msg_length) - str nl = &new_line - int nl_length = strlen(nl) - write(term, nl, nl_length) - return