#include "assembler.h" #include "../../vm/common.h" #include "../../vm/fixed.h" #include "../../vm/libc.h" #include "../../vm/opcodes.h" #include #include #include SymbolTable *symbol_table_init() { SymbolTable *table = malloc(sizeof(SymbolTable)); table->symbols = malloc(16 * sizeof(Symbol)); table->count = 0; table->capacity = 16; return table; } NamesTable *names_table_init() { NamesTable *table = malloc(sizeof(NamesTable)); table->names = malloc(16 * sizeof(char *)); table->count = 0; table->capacity = 16; return table; } u32 names_table_add(NamesTable *table, const char *name) { for (u32 i = 0; i < table->count; i++) { if (strcmp(table->names[i], name) == 0) { return i; } } if (table->count >= table->capacity) { table->capacity *= 2; table->names = realloc(table->names, table->capacity * sizeof(char *)); } table->names[table->count] = malloc(strlen(name) + 1); strcpy(table->names[table->count], name); u32 index = table->count; table->count++; return index; } u32 symbol_table_add(SymbolTable *table, Symbol s) { if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } table->symbols[table->count] = s; u32 index = table->count; table->count++; return index; } Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, const char *name) { for (u32 i = 0; i < nt->count; i++) { if (strcmp(nt->names[i], name) == 0) { for (int j = 0; j < table->count; j++) { if (table->symbols[j].name == i) { return &table->symbols[j]; } } } } return nil; } u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) { Symbol *sym = symbol_table_lookup(nt, table, name); if (!sym) { fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); exit(1); } return sym->ref; } void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); vm->cp += 4; } void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } int parse_register(const char *reg_str) { if (reg_str[0] != '$') return -1; return atoi(reg_str + 1); } u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { // Handle symbol references (e.g., &label) if (ref[0] == '&') { return get_ref(nt, table, ref + 1); } // fixed-point numbers (e.g., 0.5) if (strchr(ref, '.')) { return float_to_fixed(atof(ref)); } // decimal literals (e.g., 7) char *endptr; u32 value = (u32)strtoul(ref, &endptr, 10); if (endptr == ref || *endptr != '\0') { fprintf(stderr, "Invalid decimal literal: %s\n", ref); exit(1); } return value; } static char *unwrap_string(const char *quoted_str) { if (!quoted_str) return nil; size_t len = strlen(quoted_str); if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { // Remove quotes and process escape sequences const char *src = quoted_str + 1; size_t src_len = len - 2; // First pass: calculate the actual length needed after escape processing size_t actual_len = 0; for (size_t i = 0; i < src_len; ++i) { if (src[i] == '\\' && i + 1 < src_len) { // Escape sequence actual_len++; i++; // Skip the next character } else { actual_len++; } } char *unwrapped = (char *)malloc(actual_len + 1); size_t dst_idx = 0; // Second pass: process escape sequences for (size_t i = 0; i < src_len; ++i) { if (src[i] == '\\' && i + 1 < src_len) { // Handle escape sequences switch (src[i + 1]) { case 'n': unwrapped[dst_idx++] = '\n'; break; case 't': unwrapped[dst_idx++] = '\t'; break; case 'r': unwrapped[dst_idx++] = '\r'; break; case '\\': unwrapped[dst_idx++] = '\\'; break; case '"': unwrapped[dst_idx++] = '"'; break; case '\'': unwrapped[dst_idx++] = '\''; break; default: // Unknown escape, keep both characters unwrapped[dst_idx++] = src[i]; unwrapped[dst_idx++] = src[i + 1]; break; } i++; // Skip the next character } else { unwrapped[dst_idx++] = src[i]; } } unwrapped[dst_idx] = '\0'; return unwrapped; } // Not quoted, return copy return strdup(quoted_str); } Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { Symbol *s = (Symbol *)malloc(sizeof(Symbol)); ValueType t; Token token_type = nextToken(); switch (token_type.type) { case TOKEN_TYPE_I8: t.type = I8; t.size = 1; break; case TOKEN_TYPE_I16: t.type = I16; t.size = 2; break; case TOKEN_TYPE_U8: t.type = U8; t.size = 1; break; case TOKEN_TYPE_U16: t.type = U16; t.size = 2; break; case TOKEN_TYPE_INT: t.type = I32; t.size = 4; break; case TOKEN_TYPE_NAT: t.type = U32; t.size = 4; break; case TOKEN_TYPE_REAL: t.type = F32; t.size = 4; break; case TOKEN_TYPE_STR: t.type = STR; break; case TOKEN_IDENTIFIER: break; default: return nil; } Token eq = nextToken(); if (eq.type != TOKEN_EQ) return nil; Token name = nextToken(); if (name.type != TOKEN_IDENTIFIER) return nil; s->name = names_table_add(nt, name.start); u32 addr = vm->mp; s->ref = addr; u32 result; Token value = nextToken(); switch (value.type) { case TOKEN_LITERAL_INT: case TOKEN_LITERAL_NAT: case TOKEN_LITERAL_REAL: result = resolve_symbol(nt, st, value.start); write_u32(vm, memory, addr, result); vm->mp += t.size; vm->frames[vm->fp].end += t.size; break; case TOKEN_LITERAL_STR: { char *unwrapped = unwrap_string(value.start); int len = strlen(unwrapped); u32 addr = vm->mp; u32 size = len + 1 + 4; t.size = size; vm->mp += size; vm->frames[vm->fp].end += size; write_u32(vm, memory, addr, len); for (int i = 0; i < len; i++) { write_u8(vm, memory, addr + 4 + i, unwrapped[i]); } write_u8(vm, memory, addr + 4 + len, '\0'); free(unwrapped); break; } default: return nil; } s->type = t; return s; } Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) { USED(vm); USED(nt); USED(st); return nil; } void assemble(VM *vm, char *source) { SymbolTable *st = symbol_table_init(); NamesTable *nt = names_table_init(); initLexer(source); Token token; do { token = nextToken(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); break; } if (token.type != TOKEN_EOF) { printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { if (!global(vm, nt, st)) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); } } if (token.type == TOKEN_KEYWORD_FN) { function(vm, nt, st); } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { } if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first if (streq(token.start, "exit")) { } else if (streq(token.start, "call")) { } else if (streq(token.start, "syscall")) { } else if (streq(token.start, "load_immediate")) { } else if (streq(token.start, "load_indirect_8")) { } else if (streq(token.start, "load_indirect_16")) { } else if (streq(token.start, "load_indirect_32")) { } else if (streq(token.start, "load_absolute_8")) { } else if (streq(token.start, "load_absolute_16")) { } else if (streq(token.start, "load_absolute_32")) { } else if (streq(token.start, "load_offset_8")) { } else if (streq(token.start, "load_offset_16")) { } else if (streq(token.start, "load_offset_32")) { } else if (streq(token.start, "store_absolute_8")) { } else if (streq(token.start, "store_absolute_16")) { } else if (streq(token.start, "store_absolute_32")) { } else if (streq(token.start, "store_indirect_8")) { } else if (streq(token.start, "store_indirect_16")) { } else if (streq(token.start, "store_indirect_32")) { } else if (streq(token.start, "store_offset_8")) { } else if (streq(token.start, "store_offset_16")) { } else if (streq(token.start, "store_offset_32")) { } else if (streq(token.start, "malloc")) { } else if (streq(token.start, "malloc_immediate")) { } else if (streq(token.start, "memset_8")) { } else if (streq(token.start, "memset_16")) { } else if (streq(token.start, "memset_32")) { } else if (streq(token.start, "register_move")) { } else if (streq(token.start, "add_int")) { } else if (streq(token.start, "sub_int")) { } else if (streq(token.start, "mul_int")) { } else if (streq(token.start, "div_int")) { } else if (streq(token.start, "abs_int")) { } else if (streq(token.start, "neg_int")) { } else if (streq(token.start, "add_nat")) { } else if (streq(token.start, "sub_nat")) { } else if (streq(token.start, "mul_nat")) { } else if (streq(token.start, "div_nat")) { } else if (streq(token.start, "abs_nat")) { } else if (streq(token.start, "neg_nat")) { } else if (streq(token.start, "add_real")) { } else if (streq(token.start, "sub_real")) { } else if (streq(token.start, "mul_real")) { } else if (streq(token.start, "div_real")) { } else if (streq(token.start, "abs_real")) { } else if (streq(token.start, "neg_real")) { } else if (streq(token.start, "int_to_real")) { } else if (streq(token.start, "nat_to_real")) { } else if (streq(token.start, "real_to_int")) { } else if (streq(token.start, "real_to_nat")) { } else if (streq(token.start, "bit_shift_left")) { } else if (streq(token.start, "bit_shift_right")) { } else if (streq(token.start, "bit_shift_r_ext")) { } else if (streq(token.start, "bit_and")) { } else if (streq(token.start, "bit_or")) { } else if (streq(token.start, "bit_xor")) { } else if (streq(token.start, "jump")) { } else if (streq(token.start, "jump_if_flag")) { } else if (streq(token.start, "jump_eq_int")) { } else if (streq(token.start, "jump_neq_int")) { } else if (streq(token.start, "jump_gt_int")) { } else if (streq(token.start, "jump_lt_int")) { } else if (streq(token.start, "jump_le_int")) { } else if (streq(token.start, "jump_ge_int")) { } else if (streq(token.start, "jump_eq_nat")) { } else if (streq(token.start, "jump_neq_nat")) { } else if (streq(token.start, "jump_gt_nat")) { } else if (streq(token.start, "jump_lt_nat")) { } else if (streq(token.start, "jump_le_nat")) { } else if (streq(token.start, "jump_ge_nat")) { } else if (streq(token.start, "jump_eq_real")) { } else if (streq(token.start, "jump_neq_real")) { } else if (streq(token.start, "jump_ge_real")) { } else if (streq(token.start, "jump_gt_real")) { } else if (streq(token.start, "jump_lt_real")) { } else if (streq(token.start, "jump_le_real")) { } else if (streq(token.start, "string_length")) { } else if (streq(token.start, "string_eq")) { } else if (streq(token.start, "string_concat")) { } else if (streq(token.start, "string_get_char")) { } else if (streq(token.start, "string_find_char")) { } else if (streq(token.start, "string_slice")) { } else if (streq(token.start, "int_to_string")) { } else if (streq(token.start, "nat_to_string")) { } else if (streq(token.start, "real_to_string")) { } else if (streq(token.start, "string_to_int")) { } else if (streq(token.start, "string_to_nat")) { } else if (streq(token.start, "string_to_real")) { } else { // some other identifier } } } } while (token.type != TOKEN_EOF); }