#include "assembler.h" typedef enum { SYMBOL_CODE, SYMBOL_DATA, SYMBOL_PLEX } SymbolType; typedef struct { char *name; u32 address; SymbolType type; int size; // How much memory this symbol occupies int is_constant; // 1 = constant, 0 = variable } Symbol; typedef struct { Symbol *symbols; int count; int capacity; } SymbolTable; void symbol_table_init(SymbolTable *table) { table->capacity = 32; table->count = 0; table->symbols = malloc(table->capacity * sizeof(Symbol)); } void symbol_table_add(SymbolTable *table, const char *name, u32 address, SymbolType type) { // Check for duplicates for (int i = 0; i < table->count; i++) { if (strcmp(table->symbols[i].name, name) == 0) { // Allow plex redefinition for compiler evolution if (type == SYMBOL_PLEX && table->symbols[i].type == SYMBOL_PLEX) { return; } fprintf(stderr, "Error: Duplicate label '%s'\n", name); exit(1); } } if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } Symbol *sym = &table->symbols[table->count++]; sym->name = strdup(name); sym->address = address; sym->type = type; sym->size = 4; // Default size sym->is_constant = 0; } Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { for (int i = 0; i < table->count; i++) { if (strcmp(table->symbols[i].name, name) == 0) { return &table->symbols[i]; } } return NULL; } u32 find_label_in_table(SymbolTable *table, const char *name) { Symbol *sym = symbol_table_lookup(table, name); if (!sym) { fprintf(stderr, "Error: Undefined label '%s'\n", name); exit(1); } return sym->address; } int get_instruction_byte_size(ExprNode *node) { const char *opname = node->token; // Simple opcodes (1 byte) if (strcmp(opname, "halt") == 0 || strcmp(opname, "return") == 0) { return 1; } // Register-based opcodes (2 bytes: opcode + register) if (strcmp(opname, "pop") == 0 || strcmp(opname, "jump-if-flag") == 0 || strcmp(opname, "jump") == 0 || strcmp(opname, "push") == 0) { return 2; } if (strcmp(opname, "int-to-string") == 0 || strcmp(opname, "nat-to-string") == 0 || strcmp(opname, "real-to-string") == 0 || strcmp(opname, "int-to-real") == 0 || strcmp(opname, "nat-to-real") == 0 || strcmp(opname, "real-to-int") == 0 || strcmp(opname, "load-r") == 0 || strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "int-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 || strcmp(opname, "load-r8") == 0 || strcmp(opname, "string-length") == 0 || strcmp(opname, "store") == 0 || strcmp(opname, "store-8") == 0 || strcmp(opname, "store-16") == 0 || strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) { return 3; } // Register-register-register opcodes (4 bytes: 1 + 3) if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 || strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 || strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || strcmp(opname, "bit-shift-left") == 0 || strcmp(opname, "bit-shift-right") == 0 || strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || strcmp(opname, "div-real") == 0) { return 4; } // (5 bytes: 1 + 4) if (strcmp(opname, "call") == 0) { return 5; } // Load, Load-immediate (6 bytes: 1 + 1 + 4) if (strcmp(opname, "load") == 0 || strcmp(opname, "load-immediate") == 0 || strcmp(opname, "load-u16") == 0 || strcmp(opname, "load-i16") == 0 || strcmp(opname, "load-i8") == 0) { return 6; } // jump compare (7 bytes: 1 + 4 + 1 + 1) if (strcmp(opname, "jump-eq-int") == 0 || strcmp(opname, "jump-neq-int") == 0 || strcmp(opname, "jump-gt-int") == 0 || strcmp(opname, "jump-lt-int") == 0 || strcmp(opname, "jump-le-int") == 0 || strcmp(opname, "jump-ge-int") == 0 || strcmp(opname, "jump-eq-nat") == 0 || strcmp(opname, "jump-neq-nat") == 0 || strcmp(opname, "jump-gt-nat") == 0 || strcmp(opname, "jump-lt-nat") == 0 || strcmp(opname, "jump-le-nat") == 0 || strcmp(opname, "jump-ge-nat") == 0 || strcmp(opname, "jump-eq-real") == 0 || strcmp(opname, "jump-neq-real") == 0 || strcmp(opname, "jump-gt-real") == 0 || strcmp(opname, "jump-lt-real") == 0 || strcmp(opname, "jump-le-real") == 0 || strcmp(opname, "jump-ge-real") == 0) { return 7; } // Syscall (1 + syscall_id (4) + args) if (strcmp(opname, "syscall") == 0) { return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); } fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); exit(-1); } int calculate_instruction_size(ExprNode *node) { if (node->child_count == 0) return 0; return get_instruction_byte_size(node); } void collect_symbols_in_node(SymbolTable *table, ExprNode *node, u32 *current_addr, int depth) { char indent[32] = ""; for (int i = 0; i < depth; i++) strcat(indent, " "); #ifdef ASM_DEBUG printf("%s%d %s ", indent, *current_addr, node->token); #endif if (strcmp(node->token, "label") == 0) { if (node->child_count >= 1) { const char *name = node->children[0]->token; #ifdef ASM_DEBUG printf(" %s -> %d\n", name, *current_addr); #endif symbol_table_add(table, name, *current_addr, SYMBOL_CODE); } for (size_t i = 1; i < node->child_count; i++) { collect_symbols_in_node(table, node->children[i], current_addr, depth + 1); } } else { int size = get_instruction_byte_size(node); *current_addr += size; #ifdef ASM_DEBUG printf(" +%d bytes -> %d\n", size, *current_addr); #endif } } void collect_symbols(SymbolTable *table, ExprNode *program) { // First, collect all data labels (with placeholder address) for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "data") == 0) { for (size_t j = 0; j < section->child_count; ++j) { ExprNode *item = section->children[j]; if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { const char *name = item->children[0]->token; symbol_table_add(table, name, 0, SYMBOL_DATA); } } } } // Second, collect all code labels with proper nesting u32 code_addr = 0; for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "code") == 0) { for (size_t j = 0; j < section->child_count; ++j) { collect_symbols_in_node(table, section->children[j], &code_addr, 0); } } } } u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { u32 addr = vm->mp; vm->mp += size; vm->frames[vm->fp].end += size; // Update the symbol's address Symbol *sym = symbol_table_lookup(table, name); if (sym && sym->type == SYMBOL_DATA) { sym->address = addr; sym->size = size; } return addr; } void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); vm->cp += 4; } void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } int parse_register(const char *reg_str) { if (reg_str[0] != '$') return -1; return atoi(reg_str + 1); } u32 resolve_symbol(SymbolTable *table, const char *ref) { // Handle symbol references (e.g., &label) if (ref[0] == '&') { return find_label_in_table(table, ref + 1); } // Handle fixed-point numbers (e.g., 0.5) if (strchr(ref, '.')) { return TO_FIXED(atof(ref)); } // Handle hexadecimal literals (e.g., 0x7) if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) { char *endptr; u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x" if (endptr == ref + 2 || *endptr != '\0') { fprintf(stderr, "Invalid hex literal: %s\n", ref); exit(1); } return value; } // Handle decimal literals (e.g., 7) char *endptr; u32 value = (u32)strtoul(ref, &endptr, 10); if (endptr == ref || *endptr != '\0') { fprintf(stderr, "Invalid decimal literal: %s\n", ref); exit(1); } return value; } static char *unwrap_string(const char *quoted_str) { if (!quoted_str) return NULL; size_t len = strlen(quoted_str); if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { // Remove quotes and process escape sequences const char *src = quoted_str + 1; size_t src_len = len - 2; // First pass: calculate the actual length needed after escape processing size_t actual_len = 0; for (size_t i = 0; i < src_len; ++i) { if (src[i] == '\\' && i + 1 < src_len) { // Escape sequence actual_len++; i++; // Skip the next character } else { actual_len++; } } char *unwrapped = (char *)malloc(actual_len + 1); size_t dst_idx = 0; // Second pass: process escape sequences for (size_t i = 0; i < src_len; ++i) { if (src[i] == '\\' && i + 1 < src_len) { // Handle escape sequences switch (src[i + 1]) { case 'n': unwrapped[dst_idx++] = '\n'; break; case 't': unwrapped[dst_idx++] = '\t'; break; case 'r': unwrapped[dst_idx++] = '\r'; break; case '\\': unwrapped[dst_idx++] = '\\'; break; case '"': unwrapped[dst_idx++] = '"'; break; case '\'': unwrapped[dst_idx++] = '\''; break; default: // Unknown escape, keep both characters unwrapped[dst_idx++] = src[i]; unwrapped[dst_idx++] = src[i + 1]; break; } i++; // Skip the next character } else { unwrapped[dst_idx++] = src[i]; } } unwrapped[dst_idx] = '\0'; return unwrapped; } // Not quoted, return copy return strdup(quoted_str); } void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { for (size_t i = 0; i < block->child_count; ++i) { ExprNode *item = block->children[i]; if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { const char *name = item->children[0]->token; ExprNode *val = item->children[1]; if (val->child_count == 0) { const char *token = val->token; // Case 1: String literal (enclosed in quotes) if (token[0] == '"' && token[strlen(token) - 1] == '"') { char *unwrapped = unwrap_string(token); int len = strlen(unwrapped) + 1; u32 addr = allocate_data(vm, table, name, len + 4); write_u32(vm, memory, addr, len); for (int i = 0; i < len; i++) { write_u8(vm, memory, addr + 4 + i, unwrapped[i]); } free(unwrapped); } // Case 2: Hexadecimal integer (0x...) else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) { char *endptr; u32 value = (u32)strtoul(token + 2, &endptr, 16); if (endptr != token + strlen(token)) { fprintf(stderr, "Invalid hex in data block: %s\n", token); exit(1); } u32 addr = allocate_data(vm, table, name, 4); write_u32(vm, memory, addr, value); } // Case 3: Floating-point (has decimal point) else if (strchr(token, '.')) { float f = atof(token); u32 addr = allocate_data(vm, table, name, 4); write_u32(vm, memory, addr, TO_FIXED(f)); } // Case 4: Decimal integer else { char *endptr; u32 value = (u32)strtoul(token, &endptr, 10); if (endptr != token + strlen(token)) { fprintf(stderr, "Invalid decimal in data block: %s\n", token); exit(1); } u32 addr = allocate_data(vm, table, name, 4); write_u32(vm, memory, addr, value); vm->mp += 4; } } else { fprintf(stderr, "Unsupported data item\n"); exit(1); } } } } void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { const char *opname = node->token; if (strcmp(opname, "label") == 0) { for (size_t i = 1; i < node->child_count; i++) { process_code_expr(vm, table, node->children[i]); } } else if (strcmp(opname, "halt") == 0) { emit_opcode(vm, OP_HALT); } else if (strcmp(opname, "jump") == 0) { emit_opcode(vm, OP_JMP); u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "jump-if-flag") == 0) { emit_opcode(vm, OP_JMPF); u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "call") == 0) { emit_opcode(vm, OP_CALL); u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "return") == 0) { emit_opcode(vm, OP_RETURN); } else if (strcmp(opname, "load-immediate") == 0) { emit_opcode(vm, OP_LOAD_IMM); int reg = parse_register(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, reg); emit_u32(vm, addr); } else if (strcmp(opname, "load") == 0) { emit_opcode(vm, OP_LOAD); int dest = parse_register(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, dest); emit_u32(vm, addr); } else if (strcmp(opname, "load-r") == 0) { emit_opcode(vm, OP_LOAD_REG); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-r8") == 0) { emit_opcode(vm, OP_LOAD_REG8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "malloc") == 0) { emit_opcode(vm, OP_MALLOC); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-i8") == 0) { emit_opcode(vm, OP_LOADI8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-u8") == 0) { emit_opcode(vm, OP_LOADU8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-u16") == 0) { emit_opcode(vm, OP_LOADU16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-i16") == 0) { emit_opcode(vm, OP_LOADI16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store") == 0) { emit_opcode(vm, OP_STORE); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-8") == 0) { emit_opcode(vm, OP_STORE8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-16") == 0) { emit_opcode(vm, OP_STORE16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "push") == 0) { emit_opcode(vm, OP_PUSH); int reg = parse_register(node->children[0]->token); emit_byte(vm, reg); } else if (strcmp(opname, "pop") == 0) { emit_opcode(vm, OP_POP); int reg = parse_register(node->children[0]->token); emit_byte(vm, reg); } else if (strcmp(opname, "register-move") == 0) { emit_opcode(vm, OP_REG_MOV); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "syscall") == 0) { emit_opcode(vm, OP_SYSCALL); // Parse syscall ID u32 syscall_id = 0; const char *syscall_name = node->children[0]->token; if (strcmp(syscall_name, "EXIT") == 0) syscall_id = SYSCALL_EXIT; else if (strcmp(syscall_name, "OPEN") == 0) syscall_id = SYSCALL_DEVICE_OPEN; else if (strcmp(syscall_name, "READ") == 0) syscall_id = SYSCALL_DEVICE_READ; else if (strcmp(syscall_name, "WRITE") == 0) syscall_id = SYSCALL_DEVICE_WRITE; else if (strcmp(syscall_name, "CLOSE") == 0) syscall_id = SYSCALL_DEVICE_CLOSE; else if (strcmp(syscall_name, "IOCTL") == 0) syscall_id = SYSCALL_DEVICE_IOCTL; emit_u32(vm, syscall_id); // Emit register arguments for (size_t i = 1; i < node->child_count; ++i) { int reg = parse_register(node->children[i]->token); emit_byte(vm, reg); } } else if (strcmp(opname, "bit-shift-left") == 0) { emit_opcode(vm, OP_SLL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-shift-right") == 0) { emit_opcode(vm, OP_SRL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-shift-right-extend") == 0) { emit_opcode(vm, OP_SRE); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-and") == 0) { emit_opcode(vm, OP_BAND); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-or") == 0) { emit_opcode(vm, OP_BOR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-xor") == 0) { emit_opcode(vm, OP_BXOR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "add-int") == 0) { emit_opcode(vm, OP_ADD_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "sub-int") == 0) { emit_opcode(vm, OP_SUB_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "mul-int") == 0) { emit_opcode(vm, OP_MUL_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "div-int") == 0) { emit_opcode(vm, OP_DIV_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "add-nat") == 0) { emit_opcode(vm, OP_ADD_UINT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "sub-nat") == 0) { emit_opcode(vm, OP_SUB_UINT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "mul-nat") == 0) { emit_opcode(vm, OP_MUL_UINT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "div-nat") == 0) { emit_opcode(vm, OP_DIV_UINT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "add-real") == 0) { emit_opcode(vm, OP_ADD_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "sub-real") == 0) { emit_opcode(vm, OP_SUB_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "mul-real") == 0) { emit_opcode(vm, OP_MUL_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "div-real") == 0) { emit_opcode(vm, OP_DIV_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "int-to-real") == 0) { emit_opcode(vm, OP_INT_TO_REAL); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "nat-to-real") == 0) { emit_opcode(vm, OP_UINT_TO_REAL); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "real-to-int") == 0) { emit_opcode(vm, OP_REAL_TO_INT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "real-to-nat") == 0) { emit_opcode(vm, OP_REAL_TO_UINT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "jump-eq-int") == 0) { emit_opcode(vm, OP_JEQ_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-neq-int") == 0 ) { emit_opcode(vm, OP_JNEQ_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-int") == 0) { emit_opcode(vm, OP_JGT_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-int") == 0) { emit_opcode(vm, OP_JLT_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-int") == 0) { emit_opcode(vm, OP_JLE_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-int") == 0) { emit_opcode(vm, OP_JGE_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-eq-nat") == 0) { emit_opcode(vm, OP_JEQ_UINT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-neq-nat") == 0 ) { emit_opcode(vm, OP_JNEQ_UINT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-nat") == 0) { emit_opcode(vm, OP_JGT_UINT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-nat") == 0) { emit_opcode(vm, OP_JLT_UINT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-nat") == 0) { emit_opcode(vm, OP_JLE_UINT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-nat") == 0) { emit_opcode(vm, OP_JGE_UINT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-eq-real") == 0) { emit_opcode(vm, OP_JEQ_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-neq-real") == 0 ) { emit_opcode(vm, OP_JNEQ_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-real") == 0) { emit_opcode(vm, OP_JGT_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-real") == 0) { emit_opcode(vm, OP_JLT_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-real") == 0) { emit_opcode(vm, OP_JLE_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-real") == 0) { emit_opcode(vm, OP_JGE_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-length") == 0) { emit_opcode(vm, OP_STRLEN); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-eq") == 0) { emit_opcode(vm, OP_STREQ); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-concat") == 0) { emit_opcode(vm, OP_STRCAT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-get-char") == 0) { emit_opcode(vm, OP_STR_GET_CHAR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-find-char") == 0) { emit_opcode(vm, OP_STR_FIND_CHAR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-slice") == 0) { emit_opcode(vm, OP_STR_SLICE); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); int src3 = parse_register(node->children[3]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); emit_byte(vm, src3); } else if (strcmp(opname, "int-to-string") == 0) { emit_opcode(vm, OP_INT_TO_STRING); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "nat-to-string") == 0) { emit_opcode(vm, OP_UINT_TO_STRING); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "real-to-string") == 0) { emit_opcode(vm, OP_REAL_TO_STRING); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-to-int") == 0) { emit_opcode(vm, OP_STRING_TO_INT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-to-nat") == 0) { emit_opcode(vm, OP_STRING_TO_UINT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-to-real") == 0) { emit_opcode(vm, OP_STRING_TO_REAL); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else { fprintf(stderr, "Unknown opcode: %s\n", opname); } } void assemble(VM *vm, ExprNode *program) { SymbolTable table; symbol_table_init(&table); // PASS 1: Collect all symbols (both code and data) collect_symbols(&table, program); // PASS 2: Process data section using symbol table for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "data") == 0) { process_data_block(vm, &table, section); } } // PASS 3: Process code section using complete symbol table for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "code") == 0) { for (size_t j = 0; j < section->child_count; ++j) { process_code_expr(vm, &table, section->children[j]); } } } // Cleanup symbol table for (int i = 0; i < table.count; i++) { #ifdef ASM_DEBUG Symbol s = table.symbols[i]; printf("%s[%d]\n", s.name, s.address); #endif free(table.symbols[i].name); } free(table.symbols); }