#include "assembler.h" #include "parser.h" typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType; typedef struct { char *name; u32 address; SymbolType type; int size; // How much memory this symbol occupies int is_constant; // 1 = constant, 0 = variable } Symbol; typedef struct { Symbol *symbols; int count; int capacity; } SymbolTable; void symbol_table_init(SymbolTable *table) { table->capacity = 32; table->count = 0; table->symbols = malloc(table->capacity * sizeof(Symbol)); } void symbol_table_add(SymbolTable *table, const char *name, u32 address, SymbolType type) { // Check for duplicates for (int i = 0; i < table->count; i++) { if (strcmp(table->symbols[i].name, name) == 0) { fprintf(stderr, "Error: Duplicate label '%s'\n", name); exit(1); } } if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } Symbol *sym = &table->symbols[table->count++]; sym->name = strdup(name); sym->address = address; sym->type = type; sym->size = 4; // Default size sym->is_constant = 0; } Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { for (int i = 0; i < table->count; i++) { if (strcmp(table->symbols[i].name, name) == 0) { return &table->symbols[i]; } } return NULL; } u32 find_label_in_table(SymbolTable *table, const char *name) { Symbol *sym = symbol_table_lookup(table, name); if (!sym) { fprintf(stderr, "Error: Undefined label '%s'\n", name); exit(1); } return sym->address; } int get_instruction_byte_size(ExprNode *node) { const char *opname = node->token; // Simple opcodes (1 byte) if (strcmp(opname, "halt") == 0) { return 1; } // Return (1 + 1) if (strcmp(opname, "return") == 0) { return 2; // 1 byte opcode + 1 byte return register } if (strcmp(opname, "int-to-string") == 0 || strcmp(opname, "load-indirect-8") == 0 || strcmp(opname, "nat-to-string") == 0 || strcmp(opname, "load-indirect-16") == 0 || strcmp(opname, "real-to-string") == 0 || strcmp(opname, "load-indirect-32") == 0 || strcmp(opname, "int-to-real") == 0 || strcmp(opname, "store-indirect-8") == 0 || strcmp(opname, "nat-to-real") == 0 || strcmp(opname, "store-indirect-16") == 0 || strcmp(opname, "real-to-int") == 0 || strcmp(opname, "store-indirect-32") == 0 || strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 || strcmp(opname, "int-to-nat") == 0 || strcmp(opname, "string-length") == 0 || strcmp(opname, "store-absolute-32") == 0 || strcmp(opname, "store-absolute-8") == 0 || strcmp(opname, "store-absolute-16") == 0 || strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 || strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) { return 3; } // Register-register-register opcodes (4 bytes: 1 + 3) if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 || strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 || strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 || strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 || strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 || strcmp(opname, "bit-shift-left") == 0 || strcmp(opname, "bit-shift-right") == 0 || strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 || strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 || strcmp(opname, "div-real") == 0) { return 4; } // (5 bytes: 1 + 4) if (strcmp(opname, "jump-if-flag") == 0 || strcmp(opname, "jump") == 0) { return 5; } // Load, Load-immediate (6 bytes: 1 + 1 + 4) if (strcmp(opname, "load-absolute-32") == 0 || strcmp(opname, "load-immediate") == 0 || strcmp(opname, "load-absolute-16") == 0 || strcmp(opname, "load-absolute-8") == 0) { return 6; } // jump compare (7 bytes: 1 + 4 + 1 + 1) if (strcmp(opname, "jump-eq-int") == 0 || strcmp(opname, "jump-neq-int") == 0 || strcmp(opname, "jump-gt-int") == 0 || strcmp(opname, "jump-lt-int") == 0 || strcmp(opname, "jump-le-int") == 0 || strcmp(opname, "jump-ge-int") == 0 || strcmp(opname, "jump-eq-nat") == 0 || strcmp(opname, "jump-neq-nat") == 0 || strcmp(opname, "jump-gt-nat") == 0 || strcmp(opname, "jump-lt-nat") == 0 || strcmp(opname, "jump-le-nat") == 0 || strcmp(opname, "jump-ge-nat") == 0 || strcmp(opname, "jump-eq-real") == 0 || strcmp(opname, "jump-neq-real") == 0 || strcmp(opname, "jump-gt-real") == 0 || strcmp(opname, "jump-lt-real") == 0 || strcmp(opname, "jump-le-real") == 0 || strcmp(opname, "jump-ge-real") == 0 || strcmp(opname, "store-offset-8") == 0 || strcmp(opname, "store-offset-16") == 0 || strcmp(opname, "store-offset-32") == 0 || strcmp(opname, "load-offset-8") == 0 || strcmp(opname, "load-offset-16") == 0 || strcmp(opname, "load-offset-32") == 0) { return 7; } // Call (1 + 4 + 1 + args + 1) if (strcmp(opname, "call") == 0) { ExprNode *args_node = node->children[1]; u32 args_count; if (strcmp(args_node->token, "nil") == 0) { args_count = 0; } else { args_count = 1 + args_node->child_count; } return 1 + 1 + 1 + 4 + args_count; } // Syscall (1 + syscall_id (4) + args) if (strcmp(opname, "syscall") == 0) { return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); } fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); exit(-1); } int calculate_instruction_size(ExprNode *node) { if (node->child_count == 0) return 0; return get_instruction_byte_size(node); } void collect_symbols_in_node(SymbolTable *table, ExprNode *node, u32 *current_addr, int depth) { char indent[32] = ""; for (int i = 0; i < depth; i++) strcat(indent, " "); #ifdef ASM_DEBUG printf("%s%d %s ", indent, *current_addr, node->token); #endif if (strcmp(node->token, "label") == 0) { if (node->child_count >= 1) { const char *name = node->children[0]->token; #ifdef ASM_DEBUG printf(" %s -> %d\n", name, *current_addr); #endif symbol_table_add(table, name, *current_addr, SYMBOL_CODE); } for (size_t i = 1; i < node->child_count; i++) { collect_symbols_in_node(table, node->children[i], current_addr, depth + 1); } } else { int size = get_instruction_byte_size(node); *current_addr += size; #ifdef ASM_DEBUG printf(" +%d bytes -> %d\n", size, *current_addr); #endif } } void collect_symbols(SymbolTable *table, ExprNode *program) { // First, collect all data labels (with placeholder address) for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "data") == 0) { for (size_t j = 0; j < section->child_count; ++j) { ExprNode *item = section->children[j]; if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { const char *name = item->children[0]->token; symbol_table_add(table, name, 0, SYMBOL_DATA); } } } } // Second, collect all code labels with proper nesting u32 code_addr = 0; for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "code") == 0) { for (size_t j = 0; j < section->child_count; ++j) { collect_symbols_in_node(table, section->children[j], &code_addr, 0); } } } } u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { u32 addr = vm->mp; vm->mp += size; vm->frames[vm->fp].end += size; // Update the symbol's address Symbol *sym = symbol_table_lookup(table, name); if (sym && sym->type == SYMBOL_DATA) { sym->address = addr; sym->size = size; } return addr; } void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); vm->cp += 4; } void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); } int parse_register(const char *reg_str) { if (reg_str[0] != '$') return -1; return atoi(reg_str + 1); } u32 resolve_symbol(SymbolTable *table, const char *ref) { // Handle symbol references (e.g., &label) if (ref[0] == '&') { return find_label_in_table(table, ref + 1); } // Handle fixed-point numbers (e.g., 0.5) if (strchr(ref, '.')) { return TO_FIXED(atof(ref)); } // Handle hexadecimal literals (e.g., 0x7) if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) { char *endptr; u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x" if (endptr == ref + 2 || *endptr != '\0') { fprintf(stderr, "Invalid hex literal: %s\n", ref); exit(1); } return value; } // Handle decimal literals (e.g., 7) char *endptr; u32 value = (u32)strtoul(ref, &endptr, 10); if (endptr == ref || *endptr != '\0') { fprintf(stderr, "Invalid decimal literal: %s\n", ref); exit(1); } return value; } static char *unwrap_string(const char *quoted_str) { if (!quoted_str) return NULL; size_t len = strlen(quoted_str); if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { // Remove quotes and process escape sequences const char *src = quoted_str + 1; size_t src_len = len - 2; // First pass: calculate the actual length needed after escape processing size_t actual_len = 0; for (size_t i = 0; i < src_len; ++i) { if (src[i] == '\\' && i + 1 < src_len) { // Escape sequence actual_len++; i++; // Skip the next character } else { actual_len++; } } char *unwrapped = (char *)malloc(actual_len + 1); size_t dst_idx = 0; // Second pass: process escape sequences for (size_t i = 0; i < src_len; ++i) { if (src[i] == '\\' && i + 1 < src_len) { // Handle escape sequences switch (src[i + 1]) { case 'n': unwrapped[dst_idx++] = '\n'; break; case 't': unwrapped[dst_idx++] = '\t'; break; case 'r': unwrapped[dst_idx++] = '\r'; break; case '\\': unwrapped[dst_idx++] = '\\'; break; case '"': unwrapped[dst_idx++] = '"'; break; case '\'': unwrapped[dst_idx++] = '\''; break; default: // Unknown escape, keep both characters unwrapped[dst_idx++] = src[i]; unwrapped[dst_idx++] = src[i + 1]; break; } i++; // Skip the next character } else { unwrapped[dst_idx++] = src[i]; } } unwrapped[dst_idx] = '\0'; return unwrapped; } // Not quoted, return copy return strdup(quoted_str); } void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { for (size_t i = 0; i < block->child_count; ++i) { ExprNode *item = block->children[i]; if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { const char *name = item->children[0]->token; ExprNode *val = item->children[1]; if (val->child_count == 0) { const char *token = val->token; // Case 1: String literal (enclosed in quotes) if (token[0] == '"' && token[strlen(token) - 1] == '"') { char *unwrapped = unwrap_string(token); int len = strlen(unwrapped) + 1; u32 addr = allocate_data(vm, table, name, len + 4); write_u32(vm, memory, addr, len); for (int i = 0; i < len; i++) { write_u8(vm, memory, addr + 4 + i, unwrapped[i]); } free(unwrapped); } // Case 2: Hexadecimal integer (0x...) else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) { char *endptr; u32 value = (u32)strtoul(token + 2, &endptr, 16); if (endptr != token + strlen(token)) { fprintf(stderr, "Invalid hex in data block: %s\n", token); exit(1); } u32 addr = allocate_data(vm, table, name, 4); write_u32(vm, memory, addr, value); } // Case 3: Floating-point (has decimal point) else if (strchr(token, '.')) { float f = atof(token); u32 addr = allocate_data(vm, table, name, 4); write_u32(vm, memory, addr, TO_FIXED(f)); } // Case 4: Decimal integer else { char *endptr; u32 value = (u32)strtoul(token, &endptr, 10); if (endptr != token + strlen(token)) { fprintf(stderr, "Invalid decimal in data block: %s\n", token); exit(1); } u32 addr = allocate_data(vm, table, name, 4); write_u32(vm, memory, addr, value); vm->mp += 4; } } else { fprintf(stderr, "Unsupported data item\n"); exit(1); } } } } void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { const char *opname = node->token; if (strcmp(opname, "label") == 0) { for (size_t i = 1; i < node->child_count; i++) { process_code_expr(vm, table, node->children[i]); } } else if (strcmp(opname, "halt") == 0) { emit_opcode(vm, OP_HALT); } else if (strcmp(opname, "jump") == 0) { emit_opcode(vm, OP_JMP); u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "jump-if-flag") == 0) { emit_opcode(vm, OP_JMPF); u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "call") == 0) { emit_opcode(vm, OP_CALL); if (node->child_count < 3) { fprintf(stderr, "Error: call requires (args) and return register\n"); return; } // Parse function address (first child) u32 addr = resolve_symbol(table, node->children[0]->token); if (addr == (u32)-1) { fprintf(stderr, "Error: undefined symbol '%s'\n", node->children[0]->token); return; } emit_u32(vm, addr); // Parse argument list (second child) ExprNode *args_node = node->children[1]; u8 arg_count = 0; if (args_node->child_count > 0) { // Multiple arguments case arg_count = args_node->child_count + 1; // +1 for the token } else { // Single argument case - token is the argument arg_count = (args_node->token[0] != '\0') ? 1 : 0; } emit_byte(vm, arg_count); // Emit arguments based on representation if (arg_count > 0) { // First argument is always the token const char *reg_str = args_node->token; int reg = parse_register(reg_str); if (reg < 0) { fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); return; } emit_byte(vm, (u8)reg); // Emit children if present for (size_t i = 0; i < args_node->child_count; i++) { reg_str = args_node->children[i]->token; reg = parse_register(reg_str); if (reg < 0) { fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str); return; } emit_byte(vm, (u8)reg); } } // Parse return register (third child) const char *return_reg_str = node->children[2]->token; int return_reg = parse_register(return_reg_str); if (return_reg < 0) { if (strcmp(return_reg_str, "nil") == 0) { return_reg = 0xFF; } else { fprintf(stderr, "Error: invalid return register '%s'\n", return_reg_str); return; } } emit_byte(vm, (u8)return_reg); } else if (strcmp(opname, "return") == 0) { emit_opcode(vm, OP_RETURN); if (node->child_count != 1) { fprintf(stderr, "Error: return requires exactly one argument\n"); return; } const char *reg_str = node->children[0]->token; int reg = parse_register(reg_str); // Handle "nil" as special case (no return value) if (reg < 0) { if (strcmp(reg_str, "nil") == 0) { reg = 0xFF; // Special value for "no return" } else { fprintf(stderr, "Error: invalid return register '%s'\n", reg_str); return; } } emit_byte(vm, (u8)reg); } else if (strcmp(opname, "load-immediate") == 0) { emit_opcode(vm, OP_LOAD_IMM); int reg = parse_register(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, reg); emit_u32(vm, addr); } else if (strcmp(opname, "load-absolute-8") == 0) { emit_opcode(vm, OP_LOAD_ABS_8); int dest = parse_register(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, dest); emit_u32(vm, addr); } else if (strcmp(opname, "load-absolute-16") == 0) { emit_opcode(vm, OP_LOAD_ABS_16); int dest = parse_register(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, dest); emit_u32(vm, addr); } else if (strcmp(opname, "load-absolute-32") == 0) { emit_opcode(vm, OP_LOAD_ABS_32); int dest = parse_register(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, dest); emit_u32(vm, addr); } else if (strcmp(opname, "load-indirect-8") == 0) { emit_opcode(vm, OP_LOAD_IND_8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-indirect-16") == 0) { emit_opcode(vm, OP_LOAD_IND_16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "load-indirect-32") == 0) { emit_opcode(vm, OP_LOAD_IND_32); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "malloc") == 0) { emit_opcode(vm, OP_MALLOC); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "memset-8") == 0) { emit_opcode(vm, OP_MEMSET_8); int dest = parse_register(node->children[0]->token); int value = parse_register(node->children[1]->token); int count = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, value); emit_byte(vm, count); } else if (strcmp(opname, "memset-16") == 0) { emit_opcode(vm, OP_MEMSET_16); int dest = parse_register(node->children[0]->token); int value = parse_register(node->children[1]->token); int count = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, value); emit_byte(vm, count); } else if (strcmp(opname, "memset") == 0) { emit_opcode(vm, OP_MEMSET_32); int dest = parse_register(node->children[0]->token); int value = parse_register(node->children[1]->token); int count = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, value); emit_byte(vm, count); } else if (strcmp(opname, "store-absolute-8") == 0) { emit_opcode(vm, OP_STORE_ABS_8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-absolute-16") == 0) { emit_opcode(vm, OP_STORE_ABS_16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-absolute-32") == 0) { emit_opcode(vm, OP_STORE_ABS_32); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-indirect-8") == 0) { emit_opcode(vm, OP_STORE_IND_8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-indirect-16") == 0) { emit_opcode(vm, OP_STORE_IND_16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-indirect-32") == 0) { emit_opcode(vm, OP_STORE_IND_32); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src1); } else if (strcmp(opname, "store-offset-8") == 0) { emit_opcode(vm, OP_STORE_OFF_8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_u32(vm, addr); } else if (strcmp(opname, "store-offset-16") == 0) { emit_opcode(vm, OP_STORE_OFF_16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_u32(vm, addr); } else if (strcmp(opname, "store-offset-32") == 0) { emit_opcode(vm, OP_STORE_OFF_32); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_u32(vm, addr); } else if (strcmp(opname, "load-offset-8") == 0) { emit_opcode(vm, OP_LOAD_OFF_8); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_u32(vm, addr); } else if (strcmp(opname, "load-offset-16") == 0) { emit_opcode(vm, OP_LOAD_OFF_16); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_u32(vm, addr); } else if (strcmp(opname, "load-offset-32") == 0) { emit_opcode(vm, OP_LOAD_OFF_32); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_u32(vm, addr); } else if (strcmp(opname, "register-move") == 0) { emit_opcode(vm, OP_REG_MOV); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "syscall") == 0) { emit_opcode(vm, OP_SYSCALL); // Parse syscall ID u32 syscall_id = 0; const char *syscall_name = node->children[0]->token; if (strcmp(syscall_name, "EXIT") == 0) syscall_id = SYSCALL_EXIT; else if (strcmp(syscall_name, "OPEN") == 0) syscall_id = SYSCALL_DEVICE_OPEN; else if (strcmp(syscall_name, "READ") == 0) syscall_id = SYSCALL_DEVICE_READ; else if (strcmp(syscall_name, "WRITE") == 0) syscall_id = SYSCALL_DEVICE_WRITE; else if (strcmp(syscall_name, "CLOSE") == 0) syscall_id = SYSCALL_DEVICE_CLOSE; else if (strcmp(syscall_name, "IOCTL") == 0) syscall_id = SYSCALL_DEVICE_IOCTL; else if (strcmp(syscall_name, "REFRESH") == 0) syscall_id = SYSCALL_DEVICE_REFRESH; emit_u32(vm, syscall_id); // Emit register arguments for (size_t i = 1; i < node->child_count; ++i) { int reg = parse_register(node->children[i]->token); emit_byte(vm, reg); } } else if (strcmp(opname, "bit-shift-left") == 0) { emit_opcode(vm, OP_SLL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-shift-right") == 0) { emit_opcode(vm, OP_SRL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-shift-re") == 0) { emit_opcode(vm, OP_SRE); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-and") == 0) { emit_opcode(vm, OP_BAND); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-or") == 0) { emit_opcode(vm, OP_BOR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "bit-xor") == 0) { emit_opcode(vm, OP_BXOR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "add-int") == 0) { emit_opcode(vm, OP_ADD_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "sub-int") == 0) { emit_opcode(vm, OP_SUB_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "mul-int") == 0) { emit_opcode(vm, OP_MUL_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "div-int") == 0) { emit_opcode(vm, OP_DIV_INT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "add-nat") == 0) { emit_opcode(vm, OP_ADD_NAT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "sub-nat") == 0) { emit_opcode(vm, OP_SUB_NAT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "mul-nat") == 0) { emit_opcode(vm, OP_MUL_NAT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "div-nat") == 0) { emit_opcode(vm, OP_DIV_NAT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "add-real") == 0) { emit_opcode(vm, OP_ADD_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "sub-real") == 0) { emit_opcode(vm, OP_SUB_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "mul-real") == 0) { emit_opcode(vm, OP_MUL_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "div-real") == 0) { emit_opcode(vm, OP_DIV_REAL); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "int-to-real") == 0) { emit_opcode(vm, OP_INT_TO_REAL); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "nat-to-real") == 0) { emit_opcode(vm, OP_NAT_TO_REAL); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "real-to-int") == 0) { emit_opcode(vm, OP_REAL_TO_INT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "real-to-nat") == 0) { emit_opcode(vm, OP_REAL_TO_NAT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "jump-eq-int") == 0) { emit_opcode(vm, OP_JEQ_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-neq-int") == 0) { emit_opcode(vm, OP_JNEQ_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-int") == 0) { emit_opcode(vm, OP_JGT_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-int") == 0) { emit_opcode(vm, OP_JLT_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-int") == 0) { emit_opcode(vm, OP_JLE_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-int") == 0) { emit_opcode(vm, OP_JGE_INT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-eq-nat") == 0) { emit_opcode(vm, OP_JEQ_NAT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-neq-nat") == 0) { emit_opcode(vm, OP_JNEQ_NAT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-nat") == 0) { emit_opcode(vm, OP_JGT_NAT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-nat") == 0) { emit_opcode(vm, OP_JLT_NAT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-nat") == 0) { emit_opcode(vm, OP_JLE_NAT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-nat") == 0) { emit_opcode(vm, OP_JGE_NAT); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-eq-real") == 0) { emit_opcode(vm, OP_JEQ_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-neq-real") == 0) { emit_opcode(vm, OP_JNEQ_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-real") == 0) { emit_opcode(vm, OP_JGT_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-real") == 0) { emit_opcode(vm, OP_JLT_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-real") == 0) { emit_opcode(vm, OP_JLE_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-real") == 0) { emit_opcode(vm, OP_JGE_REAL); u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-length") == 0) { emit_opcode(vm, OP_STRLEN); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-eq") == 0) { emit_opcode(vm, OP_STREQ); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-concat") == 0) { emit_opcode(vm, OP_STRCAT); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-get-char") == 0) { emit_opcode(vm, OP_STR_GET_CHAR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-find-char") == 0) { emit_opcode(vm, OP_STR_FIND_CHAR); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); } else if (strcmp(opname, "string-slice") == 0) { emit_opcode(vm, OP_STR_SLICE); int dest = parse_register(node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); int src3 = parse_register(node->children[3]->token); emit_byte(vm, dest); emit_byte(vm, src1); emit_byte(vm, src2); emit_byte(vm, src3); } else if (strcmp(opname, "int-to-string") == 0) { emit_opcode(vm, OP_INT_TO_STRING); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "nat-to-string") == 0) { emit_opcode(vm, OP_NAT_TO_STRING); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "real-to-string") == 0) { emit_opcode(vm, OP_REAL_TO_STRING); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-to-int") == 0) { emit_opcode(vm, OP_STRING_TO_INT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-to-nat") == 0) { emit_opcode(vm, OP_STRING_TO_NAT); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else if (strcmp(opname, "string-to-real") == 0) { emit_opcode(vm, OP_STRING_TO_REAL); int dest = parse_register(node->children[0]->token); int src = parse_register(node->children[1]->token); emit_byte(vm, dest); emit_byte(vm, src); } else { fprintf(stderr, "Unknown opcode: %s\n", opname); } } void assemble(VM *vm, ExprNode *program) { SymbolTable table; symbol_table_init(&table); // PASS 1: Collect all symbols (both code and data) collect_symbols(&table, program); // PASS 2: Process data section using symbol table for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "data") == 0) { process_data_block(vm, &table, section); } } // PASS 3: Process code section using complete symbol table for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "code") == 0) { for (size_t j = 0; j < section->child_count; ++j) { process_code_expr(vm, &table, section->children[j]); } } } // Cleanup symbol table for (int i = 0; i < table.count; i++) { #ifdef ASM_DEBUG Symbol s = table.symbols[i]; printf("%s[%d]\n", s.name, s.address); #endif free(table.symbols[i].name); } free(table.symbols); }