undar-lang/src/tools/assembler.c

1170 lines
41 KiB
C

#include "assembler.h"
#include "parser.h"
typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType;
typedef struct {
char *name;
u32 address;
SymbolType type;
int size; // How much memory this symbol occupies
int is_constant; // 1 = constant, 0 = variable
} Symbol;
typedef struct {
Symbol *symbols;
int count;
int capacity;
} SymbolTable;
void symbol_table_init(SymbolTable *table) {
table->capacity = 32;
table->count = 0;
table->symbols = malloc(table->capacity * sizeof(Symbol));
}
void symbol_table_add(SymbolTable *table, const char *name, u32 address,
SymbolType type) {
// Check for duplicates
for (int i = 0; i < table->count; i++) {
if (strcmp(table->symbols[i].name, name) == 0) {
fprintf(stderr, "Error: Duplicate label '%s'\n", name);
exit(1);
}
}
if (table->count >= table->capacity) {
table->capacity *= 2;
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
}
Symbol *sym = &table->symbols[table->count++];
sym->name = strdup(name);
sym->address = address;
sym->type = type;
sym->size = 4; // Default size
sym->is_constant = 0;
}
Symbol *symbol_table_lookup(SymbolTable *table, const char *name) {
for (int i = 0; i < table->count; i++) {
if (strcmp(table->symbols[i].name, name) == 0) {
return &table->symbols[i];
}
}
return NULL;
}
u32 find_label_in_table(SymbolTable *table, const char *name) {
Symbol *sym = symbol_table_lookup(table, name);
if (!sym) {
fprintf(stderr, "Error: Undefined label '%s'\n", name);
exit(1);
}
return sym->address;
}
int get_instruction_byte_size(ExprNode *node) {
const char *opname = node->token;
// Simple opcodes (1 byte)
if (strcmp(opname, "halt") == 0) {
return 1;
}
// Return (1 + 1)
if (strcmp(opname, "return") == 0) {
return 2; // 1 byte opcode + 1 byte return register
}
if (strcmp(opname, "int-to-string") == 0 ||
strcmp(opname, "load-indirect-8") == 0 ||
strcmp(opname, "nat-to-string") == 0 ||
strcmp(opname, "load-indirect-16") == 0 ||
strcmp(opname, "real-to-string") == 0 ||
strcmp(opname, "load-indirect-32") == 0 ||
strcmp(opname, "int-to-real") == 0 ||
strcmp(opname, "store-indirect-8") == 0 ||
strcmp(opname, "nat-to-real") == 0 ||
strcmp(opname, "store-indirect-16") == 0 ||
strcmp(opname, "real-to-int") == 0 ||
strcmp(opname, "store-indirect-32") == 0 ||
strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 ||
strcmp(opname, "int-to-nat") == 0 ||
strcmp(opname, "string-length") == 0 ||
strcmp(opname, "store-absolute-32") == 0 ||
strcmp(opname, "store-absolute-8") == 0 ||
strcmp(opname, "store-absolute-16") == 0 ||
strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 ||
strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 ||
strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) {
return 3;
}
// Register-register-register opcodes (4 bytes: 1 + 3)
if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 ||
strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 ||
strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 ||
strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 ||
strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 ||
strcmp(opname, "bit-shift-left") == 0 ||
strcmp(opname, "bit-shift-right") == 0 ||
strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 ||
strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 ||
strcmp(opname, "div-real") == 0) {
return 4;
}
// (5 bytes: 1 + 4)
if (strcmp(opname, "jump-if-flag") == 0 || strcmp(opname, "jump") == 0) {
return 5;
}
// Load, Load-immediate (6 bytes: 1 + 1 + 4)
if (strcmp(opname, "load-absolute-32") == 0 ||
strcmp(opname, "load-immediate") == 0 ||
strcmp(opname, "load-absolute-16") == 0 ||
strcmp(opname, "load-absolute-8") == 0) {
return 6;
}
// jump compare (7 bytes: 1 + 4 + 1 + 1)
if (strcmp(opname, "jump-eq-int") == 0 ||
strcmp(opname, "jump-neq-int") == 0 ||
strcmp(opname, "jump-gt-int") == 0 ||
strcmp(opname, "jump-lt-int") == 0 ||
strcmp(opname, "jump-le-int") == 0 ||
strcmp(opname, "jump-ge-int") == 0 ||
strcmp(opname, "jump-eq-nat") == 0 ||
strcmp(opname, "jump-neq-nat") == 0 ||
strcmp(opname, "jump-gt-nat") == 0 ||
strcmp(opname, "jump-lt-nat") == 0 ||
strcmp(opname, "jump-le-nat") == 0 ||
strcmp(opname, "jump-ge-nat") == 0 ||
strcmp(opname, "jump-eq-real") == 0 ||
strcmp(opname, "jump-neq-real") == 0 ||
strcmp(opname, "jump-gt-real") == 0 ||
strcmp(opname, "jump-lt-real") == 0 ||
strcmp(opname, "jump-le-real") == 0 ||
strcmp(opname, "jump-ge-real") == 0 ||
strcmp(opname, "store-offset-8") == 0 ||
strcmp(opname, "store-offset-16") == 0 ||
strcmp(opname, "store-offset-32") == 0 ||
strcmp(opname, "load-offset-8") == 0 ||
strcmp(opname, "load-offset-16") == 0 ||
strcmp(opname, "load-offset-32") == 0) {
return 7;
}
// Call (1 + 4 + 1 + args + 1)
if (strcmp(opname, "call") == 0) {
ExprNode *args_node = node->children[1];
u32 args_count;
if (strcmp(args_node->token, "nil") == 0) {
args_count = 0;
} else {
args_count = 1 + args_node->child_count;
}
return 1 + 1 + 1 + 4 + args_count;
}
// Syscall (1 + syscall_id (4) + args)
if (strcmp(opname, "syscall") == 0) {
return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0);
}
fprintf(stderr, "Unknown opcode for sizing: %s\n", opname);
exit(-1);
}
int calculate_instruction_size(ExprNode *node) {
if (node->child_count == 0)
return 0;
return get_instruction_byte_size(node);
}
void collect_symbols_in_node(SymbolTable *table, ExprNode *node,
u32 *current_addr, int depth) {
char indent[32] = "";
for (int i = 0; i < depth; i++)
strcat(indent, " ");
#ifdef ASM_DEBUG
printf("%s%d %s ", indent, *current_addr, node->token);
#endif
if (strcmp(node->token, "label") == 0) {
if (node->child_count >= 1) {
const char *name = node->children[0]->token;
#ifdef ASM_DEBUG
printf(" %s -> %d\n", name, *current_addr);
#endif
symbol_table_add(table, name, *current_addr, SYMBOL_CODE);
}
for (size_t i = 1; i < node->child_count; i++) {
collect_symbols_in_node(table, node->children[i], current_addr,
depth + 1);
}
} else {
int size = get_instruction_byte_size(node);
*current_addr += size;
#ifdef ASM_DEBUG
printf(" +%d bytes -> %d\n", size, *current_addr);
#endif
}
}
void collect_symbols(SymbolTable *table, ExprNode *program) {
// First, collect all data labels (with placeholder address)
for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i];
if (strcmp(section->token, "data") == 0) {
for (size_t j = 0; j < section->child_count; ++j) {
ExprNode *item = section->children[j];
if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
const char *name = item->children[0]->token;
symbol_table_add(table, name, 0, SYMBOL_DATA);
}
}
}
}
// Second, collect all code labels with proper nesting
u32 code_addr = 0;
for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i];
if (strcmp(section->token, "code") == 0) {
for (size_t j = 0; j < section->child_count; ++j) {
collect_symbols_in_node(table, section->children[j], &code_addr, 0);
}
}
}
}
u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) {
u32 addr = vm->mp;
vm->mp += size;
vm->frames[vm->fp].end += size;
// Update the symbol's address
Symbol *sym = symbol_table_lookup(table, name);
if (sym && sym->type == SYMBOL_DATA) {
sym->address = addr;
sym->size = size;
}
return addr;
}
void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; }
void emit_u32(VM *vm, u32 value) {
write_u32(vm, code, vm->cp, value);
vm->cp += 4;
}
void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); }
int parse_register(const char *reg_str) {
if (reg_str[0] != '$')
return -1;
return atoi(reg_str + 1);
}
u32 resolve_symbol(SymbolTable *table, const char *ref) {
// Handle symbol references (e.g., &label)
if (ref[0] == '&') {
return find_label_in_table(table, ref + 1);
}
// Handle fixed-point numbers (e.g., 0.5)
if (strchr(ref, '.')) {
return TO_FIXED(atof(ref));
}
// Handle hexadecimal literals (e.g., 0x7)
if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) {
char *endptr;
u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x"
if (endptr == ref + 2 || *endptr != '\0') {
fprintf(stderr, "Invalid hex literal: %s\n", ref);
exit(1);
}
return value;
}
// Handle decimal literals (e.g., 7)
char *endptr;
u32 value = (u32)strtoul(ref, &endptr, 10);
if (endptr == ref || *endptr != '\0') {
fprintf(stderr, "Invalid decimal literal: %s\n", ref);
exit(1);
}
return value;
}
static char *unwrap_string(const char *quoted_str) {
if (!quoted_str)
return NULL;
size_t len = strlen(quoted_str);
if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') {
// Remove quotes and process escape sequences
const char *src = quoted_str + 1;
size_t src_len = len - 2;
// First pass: calculate the actual length needed after escape processing
size_t actual_len = 0;
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Escape sequence
actual_len++;
i++; // Skip the next character
} else {
actual_len++;
}
}
char *unwrapped = (char *)malloc(actual_len + 1);
size_t dst_idx = 0;
// Second pass: process escape sequences
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Handle escape sequences
switch (src[i + 1]) {
case 'n':
unwrapped[dst_idx++] = '\n';
break;
case 't':
unwrapped[dst_idx++] = '\t';
break;
case 'r':
unwrapped[dst_idx++] = '\r';
break;
case '\\':
unwrapped[dst_idx++] = '\\';
break;
case '"':
unwrapped[dst_idx++] = '"';
break;
case '\'':
unwrapped[dst_idx++] = '\'';
break;
default:
// Unknown escape, keep both characters
unwrapped[dst_idx++] = src[i];
unwrapped[dst_idx++] = src[i + 1];
break;
}
i++; // Skip the next character
} else {
unwrapped[dst_idx++] = src[i];
}
}
unwrapped[dst_idx] = '\0';
return unwrapped;
}
// Not quoted, return copy
return strdup(quoted_str);
}
void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) {
for (size_t i = 0; i < block->child_count; ++i) {
ExprNode *item = block->children[i];
if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
const char *name = item->children[0]->token;
ExprNode *val = item->children[1];
if (val->child_count == 0) {
const char *token = val->token;
// Case 1: String literal (enclosed in quotes)
if (token[0] == '"' && token[strlen(token) - 1] == '"') {
char *unwrapped = unwrap_string(token);
int len = strlen(unwrapped) + 1;
u32 addr = allocate_data(vm, table, name, len + 4);
write_u32(vm, memory, addr, len);
for (int i = 0; i < len; i++) {
write_u8(vm, memory, addr + 4 + i, unwrapped[i]);
}
free(unwrapped);
}
// Case 2: Hexadecimal integer (0x...)
else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) {
char *endptr;
u32 value = (u32)strtoul(token + 2, &endptr, 16);
if (endptr != token + strlen(token)) {
fprintf(stderr, "Invalid hex in data block: %s\n", token);
exit(1);
}
u32 addr = allocate_data(vm, table, name, 4);
write_u32(vm, memory, addr, value);
}
// Case 3: Floating-point (has decimal point)
else if (strchr(token, '.')) {
float f = atof(token);
u32 addr = allocate_data(vm, table, name, 4);
write_u32(vm, memory, addr, TO_FIXED(f));
}
// Case 4: Decimal integer
else {
char *endptr;
u32 value = (u32)strtoul(token, &endptr, 10);
if (endptr != token + strlen(token)) {
fprintf(stderr, "Invalid decimal in data block: %s\n", token);
exit(1);
}
u32 addr = allocate_data(vm, table, name, 4);
write_u32(vm, memory, addr, value);
vm->mp += 4;
}
} else {
fprintf(stderr, "Unsupported data item\n");
exit(1);
}
}
}
}
void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) {
const char *opname = node->token;
if (strcmp(opname, "label") == 0) {
for (size_t i = 1; i < node->child_count; i++) {
process_code_expr(vm, table, node->children[i]);
}
} else if (strcmp(opname, "halt") == 0) {
emit_opcode(vm, OP_HALT);
} else if (strcmp(opname, "jump") == 0) {
emit_opcode(vm, OP_JMP);
u32 addr = resolve_symbol(table, node->children[0]->token);
emit_u32(vm, addr);
} else if (strcmp(opname, "jump-if-flag") == 0) {
emit_opcode(vm, OP_JMPF);
u32 addr = resolve_symbol(table, node->children[0]->token);
emit_u32(vm, addr);
} else if (strcmp(opname, "call") == 0) {
emit_opcode(vm, OP_CALL);
if (node->child_count < 3) {
fprintf(stderr, "Error: call requires (args) and return register\n");
return;
}
// Parse function address (first child)
u32 addr = resolve_symbol(table, node->children[0]->token);
if (addr == (u32)-1) {
fprintf(stderr, "Error: undefined symbol '%s'\n",
node->children[0]->token);
return;
}
emit_u32(vm, addr);
// Parse argument list (second child)
ExprNode *args_node = node->children[1];
u8 arg_count = 0;
if (args_node->child_count > 0) {
// Multiple arguments case
arg_count = args_node->child_count + 1; // +1 for the token
} else {
// Single argument case - token is the argument
arg_count = (args_node->token[0] != '\0') ? 1 : 0;
}
emit_byte(vm, arg_count);
// Emit arguments based on representation
if (arg_count > 0) {
// First argument is always the token
const char *reg_str = args_node->token;
int reg = parse_register(reg_str);
if (reg < 0) {
fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str);
return;
}
emit_byte(vm, (u8)reg);
// Emit children if present
for (size_t i = 0; i < args_node->child_count; i++) {
reg_str = args_node->children[i]->token;
reg = parse_register(reg_str);
if (reg < 0) {
fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str);
return;
}
emit_byte(vm, (u8)reg);
}
}
// Parse return register (third child)
const char *return_reg_str = node->children[2]->token;
int return_reg = parse_register(return_reg_str);
if (return_reg < 0) {
if (strcmp(return_reg_str, "nil") == 0) {
return_reg = 0xFF;
} else {
fprintf(stderr, "Error: invalid return register '%s'\n",
return_reg_str);
return;
}
}
emit_byte(vm, (u8)return_reg);
} else if (strcmp(opname, "return") == 0) {
emit_opcode(vm, OP_RETURN);
if (node->child_count != 1) {
fprintf(stderr, "Error: return requires exactly one argument\n");
return;
}
const char *reg_str = node->children[0]->token;
int reg = parse_register(reg_str);
// Handle "nil" as special case (no return value)
if (reg < 0) {
if (strcmp(reg_str, "nil") == 0) {
reg = 0xFF; // Special value for "no return"
} else {
fprintf(stderr, "Error: invalid return register '%s'\n", reg_str);
return;
}
}
emit_byte(vm, (u8)reg);
} else if (strcmp(opname, "load-immediate") == 0) {
emit_opcode(vm, OP_LOAD_IMM);
int reg = parse_register(node->children[0]->token);
u32 addr = resolve_symbol(table, node->children[1]->token);
emit_byte(vm, reg);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-absolute-8") == 0) {
emit_opcode(vm, OP_LOAD_ABS_8);
int dest = parse_register(node->children[0]->token);
u32 addr = resolve_symbol(table, node->children[1]->token);
emit_byte(vm, dest);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-absolute-16") == 0) {
emit_opcode(vm, OP_LOAD_ABS_16);
int dest = parse_register(node->children[0]->token);
u32 addr = resolve_symbol(table, node->children[1]->token);
emit_byte(vm, dest);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-absolute-32") == 0) {
emit_opcode(vm, OP_LOAD_ABS_32);
int dest = parse_register(node->children[0]->token);
u32 addr = resolve_symbol(table, node->children[1]->token);
emit_byte(vm, dest);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-indirect-8") == 0) {
emit_opcode(vm, OP_LOAD_IND_8);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "load-indirect-16") == 0) {
emit_opcode(vm, OP_LOAD_IND_16);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "load-indirect-32") == 0) {
emit_opcode(vm, OP_LOAD_IND_32);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "malloc") == 0) {
emit_opcode(vm, OP_MALLOC);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "memset-8") == 0) {
emit_opcode(vm, OP_MEMSET_8);
int dest = parse_register(node->children[0]->token);
int value = parse_register(node->children[1]->token);
int count = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, value);
emit_byte(vm, count);
} else if (strcmp(opname, "memset-16") == 0) {
emit_opcode(vm, OP_MEMSET_16);
int dest = parse_register(node->children[0]->token);
int value = parse_register(node->children[1]->token);
int count = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, value);
emit_byte(vm, count);
} else if (strcmp(opname, "memset") == 0) {
emit_opcode(vm, OP_MEMSET_32);
int dest = parse_register(node->children[0]->token);
int value = parse_register(node->children[1]->token);
int count = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, value);
emit_byte(vm, count);
} else if (strcmp(opname, "store-absolute-8") == 0) {
emit_opcode(vm, OP_STORE_ABS_8);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "store-absolute-16") == 0) {
emit_opcode(vm, OP_STORE_ABS_16);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "store-absolute-32") == 0) {
emit_opcode(vm, OP_STORE_ABS_32);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "store-indirect-8") == 0) {
emit_opcode(vm, OP_STORE_IND_8);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "store-indirect-16") == 0) {
emit_opcode(vm, OP_STORE_IND_16);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "store-indirect-32") == 0) {
emit_opcode(vm, OP_STORE_IND_32);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
} else if (strcmp(opname, "store-offset-8") == 0) {
emit_opcode(vm, OP_STORE_OFF_8);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
u32 addr = resolve_symbol(table, node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_u32(vm, addr);
} else if (strcmp(opname, "store-offset-16") == 0) {
emit_opcode(vm, OP_STORE_OFF_16);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
u32 addr = resolve_symbol(table, node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_u32(vm, addr);
} else if (strcmp(opname, "store-offset-32") == 0) {
emit_opcode(vm, OP_STORE_OFF_32);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
u32 addr = resolve_symbol(table, node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-offset-8") == 0) {
emit_opcode(vm, OP_LOAD_OFF_8);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
u32 addr = resolve_symbol(table, node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-offset-16") == 0) {
emit_opcode(vm, OP_LOAD_OFF_16);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
u32 addr = resolve_symbol(table, node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_u32(vm, addr);
} else if (strcmp(opname, "load-offset-32") == 0) {
emit_opcode(vm, OP_LOAD_OFF_32);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
u32 addr = resolve_symbol(table, node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_u32(vm, addr);
} else if (strcmp(opname, "register-move") == 0) {
emit_opcode(vm, OP_REG_MOV);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "syscall") == 0) {
emit_opcode(vm, OP_SYSCALL);
// Parse syscall ID
u32 syscall_id = 0;
const char *syscall_name = node->children[0]->token;
if (strcmp(syscall_name, "EXIT") == 0)
syscall_id = SYSCALL_EXIT;
else if (strcmp(syscall_name, "OPEN") == 0)
syscall_id = SYSCALL_DEVICE_OPEN;
else if (strcmp(syscall_name, "READ") == 0)
syscall_id = SYSCALL_DEVICE_READ;
else if (strcmp(syscall_name, "WRITE") == 0)
syscall_id = SYSCALL_DEVICE_WRITE;
else if (strcmp(syscall_name, "CLOSE") == 0)
syscall_id = SYSCALL_DEVICE_CLOSE;
else if (strcmp(syscall_name, "IOCTL") == 0)
syscall_id = SYSCALL_DEVICE_IOCTL;
else if (strcmp(syscall_name, "REFRESH") == 0)
syscall_id = SYSCALL_DEVICE_REFRESH;
emit_u32(vm, syscall_id);
// Emit register arguments
for (size_t i = 1; i < node->child_count; ++i) {
int reg = parse_register(node->children[i]->token);
emit_byte(vm, reg);
}
} else if (strcmp(opname, "bit-shift-left") == 0) {
emit_opcode(vm, OP_SLL);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "bit-shift-right") == 0) {
emit_opcode(vm, OP_SRL);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "bit-shift-re") == 0) {
emit_opcode(vm, OP_SRE);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "bit-and") == 0) {
emit_opcode(vm, OP_BAND);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "bit-or") == 0) {
emit_opcode(vm, OP_BOR);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "bit-xor") == 0) {
emit_opcode(vm, OP_BXOR);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "add-int") == 0) {
emit_opcode(vm, OP_ADD_INT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "sub-int") == 0) {
emit_opcode(vm, OP_SUB_INT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "mul-int") == 0) {
emit_opcode(vm, OP_MUL_INT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "div-int") == 0) {
emit_opcode(vm, OP_DIV_INT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "add-nat") == 0) {
emit_opcode(vm, OP_ADD_NAT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "sub-nat") == 0) {
emit_opcode(vm, OP_SUB_NAT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "mul-nat") == 0) {
emit_opcode(vm, OP_MUL_NAT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "div-nat") == 0) {
emit_opcode(vm, OP_DIV_NAT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "add-real") == 0) {
emit_opcode(vm, OP_ADD_REAL);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "sub-real") == 0) {
emit_opcode(vm, OP_SUB_REAL);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "mul-real") == 0) {
emit_opcode(vm, OP_MUL_REAL);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "div-real") == 0) {
emit_opcode(vm, OP_DIV_REAL);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "int-to-real") == 0) {
emit_opcode(vm, OP_INT_TO_REAL);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "nat-to-real") == 0) {
emit_opcode(vm, OP_NAT_TO_REAL);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "real-to-int") == 0) {
emit_opcode(vm, OP_REAL_TO_INT);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "real-to-nat") == 0) {
emit_opcode(vm, OP_REAL_TO_NAT);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "jump-eq-int") == 0) {
emit_opcode(vm, OP_JEQ_INT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-neq-int") == 0) {
emit_opcode(vm, OP_JNEQ_INT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-gt-int") == 0) {
emit_opcode(vm, OP_JGT_INT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-lt-int") == 0) {
emit_opcode(vm, OP_JLT_INT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-le-int") == 0) {
emit_opcode(vm, OP_JLE_INT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-ge-int") == 0) {
emit_opcode(vm, OP_JGE_INT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-eq-nat") == 0) {
emit_opcode(vm, OP_JEQ_NAT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-neq-nat") == 0) {
emit_opcode(vm, OP_JNEQ_NAT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-gt-nat") == 0) {
emit_opcode(vm, OP_JGT_NAT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-lt-nat") == 0) {
emit_opcode(vm, OP_JLT_NAT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-le-nat") == 0) {
emit_opcode(vm, OP_JLE_NAT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-ge-nat") == 0) {
emit_opcode(vm, OP_JGE_NAT);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-eq-real") == 0) {
emit_opcode(vm, OP_JEQ_REAL);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-neq-real") == 0) {
emit_opcode(vm, OP_JNEQ_REAL);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-gt-real") == 0) {
emit_opcode(vm, OP_JGT_REAL);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-lt-real") == 0) {
emit_opcode(vm, OP_JLT_REAL);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-le-real") == 0) {
emit_opcode(vm, OP_JLE_REAL);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "jump-ge-real") == 0) {
emit_opcode(vm, OP_JGE_REAL);
u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "string-length") == 0) {
emit_opcode(vm, OP_STRLEN);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "string-eq") == 0) {
emit_opcode(vm, OP_STREQ);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "string-concat") == 0) {
emit_opcode(vm, OP_STRCAT);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "string-get-char") == 0) {
emit_opcode(vm, OP_STR_GET_CHAR);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "string-find-char") == 0) {
emit_opcode(vm, OP_STR_FIND_CHAR);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
} else if (strcmp(opname, "string-slice") == 0) {
emit_opcode(vm, OP_STR_SLICE);
int dest = parse_register(node->children[0]->token);
int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token);
int src3 = parse_register(node->children[3]->token);
emit_byte(vm, dest);
emit_byte(vm, src1);
emit_byte(vm, src2);
emit_byte(vm, src3);
} else if (strcmp(opname, "int-to-string") == 0) {
emit_opcode(vm, OP_INT_TO_STRING);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "nat-to-string") == 0) {
emit_opcode(vm, OP_NAT_TO_STRING);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "real-to-string") == 0) {
emit_opcode(vm, OP_REAL_TO_STRING);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "string-to-int") == 0) {
emit_opcode(vm, OP_STRING_TO_INT);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "string-to-nat") == 0) {
emit_opcode(vm, OP_STRING_TO_NAT);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else if (strcmp(opname, "string-to-real") == 0) {
emit_opcode(vm, OP_STRING_TO_REAL);
int dest = parse_register(node->children[0]->token);
int src = parse_register(node->children[1]->token);
emit_byte(vm, dest);
emit_byte(vm, src);
} else {
fprintf(stderr, "Unknown opcode: %s\n", opname);
}
}
void assemble(VM *vm, ExprNode *program) {
SymbolTable table;
symbol_table_init(&table);
// PASS 1: Collect all symbols (both code and data)
collect_symbols(&table, program);
// PASS 2: Process data section using symbol table
for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i];
if (strcmp(section->token, "data") == 0) {
process_data_block(vm, &table, section);
}
}
// PASS 3: Process code section using complete symbol table
for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i];
if (strcmp(section->token, "code") == 0) {
for (size_t j = 0; j < section->child_count; ++j) {
process_code_expr(vm, &table, section->children[j]);
}
}
}
// Cleanup symbol table
for (int i = 0; i < table.count; i++) {
#ifdef ASM_DEBUG
Symbol s = table.symbols[i];
printf("%s[%d]\n", s.name, s.address);
#endif
free(table.symbols[i].name);
}
free(table.symbols);
}