955 lines
34 KiB
C
955 lines
34 KiB
C
#include "assembler.h"
|
|
typedef enum { SYMBOL_CODE, SYMBOL_DATA, SYMBOL_PLEX } SymbolType;
|
|
|
|
typedef struct {
|
|
char *name;
|
|
u32 address;
|
|
SymbolType type;
|
|
int size; // How much memory this symbol occupies
|
|
int is_constant; // 1 = constant, 0 = variable
|
|
} Symbol;
|
|
|
|
typedef struct {
|
|
Symbol *symbols;
|
|
int count;
|
|
int capacity;
|
|
} SymbolTable;
|
|
|
|
void symbol_table_init(SymbolTable *table) {
|
|
table->capacity = 32;
|
|
table->count = 0;
|
|
table->symbols = malloc(table->capacity * sizeof(Symbol));
|
|
}
|
|
|
|
void symbol_table_add(SymbolTable *table, const char *name, u32 address,
|
|
SymbolType type) {
|
|
// Check for duplicates
|
|
for (int i = 0; i < table->count; i++) {
|
|
if (strcmp(table->symbols[i].name, name) == 0) {
|
|
// Allow plex redefinition for compiler evolution
|
|
if (type == SYMBOL_PLEX && table->symbols[i].type == SYMBOL_PLEX) {
|
|
return;
|
|
}
|
|
fprintf(stderr, "Error: Duplicate label '%s'\n", name);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (table->count >= table->capacity) {
|
|
table->capacity *= 2;
|
|
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
|
|
}
|
|
|
|
Symbol *sym = &table->symbols[table->count++];
|
|
sym->name = strdup(name);
|
|
sym->address = address;
|
|
sym->type = type;
|
|
sym->size = 4; // Default size
|
|
sym->is_constant = 0;
|
|
}
|
|
|
|
Symbol *symbol_table_lookup(SymbolTable *table, const char *name) {
|
|
for (int i = 0; i < table->count; i++) {
|
|
if (strcmp(table->symbols[i].name, name) == 0) {
|
|
return &table->symbols[i];
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
u32 find_label_in_table(SymbolTable *table, const char *name) {
|
|
Symbol *sym = symbol_table_lookup(table, name);
|
|
if (!sym) {
|
|
fprintf(stderr, "Error: Undefined label '%s'\n", name);
|
|
exit(1);
|
|
}
|
|
return sym->address;
|
|
}
|
|
|
|
int get_instruction_byte_size(ExprNode *node) {
|
|
const char *opname = node->token;
|
|
|
|
// Simple opcodes (1 byte)
|
|
if (strcmp(opname, "halt") == 0 || strcmp(opname, "return") == 0) {
|
|
return 1;
|
|
}
|
|
|
|
// Register-based opcodes (2 bytes: opcode + register)
|
|
if (strcmp(opname, "pop") == 0 || strcmp(opname, "jump-if-flag") == 0 ||
|
|
strcmp(opname, "jump") == 0 || strcmp(opname, "push") == 0) {
|
|
return 2;
|
|
}
|
|
|
|
if (strcmp(opname, "int-to-string") == 0 ||
|
|
strcmp(opname, "nat-to-string") == 0 ||
|
|
strcmp(opname, "real-to-string") == 0 ||
|
|
strcmp(opname, "int-to-real") == 0 ||
|
|
strcmp(opname, "nat-to-real") == 0 ||
|
|
strcmp(opname, "real-to-int") == 0 || strcmp(opname, "load-r") == 0 ||
|
|
strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "int-to-nat") == 0 ||
|
|
strcmp(opname, "nat-to-int") == 0 || strcmp(opname, "load-r8") == 0 ||
|
|
strcmp(opname, "string-length") == 0 || strcmp(opname, "store") == 0 ||
|
|
strcmp(opname, "store-8") == 0 || strcmp(opname, "store-16") == 0 ||
|
|
strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) {
|
|
return 3;
|
|
}
|
|
|
|
// Register-register-register opcodes (4 bytes: 1 + 3)
|
|
if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 ||
|
|
strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 ||
|
|
strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 ||
|
|
strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 ||
|
|
strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 ||
|
|
strcmp(opname, "bit-shift-left") == 0 ||
|
|
strcmp(opname, "bit-shift-right") == 0 ||
|
|
strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 ||
|
|
strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 ||
|
|
strcmp(opname, "div-real") == 0) {
|
|
return 4;
|
|
}
|
|
|
|
// (5 bytes: 1 + 4)
|
|
if (strcmp(opname, "call") == 0) {
|
|
return 5;
|
|
}
|
|
|
|
// Load, Load-immediate (6 bytes: 1 + 1 + 4)
|
|
if (strcmp(opname, "load") == 0 || strcmp(opname, "load-immediate") == 0 ||
|
|
strcmp(opname, "load-u16") == 0 || strcmp(opname, "load-i16") == 0 ||
|
|
strcmp(opname, "load-i8") == 0) {
|
|
return 6;
|
|
}
|
|
|
|
// jump compare (7 bytes: 1 + 4 + 1 + 1)
|
|
if (strcmp(opname, "jump-eq-int") == 0 ||
|
|
strcmp(opname, "jump-gt-int") == 0 ||
|
|
strcmp(opname, "jump-lt-int") == 0 ||
|
|
strcmp(opname, "jump-le-int") == 0 ||
|
|
strcmp(opname, "jump-ge-int") == 0 ||
|
|
strcmp(opname, "jump-eq-nat") == 0 ||
|
|
strcmp(opname, "jump-gt-nat") == 0 ||
|
|
strcmp(opname, "jump-lt-nat") == 0 ||
|
|
strcmp(opname, "jump-le-nat") == 0 ||
|
|
strcmp(opname, "jump-ge-nat") == 0 ||
|
|
strcmp(opname, "jump-eq-real") == 0 ||
|
|
strcmp(opname, "jump-gt-real") == 0 ||
|
|
strcmp(opname, "jump-lt-real") == 0 ||
|
|
strcmp(opname, "jump-le-real") == 0 ||
|
|
strcmp(opname, "jump-ge-real") == 0) {
|
|
return 7;
|
|
}
|
|
|
|
// Syscall (1 + syscall_id (4) + args)
|
|
if (strcmp(opname, "syscall") == 0) {
|
|
return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0);
|
|
}
|
|
|
|
fprintf(stderr, "Unknown opcode for sizing: %s\n", opname);
|
|
exit(-1);
|
|
}
|
|
|
|
int calculate_instruction_size(ExprNode *node) {
|
|
if (node->child_count == 0)
|
|
return 0;
|
|
|
|
return get_instruction_byte_size(node);
|
|
}
|
|
|
|
void collect_symbols_in_node(SymbolTable *table, ExprNode *node,
|
|
u32 *current_addr, int depth) {
|
|
char indent[32] = "";
|
|
for (int i = 0; i < depth; i++)
|
|
strcat(indent, " ");
|
|
|
|
#ifdef ASM_DEBUG
|
|
printf("%s%d %s ", indent, *current_addr, node->token);
|
|
#endif
|
|
|
|
if (strcmp(node->token, "label") == 0) {
|
|
if (node->child_count >= 1) {
|
|
const char *name = node->children[0]->token;
|
|
#ifdef ASM_DEBUG
|
|
printf(" %s -> %d\n", name, *current_addr);
|
|
#endif
|
|
symbol_table_add(table, name, *current_addr, SYMBOL_CODE);
|
|
}
|
|
|
|
for (size_t i = 1; i < node->child_count; i++) {
|
|
collect_symbols_in_node(table, node->children[i], current_addr,
|
|
depth + 1);
|
|
}
|
|
} else {
|
|
int size = get_instruction_byte_size(node);
|
|
*current_addr += size;
|
|
#ifdef ASM_DEBUG
|
|
printf(" +%d bytes -> %d\n", size, *current_addr);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void collect_symbols(SymbolTable *table, ExprNode *program) {
|
|
// First, collect all data labels (with placeholder address)
|
|
for (size_t i = 0; i < program->child_count; ++i) {
|
|
ExprNode *section = program->children[i];
|
|
if (strcmp(section->token, "data") == 0) {
|
|
for (size_t j = 0; j < section->child_count; ++j) {
|
|
ExprNode *item = section->children[j];
|
|
if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
|
|
const char *name = item->children[0]->token;
|
|
symbol_table_add(table, name, 0, SYMBOL_DATA);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Second, collect all code labels with proper nesting
|
|
u32 code_addr = 0;
|
|
for (size_t i = 0; i < program->child_count; ++i) {
|
|
ExprNode *section = program->children[i];
|
|
if (strcmp(section->token, "code") == 0) {
|
|
for (size_t j = 0; j < section->child_count; ++j) {
|
|
collect_symbols_in_node(table, section->children[j], &code_addr, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) {
|
|
u32 addr = vm->mp;
|
|
vm->mp += size;
|
|
vm->frames[vm->fp].end += size;
|
|
|
|
// Update the symbol's address
|
|
Symbol *sym = symbol_table_lookup(table, name);
|
|
if (sym && sym->type == SYMBOL_DATA) {
|
|
sym->address = addr;
|
|
sym->size = size;
|
|
}
|
|
|
|
return addr;
|
|
}
|
|
|
|
void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; }
|
|
|
|
void emit_u32(VM *vm, u32 value) {
|
|
write_u32(vm, code, vm->cp, value);
|
|
vm->cp += 4;
|
|
}
|
|
|
|
void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); }
|
|
|
|
int parse_register(const char *reg_str) {
|
|
if (reg_str[0] != '$')
|
|
return -1;
|
|
return atoi(reg_str + 1);
|
|
}
|
|
|
|
u32 resolve_symbol(SymbolTable *table, const char *ref) {
|
|
// Handle symbol references (e.g., &label)
|
|
if (ref[0] == '&') {
|
|
return find_label_in_table(table, ref + 1);
|
|
}
|
|
|
|
// Handle fixed-point numbers (e.g., 0.5)
|
|
if (strchr(ref, '.')) {
|
|
return TO_FIXED(atof(ref));
|
|
}
|
|
|
|
// Handle hexadecimal literals (e.g., 0x7)
|
|
if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) {
|
|
char *endptr;
|
|
u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x"
|
|
|
|
if (endptr == ref + 2 || *endptr != '\0') {
|
|
fprintf(stderr, "Invalid hex literal: %s\n", ref);
|
|
exit(1);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
// Handle decimal literals (e.g., 7)
|
|
char *endptr;
|
|
u32 value = (u32)strtoul(ref, &endptr, 10);
|
|
|
|
if (endptr == ref || *endptr != '\0') {
|
|
fprintf(stderr, "Invalid decimal literal: %s\n", ref);
|
|
exit(1);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
static char *unwrap_string(const char *quoted_str) {
|
|
if (!quoted_str)
|
|
return NULL;
|
|
|
|
size_t len = strlen(quoted_str);
|
|
if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') {
|
|
// Remove quotes and process escape sequences
|
|
const char *src = quoted_str + 1;
|
|
size_t src_len = len - 2;
|
|
|
|
// First pass: calculate the actual length needed after escape processing
|
|
size_t actual_len = 0;
|
|
for (size_t i = 0; i < src_len; ++i) {
|
|
if (src[i] == '\\' && i + 1 < src_len) {
|
|
// Escape sequence
|
|
actual_len++;
|
|
i++; // Skip the next character
|
|
} else {
|
|
actual_len++;
|
|
}
|
|
}
|
|
|
|
char *unwrapped = (char *)malloc(actual_len + 1);
|
|
size_t dst_idx = 0;
|
|
|
|
// Second pass: process escape sequences
|
|
for (size_t i = 0; i < src_len; ++i) {
|
|
if (src[i] == '\\' && i + 1 < src_len) {
|
|
// Handle escape sequences
|
|
switch (src[i + 1]) {
|
|
case 'n':
|
|
unwrapped[dst_idx++] = '\n';
|
|
break;
|
|
case 't':
|
|
unwrapped[dst_idx++] = '\t';
|
|
break;
|
|
case 'r':
|
|
unwrapped[dst_idx++] = '\r';
|
|
break;
|
|
case '\\':
|
|
unwrapped[dst_idx++] = '\\';
|
|
break;
|
|
case '"':
|
|
unwrapped[dst_idx++] = '"';
|
|
break;
|
|
case '\'':
|
|
unwrapped[dst_idx++] = '\'';
|
|
break;
|
|
default:
|
|
// Unknown escape, keep both characters
|
|
unwrapped[dst_idx++] = src[i];
|
|
unwrapped[dst_idx++] = src[i + 1];
|
|
break;
|
|
}
|
|
i++; // Skip the next character
|
|
} else {
|
|
unwrapped[dst_idx++] = src[i];
|
|
}
|
|
}
|
|
unwrapped[dst_idx] = '\0';
|
|
return unwrapped;
|
|
}
|
|
// Not quoted, return copy
|
|
return strdup(quoted_str);
|
|
}
|
|
|
|
void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) {
|
|
for (size_t i = 0; i < block->child_count; ++i) {
|
|
ExprNode *item = block->children[i];
|
|
if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
|
|
const char *name = item->children[0]->token;
|
|
ExprNode *val = item->children[1];
|
|
|
|
if (val->child_count == 0) {
|
|
const char *token = val->token;
|
|
|
|
// Case 1: String literal (enclosed in quotes)
|
|
if (token[0] == '"' && token[strlen(token) - 1] == '"') {
|
|
char *unwrapped = unwrap_string(token);
|
|
int len = strlen(unwrapped) + 1;
|
|
u32 addr = allocate_data(vm, table, name, len + 4);
|
|
|
|
write_u32(vm, memory, addr, len);
|
|
for (int i = 0; i < len; i++) {
|
|
write_u8(vm, memory, addr + 4 + i, unwrapped[i]);
|
|
}
|
|
free(unwrapped);
|
|
}
|
|
// Case 2: Hexadecimal integer (0x...)
|
|
else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) {
|
|
char *endptr;
|
|
u32 value = (u32)strtoul(token + 2, &endptr, 16);
|
|
|
|
if (endptr != token + strlen(token)) {
|
|
fprintf(stderr, "Invalid hex in data block: %s\n", token);
|
|
exit(1);
|
|
}
|
|
|
|
u32 addr = allocate_data(vm, table, name, 4);
|
|
write_u32(vm, memory, addr, value);
|
|
}
|
|
// Case 3: Floating-point (has decimal point)
|
|
else if (strchr(token, '.')) {
|
|
float f = atof(token);
|
|
u32 addr = allocate_data(vm, table, name, 4);
|
|
write_u32(vm, memory, addr, TO_FIXED(f));
|
|
}
|
|
// Case 4: Decimal integer
|
|
else {
|
|
char *endptr;
|
|
u32 value = (u32)strtoul(token, &endptr, 10);
|
|
|
|
if (endptr != token + strlen(token)) {
|
|
fprintf(stderr, "Invalid decimal in data block: %s\n", token);
|
|
exit(1);
|
|
}
|
|
|
|
u32 addr = allocate_data(vm, table, name, 4);
|
|
write_u32(vm, memory, addr, value);
|
|
vm->mp += 4;
|
|
}
|
|
} else {
|
|
fprintf(stderr, "Unsupported data item\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) {
|
|
const char *opname = node->token;
|
|
if (strcmp(opname, "label") == 0) {
|
|
for (size_t i = 1; i < node->child_count; i++) {
|
|
process_code_expr(vm, table, node->children[i]);
|
|
}
|
|
} else if (strcmp(opname, "halt") == 0) {
|
|
emit_opcode(vm, OP_HALT);
|
|
} else if (strcmp(opname, "jump") == 0) {
|
|
emit_opcode(vm, OP_JMP);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
emit_u32(vm, addr);
|
|
} else if (strcmp(opname, "jump-if-flag") == 0) {
|
|
emit_opcode(vm, OP_JMPF);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
emit_u32(vm, addr);
|
|
} else if (strcmp(opname, "call") == 0) {
|
|
emit_opcode(vm, OP_CALL);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
emit_u32(vm, addr);
|
|
} else if (strcmp(opname, "return") == 0) {
|
|
emit_opcode(vm, OP_RETURN);
|
|
} else if (strcmp(opname, "load-immediate") == 0) {
|
|
emit_opcode(vm, OP_LOAD_IMM);
|
|
int reg = parse_register(node->children[0]->token);
|
|
u32 addr = resolve_symbol(table, node->children[1]->token);
|
|
emit_byte(vm, reg);
|
|
emit_u32(vm, addr);
|
|
} else if (strcmp(opname, "load") == 0) {
|
|
emit_opcode(vm, OP_LOAD);
|
|
int dest = parse_register(node->children[0]->token);
|
|
u32 addr = resolve_symbol(table, node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_u32(vm, addr);
|
|
} else if (strcmp(opname, "load-r") == 0) {
|
|
emit_opcode(vm, OP_LOAD_REG);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "load-r8") == 0) {
|
|
emit_opcode(vm, OP_LOAD_REG8);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "malloc") == 0) {
|
|
emit_opcode(vm, OP_MALLOC);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "load-i8") == 0) {
|
|
emit_opcode(vm, OP_LOADI8);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "load-u8") == 0) {
|
|
emit_opcode(vm, OP_LOADU8);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "load-u16") == 0) {
|
|
emit_opcode(vm, OP_LOADU16);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "load-i16") == 0) {
|
|
emit_opcode(vm, OP_LOADI16);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "store") == 0) {
|
|
emit_opcode(vm, OP_STORE);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "store-8") == 0) {
|
|
emit_opcode(vm, OP_STORE8);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "store-16") == 0) {
|
|
emit_opcode(vm, OP_STORE16);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
} else if (strcmp(opname, "push") == 0) {
|
|
emit_opcode(vm, OP_PUSH);
|
|
int reg = parse_register(node->children[0]->token);
|
|
emit_byte(vm, reg);
|
|
} else if (strcmp(opname, "pop") == 0) {
|
|
emit_opcode(vm, OP_POP);
|
|
int reg = parse_register(node->children[0]->token);
|
|
emit_byte(vm, reg);
|
|
} else if (strcmp(opname, "register-move") == 0) {
|
|
emit_opcode(vm, OP_REG_MOV);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "syscall") == 0) {
|
|
emit_opcode(vm, OP_SYSCALL);
|
|
|
|
// Parse syscall ID
|
|
u32 syscall_id = 0;
|
|
const char *syscall_name = node->children[0]->token;
|
|
if (strcmp(syscall_name, "EXIT") == 0)
|
|
syscall_id = SYSCALL_EXIT;
|
|
else if (strcmp(syscall_name, "OPEN") == 0)
|
|
syscall_id = SYSCALL_DEVICE_OPEN;
|
|
else if (strcmp(syscall_name, "READ") == 0)
|
|
syscall_id = SYSCALL_DEVICE_READ;
|
|
else if (strcmp(syscall_name, "WRITE") == 0)
|
|
syscall_id = SYSCALL_DEVICE_WRITE;
|
|
else if (strcmp(syscall_name, "CLOSE") == 0)
|
|
syscall_id = SYSCALL_DEVICE_CLOSE;
|
|
else if (strcmp(syscall_name, "IOCTL") == 0)
|
|
syscall_id = SYSCALL_DEVICE_IOCTL;
|
|
|
|
emit_u32(vm, syscall_id);
|
|
|
|
// Emit register arguments
|
|
for (size_t i = 1; i < node->child_count; ++i) {
|
|
int reg = parse_register(node->children[i]->token);
|
|
emit_byte(vm, reg);
|
|
}
|
|
} else if (strcmp(opname, "bit-shift-left") == 0) {
|
|
emit_opcode(vm, OP_SLL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "bit-shift-right") == 0) {
|
|
emit_opcode(vm, OP_SRL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "bit-shift-right-extend") == 0) {
|
|
emit_opcode(vm, OP_SRE);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "bit-and") == 0) {
|
|
emit_opcode(vm, OP_BAND);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "bit-or") == 0) {
|
|
emit_opcode(vm, OP_BOR);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "bit-xor") == 0) {
|
|
emit_opcode(vm, OP_BXOR);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "add-int") == 0) {
|
|
emit_opcode(vm, OP_ADD_INT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "sub-int") == 0) {
|
|
emit_opcode(vm, OP_SUB_INT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "mul-int") == 0) {
|
|
emit_opcode(vm, OP_MUL_INT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "div-int") == 0) {
|
|
emit_opcode(vm, OP_DIV_INT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "add-nat") == 0) {
|
|
emit_opcode(vm, OP_ADD_UINT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "sub-nat") == 0) {
|
|
emit_opcode(vm, OP_SUB_UINT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "mul-nat") == 0) {
|
|
emit_opcode(vm, OP_MUL_UINT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "div-nat") == 0) {
|
|
emit_opcode(vm, OP_DIV_UINT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "add-real") == 0) {
|
|
emit_opcode(vm, OP_ADD_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "sub-real") == 0) {
|
|
emit_opcode(vm, OP_SUB_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "mul-real") == 0) {
|
|
emit_opcode(vm, OP_MUL_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "div-real") == 0) {
|
|
emit_opcode(vm, OP_DIV_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "int-to-real") == 0) {
|
|
emit_opcode(vm, OP_INT_TO_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "nat-to-real") == 0) {
|
|
emit_opcode(vm, OP_UINT_TO_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "real-to-int") == 0) {
|
|
emit_opcode(vm, OP_REAL_TO_INT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "real-to-nat") == 0) {
|
|
emit_opcode(vm, OP_REAL_TO_UINT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "jump-eq-int") == 0) {
|
|
emit_opcode(vm, OP_JEQ_INT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-gt-int") == 0) {
|
|
emit_opcode(vm, OP_JGT_INT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-lt-int") == 0) {
|
|
emit_opcode(vm, OP_JLT_INT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-le-int") == 0) {
|
|
emit_opcode(vm, OP_JLE_INT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-ge-int") == 0) {
|
|
emit_opcode(vm, OP_JGE_INT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-eq-nat") == 0) {
|
|
emit_opcode(vm, OP_JEQ_UINT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-gt-nat") == 0) {
|
|
emit_opcode(vm, OP_JGT_UINT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-lt-nat") == 0) {
|
|
emit_opcode(vm, OP_JLT_UINT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-le-nat") == 0) {
|
|
emit_opcode(vm, OP_JLE_UINT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-ge-nat") == 0) {
|
|
emit_opcode(vm, OP_JGE_UINT);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-eq-real") == 0) {
|
|
emit_opcode(vm, OP_JEQ_REAL);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-gt-real") == 0) {
|
|
emit_opcode(vm, OP_JGT_REAL);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-lt-real") == 0) {
|
|
emit_opcode(vm, OP_JLT_REAL);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-le-real") == 0) {
|
|
emit_opcode(vm, OP_JLE_REAL);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "jump-ge-real") == 0) {
|
|
emit_opcode(vm, OP_JGE_REAL);
|
|
u32 addr = resolve_symbol(table, node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_u32(vm, addr);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "string-length") == 0) {
|
|
emit_opcode(vm, OP_STRLEN);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "string-eq") == 0) {
|
|
emit_opcode(vm, OP_STREQ);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "string-concat") == 0) {
|
|
emit_opcode(vm, OP_STRCAT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "string-get-char") == 0) {
|
|
emit_opcode(vm, OP_STR_GET_CHAR);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "string-find-char") == 0) {
|
|
emit_opcode(vm, OP_STR_FIND_CHAR);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
} else if (strcmp(opname, "string-slice") == 0) {
|
|
emit_opcode(vm, OP_STR_SLICE);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src1 = parse_register(node->children[1]->token);
|
|
int src2 = parse_register(node->children[2]->token);
|
|
int src3 = parse_register(node->children[3]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src1);
|
|
emit_byte(vm, src2);
|
|
emit_byte(vm, src3);
|
|
} else if (strcmp(opname, "int-to-string") == 0) {
|
|
emit_opcode(vm, OP_INT_TO_STRING);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "nat-to-string") == 0) {
|
|
emit_opcode(vm, OP_UINT_TO_STRING);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "real-to-string") == 0) {
|
|
emit_opcode(vm, OP_REAL_TO_STRING);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "string-to-int") == 0) {
|
|
emit_opcode(vm, OP_STRING_TO_INT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "string-to-nat") == 0) {
|
|
emit_opcode(vm, OP_STRING_TO_UINT);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else if (strcmp(opname, "string-to-real") == 0) {
|
|
emit_opcode(vm, OP_STRING_TO_REAL);
|
|
int dest = parse_register(node->children[0]->token);
|
|
int src = parse_register(node->children[1]->token);
|
|
emit_byte(vm, dest);
|
|
emit_byte(vm, src);
|
|
} else {
|
|
fprintf(stderr, "Unknown opcode: %s\n", opname);
|
|
}
|
|
}
|
|
|
|
void assemble(VM *vm, ExprNode *program) {
|
|
SymbolTable table;
|
|
symbol_table_init(&table);
|
|
|
|
// PASS 1: Collect all symbols (both code and data)
|
|
collect_symbols(&table, program);
|
|
|
|
// PASS 2: Process data section using symbol table
|
|
for (size_t i = 0; i < program->child_count; ++i) {
|
|
ExprNode *section = program->children[i];
|
|
if (strcmp(section->token, "data") == 0) {
|
|
process_data_block(vm, &table, section);
|
|
}
|
|
}
|
|
|
|
// PASS 3: Process code section using complete symbol table
|
|
for (size_t i = 0; i < program->child_count; ++i) {
|
|
ExprNode *section = program->children[i];
|
|
if (strcmp(section->token, "code") == 0) {
|
|
for (size_t j = 0; j < section->child_count; ++j) {
|
|
process_code_expr(vm, &table, section->children[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Cleanup symbol table
|
|
for (int i = 0; i < table.count; i++) {
|
|
#ifdef ASM_DEBUG
|
|
Symbol s = table.symbols[i];
|
|
printf("%s[%d]\n", s.name, s.address);
|
|
#endif
|
|
free(table.symbols[i].name);
|
|
}
|
|
free(table.symbols);
|
|
} |