WIP: nested labels, add, hello, and simple work now!

This commit is contained in:
zongor 2025-09-22 23:29:25 -07:00
parent 2f08c6893c
commit 77745c8880
7 changed files with 357 additions and 192 deletions

View File

@ -1,45 +1,189 @@
#include "assembler.h" #include "assembler.h"
Label labels[256]; // For simplicity typedef enum { SYMBOL_CODE, SYMBOL_DATA, SYMBOL_PLEX } SymbolType;
int label_count = 0;
void add_label(const char *name, u32 address) { typedef struct {
Label *l = &labels[label_count++]; char *name;
l->name = strdup(name); u32 address;
l->address = address; SymbolType type;
int size; // How much memory this symbol occupies
int is_constant; // 1 = constant, 0 = variable
} Symbol;
typedef struct {
Symbol *symbols;
int count;
int capacity;
} SymbolTable;
void symbol_table_init(SymbolTable *table) {
table->capacity = 32;
table->count = 0;
table->symbols = malloc(table->capacity * sizeof(Symbol));
} }
u32 find_label(const char *name) { void symbol_table_add(SymbolTable *table, const char *name, u32 address,
for (int i = 0; i < label_count; ++i) { SymbolType type) {
if (strcmp(labels[i].name, name) == 0) // Check for duplicates
return labels[i].address; for (int i = 0; i < table->count; i++) {
if (strcmp(table->symbols[i].name, name) == 0) {
// Allow plex redefinition for compiler evolution
if (type == SYMBOL_PLEX && table->symbols[i].type == SYMBOL_PLEX) {
return;
}
fprintf(stderr, "Error: Duplicate label '%s'\n", name);
exit(1);
}
} }
fprintf(stderr, "Error: Undefined label '%s'\n", name);
exit(1); if (table->count >= table->capacity) {
table->capacity *= 2;
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
}
Symbol *sym = &table->symbols[table->count++];
sym->name = strdup(name);
sym->address = address;
sym->type = type;
sym->size = 4; // Default size
sym->is_constant = 0;
} }
u32 real_alloc(VM *vm, float v) { Symbol *symbol_table_lookup(SymbolTable *table, const char *name) {
i32 fixed = TO_FIXED(v); for (int i = 0; i < table->count; i++) {
u32 addr = vm->mp; if (strcmp(table->symbols[i].name, name) == 0) {
write_u32(vm, memory, vm->mp, fixed); return &table->symbols[i];
vm->mp += 4; }
vm->frames[vm->fp].end += 4; }
return addr; return NULL;
} }
u32 nat_alloc(VM *vm, u32 v) { // Add this helper for your current code
u32 addr = vm->mp; u32 find_label_in_table(SymbolTable *table, const char *name) {
write_u32(vm, memory, vm->mp, v); Symbol *sym = symbol_table_lookup(table, name);
vm->mp += 4; if (!sym) {
vm->frames[vm->fp].end += 4; fprintf(stderr, "Error: Undefined label '%s'\n", name);
return addr; exit(1);
}
return sym->address;
} }
u32 int_alloc(VM *vm, i32 v) { int get_instruction_byte_size(ExprNode *node) {
const char *opname = node->token;
// Simple opcodes (1 byte)
if (strcmp(opname, "halt") == 0 || strcmp(opname, "return") == 0) {
return 1;
}
// Register-based opcodes (2 bytes: opcode + register)
if (strcmp(opname, "pop") == 0 || strcmp(opname, "push") == 0) {
return 2;
}
if (strcmp(opname, "int-to-string") == 0 ||
strcmp(opname, "string-length") == 0) {
return 3;
}
// Load/store with register and address (5 bytes: 1 + 1 + 4)
if (strcmp(opname, "load") == 0 || strcmp(opname, "store") == 0 ||
strcmp(opname, "jump") == 0 || strcmp(opname, "jump-if-flag") == 0 ||
strcmp(opname, "call") == 0) {
return 5;
}
// Register-register-register opcodes (4 bytes: 1 + 3)
if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0) {
return 4;
}
// Load-immediate (5 bytes: 1 + 1 + 4)
if (strcmp(opname, "load-immediate") == 0) {
return 6;
}
// Syscall (1 + syscall_id (4) + args)
if (strcmp(opname, "syscall") == 0) {
return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0);
}
fprintf(stderr, "Unknown opcode for sizing: %s\n", opname);
return 4; // Conservative fallback
}
int calculate_instruction_size(ExprNode *node) {
if (node->child_count == 0)
return 0;
return get_instruction_byte_size(node);
}
void collect_symbols_in_node(SymbolTable *table, ExprNode *node,
u32 *current_addr, int depth) {
char indent[32] = "";
for (int i = 0; i < depth; i++)
strcat(indent, " ");
printf("%sProcessing: %s (addr=%d)\n", indent, node->token, *current_addr);
if (strcmp(node->token, "label") == 0) {
if (node->child_count >= 1) {
const char *name = node->children[0]->token;
printf("%s ADDING LABEL: %s -> %d\n", indent, name, *current_addr);
symbol_table_add(table, name, *current_addr, SYMBOL_CODE);
}
for (size_t i = 1; i < node->child_count; i++) {
collect_symbols_in_node(table, node->children[i], current_addr,
depth + 1);
}
} else {
int size = get_instruction_byte_size(node);
*current_addr += size;
printf("%s +%d bytes -> %d\n", indent, size, *current_addr);
}
}
void collect_symbols(SymbolTable *table, ExprNode *program) {
// First, collect all data labels (with placeholder address)
for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i];
if (strcmp(section->token, "data") == 0) {
for (size_t j = 0; j < section->child_count; ++j) {
ExprNode *item = section->children[j];
if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
const char *name = item->children[0]->token;
symbol_table_add(table, name, 0, SYMBOL_DATA);
}
}
}
}
// Second, collect all code labels with proper nesting
u32 code_addr = 0;
for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i];
if (strcmp(section->token, "code") == 0) {
for (size_t j = 0; j < section->child_count; ++j) {
collect_symbols_in_node(table, section->children[j], &code_addr, 0);
}
}
}
}
u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) {
u32 addr = vm->mp; u32 addr = vm->mp;
write_u32(vm, memory, vm->mp, v); vm->mp += size;
vm->mp += 4; vm->frames[vm->fp].end += size;
vm->frames[vm->fp].end += 4;
// Update the symbol's address
Symbol *sym = symbol_table_lookup(table, name);
if (sym && sym->type == SYMBOL_DATA) {
sym->address = addr;
sym->size = size;
}
return addr; return addr;
} }
@ -58,78 +202,85 @@ int parse_register(const char *reg_str) {
return atoi(reg_str + 1); return atoi(reg_str + 1);
} }
u32 parse_memory_ref(const char *ref) { u32 resolve_symbol(SymbolTable *table, const char *ref) {
if (ref[0] == '&') { if (ref[0] == '&') {
return find_label(ref + 1); return find_label_in_table(table, ref + 1);
} }
// Or parse as immediate number?
return 0;
}
void codegen_expr(VM *vm, ExprNode *node); // Handle immediate values
if (strchr(ref, '.')) {
void codegen_code_block(VM *vm, ExprNode *block) { return TO_FIXED(atof(ref));
for (size_t i = 0; i < block->child_count; ++i) {
ExprNode *stmt = block->children[i];
codegen_expr(vm, stmt);
} }
return (u32)atoi(ref);
} }
static char *unwrap_string(const char *quoted_str) { static char *unwrap_string(const char *quoted_str) {
if (!quoted_str) if (!quoted_str)
return NULL; return NULL;
size_t len = strlen(quoted_str); size_t len = strlen(quoted_str);
if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') {
// Remove quotes and process escape sequences // Remove quotes and process escape sequences
const char *src = quoted_str + 1; const char *src = quoted_str + 1;
size_t src_len = len - 2; size_t src_len = len - 2;
// First pass: calculate the actual length needed after escape processing // First pass: calculate the actual length needed after escape processing
size_t actual_len = 0; size_t actual_len = 0;
for (size_t i = 0; i < src_len; ++i) { for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) { if (src[i] == '\\' && i + 1 < src_len) {
// Escape sequence // Escape sequence
actual_len++; actual_len++;
i++; // Skip the next character i++; // Skip the next character
} else { } else {
actual_len++; actual_len++;
} }
}
char *unwrapped = (char *)malloc(actual_len + 1);
size_t dst_idx = 0;
// Second pass: process escape sequences
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Handle escape sequences
switch (src[i + 1]) {
case 'n': unwrapped[dst_idx++] = '\n'; break;
case 't': unwrapped[dst_idx++] = '\t'; break;
case 'r': unwrapped[dst_idx++] = '\r'; break;
case '\\': unwrapped[dst_idx++] = '\\'; break;
case '"': unwrapped[dst_idx++] = '"'; break;
case '\'': unwrapped[dst_idx++] = '\''; break;
default:
// Unknown escape, keep both characters
unwrapped[dst_idx++] = src[i];
unwrapped[dst_idx++] = src[i + 1];
break;
}
i++; // Skip the next character
} else {
unwrapped[dst_idx++] = src[i];
}
}
unwrapped[dst_idx] = '\0';
return unwrapped;
} }
// Not quoted, return copy
return strdup(quoted_str); char *unwrapped = (char *)malloc(actual_len + 1);
size_t dst_idx = 0;
// Second pass: process escape sequences
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Handle escape sequences
switch (src[i + 1]) {
case 'n':
unwrapped[dst_idx++] = '\n';
break;
case 't':
unwrapped[dst_idx++] = '\t';
break;
case 'r':
unwrapped[dst_idx++] = '\r';
break;
case '\\':
unwrapped[dst_idx++] = '\\';
break;
case '"':
unwrapped[dst_idx++] = '"';
break;
case '\'':
unwrapped[dst_idx++] = '\'';
break;
default:
// Unknown escape, keep both characters
unwrapped[dst_idx++] = src[i];
unwrapped[dst_idx++] = src[i + 1];
break;
}
i++; // Skip the next character
} else {
unwrapped[dst_idx++] = src[i];
}
}
unwrapped[dst_idx] = '\0';
return unwrapped;
}
// Not quoted, return copy
return strdup(quoted_str);
} }
void codegen_data_block(VM *vm, ExprNode *block) { void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) {
for (size_t i = 0; i < block->child_count; ++i) { for (size_t i = 0; i < block->child_count; ++i) {
ExprNode *item = block->children[i]; ExprNode *item = block->children[i];
if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
@ -139,80 +290,67 @@ void codegen_data_block(VM *vm, ExprNode *block) {
if (val->child_count == 0) { if (val->child_count == 0) {
if (strchr(val->token, '.')) { if (strchr(val->token, '.')) {
float f = atof(val->token); float f = atof(val->token);
u32 addr = real_alloc(vm, f); u32 addr = allocate_data(vm, table, name, 4);
add_label(name, addr); write_u32(vm, memory, addr, TO_FIXED(f));
} else { } else {
// Assume string // unwrap deals with control characters and "" literals
char *unwrapped = unwrap_string(val->token); char *unwrapped = unwrap_string(val->token);
u32 addr = str_alloc(vm, &vm->frames[vm->fp], unwrapped, int len = strlen(unwrapped) + 1; // Include length + null terminator
strlen(unwrapped) + 1); u32 addr = allocate_data(vm, table, name, len + 4);
write_u32(vm, memory, addr, len);
// Copy string to memory
for (int i = 0; i < len; i++) {
write_u8(vm, memory, addr + 4 + i, unwrapped[i]);
}
free(unwrapped); free(unwrapped);
add_label(name, addr);
} }
} else { } else {
// Complex expression?
fprintf(stderr, "Unsupported data item\n"); fprintf(stderr, "Unsupported data item\n");
} }
} }
} }
} }
void codegen_expr(VM *vm, ExprNode *node) { void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) {
if (node->child_count == 0)
return;
const char *opname = node->token; const char *opname = node->token;
if (strcmp(opname, "label") == 0) { if (strcmp(opname, "label") == 0) {
if (node->child_count < 2) { for (size_t i = 1; i < node->child_count; i++) {
fprintf(stderr, "Error: label requires at least a name\n"); process_code_expr(vm, table, node->children[i]);
return;
}
const char *label_name = node->children[0]->token;
add_label(label_name, vm->cp);
for (size_t i = 1; i < node->child_count; ++i) {
codegen_expr(vm, node->children[i]);
} }
} else if (strcmp(opname, "halt") == 0) { } else if (strcmp(opname, "halt") == 0) {
emit_opcode(vm, OP_HALT); emit_opcode(vm, OP_HALT);
} else if (strcmp(opname, "jump") == 0) { } else if (strcmp(opname, "jump") == 0) {
emit_opcode(vm, OP_JMP); emit_opcode(vm, OP_JMP);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
} else if (strcmp(opname, "jump-if-flag") == 0) { } else if (strcmp(opname, "jump-if-flag") == 0) {
emit_opcode(vm, OP_JMPF); emit_opcode(vm, OP_JMPF);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
} else if (strcmp(opname, "call") == 0) { } else if (strcmp(opname, "call") == 0) {
emit_opcode(vm, OP_CALL); emit_opcode(vm, OP_CALL);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
} else if (strcmp(opname, "return") == 0) { } else if (strcmp(opname, "return") == 0) {
emit_opcode(vm, OP_RETURN); emit_opcode(vm, OP_RETURN);
} else if (strcmp(opname, "load") == 0) { } else if (strcmp(opname, "load") == 0) {
emit_opcode(vm, OP_LOAD); emit_opcode(vm, OP_LOAD);
int reg = parse_register(node->children[0]->token); int reg = parse_register(node->children[0]->token);
u32 addr = parse_memory_ref(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[1]->token);
emit_byte(vm, reg); emit_byte(vm, reg);
emit_u32(vm, addr); emit_u32(vm, addr);
} else if (strcmp(opname, "load-immediate") == 0) { } else if (strcmp(opname, "load-immediate") == 0) {
emit_opcode(vm, OP_LOAD_IMM); emit_opcode(vm, OP_LOAD_IMM);
int reg = parse_register(node->children[0]->token); int reg = parse_register(node->children[0]->token);
if (strchr(node->children[1]->token, '&')) { u32 addr = resolve_symbol(table, node->children[1]->token);
u32 addr = parse_memory_ref(node->children[1]->token); emit_byte(vm, reg);
emit_byte(vm, reg); emit_u32(vm, addr);
emit_u32(vm, addr);
} else {
u32 val = (u32)atoi(node->children[1]->token);
emit_byte(vm, reg);
emit_u32(vm, val);
}
} else if (strcmp(opname, "store") == 0) { } else if (strcmp(opname, "store") == 0) {
emit_opcode(vm, OP_STORE); emit_opcode(vm, OP_STORE);
int reg = parse_register(node->children[0]->token); int reg = parse_register(node->children[0]->token);
u32 addr = parse_memory_ref(node->children[1]->token); u32 addr = resolve_symbol(table, node->children[1]->token);
emit_byte(vm, reg); emit_byte(vm, reg);
emit_u32(vm, addr); emit_u32(vm, addr);
} else if (strcmp(opname, "push") == 0) { } else if (strcmp(opname, "push") == 0) {
@ -377,7 +515,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src); emit_byte(vm, src);
} else if (strcmp(opname, "jump-eq-int") == 0) { } else if (strcmp(opname, "jump-eq-int") == 0) {
emit_opcode(vm, OP_JEQ_INT); emit_opcode(vm, OP_JEQ_INT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -385,7 +523,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-gt-int") == 0) { } else if (strcmp(opname, "jump-gt-int") == 0) {
emit_opcode(vm, OP_JGT_INT); emit_opcode(vm, OP_JGT_INT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -393,7 +531,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-lt-int") == 0) { } else if (strcmp(opname, "jump-lt-int") == 0) {
emit_opcode(vm, OP_JLT_INT); emit_opcode(vm, OP_JLT_INT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -401,7 +539,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-le-int") == 0) { } else if (strcmp(opname, "jump-le-int") == 0) {
emit_opcode(vm, OP_JLE_INT); emit_opcode(vm, OP_JLE_INT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -409,7 +547,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-ge-int") == 0) { } else if (strcmp(opname, "jump-ge-int") == 0) {
emit_opcode(vm, OP_JGE_INT); emit_opcode(vm, OP_JGE_INT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -417,7 +555,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-eq-nat") == 0) { } else if (strcmp(opname, "jump-eq-nat") == 0) {
emit_opcode(vm, OP_JEQ_UINT); emit_opcode(vm, OP_JEQ_UINT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -425,7 +563,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-gt-nat") == 0) { } else if (strcmp(opname, "jump-gt-nat") == 0) {
emit_opcode(vm, OP_JGT_UINT); emit_opcode(vm, OP_JGT_UINT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -433,7 +571,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-lt-nat") == 0) { } else if (strcmp(opname, "jump-lt-nat") == 0) {
emit_opcode(vm, OP_JLT_UINT); emit_opcode(vm, OP_JLT_UINT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -441,7 +579,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-le-nat") == 0) { } else if (strcmp(opname, "jump-le-nat") == 0) {
emit_opcode(vm, OP_JLE_UINT); emit_opcode(vm, OP_JLE_UINT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -449,7 +587,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-ge-nat") == 0) { } else if (strcmp(opname, "jump-ge-nat") == 0) {
emit_opcode(vm, OP_JGE_UINT); emit_opcode(vm, OP_JGE_UINT);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -457,7 +595,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-eq-real") == 0) { } else if (strcmp(opname, "jump-eq-real") == 0) {
emit_opcode(vm, OP_JEQ_REAL); emit_opcode(vm, OP_JEQ_REAL);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -465,7 +603,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-gt-real") == 0) { } else if (strcmp(opname, "jump-gt-real") == 0) {
emit_opcode(vm, OP_JGT_REAL); emit_opcode(vm, OP_JGT_REAL);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -473,7 +611,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-lt-real") == 0) { } else if (strcmp(opname, "jump-lt-real") == 0) {
emit_opcode(vm, OP_JLT_REAL); emit_opcode(vm, OP_JLT_REAL);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -481,7 +619,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-le-real") == 0) { } else if (strcmp(opname, "jump-le-real") == 0) {
emit_opcode(vm, OP_JLE_REAL); emit_opcode(vm, OP_JLE_REAL);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -489,7 +627,7 @@ void codegen_expr(VM *vm, ExprNode *node) {
emit_byte(vm, src2); emit_byte(vm, src2);
} else if (strcmp(opname, "jump-ge-real") == 0) { } else if (strcmp(opname, "jump-ge-real") == 0) {
emit_opcode(vm, OP_JGE_REAL); emit_opcode(vm, OP_JGE_REAL);
u32 addr = find_label(node->children[0]->token); u32 addr = resolve_symbol(table, node->children[0]->token);
int src1 = parse_register(node->children[1]->token); int src1 = parse_register(node->children[1]->token);
int src2 = parse_register(node->children[2]->token); int src2 = parse_register(node->children[2]->token);
emit_u32(vm, addr); emit_u32(vm, addr);
@ -585,19 +723,33 @@ void codegen_expr(VM *vm, ExprNode *node) {
} }
void assemble(VM *vm, ExprNode *program) { void assemble(VM *vm, ExprNode *program) {
// First pass: process data section to define all labels SymbolTable table;
symbol_table_init(&table);
// PASS 1: Collect all symbols (both code and data)
collect_symbols(&table, program);
// PASS 2: Process data section using symbol table
for (size_t i = 0; i < program->child_count; ++i) { for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i]; ExprNode *section = program->children[i];
if (strcmp(section->token, "data") == 0) { if (strcmp(section->token, "data") == 0) {
codegen_data_block(vm, section); process_data_block(vm, &table, section);
} }
} }
// Second pass: process code section now that all labels are defined // PASS 3: Process code section using complete symbol table
for (size_t i = 0; i < program->child_count; ++i) { for (size_t i = 0; i < program->child_count; ++i) {
ExprNode *section = program->children[i]; ExprNode *section = program->children[i];
if (strcmp(section->token, "code") == 0) { if (strcmp(section->token, "code") == 0) {
codegen_code_block(vm, section); for (size_t i = 0; i < section->child_count; ++i) {
process_code_expr(vm, &table, section->children[i]);
}
} }
} }
// Cleanup symbol table
for (int i = 0; i < table.count; i++) {
free(table.symbols[i].name);
}
free(table.symbols);
} }

View File

@ -15,11 +15,6 @@
((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \ ((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \
))) )))
typedef struct {
char *name;
u32 address;
} Label;
void assemble(VM *vm, ExprNode *program); void assemble(VM *vm, ExprNode *program);
#endif #endif

View File

@ -27,10 +27,24 @@ static ExprNode *expr_node_create(const char *token, int line) {
// Forward declaration // Forward declaration
static ExprNode *parse_expression(const char **ptr, int line); static ExprNode *parse_expression(const char **ptr, int line);
// Skip whitespace characters // Skip whitespace characters and comments
static const char *skip_whitespace(const char *ptr) { static const char *skip_whitespace(const char *ptr) {
while (*ptr && isspace(*ptr)) { while (*ptr) {
ptr++; // Skip regular whitespace
if (isspace(*ptr)) {
ptr++;
continue;
}
// Check for comment start
if (*ptr == ';') {
// Skip everything until end of line
while (*ptr && *ptr != '\n') {
ptr++;
}
continue;
}
break;
} }
return ptr; return ptr;
} }
@ -39,7 +53,7 @@ static const char *skip_whitespace(const char *ptr) {
static char *parse_token(const char **ptr, int line) { static char *parse_token(const char **ptr, int line) {
const char *start = *ptr; const char *start = *ptr;
// Skip leading whitespace // Skip leading whitespace and comments
start = skip_whitespace(start); start = skip_whitespace(start);
if (!*start) { if (!*start) {
printf("Error at line:%d\n", line); printf("Error at line:%d\n", line);
@ -67,13 +81,16 @@ static char *parse_token(const char **ptr, int line) {
else if (*end == '(' || *end == ')') { else if (*end == '(' || *end == ')') {
end++; end++;
} else { } else {
// Read until whitespace or parentheses // Read until whitespace, parentheses, or comment
while (*end && !isspace(*end) && *end != '(' && *end != ')') { while (*end && !isspace(*end) && *end != '(' && *end != ')' && *end != ';') {
end++; end++;
} }
} }
if (end == start) return NULL; if (end == start) {
printf("Error at line:%d\n", line);
return NULL;
}
size_t len = end - start; size_t len = end - start;
char *token = (char *)safe_malloc(len + 1); char *token = (char *)safe_malloc(len + 1);

View File

@ -1,21 +1,26 @@
((code ((code
(label main (label main ; 0
(load-immediate $0 1) (load-immediate $0 1) ; 6
(push $0) (push $0) ; 8
(load-immediate $0 1) (load-immediate $0 1) ; 14
(call &add) (push $0) ; 16
(pop $0) (call &add) ; 19
(int-to-string $1 $0) (pop $0) ; 21
(load-immediate $3 &terminal-str) (int-to-string $1 $0) ; 24
(string-length $2 $1) (load-immediate $3 &terminal-str) ; 30
(syscall DEVICE-WRITE $3 $1 $2) (string-length $2 $1) ; 33
(halt)) (syscall DEVICE-WRITE $3 $1 $2) ; 41
(load-immediate $6 &new-line)
(string-length $7 $6)
(syscall DEVICE-WRITE $5 $6 $7)
(halt)) ; 42
(label add (label add ; 43
(pop $0) (pop $0) ; 45
(pop $1) (pop $1) ; 47
(add-int $2 $1 $0) (add-int $2 $1 $0)
(push $2) (push $2)
(return))) (return)))
(data (data
(label terminal-str "/dev/term/0"))) (label terminal-str "/dev/term/0")
(label new-line "\n")))

View File

@ -14,11 +14,11 @@
(load-immediate $1 2) (load-immediate $1 2)
(load $2 &base-case) (load $2 &base-case)
(jump-lt-int $2 $0 $1) (jump-lt-int $2 $0 $1)
(load $2 2) (load-immediate $2 2)
(sub-int $4 $0 $3) (sub-int $4 $0 $3)
(push $4) (push $4)
(call &fib) (call &fib)
(load $2 1) (load-immediate $2 1)
(sub-int $4 $0 $3) (sub-int $4 $0 $3)
(push $4) (push $4)
(call &fib) (call &fib)
@ -26,8 +26,8 @@
(pop $5) (pop $5)
(add-int $6 $5 $4) (add-int $6 $5 $4)
(push $6) (push $6)
(return) (return))
(label base-case) (label base-case
(push $0) (push $0)
(return))) (return)))
(data (data

View File

@ -1,14 +1,10 @@
((code ((code
(label main (label main
(load-immediate $0 &terminal-str) (load-immediate $0 &terminal-str) ; load terminal namespace
(load-immediate $1 &hello-str) (load-immediate $1 &hello-str) ; load hello string ptr
(string-length $2 $1) (string-length $2 $1) ; get length to write to stdout
(syscall DEVICE-WRITE $0 $1 $2) (syscall DEVICE-WRITE $0 $1 $2) ; do the write syscall
(load-immediate $3 &new-line) (halt))) ; done
(string-length $4 $3)
(syscall DEVICE-WRITE $0 $3 $4)
(halt)))
(data (data
(label terminal-str "/dev/term/0") (label terminal-str "/dev/term/0")
(label new-line "\n") (label hello-str "nuqneH 'u'?\n")))
(label hello-str "nuqneH 'u'?")))

View File

@ -4,12 +4,12 @@
(load-immediate $1 5000) (load-immediate $1 5000)
(load-immediate $2 0) (load-immediate $2 0)
(load-immediate $3 -1) (load-immediate $3 -1)
(label loop-body) (label loop-body
(load $4 &loop-body) (load $4 &loop-body)
(load-immediate $5 5.0) (load-immediate $5 5.0)
(add-real $0 $0 $5) (add-real $0 $0 $5)
(add-int $1 $1 $3) (add-int $1 $1 $3)
(jump-gt-eq-int $4 $1 $2) (jump-gt-eq-int $4 $1 $2))
(real-to-nat $1 $0) (real-to-nat $1 $0)
(load-immediate $6 &terminal-str) (load-immediate $6 &terminal-str)
(load $7 &help) (load $7 &help)