From 77745c8880dfa7fa9d8082f9d9962f93ba4fea4c Mon Sep 17 00:00:00 2001 From: zongor Date: Mon, 22 Sep 2025 23:29:25 -0700 Subject: [PATCH] WIP: nested labels, add, hello, and simple work now! --- src/tools/assembler.c | 442 ++++++++++++++++++++++++++++-------------- src/tools/assembler.h | 5 - src/tools/parser.c | 31 ++- test/add.asm.lisp | 35 ++-- test/fib.asm.lisp | 8 +- test/hello.asm.lisp | 16 +- test/loop.asm.lisp | 12 +- 7 files changed, 357 insertions(+), 192 deletions(-) diff --git a/src/tools/assembler.c b/src/tools/assembler.c index 3b3b581..5635817 100644 --- a/src/tools/assembler.c +++ b/src/tools/assembler.c @@ -1,45 +1,189 @@ #include "assembler.h" -Label labels[256]; // For simplicity -int label_count = 0; +typedef enum { SYMBOL_CODE, SYMBOL_DATA, SYMBOL_PLEX } SymbolType; -void add_label(const char *name, u32 address) { - Label *l = &labels[label_count++]; - l->name = strdup(name); - l->address = address; +typedef struct { + char *name; + u32 address; + SymbolType type; + int size; // How much memory this symbol occupies + int is_constant; // 1 = constant, 0 = variable +} Symbol; + +typedef struct { + Symbol *symbols; + int count; + int capacity; +} SymbolTable; + +void symbol_table_init(SymbolTable *table) { + table->capacity = 32; + table->count = 0; + table->symbols = malloc(table->capacity * sizeof(Symbol)); } -u32 find_label(const char *name) { - for (int i = 0; i < label_count; ++i) { - if (strcmp(labels[i].name, name) == 0) - return labels[i].address; +void symbol_table_add(SymbolTable *table, const char *name, u32 address, + SymbolType type) { + // Check for duplicates + for (int i = 0; i < table->count; i++) { + if (strcmp(table->symbols[i].name, name) == 0) { + // Allow plex redefinition for compiler evolution + if (type == SYMBOL_PLEX && table->symbols[i].type == SYMBOL_PLEX) { + return; + } + fprintf(stderr, "Error: Duplicate label '%s'\n", name); + exit(1); + } } - fprintf(stderr, "Error: Undefined label '%s'\n", name); - exit(1); + + if (table->count >= table->capacity) { + table->capacity *= 2; + table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); + } + + Symbol *sym = &table->symbols[table->count++]; + sym->name = strdup(name); + sym->address = address; + sym->type = type; + sym->size = 4; // Default size + sym->is_constant = 0; } -u32 real_alloc(VM *vm, float v) { - i32 fixed = TO_FIXED(v); - u32 addr = vm->mp; - write_u32(vm, memory, vm->mp, fixed); - vm->mp += 4; - vm->frames[vm->fp].end += 4; - return addr; +Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { + for (int i = 0; i < table->count; i++) { + if (strcmp(table->symbols[i].name, name) == 0) { + return &table->symbols[i]; + } + } + return NULL; } -u32 nat_alloc(VM *vm, u32 v) { - u32 addr = vm->mp; - write_u32(vm, memory, vm->mp, v); - vm->mp += 4; - vm->frames[vm->fp].end += 4; - return addr; +// Add this helper for your current code +u32 find_label_in_table(SymbolTable *table, const char *name) { + Symbol *sym = symbol_table_lookup(table, name); + if (!sym) { + fprintf(stderr, "Error: Undefined label '%s'\n", name); + exit(1); + } + return sym->address; } -u32 int_alloc(VM *vm, i32 v) { +int get_instruction_byte_size(ExprNode *node) { + const char *opname = node->token; + + // Simple opcodes (1 byte) + if (strcmp(opname, "halt") == 0 || strcmp(opname, "return") == 0) { + return 1; + } + + // Register-based opcodes (2 bytes: opcode + register) + if (strcmp(opname, "pop") == 0 || strcmp(opname, "push") == 0) { + return 2; + } + + if (strcmp(opname, "int-to-string") == 0 || + strcmp(opname, "string-length") == 0) { + return 3; + } + + // Load/store with register and address (5 bytes: 1 + 1 + 4) + if (strcmp(opname, "load") == 0 || strcmp(opname, "store") == 0 || + strcmp(opname, "jump") == 0 || strcmp(opname, "jump-if-flag") == 0 || + strcmp(opname, "call") == 0) { + return 5; + } + + // Register-register-register opcodes (4 bytes: 1 + 3) + if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0) { + return 4; + } + + // Load-immediate (5 bytes: 1 + 1 + 4) + if (strcmp(opname, "load-immediate") == 0) { + return 6; + } + + // Syscall (1 + syscall_id (4) + args) + if (strcmp(opname, "syscall") == 0) { + return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0); + } + + fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); + return 4; // Conservative fallback +} + +int calculate_instruction_size(ExprNode *node) { + if (node->child_count == 0) + return 0; + + return get_instruction_byte_size(node); +} + +void collect_symbols_in_node(SymbolTable *table, ExprNode *node, + u32 *current_addr, int depth) { + char indent[32] = ""; + for (int i = 0; i < depth; i++) + strcat(indent, " "); + + printf("%sProcessing: %s (addr=%d)\n", indent, node->token, *current_addr); + + if (strcmp(node->token, "label") == 0) { + if (node->child_count >= 1) { + const char *name = node->children[0]->token; + printf("%s ADDING LABEL: %s -> %d\n", indent, name, *current_addr); + symbol_table_add(table, name, *current_addr, SYMBOL_CODE); + } + + for (size_t i = 1; i < node->child_count; i++) { + collect_symbols_in_node(table, node->children[i], current_addr, + depth + 1); + } + } else { + int size = get_instruction_byte_size(node); + *current_addr += size; + printf("%s +%d bytes -> %d\n", indent, size, *current_addr); + } +} + +void collect_symbols(SymbolTable *table, ExprNode *program) { + // First, collect all data labels (with placeholder address) + for (size_t i = 0; i < program->child_count; ++i) { + ExprNode *section = program->children[i]; + if (strcmp(section->token, "data") == 0) { + for (size_t j = 0; j < section->child_count; ++j) { + ExprNode *item = section->children[j]; + if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { + const char *name = item->children[0]->token; + symbol_table_add(table, name, 0, SYMBOL_DATA); + } + } + } + } + + // Second, collect all code labels with proper nesting + u32 code_addr = 0; + for (size_t i = 0; i < program->child_count; ++i) { + ExprNode *section = program->children[i]; + if (strcmp(section->token, "code") == 0) { + for (size_t j = 0; j < section->child_count; ++j) { + collect_symbols_in_node(table, section->children[j], &code_addr, 0); + } + } + } +} + +u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) { u32 addr = vm->mp; - write_u32(vm, memory, vm->mp, v); - vm->mp += 4; - vm->frames[vm->fp].end += 4; + vm->mp += size; + vm->frames[vm->fp].end += size; + + // Update the symbol's address + Symbol *sym = symbol_table_lookup(table, name); + if (sym && sym->type == SYMBOL_DATA) { + sym->address = addr; + sym->size = size; + } + return addr; } @@ -58,78 +202,85 @@ int parse_register(const char *reg_str) { return atoi(reg_str + 1); } -u32 parse_memory_ref(const char *ref) { +u32 resolve_symbol(SymbolTable *table, const char *ref) { if (ref[0] == '&') { - return find_label(ref + 1); + return find_label_in_table(table, ref + 1); } - // Or parse as immediate number? - return 0; -} -void codegen_expr(VM *vm, ExprNode *node); - -void codegen_code_block(VM *vm, ExprNode *block) { - for (size_t i = 0; i < block->child_count; ++i) { - ExprNode *stmt = block->children[i]; - codegen_expr(vm, stmt); + // Handle immediate values + if (strchr(ref, '.')) { + return TO_FIXED(atof(ref)); } + return (u32)atoi(ref); } static char *unwrap_string(const char *quoted_str) { - if (!quoted_str) - return NULL; + if (!quoted_str) + return NULL; - size_t len = strlen(quoted_str); - if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { - // Remove quotes and process escape sequences - const char *src = quoted_str + 1; - size_t src_len = len - 2; - - // First pass: calculate the actual length needed after escape processing - size_t actual_len = 0; - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Escape sequence - actual_len++; - i++; // Skip the next character - } else { - actual_len++; - } - } - - char *unwrapped = (char *)malloc(actual_len + 1); - size_t dst_idx = 0; - - // Second pass: process escape sequences - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Handle escape sequences - switch (src[i + 1]) { - case 'n': unwrapped[dst_idx++] = '\n'; break; - case 't': unwrapped[dst_idx++] = '\t'; break; - case 'r': unwrapped[dst_idx++] = '\r'; break; - case '\\': unwrapped[dst_idx++] = '\\'; break; - case '"': unwrapped[dst_idx++] = '"'; break; - case '\'': unwrapped[dst_idx++] = '\''; break; - default: - // Unknown escape, keep both characters - unwrapped[dst_idx++] = src[i]; - unwrapped[dst_idx++] = src[i + 1]; - break; - } - i++; // Skip the next character - } else { - unwrapped[dst_idx++] = src[i]; - } - } - unwrapped[dst_idx] = '\0'; - return unwrapped; + size_t len = strlen(quoted_str); + if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { + // Remove quotes and process escape sequences + const char *src = quoted_str + 1; + size_t src_len = len - 2; + + // First pass: calculate the actual length needed after escape processing + size_t actual_len = 0; + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Escape sequence + actual_len++; + i++; // Skip the next character + } else { + actual_len++; + } } - // Not quoted, return copy - return strdup(quoted_str); + + char *unwrapped = (char *)malloc(actual_len + 1); + size_t dst_idx = 0; + + // Second pass: process escape sequences + for (size_t i = 0; i < src_len; ++i) { + if (src[i] == '\\' && i + 1 < src_len) { + // Handle escape sequences + switch (src[i + 1]) { + case 'n': + unwrapped[dst_idx++] = '\n'; + break; + case 't': + unwrapped[dst_idx++] = '\t'; + break; + case 'r': + unwrapped[dst_idx++] = '\r'; + break; + case '\\': + unwrapped[dst_idx++] = '\\'; + break; + case '"': + unwrapped[dst_idx++] = '"'; + break; + case '\'': + unwrapped[dst_idx++] = '\''; + break; + default: + // Unknown escape, keep both characters + unwrapped[dst_idx++] = src[i]; + unwrapped[dst_idx++] = src[i + 1]; + break; + } + i++; // Skip the next character + } else { + unwrapped[dst_idx++] = src[i]; + } + } + unwrapped[dst_idx] = '\0'; + return unwrapped; + } + // Not quoted, return copy + return strdup(quoted_str); } -void codegen_data_block(VM *vm, ExprNode *block) { +void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) { for (size_t i = 0; i < block->child_count; ++i) { ExprNode *item = block->children[i]; if (strcmp(item->token, "label") == 0 && item->child_count >= 2) { @@ -139,80 +290,67 @@ void codegen_data_block(VM *vm, ExprNode *block) { if (val->child_count == 0) { if (strchr(val->token, '.')) { float f = atof(val->token); - u32 addr = real_alloc(vm, f); - add_label(name, addr); + u32 addr = allocate_data(vm, table, name, 4); + write_u32(vm, memory, addr, TO_FIXED(f)); } else { - // Assume string + // unwrap deals with control characters and "" literals char *unwrapped = unwrap_string(val->token); - u32 addr = str_alloc(vm, &vm->frames[vm->fp], unwrapped, - strlen(unwrapped) + 1); + int len = strlen(unwrapped) + 1; // Include length + null terminator + u32 addr = allocate_data(vm, table, name, len + 4); + + write_u32(vm, memory, addr, len); + // Copy string to memory + for (int i = 0; i < len; i++) { + write_u8(vm, memory, addr + 4 + i, unwrapped[i]); + } + free(unwrapped); - add_label(name, addr); } } else { - // Complex expression? fprintf(stderr, "Unsupported data item\n"); } } } } -void codegen_expr(VM *vm, ExprNode *node) { - if (node->child_count == 0) - return; - +void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) { const char *opname = node->token; - if (strcmp(opname, "label") == 0) { - if (node->child_count < 2) { - fprintf(stderr, "Error: label requires at least a name\n"); - return; - } - - const char *label_name = node->children[0]->token; - add_label(label_name, vm->cp); - - for (size_t i = 1; i < node->child_count; ++i) { - codegen_expr(vm, node->children[i]); + for (size_t i = 1; i < node->child_count; i++) { + process_code_expr(vm, table, node->children[i]); } } else if (strcmp(opname, "halt") == 0) { emit_opcode(vm, OP_HALT); } else if (strcmp(opname, "jump") == 0) { emit_opcode(vm, OP_JMP); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "jump-if-flag") == 0) { emit_opcode(vm, OP_JMPF); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "call") == 0) { emit_opcode(vm, OP_CALL); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); emit_u32(vm, addr); } else if (strcmp(opname, "return") == 0) { emit_opcode(vm, OP_RETURN); } else if (strcmp(opname, "load") == 0) { emit_opcode(vm, OP_LOAD); int reg = parse_register(node->children[0]->token); - u32 addr = parse_memory_ref(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, reg); emit_u32(vm, addr); } else if (strcmp(opname, "load-immediate") == 0) { emit_opcode(vm, OP_LOAD_IMM); int reg = parse_register(node->children[0]->token); - if (strchr(node->children[1]->token, '&')) { - u32 addr = parse_memory_ref(node->children[1]->token); - emit_byte(vm, reg); - emit_u32(vm, addr); - } else { - u32 val = (u32)atoi(node->children[1]->token); - emit_byte(vm, reg); - emit_u32(vm, val); - } + u32 addr = resolve_symbol(table, node->children[1]->token); + emit_byte(vm, reg); + emit_u32(vm, addr); } else if (strcmp(opname, "store") == 0) { emit_opcode(vm, OP_STORE); int reg = parse_register(node->children[0]->token); - u32 addr = parse_memory_ref(node->children[1]->token); + u32 addr = resolve_symbol(table, node->children[1]->token); emit_byte(vm, reg); emit_u32(vm, addr); } else if (strcmp(opname, "push") == 0) { @@ -377,7 +515,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src); } else if (strcmp(opname, "jump-eq-int") == 0) { emit_opcode(vm, OP_JEQ_INT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -385,7 +523,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-int") == 0) { emit_opcode(vm, OP_JGT_INT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -393,7 +531,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-int") == 0) { emit_opcode(vm, OP_JLT_INT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -401,7 +539,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-int") == 0) { emit_opcode(vm, OP_JLE_INT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -409,7 +547,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-int") == 0) { emit_opcode(vm, OP_JGE_INT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -417,7 +555,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-eq-nat") == 0) { emit_opcode(vm, OP_JEQ_UINT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -425,7 +563,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-nat") == 0) { emit_opcode(vm, OP_JGT_UINT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -433,7 +571,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-nat") == 0) { emit_opcode(vm, OP_JLT_UINT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -441,7 +579,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-nat") == 0) { emit_opcode(vm, OP_JLE_UINT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -449,7 +587,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-nat") == 0) { emit_opcode(vm, OP_JGE_UINT); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -457,7 +595,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-eq-real") == 0) { emit_opcode(vm, OP_JEQ_REAL); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -465,7 +603,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-gt-real") == 0) { emit_opcode(vm, OP_JGT_REAL); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -473,7 +611,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-lt-real") == 0) { emit_opcode(vm, OP_JLT_REAL); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -481,7 +619,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-le-real") == 0) { emit_opcode(vm, OP_JLE_REAL); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -489,7 +627,7 @@ void codegen_expr(VM *vm, ExprNode *node) { emit_byte(vm, src2); } else if (strcmp(opname, "jump-ge-real") == 0) { emit_opcode(vm, OP_JGE_REAL); - u32 addr = find_label(node->children[0]->token); + u32 addr = resolve_symbol(table, node->children[0]->token); int src1 = parse_register(node->children[1]->token); int src2 = parse_register(node->children[2]->token); emit_u32(vm, addr); @@ -585,19 +723,33 @@ void codegen_expr(VM *vm, ExprNode *node) { } void assemble(VM *vm, ExprNode *program) { - // First pass: process data section to define all labels + SymbolTable table; + symbol_table_init(&table); + + // PASS 1: Collect all symbols (both code and data) + collect_symbols(&table, program); + + // PASS 2: Process data section using symbol table for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "data") == 0) { - codegen_data_block(vm, section); + process_data_block(vm, &table, section); } } - // Second pass: process code section now that all labels are defined + // PASS 3: Process code section using complete symbol table for (size_t i = 0; i < program->child_count; ++i) { ExprNode *section = program->children[i]; if (strcmp(section->token, "code") == 0) { - codegen_code_block(vm, section); + for (size_t i = 0; i < section->child_count; ++i) { + process_code_expr(vm, &table, section->children[i]); + } } } + + // Cleanup symbol table + for (int i = 0; i < table.count; i++) { + free(table.symbols[i].name); + } + free(table.symbols); } \ No newline at end of file diff --git a/src/tools/assembler.h b/src/tools/assembler.h index b7b0e34..9dabb70 100644 --- a/src/tools/assembler.h +++ b/src/tools/assembler.h @@ -15,11 +15,6 @@ ((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \ ))) -typedef struct { - char *name; - u32 address; -} Label; - void assemble(VM *vm, ExprNode *program); #endif diff --git a/src/tools/parser.c b/src/tools/parser.c index 0a8233c..a60c9c4 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -27,10 +27,24 @@ static ExprNode *expr_node_create(const char *token, int line) { // Forward declaration static ExprNode *parse_expression(const char **ptr, int line); -// Skip whitespace characters +// Skip whitespace characters and comments static const char *skip_whitespace(const char *ptr) { - while (*ptr && isspace(*ptr)) { - ptr++; + while (*ptr) { + // Skip regular whitespace + if (isspace(*ptr)) { + ptr++; + continue; + } + + // Check for comment start + if (*ptr == ';') { + // Skip everything until end of line + while (*ptr && *ptr != '\n') { + ptr++; + } + continue; + } + break; } return ptr; } @@ -39,7 +53,7 @@ static const char *skip_whitespace(const char *ptr) { static char *parse_token(const char **ptr, int line) { const char *start = *ptr; - // Skip leading whitespace + // Skip leading whitespace and comments start = skip_whitespace(start); if (!*start) { printf("Error at line:%d\n", line); @@ -67,13 +81,16 @@ static char *parse_token(const char **ptr, int line) { else if (*end == '(' || *end == ')') { end++; } else { - // Read until whitespace or parentheses - while (*end && !isspace(*end) && *end != '(' && *end != ')') { + // Read until whitespace, parentheses, or comment + while (*end && !isspace(*end) && *end != '(' && *end != ')' && *end != ';') { end++; } } - if (end == start) return NULL; + if (end == start) { + printf("Error at line:%d\n", line); + return NULL; + } size_t len = end - start; char *token = (char *)safe_malloc(len + 1); diff --git a/test/add.asm.lisp b/test/add.asm.lisp index 4d3c9c4..99b9059 100644 --- a/test/add.asm.lisp +++ b/test/add.asm.lisp @@ -1,21 +1,26 @@ ((code - (label main - (load-immediate $0 1) - (push $0) - (load-immediate $0 1) - (call &add) - (pop $0) - (int-to-string $1 $0) - (load-immediate $3 &terminal-str) - (string-length $2 $1) - (syscall DEVICE-WRITE $3 $1 $2) - (halt)) + (label main ; 0 + (load-immediate $0 1) ; 6 + (push $0) ; 8 + (load-immediate $0 1) ; 14 + (push $0) ; 16 + (call &add) ; 19 + (pop $0) ; 21 + (int-to-string $1 $0) ; 24 + (load-immediate $3 &terminal-str) ; 30 + (string-length $2 $1) ; 33 + (syscall DEVICE-WRITE $3 $1 $2) ; 41 + (load-immediate $6 &new-line) + (string-length $7 $6) + (syscall DEVICE-WRITE $5 $6 $7) + (halt)) ; 42 - (label add - (pop $0) - (pop $1) + (label add ; 43 + (pop $0) ; 45 + (pop $1) ; 47 (add-int $2 $1 $0) (push $2) (return))) (data - (label terminal-str "/dev/term/0"))) + (label terminal-str "/dev/term/0") + (label new-line "\n"))) diff --git a/test/fib.asm.lisp b/test/fib.asm.lisp index 634e1d6..875d7ad 100644 --- a/test/fib.asm.lisp +++ b/test/fib.asm.lisp @@ -14,11 +14,11 @@ (load-immediate $1 2) (load $2 &base-case) (jump-lt-int $2 $0 $1) - (load $2 2) + (load-immediate $2 2) (sub-int $4 $0 $3) (push $4) (call &fib) - (load $2 1) + (load-immediate $2 1) (sub-int $4 $0 $3) (push $4) (call &fib) @@ -26,8 +26,8 @@ (pop $5) (add-int $6 $5 $4) (push $6) - (return) - (label base-case) + (return)) + (label base-case (push $0) (return))) (data diff --git a/test/hello.asm.lisp b/test/hello.asm.lisp index 6624d7b..e8f3c46 100644 --- a/test/hello.asm.lisp +++ b/test/hello.asm.lisp @@ -1,14 +1,10 @@ ((code (label main - (load-immediate $0 &terminal-str) - (load-immediate $1 &hello-str) - (string-length $2 $1) - (syscall DEVICE-WRITE $0 $1 $2) - (load-immediate $3 &new-line) - (string-length $4 $3) - (syscall DEVICE-WRITE $0 $3 $4) - (halt))) + (load-immediate $0 &terminal-str) ; load terminal namespace + (load-immediate $1 &hello-str) ; load hello string ptr + (string-length $2 $1) ; get length to write to stdout + (syscall DEVICE-WRITE $0 $1 $2) ; do the write syscall + (halt))) ; done (data (label terminal-str "/dev/term/0") - (label new-line "\n") - (label hello-str "nuqneH 'u'?"))) + (label hello-str "nuqneH 'u'?\n"))) diff --git a/test/loop.asm.lisp b/test/loop.asm.lisp index c1e18de..7d86be7 100644 --- a/test/loop.asm.lisp +++ b/test/loop.asm.lisp @@ -4,12 +4,12 @@ (load-immediate $1 5000) (load-immediate $2 0) (load-immediate $3 -1) - (label loop-body) - (load $4 &loop-body) - (load-immediate $5 5.0) - (add-real $0 $0 $5) - (add-int $1 $1 $3) - (jump-gt-eq-int $4 $1 $2) + (label loop-body + (load $4 &loop-body) + (load-immediate $5 5.0) + (add-real $0 $0 $5) + (add-int $1 $1 $3) + (jump-gt-eq-int $4 $1 $2)) (real-to-nat $1 $0) (load-immediate $6 &terminal-str) (load $7 &help)