1170 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			1170 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			C
		
	
	
	
| #include "assembler.h"
 | |
| #include "parser.h"
 | |
| typedef enum { SYMBOL_CODE, SYMBOL_DATA } SymbolType;
 | |
| 
 | |
| typedef struct {
 | |
|   char *name;
 | |
|   u32 address;
 | |
|   SymbolType type;
 | |
|   int size;        // How much memory this symbol occupies
 | |
|   int is_constant; // 1 = constant, 0 = variable
 | |
| } Symbol;
 | |
| 
 | |
| typedef struct {
 | |
|   Symbol *symbols;
 | |
|   int count;
 | |
|   int capacity;
 | |
| } SymbolTable;
 | |
| 
 | |
| void symbol_table_init(SymbolTable *table) {
 | |
|   table->capacity = 32;
 | |
|   table->count = 0;
 | |
|   table->symbols = malloc(table->capacity * sizeof(Symbol));
 | |
| }
 | |
| 
 | |
| void symbol_table_add(SymbolTable *table, const char *name, u32 address,
 | |
|                       SymbolType type) {
 | |
|   // Check for duplicates
 | |
|   for (int i = 0; i < table->count; i++) {
 | |
|     if (strcmp(table->symbols[i].name, name) == 0) {
 | |
|       fprintf(stderr, "Error: Duplicate label '%s'\n", name);
 | |
|       exit(1);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (table->count >= table->capacity) {
 | |
|     table->capacity *= 2;
 | |
|     table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
 | |
|   }
 | |
| 
 | |
|   Symbol *sym = &table->symbols[table->count++];
 | |
|   sym->name = strdup(name);
 | |
|   sym->address = address;
 | |
|   sym->type = type;
 | |
|   sym->size = 4; // Default size
 | |
|   sym->is_constant = 0;
 | |
| }
 | |
| 
 | |
| Symbol *symbol_table_lookup(SymbolTable *table, const char *name) {
 | |
|   for (int i = 0; i < table->count; i++) {
 | |
|     if (strcmp(table->symbols[i].name, name) == 0) {
 | |
|       return &table->symbols[i];
 | |
|     }
 | |
|   }
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| u32 find_label_in_table(SymbolTable *table, const char *name) {
 | |
|   Symbol *sym = symbol_table_lookup(table, name);
 | |
|   if (!sym) {
 | |
|     fprintf(stderr, "Error: Undefined label '%s'\n", name);
 | |
|     exit(1);
 | |
|   }
 | |
|   return sym->address;
 | |
| }
 | |
| 
 | |
| int get_instruction_byte_size(ExprNode *node) {
 | |
|   const char *opname = node->token;
 | |
| 
 | |
|   // Simple opcodes (1 byte)
 | |
|   if (strcmp(opname, "halt") == 0) {
 | |
|     return 1;
 | |
|   }
 | |
| 
 | |
|   // Return (1 + 1)
 | |
|   if (strcmp(opname, "return") == 0) {
 | |
|     return 2;  // 1 byte opcode + 1 byte return register
 | |
|   }
 | |
| 
 | |
|   if (strcmp(opname, "int-to-string") == 0 ||
 | |
|       strcmp(opname, "load-indirect-8") == 0 ||
 | |
|       strcmp(opname, "nat-to-string") == 0 ||
 | |
|       strcmp(opname, "load-indirect-16") == 0 ||
 | |
|       strcmp(opname, "real-to-string") == 0 ||
 | |
|       strcmp(opname, "load-indirect-32") == 0 ||
 | |
|       strcmp(opname, "int-to-real") == 0 ||
 | |
|       strcmp(opname, "store-indirect-8") == 0 ||
 | |
|       strcmp(opname, "nat-to-real") == 0 ||
 | |
|       strcmp(opname, "store-indirect-16") == 0 ||
 | |
|       strcmp(opname, "real-to-int") == 0 ||
 | |
|       strcmp(opname, "store-indirect-32") == 0 ||
 | |
|       strcmp(opname, "real-to-nat") == 0 || strcmp(opname, "nat-to-int") == 0 ||
 | |
|       strcmp(opname, "int-to-nat") == 0 ||
 | |
|       strcmp(opname, "string-length") == 0 ||
 | |
|       strcmp(opname, "store-absolute-32") == 0 ||
 | |
|       strcmp(opname, "store-absolute-8") == 0 ||
 | |
|       strcmp(opname, "store-absolute-16") == 0 ||
 | |
|       strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 ||
 | |
|       strcmp(opname, "memset-8") == 0 || strcmp(opname, "memset-16") == 0 ||
 | |
|       strcmp(opname, "register-move") == 0 || strcmp(opname, "malloc") == 0) {
 | |
|     return 3;
 | |
|   }
 | |
| 
 | |
|   // Register-register-register opcodes (4 bytes: 1 + 3)
 | |
|   if (strcmp(opname, "add-int") == 0 || strcmp(opname, "sub-int") == 0 ||
 | |
|       strcmp(opname, "mul-int") == 0 || strcmp(opname, "div-int") == 0 ||
 | |
|       strcmp(opname, "add-nat") == 0 || strcmp(opname, "sub-nat") == 0 ||
 | |
|       strcmp(opname, "mul-nat") == 0 || strcmp(opname, "div-nat") == 0 ||
 | |
|       strcmp(opname, "add-real") == 0 || strcmp(opname, "sub-real") == 0 ||
 | |
|       strcmp(opname, "bit-shift-left") == 0 ||
 | |
|       strcmp(opname, "bit-shift-right") == 0 ||
 | |
|       strcmp(opname, "bit-and") == 0 || strcmp(opname, "bit-or") == 0 ||
 | |
|       strcmp(opname, "bit-xor") == 0 || strcmp(opname, "mul-real") == 0 ||
 | |
|       strcmp(opname, "div-real") == 0) {
 | |
|     return 4;
 | |
|   }
 | |
| 
 | |
|   // (5 bytes: 1 + 4)
 | |
|   if (strcmp(opname, "jump-if-flag") == 0 || strcmp(opname, "jump") == 0) {
 | |
|     return 5;
 | |
|   }
 | |
| 
 | |
|   // Load, Load-immediate (6 bytes: 1 + 1 + 4)
 | |
|   if (strcmp(opname, "load-absolute-32") == 0 ||
 | |
|       strcmp(opname, "load-immediate") == 0 ||
 | |
|       strcmp(opname, "load-absolute-16") == 0 ||
 | |
|       strcmp(opname, "load-absolute-8") == 0) {
 | |
|     return 6;
 | |
|   }
 | |
| 
 | |
|   // jump compare (7 bytes: 1 + 4 + 1 + 1)
 | |
|   if (strcmp(opname, "jump-eq-int") == 0 ||
 | |
|       strcmp(opname, "jump-neq-int") == 0 ||
 | |
|       strcmp(opname, "jump-gt-int") == 0 ||
 | |
|       strcmp(opname, "jump-lt-int") == 0 ||
 | |
|       strcmp(opname, "jump-le-int") == 0 ||
 | |
|       strcmp(opname, "jump-ge-int") == 0 ||
 | |
|       strcmp(opname, "jump-eq-nat") == 0 ||
 | |
|       strcmp(opname, "jump-neq-nat") == 0 ||
 | |
|       strcmp(opname, "jump-gt-nat") == 0 ||
 | |
|       strcmp(opname, "jump-lt-nat") == 0 ||
 | |
|       strcmp(opname, "jump-le-nat") == 0 ||
 | |
|       strcmp(opname, "jump-ge-nat") == 0 ||
 | |
|       strcmp(opname, "jump-eq-real") == 0 ||
 | |
|       strcmp(opname, "jump-neq-real") == 0 ||
 | |
|       strcmp(opname, "jump-gt-real") == 0 ||
 | |
|       strcmp(opname, "jump-lt-real") == 0 ||
 | |
|       strcmp(opname, "jump-le-real") == 0 ||
 | |
|       strcmp(opname, "jump-ge-real") == 0 ||
 | |
|       strcmp(opname, "store-offset-8") == 0 ||
 | |
|       strcmp(opname, "store-offset-16") == 0 ||
 | |
|       strcmp(opname, "store-offset-32") == 0 ||
 | |
|       strcmp(opname, "load-offset-8") == 0 ||
 | |
|       strcmp(opname, "load-offset-16") == 0 ||
 | |
|       strcmp(opname, "load-offset-32") == 0) {
 | |
|     return 7;
 | |
|   }
 | |
| 
 | |
|   // Call (1 + 4 + 1 + args + 1)
 | |
|   if (strcmp(opname, "call") == 0) {
 | |
|     ExprNode *args_node = node->children[1];
 | |
|     u32 args_count;
 | |
| 
 | |
|     if (strcmp(args_node->token, "nil") == 0) {
 | |
|       args_count = 0;
 | |
|     } else {
 | |
|       args_count = 1 + args_node->child_count;
 | |
|     }
 | |
| 
 | |
|     return 1 + 1 + 1 + 4 + args_count;
 | |
|   }
 | |
| 
 | |
|   // Syscall (1 + syscall_id (4) + args)
 | |
|   if (strcmp(opname, "syscall") == 0) {
 | |
|     return 1 + 4 + (node->child_count > 0 ? node->child_count - 1 : 0);
 | |
|   }
 | |
| 
 | |
|   fprintf(stderr, "Unknown opcode for sizing: %s\n", opname);
 | |
|   exit(-1);
 | |
| }
 | |
| 
 | |
| int calculate_instruction_size(ExprNode *node) {
 | |
|   if (node->child_count == 0)
 | |
|     return 0;
 | |
| 
 | |
|   return get_instruction_byte_size(node);
 | |
| }
 | |
| 
 | |
| void collect_symbols_in_node(SymbolTable *table, ExprNode *node,
 | |
|                              u32 *current_addr, int depth) {
 | |
|   char indent[32] = "";
 | |
|   for (int i = 0; i < depth; i++)
 | |
|     strcat(indent, "  ");
 | |
| 
 | |
| #ifdef ASM_DEBUG
 | |
|   printf("%s%d %s ", indent, *current_addr, node->token);
 | |
| #endif
 | |
| 
 | |
|   if (strcmp(node->token, "label") == 0) {
 | |
|     if (node->child_count >= 1) {
 | |
|       const char *name = node->children[0]->token;
 | |
| #ifdef ASM_DEBUG
 | |
|       printf(" %s -> %d\n", name, *current_addr);
 | |
| #endif
 | |
|       symbol_table_add(table, name, *current_addr, SYMBOL_CODE);
 | |
|     }
 | |
| 
 | |
|     for (size_t i = 1; i < node->child_count; i++) {
 | |
|       collect_symbols_in_node(table, node->children[i], current_addr,
 | |
|                               depth + 1);
 | |
|     }
 | |
|   } else {
 | |
|     int size = get_instruction_byte_size(node);
 | |
|     *current_addr += size;
 | |
| #ifdef ASM_DEBUG
 | |
|     printf(" +%d bytes -> %d\n", size, *current_addr);
 | |
| #endif
 | |
|   }
 | |
| }
 | |
| 
 | |
| void collect_symbols(SymbolTable *table, ExprNode *program) {
 | |
|   // First, collect all data labels (with placeholder address)
 | |
|   for (size_t i = 0; i < program->child_count; ++i) {
 | |
|     ExprNode *section = program->children[i];
 | |
|     if (strcmp(section->token, "data") == 0) {
 | |
|       for (size_t j = 0; j < section->child_count; ++j) {
 | |
|         ExprNode *item = section->children[j];
 | |
|         if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
 | |
|           const char *name = item->children[0]->token;
 | |
|           symbol_table_add(table, name, 0, SYMBOL_DATA);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Second, collect all code labels with proper nesting
 | |
|   u32 code_addr = 0;
 | |
|   for (size_t i = 0; i < program->child_count; ++i) {
 | |
|     ExprNode *section = program->children[i];
 | |
|     if (strcmp(section->token, "code") == 0) {
 | |
|       for (size_t j = 0; j < section->child_count; ++j) {
 | |
|         collect_symbols_in_node(table, section->children[j], &code_addr, 0);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| u32 allocate_data(VM *vm, SymbolTable *table, const char *name, u32 size) {
 | |
|   u32 addr = vm->mp;
 | |
|   vm->mp += size;
 | |
|   vm->frames[vm->fp].end += size;
 | |
| 
 | |
|   // Update the symbol's address
 | |
|   Symbol *sym = symbol_table_lookup(table, name);
 | |
|   if (sym && sym->type == SYMBOL_DATA) {
 | |
|     sym->address = addr;
 | |
|     sym->size = size;
 | |
|   }
 | |
| 
 | |
|   return addr;
 | |
| }
 | |
| 
 | |
| void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; }
 | |
| 
 | |
| void emit_u32(VM *vm, u32 value) {
 | |
|   write_u32(vm, code, vm->cp, value);
 | |
|   vm->cp += 4;
 | |
| }
 | |
| 
 | |
| void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); }
 | |
| 
 | |
| int parse_register(const char *reg_str) {
 | |
|   if (reg_str[0] != '$')
 | |
|     return -1;
 | |
|   return atoi(reg_str + 1);
 | |
| }
 | |
| 
 | |
| u32 resolve_symbol(SymbolTable *table, const char *ref) {
 | |
|   // Handle symbol references (e.g., &label)
 | |
|   if (ref[0] == '&') {
 | |
|     return find_label_in_table(table, ref + 1);
 | |
|   }
 | |
| 
 | |
|   // Handle fixed-point numbers (e.g., 0.5)
 | |
|   if (strchr(ref, '.')) {
 | |
|     return TO_FIXED(atof(ref));
 | |
|   }
 | |
| 
 | |
|   // Handle hexadecimal literals (e.g., 0x7)
 | |
|   if (ref[0] == '0' && (ref[1] == 'x' || ref[1] == 'X')) {
 | |
|     char *endptr;
 | |
|     u32 value = (u32)strtoul(ref + 2, &endptr, 16); // Skip "0x"
 | |
| 
 | |
|     if (endptr == ref + 2 || *endptr != '\0') {
 | |
|       fprintf(stderr, "Invalid hex literal: %s\n", ref);
 | |
|       exit(1);
 | |
|     }
 | |
|     return value;
 | |
|   }
 | |
| 
 | |
|   // Handle decimal literals (e.g., 7)
 | |
|   char *endptr;
 | |
|   u32 value = (u32)strtoul(ref, &endptr, 10);
 | |
| 
 | |
|   if (endptr == ref || *endptr != '\0') {
 | |
|     fprintf(stderr, "Invalid decimal literal: %s\n", ref);
 | |
|     exit(1);
 | |
|   }
 | |
|   return value;
 | |
| }
 | |
| 
 | |
| static char *unwrap_string(const char *quoted_str) {
 | |
|   if (!quoted_str)
 | |
|     return NULL;
 | |
| 
 | |
|   size_t len = strlen(quoted_str);
 | |
|   if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') {
 | |
|     // Remove quotes and process escape sequences
 | |
|     const char *src = quoted_str + 1;
 | |
|     size_t src_len = len - 2;
 | |
| 
 | |
|     // First pass: calculate the actual length needed after escape processing
 | |
|     size_t actual_len = 0;
 | |
|     for (size_t i = 0; i < src_len; ++i) {
 | |
|       if (src[i] == '\\' && i + 1 < src_len) {
 | |
|         // Escape sequence
 | |
|         actual_len++;
 | |
|         i++; // Skip the next character
 | |
|       } else {
 | |
|         actual_len++;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     char *unwrapped = (char *)malloc(actual_len + 1);
 | |
|     size_t dst_idx = 0;
 | |
| 
 | |
|     // Second pass: process escape sequences
 | |
|     for (size_t i = 0; i < src_len; ++i) {
 | |
|       if (src[i] == '\\' && i + 1 < src_len) {
 | |
|         // Handle escape sequences
 | |
|         switch (src[i + 1]) {
 | |
|         case 'n':
 | |
|           unwrapped[dst_idx++] = '\n';
 | |
|           break;
 | |
|         case 't':
 | |
|           unwrapped[dst_idx++] = '\t';
 | |
|           break;
 | |
|         case 'r':
 | |
|           unwrapped[dst_idx++] = '\r';
 | |
|           break;
 | |
|         case '\\':
 | |
|           unwrapped[dst_idx++] = '\\';
 | |
|           break;
 | |
|         case '"':
 | |
|           unwrapped[dst_idx++] = '"';
 | |
|           break;
 | |
|         case '\'':
 | |
|           unwrapped[dst_idx++] = '\'';
 | |
|           break;
 | |
|         default:
 | |
|           // Unknown escape, keep both characters
 | |
|           unwrapped[dst_idx++] = src[i];
 | |
|           unwrapped[dst_idx++] = src[i + 1];
 | |
|           break;
 | |
|         }
 | |
|         i++; // Skip the next character
 | |
|       } else {
 | |
|         unwrapped[dst_idx++] = src[i];
 | |
|       }
 | |
|     }
 | |
|     unwrapped[dst_idx] = '\0';
 | |
|     return unwrapped;
 | |
|   }
 | |
|   // Not quoted, return copy
 | |
|   return strdup(quoted_str);
 | |
| }
 | |
| 
 | |
| void process_data_block(VM *vm, SymbolTable *table, ExprNode *block) {
 | |
|   for (size_t i = 0; i < block->child_count; ++i) {
 | |
|     ExprNode *item = block->children[i];
 | |
|     if (strcmp(item->token, "label") == 0 && item->child_count >= 2) {
 | |
|       const char *name = item->children[0]->token;
 | |
|       ExprNode *val = item->children[1];
 | |
| 
 | |
|       if (val->child_count == 0) {
 | |
|         const char *token = val->token;
 | |
| 
 | |
|         // Case 1: String literal (enclosed in quotes)
 | |
|         if (token[0] == '"' && token[strlen(token) - 1] == '"') {
 | |
|           char *unwrapped = unwrap_string(token);
 | |
|           int len = strlen(unwrapped) + 1;
 | |
|           u32 addr = allocate_data(vm, table, name, len + 4);
 | |
| 
 | |
|           write_u32(vm, memory, addr, len);
 | |
|           for (int i = 0; i < len; i++) {
 | |
|             write_u8(vm, memory, addr + 4 + i, unwrapped[i]);
 | |
|           }
 | |
|           free(unwrapped);
 | |
|         }
 | |
|         // Case 2: Hexadecimal integer (0x...)
 | |
|         else if (token[0] == '0' && (token[1] == 'x' || token[1] == 'X')) {
 | |
|           char *endptr;
 | |
|           u32 value = (u32)strtoul(token + 2, &endptr, 16);
 | |
| 
 | |
|           if (endptr != token + strlen(token)) {
 | |
|             fprintf(stderr, "Invalid hex in data block: %s\n", token);
 | |
|             exit(1);
 | |
|           }
 | |
| 
 | |
|           u32 addr = allocate_data(vm, table, name, 4);
 | |
|           write_u32(vm, memory, addr, value);
 | |
|         }
 | |
|         // Case 3: Floating-point (has decimal point)
 | |
|         else if (strchr(token, '.')) {
 | |
|           float f = atof(token);
 | |
|           u32 addr = allocate_data(vm, table, name, 4);
 | |
|           write_u32(vm, memory, addr, TO_FIXED(f));
 | |
|         }
 | |
|         // Case 4: Decimal integer
 | |
|         else {
 | |
|           char *endptr;
 | |
|           u32 value = (u32)strtoul(token, &endptr, 10);
 | |
| 
 | |
|           if (endptr != token + strlen(token)) {
 | |
|             fprintf(stderr, "Invalid decimal in data block: %s\n", token);
 | |
|             exit(1);
 | |
|           }
 | |
| 
 | |
|           u32 addr = allocate_data(vm, table, name, 4);
 | |
|           write_u32(vm, memory, addr, value);
 | |
|           vm->mp += 4;
 | |
|         }
 | |
|       } else {
 | |
|         fprintf(stderr, "Unsupported data item\n");
 | |
|         exit(1);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void process_code_expr(VM *vm, SymbolTable *table, ExprNode *node) {
 | |
|   const char *opname = node->token;
 | |
|   if (strcmp(opname, "label") == 0) {
 | |
|     for (size_t i = 1; i < node->child_count; i++) {
 | |
|       process_code_expr(vm, table, node->children[i]);
 | |
|     }
 | |
|   } else if (strcmp(opname, "halt") == 0) {
 | |
|     emit_opcode(vm, OP_HALT);
 | |
|   } else if (strcmp(opname, "jump") == 0) {
 | |
|     emit_opcode(vm, OP_JMP);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "jump-if-flag") == 0) {
 | |
|     emit_opcode(vm, OP_JMPF);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "call") == 0) {
 | |
|     emit_opcode(vm, OP_CALL);
 | |
| 
 | |
|     if (node->child_count < 3) {
 | |
|       fprintf(stderr, "Error: call requires (args) and return register\n");
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     // Parse function address (first child)
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     if (addr == (u32)-1) {
 | |
|       fprintf(stderr, "Error: undefined symbol '%s'\n",
 | |
|               node->children[0]->token);
 | |
|       return;
 | |
|     }
 | |
|     emit_u32(vm, addr);
 | |
| 
 | |
|     // Parse argument list (second child)
 | |
|     ExprNode *args_node = node->children[1];
 | |
|     u8 arg_count = 0;
 | |
| 
 | |
|     if (args_node->child_count > 0) {
 | |
|       // Multiple arguments case
 | |
|       arg_count = args_node->child_count + 1; // +1 for the token
 | |
|     } else {
 | |
|       // Single argument case - token is the argument
 | |
|       arg_count = (args_node->token[0] != '\0') ? 1 : 0;
 | |
|     }
 | |
|     emit_byte(vm, arg_count);
 | |
| 
 | |
|     // Emit arguments based on representation
 | |
|     if (arg_count > 0) {
 | |
|       // First argument is always the token
 | |
|       const char *reg_str = args_node->token;
 | |
|       int reg = parse_register(reg_str);
 | |
|       if (reg < 0) {
 | |
|         fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str);
 | |
|         return;
 | |
|       }
 | |
|       emit_byte(vm, (u8)reg);
 | |
| 
 | |
|       // Emit children if present
 | |
|       for (size_t i = 0; i < args_node->child_count; i++) {
 | |
|         reg_str = args_node->children[i]->token;
 | |
|         reg = parse_register(reg_str);
 | |
|         if (reg < 0) {
 | |
|           fprintf(stderr, "Error: invalid argument register '%s'\n", reg_str);
 | |
|           return;
 | |
|         }
 | |
|         emit_byte(vm, (u8)reg);
 | |
|       }
 | |
|     }
 | |
|     // Parse return register (third child)
 | |
|     const char *return_reg_str = node->children[2]->token;
 | |
|     int return_reg = parse_register(return_reg_str);
 | |
| 
 | |
|     if (return_reg < 0) {
 | |
|       if (strcmp(return_reg_str, "nil") == 0) {
 | |
|         return_reg = 0xFF;
 | |
|       } else {
 | |
|         fprintf(stderr, "Error: invalid return register '%s'\n",
 | |
|                 return_reg_str);
 | |
|         return;
 | |
|       }
 | |
|     }
 | |
|     emit_byte(vm, (u8)return_reg);
 | |
| 
 | |
| } else if (strcmp(opname, "return") == 0) {
 | |
|   emit_opcode(vm, OP_RETURN);
 | |
|   
 | |
|   if (node->child_count != 1) {
 | |
|     fprintf(stderr, "Error: return requires exactly one argument\n");
 | |
|     return;
 | |
|   }
 | |
|   
 | |
|   const char *reg_str = node->children[0]->token;
 | |
|   int reg = parse_register(reg_str);
 | |
|   
 | |
|   // Handle "nil" as special case (no return value)
 | |
|   if (reg < 0) {
 | |
|     if (strcmp(reg_str, "nil") == 0) {
 | |
|       reg = 0xFF;  // Special value for "no return"
 | |
|     } else {
 | |
|       fprintf(stderr, "Error: invalid return register '%s'\n", reg_str);
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
|   emit_byte(vm, (u8)reg);
 | |
|   } else if (strcmp(opname, "load-immediate") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_IMM);
 | |
|     int reg = parse_register(node->children[0]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[1]->token);
 | |
|     emit_byte(vm, reg);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-absolute-8") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_ABS_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-absolute-16") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_ABS_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-absolute-32") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_ABS_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-indirect-8") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_IND_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "load-indirect-16") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_IND_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "load-indirect-32") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_IND_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "malloc") == 0) {
 | |
|     emit_opcode(vm, OP_MALLOC);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "memset-8") == 0) {
 | |
|     emit_opcode(vm, OP_MEMSET_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int value = parse_register(node->children[1]->token);
 | |
|     int count = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, value);
 | |
|     emit_byte(vm, count);
 | |
|   } else if (strcmp(opname, "memset-16") == 0) {
 | |
|     emit_opcode(vm, OP_MEMSET_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int value = parse_register(node->children[1]->token);
 | |
|     int count = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, value);
 | |
|     emit_byte(vm, count);
 | |
|   } else if (strcmp(opname, "memset") == 0) {
 | |
|     emit_opcode(vm, OP_MEMSET_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int value = parse_register(node->children[1]->token);
 | |
|     int count = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, value);
 | |
|     emit_byte(vm, count);
 | |
|   } else if (strcmp(opname, "store-absolute-8") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_ABS_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "store-absolute-16") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_ABS_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "store-absolute-32") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_ABS_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "store-indirect-8") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_IND_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "store-indirect-16") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_IND_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "store-indirect-32") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_IND_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|   } else if (strcmp(opname, "store-offset-8") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_OFF_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "store-offset-16") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_OFF_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "store-offset-32") == 0) {
 | |
|     emit_opcode(vm, OP_STORE_OFF_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-offset-8") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_OFF_8);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-offset-16") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_OFF_16);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "load-offset-32") == 0) {
 | |
|     emit_opcode(vm, OP_LOAD_OFF_32);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     u32 addr = resolve_symbol(table, node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_u32(vm, addr);
 | |
|   } else if (strcmp(opname, "register-move") == 0) {
 | |
|     emit_opcode(vm, OP_REG_MOV);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "syscall") == 0) {
 | |
|     emit_opcode(vm, OP_SYSCALL);
 | |
| 
 | |
|     // Parse syscall ID
 | |
|     u32 syscall_id = 0;
 | |
|     const char *syscall_name = node->children[0]->token;
 | |
|     if (strcmp(syscall_name, "EXIT") == 0)
 | |
|       syscall_id = SYSCALL_EXIT;
 | |
|     else if (strcmp(syscall_name, "OPEN") == 0)
 | |
|       syscall_id = SYSCALL_DEVICE_OPEN;
 | |
|     else if (strcmp(syscall_name, "READ") == 0)
 | |
|       syscall_id = SYSCALL_DEVICE_READ;
 | |
|     else if (strcmp(syscall_name, "WRITE") == 0)
 | |
|       syscall_id = SYSCALL_DEVICE_WRITE;
 | |
|     else if (strcmp(syscall_name, "CLOSE") == 0)
 | |
|       syscall_id = SYSCALL_DEVICE_CLOSE;
 | |
|     else if (strcmp(syscall_name, "IOCTL") == 0)
 | |
|       syscall_id = SYSCALL_DEVICE_IOCTL;
 | |
|     else if (strcmp(syscall_name, "REFRESH") == 0)
 | |
|       syscall_id = SYSCALL_DEVICE_REFRESH;
 | |
| 
 | |
|     emit_u32(vm, syscall_id);
 | |
| 
 | |
|     // Emit register arguments
 | |
|     for (size_t i = 1; i < node->child_count; ++i) {
 | |
|       int reg = parse_register(node->children[i]->token);
 | |
|       emit_byte(vm, reg);
 | |
|     }
 | |
|   } else if (strcmp(opname, "bit-shift-left") == 0) {
 | |
|     emit_opcode(vm, OP_SLL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "bit-shift-right") == 0) {
 | |
|     emit_opcode(vm, OP_SRL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "bit-shift-re") == 0) {
 | |
|     emit_opcode(vm, OP_SRE);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "bit-and") == 0) {
 | |
|     emit_opcode(vm, OP_BAND);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "bit-or") == 0) {
 | |
|     emit_opcode(vm, OP_BOR);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "bit-xor") == 0) {
 | |
|     emit_opcode(vm, OP_BXOR);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "add-int") == 0) {
 | |
|     emit_opcode(vm, OP_ADD_INT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "sub-int") == 0) {
 | |
|     emit_opcode(vm, OP_SUB_INT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "mul-int") == 0) {
 | |
|     emit_opcode(vm, OP_MUL_INT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "div-int") == 0) {
 | |
|     emit_opcode(vm, OP_DIV_INT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "add-nat") == 0) {
 | |
|     emit_opcode(vm, OP_ADD_NAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "sub-nat") == 0) {
 | |
|     emit_opcode(vm, OP_SUB_NAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "mul-nat") == 0) {
 | |
|     emit_opcode(vm, OP_MUL_NAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "div-nat") == 0) {
 | |
|     emit_opcode(vm, OP_DIV_NAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "add-real") == 0) {
 | |
|     emit_opcode(vm, OP_ADD_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "sub-real") == 0) {
 | |
|     emit_opcode(vm, OP_SUB_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "mul-real") == 0) {
 | |
|     emit_opcode(vm, OP_MUL_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "div-real") == 0) {
 | |
|     emit_opcode(vm, OP_DIV_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "int-to-real") == 0) {
 | |
|     emit_opcode(vm, OP_INT_TO_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "nat-to-real") == 0) {
 | |
|     emit_opcode(vm, OP_NAT_TO_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "real-to-int") == 0) {
 | |
|     emit_opcode(vm, OP_REAL_TO_INT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "real-to-nat") == 0) {
 | |
|     emit_opcode(vm, OP_REAL_TO_NAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "jump-eq-int") == 0) {
 | |
|     emit_opcode(vm, OP_JEQ_INT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-neq-int") == 0) {
 | |
|     emit_opcode(vm, OP_JNEQ_INT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-gt-int") == 0) {
 | |
|     emit_opcode(vm, OP_JGT_INT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-lt-int") == 0) {
 | |
|     emit_opcode(vm, OP_JLT_INT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-le-int") == 0) {
 | |
|     emit_opcode(vm, OP_JLE_INT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-ge-int") == 0) {
 | |
|     emit_opcode(vm, OP_JGE_INT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-eq-nat") == 0) {
 | |
|     emit_opcode(vm, OP_JEQ_NAT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-neq-nat") == 0) {
 | |
|     emit_opcode(vm, OP_JNEQ_NAT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-gt-nat") == 0) {
 | |
|     emit_opcode(vm, OP_JGT_NAT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-lt-nat") == 0) {
 | |
|     emit_opcode(vm, OP_JLT_NAT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-le-nat") == 0) {
 | |
|     emit_opcode(vm, OP_JLE_NAT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-ge-nat") == 0) {
 | |
|     emit_opcode(vm, OP_JGE_NAT);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-eq-real") == 0) {
 | |
|     emit_opcode(vm, OP_JEQ_REAL);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-neq-real") == 0) {
 | |
|     emit_opcode(vm, OP_JNEQ_REAL);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-gt-real") == 0) {
 | |
|     emit_opcode(vm, OP_JGT_REAL);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-lt-real") == 0) {
 | |
|     emit_opcode(vm, OP_JLT_REAL);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-le-real") == 0) {
 | |
|     emit_opcode(vm, OP_JLE_REAL);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "jump-ge-real") == 0) {
 | |
|     emit_opcode(vm, OP_JGE_REAL);
 | |
|     u32 addr = resolve_symbol(table, node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_u32(vm, addr);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "string-length") == 0) {
 | |
|     emit_opcode(vm, OP_STRLEN);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "string-eq") == 0) {
 | |
|     emit_opcode(vm, OP_STREQ);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "string-concat") == 0) {
 | |
|     emit_opcode(vm, OP_STRCAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "string-get-char") == 0) {
 | |
|     emit_opcode(vm, OP_STR_GET_CHAR);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "string-find-char") == 0) {
 | |
|     emit_opcode(vm, OP_STR_FIND_CHAR);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|   } else if (strcmp(opname, "string-slice") == 0) {
 | |
|     emit_opcode(vm, OP_STR_SLICE);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src1 = parse_register(node->children[1]->token);
 | |
|     int src2 = parse_register(node->children[2]->token);
 | |
|     int src3 = parse_register(node->children[3]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src1);
 | |
|     emit_byte(vm, src2);
 | |
|     emit_byte(vm, src3);
 | |
|   } else if (strcmp(opname, "int-to-string") == 0) {
 | |
|     emit_opcode(vm, OP_INT_TO_STRING);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "nat-to-string") == 0) {
 | |
|     emit_opcode(vm, OP_NAT_TO_STRING);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "real-to-string") == 0) {
 | |
|     emit_opcode(vm, OP_REAL_TO_STRING);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "string-to-int") == 0) {
 | |
|     emit_opcode(vm, OP_STRING_TO_INT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "string-to-nat") == 0) {
 | |
|     emit_opcode(vm, OP_STRING_TO_NAT);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else if (strcmp(opname, "string-to-real") == 0) {
 | |
|     emit_opcode(vm, OP_STRING_TO_REAL);
 | |
|     int dest = parse_register(node->children[0]->token);
 | |
|     int src = parse_register(node->children[1]->token);
 | |
|     emit_byte(vm, dest);
 | |
|     emit_byte(vm, src);
 | |
|   } else {
 | |
|     fprintf(stderr, "Unknown opcode: %s\n", opname);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void assemble(VM *vm, ExprNode *program) {
 | |
|   SymbolTable table;
 | |
|   symbol_table_init(&table);
 | |
| 
 | |
|   // PASS 1: Collect all symbols (both code and data)
 | |
|   collect_symbols(&table, program);
 | |
| 
 | |
|   // PASS 2: Process data section using symbol table
 | |
|   for (size_t i = 0; i < program->child_count; ++i) {
 | |
|     ExprNode *section = program->children[i];
 | |
|     if (strcmp(section->token, "data") == 0) {
 | |
|       process_data_block(vm, &table, section);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // PASS 3: Process code section using complete symbol table
 | |
|   for (size_t i = 0; i < program->child_count; ++i) {
 | |
|     ExprNode *section = program->children[i];
 | |
|     if (strcmp(section->token, "code") == 0) {
 | |
|       for (size_t j = 0; j < section->child_count; ++j) {
 | |
|         process_code_expr(vm, &table, section->children[j]);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Cleanup symbol table
 | |
|   for (int i = 0; i < table.count; i++) {
 | |
| #ifdef ASM_DEBUG
 | |
|     Symbol s = table.symbols[i];
 | |
|     printf("%s[%d]\n", s.name, s.address);
 | |
| #endif
 | |
|     free(table.symbols[i].name);
 | |
|   }
 | |
|   free(table.symbols);
 | |
| }
 |