1
0
Fork 0
undar-lang-register/src/tools/assembler/assembler.c

388 lines
12 KiB
C

#include "assembler.h"
#include "../../vm/common.h"
#include "../../vm/fixed.h"
#include "../../vm/libc.h"
#include "../../vm/opcodes.h"
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
SymbolTable *symbol_table_init() {
SymbolTable *table = malloc(sizeof(SymbolTable));
table->symbols = malloc(16 * sizeof(Symbol));
table->count = 0;
table->capacity = 16;
return table;
}
NamesTable *names_table_init() {
NamesTable *table = malloc(sizeof(NamesTable));
table->names = malloc(16 * sizeof(char *));
table->count = 0;
table->capacity = 16;
return table;
}
u32 names_table_add(NamesTable *table, const char *name) {
for (u32 i = 0; i < table->count; i++) {
if (strcmp(table->names[i], name) == 0) {
return i;
}
}
if (table->count >= table->capacity) {
table->capacity *= 2;
table->names = realloc(table->names, table->capacity * sizeof(char *));
}
table->names[table->count] = malloc(strlen(name) + 1);
strcpy(table->names[table->count], name);
u32 index = table->count;
table->count++;
return index;
}
u32 symbol_table_add(SymbolTable *table, Symbol *s) {
if (table->count >= table->capacity) {
table->capacity *= 2;
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
}
table->symbols[table->count] = *s;
u32 index = table->count;
table->count++;
return index;
}
Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table,
const char *name) {
for (u32 i = 0; i < nt->count; i++) {
if (strcmp(nt->names[i], name) == 0) {
for (u32 j = 0; j < table->count; j++) {
if (table->symbols[j].name == i) {
return &table->symbols[j];
}
}
}
}
return nil;
}
u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) {
Symbol *sym = symbol_table_lookup(nt, table, name);
if (!sym) {
fprintf(stderr, "Error: Undefined Symbol '%s'\n", name);
exit(1);
}
return sym->ref;
}
void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; }
void emit_u32(VM *vm, u32 value) {
write_u32(vm, code, vm->cp, value);
vm->cp += 4;
}
void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); }
int parse_register(const char *reg_str) {
if (reg_str[0] != '$')
return -1;
return atoi(reg_str + 1);
}
u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) {
// symbol references (e.g., &label)
if (ref[0] == '&') {
return get_ref(nt, table, ref + 1);
}
// fixed-point numbers
if (strchr(ref, '.')) {
return float_to_fixed(atof(ref));
}
// decimal literals
char *endptr;
u32 value = (u32)strtoul(ref, &endptr, 10);
if (endptr == ref || *endptr != '\0') {
fprintf(stderr, "Invalid decimal literal: %s\n", ref);
exit(1);
}
return value;
}
bool global(VM *vm, NamesTable *nt, SymbolTable *st) {
Symbol *s = (Symbol *)malloc(sizeof(Symbol));
ValueType t;
Token token_type = nextToken();
switch (token_type.type) {
case TOKEN_TYPE_I8:
t.type = I8;
t.size = 1;
break;
case TOKEN_TYPE_U8:
t.type = U8;
t.size = 1;
break;
case TOKEN_TYPE_I16:
t.type = I16;
t.size = 2;
break;
case TOKEN_TYPE_U16:
t.type = U16;
t.size = 2;
break;
case TOKEN_TYPE_INT:
t.type = I32;
t.size = 4;
break;
case TOKEN_TYPE_NAT:
t.type = U32;
t.size = 4;
break;
case TOKEN_TYPE_REAL:
t.type = F32;
t.size = 4;
break;
case TOKEN_TYPE_STR:
t.type = STR;
break;
case TOKEN_IDENTIFIER:
break;
default:
return false;
}
Token eq = nextToken();
if (eq.type != TOKEN_EQ)
return false;
Token name = nextToken();
if (name.type != TOKEN_IDENTIFIER)
return false;
s->name = names_table_add(nt, name.start);
u32 addr = vm->mp;
s->ref = addr;
u32 result;
Token value = nextToken();
switch (value.type) {
case TOKEN_LITERAL_INT:
case TOKEN_LITERAL_NAT:
case TOKEN_LITERAL_REAL:
result = resolve_symbol(nt, st, value.start);
write_u32(vm, memory, addr, result);
vm->mp += t.size;
vm->frames[vm->fp].end += t.size;
break;
case TOKEN_LITERAL_STR: {
const char* src = value.start;
u32 len = 0;
u32 i = 0;
while (i < value.length) {
char c = src[i++];
if (c == '\\' && i < value.length) {
switch (src[i++]) {
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case '\\': case '"': case '\'': break; // Keep as-is
default: i--; // Rewind for unknown escapes
}
}
write_u8(vm, memory, addr + 4 + len++, c);
}
u32 size = len + 5; // 4 (len) + dst_len + 1 (null)
vm->mp += size;
vm->frames[vm->fp].end += size;
write_u32(vm, memory, addr, len);
write_u8(vm, memory, addr + 4 + len, '\0');
break;
}
default:
return false;
}
s->type = t;
symbol_table_add(st, s);
return true;
}
bool function(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm);
USED(nt);
USED(st);
return true;
}
bool variable(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm);
USED(nt);
USED(st);
return true;
}
bool label(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm);
USED(nt);
USED(st);
return true;
}
void assemble(VM *vm, char *source) {
SymbolTable *st = symbol_table_init();
NamesTable *nt = names_table_init();
initLexer(source);
Token token;
do {
token = nextToken();
if (token.type == TOKEN_ERROR) {
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
break;
}
if (token.type != TOKEN_EOF) {
printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type),
token.length, token.start);
if (token.type == TOKEN_KEYWORD_GLOBAL) {
if (!global(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_KEYWORD_FN) {
if (!function(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 ||
token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT ||
token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 ||
token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL ||
token.type == TOKEN_TYPE_STR) {
if (!variable(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_KEYWORD_LOOP ||
token.type == TOKEN_KEYWORD_ELSE) {
if (!label(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_IDENTIFIER) {
// check to see if it is an opcode first
if (streq(token.start, "exit")) {
} else if (streq(token.start, "call")) {
} else if (streq(token.start, "syscall")) {
} else if (streq(token.start, "load_immediate")) {
} else if (streq(token.start, "load_indirect_8")) {
} else if (streq(token.start, "load_indirect_16")) {
} else if (streq(token.start, "load_indirect_32")) {
} else if (streq(token.start, "load_absolute_8")) {
} else if (streq(token.start, "load_absolute_16")) {
} else if (streq(token.start, "load_absolute_32")) {
} else if (streq(token.start, "load_offset_8")) {
} else if (streq(token.start, "load_offset_16")) {
} else if (streq(token.start, "load_offset_32")) {
} else if (streq(token.start, "store_absolute_8")) {
} else if (streq(token.start, "store_absolute_16")) {
} else if (streq(token.start, "store_absolute_32")) {
} else if (streq(token.start, "store_indirect_8")) {
} else if (streq(token.start, "store_indirect_16")) {
} else if (streq(token.start, "store_indirect_32")) {
} else if (streq(token.start, "store_offset_8")) {
} else if (streq(token.start, "store_offset_16")) {
} else if (streq(token.start, "store_offset_32")) {
} else if (streq(token.start, "malloc")) {
} else if (streq(token.start, "malloc_immediate")) {
} else if (streq(token.start, "memset_8")) {
} else if (streq(token.start, "memset_16")) {
} else if (streq(token.start, "memset_32")) {
} else if (streq(token.start, "register_move")) {
} else if (streq(token.start, "add_int")) {
} else if (streq(token.start, "sub_int")) {
} else if (streq(token.start, "mul_int")) {
} else if (streq(token.start, "div_int")) {
} else if (streq(token.start, "abs_int")) {
} else if (streq(token.start, "neg_int")) {
} else if (streq(token.start, "add_nat")) {
} else if (streq(token.start, "sub_nat")) {
} else if (streq(token.start, "mul_nat")) {
} else if (streq(token.start, "div_nat")) {
} else if (streq(token.start, "abs_nat")) {
} else if (streq(token.start, "neg_nat")) {
} else if (streq(token.start, "add_real")) {
} else if (streq(token.start, "sub_real")) {
} else if (streq(token.start, "mul_real")) {
} else if (streq(token.start, "div_real")) {
} else if (streq(token.start, "abs_real")) {
} else if (streq(token.start, "neg_real")) {
} else if (streq(token.start, "int_to_real")) {
} else if (streq(token.start, "nat_to_real")) {
} else if (streq(token.start, "real_to_int")) {
} else if (streq(token.start, "real_to_nat")) {
} else if (streq(token.start, "bit_shift_left")) {
} else if (streq(token.start, "bit_shift_right")) {
} else if (streq(token.start, "bit_shift_r_ext")) {
} else if (streq(token.start, "bit_and")) {
} else if (streq(token.start, "bit_or")) {
} else if (streq(token.start, "bit_xor")) {
} else if (streq(token.start, "jump")) {
} else if (streq(token.start, "jump_if_flag")) {
} else if (streq(token.start, "jump_eq_int")) {
} else if (streq(token.start, "jump_neq_int")) {
} else if (streq(token.start, "jump_gt_int")) {
} else if (streq(token.start, "jump_lt_int")) {
} else if (streq(token.start, "jump_le_int")) {
} else if (streq(token.start, "jump_ge_int")) {
} else if (streq(token.start, "jump_eq_nat")) {
} else if (streq(token.start, "jump_neq_nat")) {
} else if (streq(token.start, "jump_gt_nat")) {
} else if (streq(token.start, "jump_lt_nat")) {
} else if (streq(token.start, "jump_le_nat")) {
} else if (streq(token.start, "jump_ge_nat")) {
} else if (streq(token.start, "jump_eq_real")) {
} else if (streq(token.start, "jump_neq_real")) {
} else if (streq(token.start, "jump_ge_real")) {
} else if (streq(token.start, "jump_gt_real")) {
} else if (streq(token.start, "jump_lt_real")) {
} else if (streq(token.start, "jump_le_real")) {
} else if (streq(token.start, "string_length")) {
} else if (streq(token.start, "string_eq")) {
} else if (streq(token.start, "string_concat")) {
} else if (streq(token.start, "string_get_char")) {
} else if (streq(token.start, "string_find_char")) {
} else if (streq(token.start, "string_slice")) {
} else if (streq(token.start, "int_to_string")) {
} else if (streq(token.start, "nat_to_string")) {
} else if (streq(token.start, "real_to_string")) {
} else if (streq(token.start, "string_to_int")) {
} else if (streq(token.start, "string_to_nat")) {
} else if (streq(token.start, "string_to_real")) {
} else {
// some other identifier
}
}
}
} while (token.type != TOKEN_EOF);
}