1
0
Fork 0
undar-lang-register/src/tools/assembler/assembler.c

417 lines
12 KiB
C

#include "assembler.h"
#include "../../vm/common.h"
#include "../../vm/fixed.h"
#include "../../vm/libc.h"
#include "../../vm/opcodes.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
SymbolTable *symbol_table_init() {
SymbolTable *table = malloc(sizeof(SymbolTable));
table->symbols = malloc(16 * sizeof(Symbol));
table->count = 0;
table->capacity = 16;
return table;
}
NamesTable *names_table_init() {
NamesTable *table = malloc(sizeof(NamesTable));
table->names = malloc(16 * sizeof(char *));
table->count = 0;
table->capacity = 16;
return table;
}
u32 names_table_add(NamesTable *table, const char *name) {
for (u32 i = 0; i < table->count; i++) {
if (strcmp(table->names[i], name) == 0) {
return i;
}
}
if (table->count >= table->capacity) {
table->capacity *= 2;
table->names = realloc(table->names, table->capacity * sizeof(char *));
}
table->names[table->count] = malloc(strlen(name) + 1);
strcpy(table->names[table->count], name);
u32 index = table->count;
table->count++;
return index;
}
u32 symbol_table_add(SymbolTable *table, Symbol s) {
if (table->count >= table->capacity) {
table->capacity *= 2;
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
}
table->symbols[table->count] = s;
u32 index = table->count;
table->count++;
return index;
}
Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table,
const char *name) {
for (u32 i = 0; i < nt->count; i++) {
if (strcmp(nt->names[i], name) == 0) {
for (int j = 0; j < table->count; j++) {
if (table->symbols[j].name == i) {
return &table->symbols[j];
}
}
}
}
return nil;
}
u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) {
Symbol *sym = symbol_table_lookup(nt, table, name);
if (!sym) {
fprintf(stderr, "Error: Undefined Symbol '%s'\n", name);
exit(1);
}
return sym->ref;
}
void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; }
void emit_u32(VM *vm, u32 value) {
write_u32(vm, code, vm->cp, value);
vm->cp += 4;
}
void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); }
int parse_register(const char *reg_str) {
if (reg_str[0] != '$')
return -1;
return atoi(reg_str + 1);
}
u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) {
// Handle symbol references (e.g., &label)
if (ref[0] == '&') {
return get_ref(nt, table, ref + 1);
}
// fixed-point numbers (e.g., 0.5)
if (strchr(ref, '.')) {
return float_to_fixed(atof(ref));
}
// decimal literals (e.g., 7)
char *endptr;
u32 value = (u32)strtoul(ref, &endptr, 10);
if (endptr == ref || *endptr != '\0') {
fprintf(stderr, "Invalid decimal literal: %s\n", ref);
exit(1);
}
return value;
}
static char *unwrap_string(const char *quoted_str) {
if (!quoted_str)
return nil;
size_t len = strlen(quoted_str);
if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') {
// Remove quotes and process escape sequences
const char *src = quoted_str + 1;
size_t src_len = len - 2;
// First pass: calculate the actual length needed after escape processing
size_t actual_len = 0;
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Escape sequence
actual_len++;
i++; // Skip the next character
} else {
actual_len++;
}
}
char *unwrapped = (char *)malloc(actual_len + 1);
size_t dst_idx = 0;
// Second pass: process escape sequences
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Handle escape sequences
switch (src[i + 1]) {
case 'n':
unwrapped[dst_idx++] = '\n';
break;
case 't':
unwrapped[dst_idx++] = '\t';
break;
case 'r':
unwrapped[dst_idx++] = '\r';
break;
case '\\':
unwrapped[dst_idx++] = '\\';
break;
case '"':
unwrapped[dst_idx++] = '"';
break;
case '\'':
unwrapped[dst_idx++] = '\'';
break;
default:
// Unknown escape, keep both characters
unwrapped[dst_idx++] = src[i];
unwrapped[dst_idx++] = src[i + 1];
break;
}
i++; // Skip the next character
} else {
unwrapped[dst_idx++] = src[i];
}
}
unwrapped[dst_idx] = '\0';
return unwrapped;
}
// Not quoted, return copy
return strdup(quoted_str);
}
Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) {
Symbol *s = (Symbol *)malloc(sizeof(Symbol));
ValueType t;
Token token_type = nextToken();
switch (token_type.type) {
case TOKEN_TYPE_I8:
t.type = I8;
t.size = 1;
break;
case TOKEN_TYPE_I16:
t.type = I16;
t.size = 2;
break;
case TOKEN_TYPE_U8:
t.type = U8;
t.size = 1;
break;
case TOKEN_TYPE_U16:
t.type = U16;
t.size = 2;
break;
case TOKEN_TYPE_INT:
t.type = I32;
t.size = 4;
break;
case TOKEN_TYPE_NAT:
t.type = U32;
t.size = 4;
break;
case TOKEN_TYPE_REAL:
t.type = F32;
t.size = 4;
break;
case TOKEN_TYPE_STR:
t.type = STR;
break;
case TOKEN_IDENTIFIER:
break;
default:
return nil;
}
Token eq = nextToken();
if (eq.type != TOKEN_EQ)
return nil;
Token name = nextToken();
if (name.type != TOKEN_IDENTIFIER)
return nil;
s->name = names_table_add(nt, name.start);
u32 addr = vm->mp;
s->ref = addr;
u32 result;
Token value = nextToken();
switch (value.type) {
case TOKEN_LITERAL_INT:
case TOKEN_LITERAL_NAT:
case TOKEN_LITERAL_REAL:
result = resolve_symbol(nt, st, value.start);
write_u32(vm, memory, addr, result);
vm->mp += t.size;
vm->frames[vm->fp].end += t.size;
break;
case TOKEN_LITERAL_STR: {
char *unwrapped = unwrap_string(value.start);
int len = strlen(unwrapped);
u32 addr = vm->mp;
u32 size = len + 1 + 4;
t.size = size;
vm->mp += size;
vm->frames[vm->fp].end += size;
write_u32(vm, memory, addr, len);
for (int i = 0; i < len; i++) {
write_u8(vm, memory, addr + 4 + i, unwrapped[i]);
}
write_u8(vm, memory, addr + 4 + len, '\0');
free(unwrapped);
break;
}
default:
return nil;
}
s->type = t;
return s;
}
Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm);
USED(nt);
USED(st);
return nil;
}
void assemble(VM *vm, char *source) {
SymbolTable *st = symbol_table_init();
NamesTable *nt = names_table_init();
initLexer(source);
Token token;
do {
token = nextToken();
if (token.type == TOKEN_ERROR) {
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
break;
}
if (token.type != TOKEN_EOF) {
printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type),
token.length, token.start);
if (token.type == TOKEN_KEYWORD_GLOBAL) {
if (!global(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_KEYWORD_FN) {
function(vm, nt, st);
}
if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 ||
token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT ||
token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 ||
token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL ||
token.type == TOKEN_TYPE_STR) {
}
if (token.type == TOKEN_IDENTIFIER) {
// check to see if it is an opcode first
if (streq(token.start, "exit")) {
} else if (streq(token.start, "call")) {
} else if (streq(token.start, "syscall")) {
} else if (streq(token.start, "load_immediate")) {
} else if (streq(token.start, "load_indirect_8")) {
} else if (streq(token.start, "load_indirect_16")) {
} else if (streq(token.start, "load_indirect_32")) {
} else if (streq(token.start, "load_absolute_8")) {
} else if (streq(token.start, "load_absolute_16")) {
} else if (streq(token.start, "load_absolute_32")) {
} else if (streq(token.start, "load_offset_8")) {
} else if (streq(token.start, "load_offset_16")) {
} else if (streq(token.start, "load_offset_32")) {
} else if (streq(token.start, "store_absolute_8")) {
} else if (streq(token.start, "store_absolute_16")) {
} else if (streq(token.start, "store_absolute_32")) {
} else if (streq(token.start, "store_indirect_8")) {
} else if (streq(token.start, "store_indirect_16")) {
} else if (streq(token.start, "store_indirect_32")) {
} else if (streq(token.start, "store_offset_8")) {
} else if (streq(token.start, "store_offset_16")) {
} else if (streq(token.start, "store_offset_32")) {
} else if (streq(token.start, "malloc")) {
} else if (streq(token.start, "malloc_immediate")) {
} else if (streq(token.start, "memset_8")) {
} else if (streq(token.start, "memset_16")) {
} else if (streq(token.start, "memset_32")) {
} else if (streq(token.start, "register_move")) {
} else if (streq(token.start, "add_int")) {
} else if (streq(token.start, "sub_int")) {
} else if (streq(token.start, "mul_int")) {
} else if (streq(token.start, "div_int")) {
} else if (streq(token.start, "abs_int")) {
} else if (streq(token.start, "neg_int")) {
} else if (streq(token.start, "add_nat")) {
} else if (streq(token.start, "sub_nat")) {
} else if (streq(token.start, "mul_nat")) {
} else if (streq(token.start, "div_nat")) {
} else if (streq(token.start, "abs_nat")) {
} else if (streq(token.start, "neg_nat")) {
} else if (streq(token.start, "add_real")) {
} else if (streq(token.start, "sub_real")) {
} else if (streq(token.start, "mul_real")) {
} else if (streq(token.start, "div_real")) {
} else if (streq(token.start, "abs_real")) {
} else if (streq(token.start, "neg_real")) {
} else if (streq(token.start, "int_to_real")) {
} else if (streq(token.start, "nat_to_real")) {
} else if (streq(token.start, "real_to_int")) {
} else if (streq(token.start, "real_to_nat")) {
} else if (streq(token.start, "bit_shift_left")) {
} else if (streq(token.start, "bit_shift_right")) {
} else if (streq(token.start, "bit_shift_r_ext")) {
} else if (streq(token.start, "bit_and")) {
} else if (streq(token.start, "bit_or")) {
} else if (streq(token.start, "bit_xor")) {
} else if (streq(token.start, "jump")) {
} else if (streq(token.start, "jump_if_flag")) {
} else if (streq(token.start, "jump_eq_int")) {
} else if (streq(token.start, "jump_neq_int")) {
} else if (streq(token.start, "jump_gt_int")) {
} else if (streq(token.start, "jump_lt_int")) {
} else if (streq(token.start, "jump_le_int")) {
} else if (streq(token.start, "jump_ge_int")) {
} else if (streq(token.start, "jump_eq_nat")) {
} else if (streq(token.start, "jump_neq_nat")) {
} else if (streq(token.start, "jump_gt_nat")) {
} else if (streq(token.start, "jump_lt_nat")) {
} else if (streq(token.start, "jump_le_nat")) {
} else if (streq(token.start, "jump_ge_nat")) {
} else if (streq(token.start, "jump_eq_real")) {
} else if (streq(token.start, "jump_neq_real")) {
} else if (streq(token.start, "jump_ge_real")) {
} else if (streq(token.start, "jump_gt_real")) {
} else if (streq(token.start, "jump_lt_real")) {
} else if (streq(token.start, "jump_le_real")) {
} else if (streq(token.start, "string_length")) {
} else if (streq(token.start, "string_eq")) {
} else if (streq(token.start, "string_concat")) {
} else if (streq(token.start, "string_get_char")) {
} else if (streq(token.start, "string_find_char")) {
} else if (streq(token.start, "string_slice")) {
} else if (streq(token.start, "int_to_string")) {
} else if (streq(token.start, "nat_to_string")) {
} else if (streq(token.start, "real_to_string")) {
} else if (streq(token.start, "string_to_int")) {
} else if (streq(token.start, "string_to_nat")) {
} else if (streq(token.start, "string_to_real")) {
} else {
// some other identifier
}
}
}
} while (token.type != TOKEN_EOF);
}