388 lines
12 KiB
C
388 lines
12 KiB
C
#include "assembler.h"
|
|
#include "../../vm/common.h"
|
|
#include "../../vm/fixed.h"
|
|
#include "../../vm/libc.h"
|
|
#include "../../vm/opcodes.h"
|
|
#include "lexer.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
SymbolTable *symbol_table_init() {
|
|
SymbolTable *table = malloc(sizeof(SymbolTable));
|
|
table->symbols = malloc(16 * sizeof(Symbol));
|
|
table->count = 0;
|
|
table->capacity = 16;
|
|
return table;
|
|
}
|
|
|
|
NamesTable *names_table_init() {
|
|
NamesTable *table = malloc(sizeof(NamesTable));
|
|
table->names = malloc(16 * sizeof(char *));
|
|
table->count = 0;
|
|
table->capacity = 16;
|
|
return table;
|
|
}
|
|
|
|
u32 names_table_add(NamesTable *table, const char *name) {
|
|
for (u32 i = 0; i < table->count; i++) {
|
|
if (strcmp(table->names[i], name) == 0) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
if (table->count >= table->capacity) {
|
|
table->capacity *= 2;
|
|
table->names = realloc(table->names, table->capacity * sizeof(char *));
|
|
}
|
|
|
|
table->names[table->count] = malloc(strlen(name) + 1);
|
|
strcpy(table->names[table->count], name);
|
|
u32 index = table->count;
|
|
table->count++;
|
|
return index;
|
|
}
|
|
|
|
u32 symbol_table_add(SymbolTable *table, Symbol *s) {
|
|
if (table->count >= table->capacity) {
|
|
table->capacity *= 2;
|
|
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
|
|
}
|
|
|
|
table->symbols[table->count] = *s;
|
|
u32 index = table->count;
|
|
table->count++;
|
|
return index;
|
|
}
|
|
|
|
Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table,
|
|
const char *name) {
|
|
for (u32 i = 0; i < nt->count; i++) {
|
|
if (strcmp(nt->names[i], name) == 0) {
|
|
for (u32 j = 0; j < table->count; j++) {
|
|
if (table->symbols[j].name == i) {
|
|
return &table->symbols[j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil;
|
|
}
|
|
|
|
u32 get_ref(NamesTable *nt, SymbolTable *table, const char *name) {
|
|
Symbol *sym = symbol_table_lookup(nt, table, name);
|
|
if (!sym) {
|
|
fprintf(stderr, "Error: Undefined Symbol '%s'\n", name);
|
|
exit(1);
|
|
}
|
|
return sym->ref;
|
|
}
|
|
|
|
void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; }
|
|
|
|
void emit_u32(VM *vm, u32 value) {
|
|
write_u32(vm, code, vm->cp, value);
|
|
vm->cp += 4;
|
|
}
|
|
|
|
void emit_opcode(VM *vm, Opcode op) { emit_byte(vm, op); }
|
|
|
|
int parse_register(const char *reg_str) {
|
|
if (reg_str[0] != '$')
|
|
return -1;
|
|
return atoi(reg_str + 1);
|
|
}
|
|
|
|
u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) {
|
|
// symbol references (e.g., &label)
|
|
if (ref[0] == '&') {
|
|
return get_ref(nt, table, ref + 1);
|
|
}
|
|
|
|
// fixed-point numbers
|
|
if (strchr(ref, '.')) {
|
|
return float_to_fixed(atof(ref));
|
|
}
|
|
|
|
// decimal literals
|
|
char *endptr;
|
|
u32 value = (u32)strtoul(ref, &endptr, 10);
|
|
|
|
if (endptr == ref || *endptr != '\0') {
|
|
fprintf(stderr, "Invalid decimal literal: %s\n", ref);
|
|
exit(1);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
bool global(VM *vm, NamesTable *nt, SymbolTable *st) {
|
|
Symbol *s = (Symbol *)malloc(sizeof(Symbol));
|
|
ValueType t;
|
|
|
|
Token token_type = nextToken();
|
|
switch (token_type.type) {
|
|
case TOKEN_TYPE_I8:
|
|
t.type = I8;
|
|
t.size = 1;
|
|
break;
|
|
case TOKEN_TYPE_U8:
|
|
t.type = U8;
|
|
t.size = 1;
|
|
break;
|
|
case TOKEN_TYPE_I16:
|
|
t.type = I16;
|
|
t.size = 2;
|
|
break;
|
|
case TOKEN_TYPE_U16:
|
|
t.type = U16;
|
|
t.size = 2;
|
|
break;
|
|
case TOKEN_TYPE_INT:
|
|
t.type = I32;
|
|
t.size = 4;
|
|
break;
|
|
case TOKEN_TYPE_NAT:
|
|
t.type = U32;
|
|
t.size = 4;
|
|
break;
|
|
case TOKEN_TYPE_REAL:
|
|
t.type = F32;
|
|
t.size = 4;
|
|
break;
|
|
case TOKEN_TYPE_STR:
|
|
t.type = STR;
|
|
break;
|
|
case TOKEN_IDENTIFIER:
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
Token eq = nextToken();
|
|
if (eq.type != TOKEN_EQ)
|
|
return false;
|
|
|
|
Token name = nextToken();
|
|
if (name.type != TOKEN_IDENTIFIER)
|
|
return false;
|
|
|
|
s->name = names_table_add(nt, name.start);
|
|
|
|
u32 addr = vm->mp;
|
|
s->ref = addr;
|
|
|
|
u32 result;
|
|
Token value = nextToken();
|
|
switch (value.type) {
|
|
case TOKEN_LITERAL_INT:
|
|
case TOKEN_LITERAL_NAT:
|
|
case TOKEN_LITERAL_REAL:
|
|
result = resolve_symbol(nt, st, value.start);
|
|
write_u32(vm, memory, addr, result);
|
|
|
|
vm->mp += t.size;
|
|
vm->frames[vm->fp].end += t.size;
|
|
break;
|
|
case TOKEN_LITERAL_STR: {
|
|
const char* src = value.start;
|
|
u32 len = 0;
|
|
u32 i = 0;
|
|
|
|
while (i < value.length) {
|
|
char c = src[i++];
|
|
if (c == '\\' && i < value.length) {
|
|
switch (src[i++]) {
|
|
case 'n': c = '\n'; break;
|
|
case 't': c = '\t'; break;
|
|
case 'r': c = '\r'; break;
|
|
case '\\': case '"': case '\'': break; // Keep as-is
|
|
default: i--; // Rewind for unknown escapes
|
|
}
|
|
}
|
|
write_u8(vm, memory, addr + 4 + len++, c);
|
|
}
|
|
|
|
u32 size = len + 5; // 4 (len) + dst_len + 1 (null)
|
|
vm->mp += size;
|
|
vm->frames[vm->fp].end += size;
|
|
write_u32(vm, memory, addr, len);
|
|
write_u8(vm, memory, addr + 4 + len, '\0');
|
|
break;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
s->type = t;
|
|
symbol_table_add(st, s);
|
|
return true;
|
|
}
|
|
|
|
bool function(VM *vm, NamesTable *nt, SymbolTable *st) {
|
|
USED(vm);
|
|
USED(nt);
|
|
USED(st);
|
|
return true;
|
|
}
|
|
|
|
bool variable(VM *vm, NamesTable *nt, SymbolTable *st) {
|
|
USED(vm);
|
|
USED(nt);
|
|
USED(st);
|
|
return true;
|
|
}
|
|
|
|
bool label(VM *vm, NamesTable *nt, SymbolTable *st) {
|
|
USED(vm);
|
|
USED(nt);
|
|
USED(st);
|
|
return true;
|
|
}
|
|
|
|
void assemble(VM *vm, char *source) {
|
|
SymbolTable *st = symbol_table_init();
|
|
NamesTable *nt = names_table_init();
|
|
|
|
initLexer(source);
|
|
Token token;
|
|
do {
|
|
token = nextToken();
|
|
if (token.type == TOKEN_ERROR) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
|
|
break;
|
|
}
|
|
if (token.type != TOKEN_EOF) {
|
|
printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type),
|
|
token.length, token.start);
|
|
|
|
if (token.type == TOKEN_KEYWORD_GLOBAL) {
|
|
if (!global(vm, nt, st)) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length,
|
|
token.start);
|
|
}
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_FN) {
|
|
if (!function(vm, nt, st)) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length,
|
|
token.start);
|
|
}
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 ||
|
|
token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT ||
|
|
token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 ||
|
|
token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL ||
|
|
token.type == TOKEN_TYPE_STR) {
|
|
if (!variable(vm, nt, st)) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length,
|
|
token.start);
|
|
}
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_LOOP ||
|
|
token.type == TOKEN_KEYWORD_ELSE) {
|
|
if (!label(vm, nt, st)) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length,
|
|
token.start);
|
|
}
|
|
}
|
|
|
|
if (token.type == TOKEN_IDENTIFIER) {
|
|
// check to see if it is an opcode first
|
|
if (streq(token.start, "exit")) {
|
|
} else if (streq(token.start, "call")) {
|
|
} else if (streq(token.start, "syscall")) {
|
|
} else if (streq(token.start, "load_immediate")) {
|
|
} else if (streq(token.start, "load_indirect_8")) {
|
|
} else if (streq(token.start, "load_indirect_16")) {
|
|
} else if (streq(token.start, "load_indirect_32")) {
|
|
} else if (streq(token.start, "load_absolute_8")) {
|
|
} else if (streq(token.start, "load_absolute_16")) {
|
|
} else if (streq(token.start, "load_absolute_32")) {
|
|
} else if (streq(token.start, "load_offset_8")) {
|
|
} else if (streq(token.start, "load_offset_16")) {
|
|
} else if (streq(token.start, "load_offset_32")) {
|
|
} else if (streq(token.start, "store_absolute_8")) {
|
|
} else if (streq(token.start, "store_absolute_16")) {
|
|
} else if (streq(token.start, "store_absolute_32")) {
|
|
} else if (streq(token.start, "store_indirect_8")) {
|
|
} else if (streq(token.start, "store_indirect_16")) {
|
|
} else if (streq(token.start, "store_indirect_32")) {
|
|
} else if (streq(token.start, "store_offset_8")) {
|
|
} else if (streq(token.start, "store_offset_16")) {
|
|
} else if (streq(token.start, "store_offset_32")) {
|
|
} else if (streq(token.start, "malloc")) {
|
|
} else if (streq(token.start, "malloc_immediate")) {
|
|
} else if (streq(token.start, "memset_8")) {
|
|
} else if (streq(token.start, "memset_16")) {
|
|
} else if (streq(token.start, "memset_32")) {
|
|
} else if (streq(token.start, "register_move")) {
|
|
} else if (streq(token.start, "add_int")) {
|
|
} else if (streq(token.start, "sub_int")) {
|
|
} else if (streq(token.start, "mul_int")) {
|
|
} else if (streq(token.start, "div_int")) {
|
|
} else if (streq(token.start, "abs_int")) {
|
|
} else if (streq(token.start, "neg_int")) {
|
|
} else if (streq(token.start, "add_nat")) {
|
|
} else if (streq(token.start, "sub_nat")) {
|
|
} else if (streq(token.start, "mul_nat")) {
|
|
} else if (streq(token.start, "div_nat")) {
|
|
} else if (streq(token.start, "abs_nat")) {
|
|
} else if (streq(token.start, "neg_nat")) {
|
|
} else if (streq(token.start, "add_real")) {
|
|
} else if (streq(token.start, "sub_real")) {
|
|
} else if (streq(token.start, "mul_real")) {
|
|
} else if (streq(token.start, "div_real")) {
|
|
} else if (streq(token.start, "abs_real")) {
|
|
} else if (streq(token.start, "neg_real")) {
|
|
} else if (streq(token.start, "int_to_real")) {
|
|
} else if (streq(token.start, "nat_to_real")) {
|
|
} else if (streq(token.start, "real_to_int")) {
|
|
} else if (streq(token.start, "real_to_nat")) {
|
|
} else if (streq(token.start, "bit_shift_left")) {
|
|
} else if (streq(token.start, "bit_shift_right")) {
|
|
} else if (streq(token.start, "bit_shift_r_ext")) {
|
|
} else if (streq(token.start, "bit_and")) {
|
|
} else if (streq(token.start, "bit_or")) {
|
|
} else if (streq(token.start, "bit_xor")) {
|
|
} else if (streq(token.start, "jump")) {
|
|
} else if (streq(token.start, "jump_if_flag")) {
|
|
} else if (streq(token.start, "jump_eq_int")) {
|
|
} else if (streq(token.start, "jump_neq_int")) {
|
|
} else if (streq(token.start, "jump_gt_int")) {
|
|
} else if (streq(token.start, "jump_lt_int")) {
|
|
} else if (streq(token.start, "jump_le_int")) {
|
|
} else if (streq(token.start, "jump_ge_int")) {
|
|
} else if (streq(token.start, "jump_eq_nat")) {
|
|
} else if (streq(token.start, "jump_neq_nat")) {
|
|
} else if (streq(token.start, "jump_gt_nat")) {
|
|
} else if (streq(token.start, "jump_lt_nat")) {
|
|
} else if (streq(token.start, "jump_le_nat")) {
|
|
} else if (streq(token.start, "jump_ge_nat")) {
|
|
} else if (streq(token.start, "jump_eq_real")) {
|
|
} else if (streq(token.start, "jump_neq_real")) {
|
|
} else if (streq(token.start, "jump_ge_real")) {
|
|
} else if (streq(token.start, "jump_gt_real")) {
|
|
} else if (streq(token.start, "jump_lt_real")) {
|
|
} else if (streq(token.start, "jump_le_real")) {
|
|
} else if (streq(token.start, "string_length")) {
|
|
} else if (streq(token.start, "string_eq")) {
|
|
} else if (streq(token.start, "string_concat")) {
|
|
} else if (streq(token.start, "string_get_char")) {
|
|
} else if (streq(token.start, "string_find_char")) {
|
|
} else if (streq(token.start, "string_slice")) {
|
|
} else if (streq(token.start, "int_to_string")) {
|
|
} else if (streq(token.start, "nat_to_string")) {
|
|
} else if (streq(token.start, "real_to_string")) {
|
|
} else if (streq(token.start, "string_to_int")) {
|
|
} else if (streq(token.start, "string_to_nat")) {
|
|
} else if (streq(token.start, "string_to_real")) {
|
|
} else {
|
|
// some other identifier
|
|
}
|
|
}
|
|
}
|
|
} while (token.type != TOKEN_EOF);
|
|
}
|