586 lines
15 KiB
C
586 lines
15 KiB
C
#include "../../vm/common.h"
|
|
#include "../../vm/fixed.h"
|
|
#include "../../vm/libc.h"
|
|
#include "../../vm/opcodes.h"
|
|
|
|
#include "parser.h"
|
|
#include "compiler.h"
|
|
|
|
/* FIXME: remove these and replace with libc.h instead */
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
|
|
void emit_op(VM *vm, u8 byte) {
|
|
vm->code[vm->cp] = byte;
|
|
}
|
|
|
|
void emit_byte(VM *vm, u8 byte) {
|
|
vm->code[vm->cp] = byte;
|
|
}
|
|
|
|
void emit_u32(VM *vm, u32 value) {
|
|
write_u32(vm, code, vm->cp, value);
|
|
}
|
|
|
|
Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length,
|
|
i32 scope_ref) {
|
|
SymbolTable st = table->scopes[scope_ref];
|
|
for (u32 i = 0; i < st.count; i++) {
|
|
if (st.symbols[i].name_length == length) {
|
|
if (strleq(st.symbols[i].name, name, length)) {
|
|
return &table->scopes[scope_ref].symbols[i];
|
|
}
|
|
}
|
|
}
|
|
if (st.parent < 0)
|
|
return nil;
|
|
return symbol_table_lookup(table, name, length, st.parent);
|
|
}
|
|
|
|
u8 symbol_table_add(ScopeTable *table, Symbol s) {
|
|
Symbol *sym =
|
|
symbol_table_lookup(table, s.name, s.name_length, table->scope_ref);
|
|
if (sym != nil) {
|
|
fprintf(stderr,
|
|
"Error: Symbol '%.*s' already defined, in this scope"
|
|
" please pick a different variable name or create a new scope.\n",
|
|
s.name_length, s.name);
|
|
exit(1);
|
|
}
|
|
|
|
u8 current_index = table->scopes[table->scope_ref].count;
|
|
if (current_index + 1 > 255) {
|
|
fprintf(stderr, "Error: Only 255 symbols are allowed per scope"
|
|
" first off: impressive; secondly:"
|
|
" just create a new scope and keep going.\n");
|
|
exit(1);
|
|
}
|
|
|
|
if (!table_realloc(table)) {
|
|
fprintf(stderr,
|
|
"Error: Symbol table is out of memory! This is likely because you "
|
|
" built the assembler in static mode, increase the static size."
|
|
" if you built using malloc, that means your computer is out of"
|
|
" memory. Close a few tabs in your web browser and try again."
|
|
" Count was %d, while capacity was %d\n",
|
|
table->count, table->capacity);
|
|
exit(1);
|
|
}
|
|
|
|
/* set ref to current count for local */
|
|
s.ref = current_index;
|
|
|
|
#ifdef DEBUG_COMPILER
|
|
if (s.scope == VAR) {
|
|
printf("$%d = %s\n", s.ref, s.name);
|
|
} else if (s.scope == GLOBAL) {
|
|
printf("memory[%d] = %s\n", s.ref, s.name);
|
|
} else {
|
|
printf("code[%d] = %s\n", s.ref, s.name);
|
|
}
|
|
#endif
|
|
|
|
table->scopes[table->scope_ref].symbols[current_index] = s;
|
|
table->scopes[table->scope_ref].count++;
|
|
return current_index;
|
|
}
|
|
|
|
u32 get_ref(ScopeTable *st, const char *name, u32 length) {
|
|
Symbol *sym = symbol_table_lookup(st, name, length, st->scope_ref);
|
|
if (!sym) {
|
|
fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n",
|
|
length, name);
|
|
exit(1);
|
|
return 0;
|
|
}
|
|
return sym->ref;
|
|
}
|
|
|
|
Token next_token_is(TokenType type) {
|
|
Token token = next_token();
|
|
if (token.type != type) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
|
|
exit(1);
|
|
}
|
|
return token;
|
|
}
|
|
|
|
/**
|
|
* Var .
|
|
*/
|
|
void define_var(ScopeTable *st, Token regType) {
|
|
Symbol s;
|
|
s.scope = (st->depth) ? VAR : GLOBAL;
|
|
switch (regType.type) {
|
|
case TOKEN_KEYWORD_PLEX: {
|
|
s.type = PLEX;
|
|
s.size = 4; /* not really this type, pointer alias which is 4 */
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_I8: {
|
|
s.type = I8;
|
|
s.size = 1;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_I16: {
|
|
s.type = I16;
|
|
s.size = 2;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_INT: {
|
|
s.type = I32;
|
|
s.size = 4;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_U8: {
|
|
s.type = U8;
|
|
s.size = 1;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_U16: {
|
|
s.type = U16;
|
|
s.size = 2;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_NAT: {
|
|
s.type = U32;
|
|
s.size = 4;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_REAL: {
|
|
s.type = F32;
|
|
s.size = 4;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_BOOL: {
|
|
s.type = BOOL;
|
|
s.size = 1;
|
|
break;
|
|
}
|
|
case TOKEN_TYPE_STR: {
|
|
s.type = STR;
|
|
s.size = 4; /* not really this type, pointer alias which is 4 */
|
|
break;
|
|
}
|
|
default:
|
|
printf("ERROR at line %d: %.*s\n", regType.line, regType.length,
|
|
regType.start);
|
|
exit(1);
|
|
}
|
|
|
|
Token name = next_token_is(TOKEN_IDENTIFIER);
|
|
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
|
|
printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line,
|
|
regType.length, regType.start);
|
|
exit(1);
|
|
}
|
|
|
|
memcpy(s.name, name.start, name.length);
|
|
s.name[name.length] = '\0';
|
|
s.name_length = name.length;
|
|
|
|
symbol_table_add(st, s);
|
|
}
|
|
|
|
|
|
/**
|
|
* Plex .
|
|
*/
|
|
void define_plex(VM *vm, ScopeTable *st) {
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* Function .
|
|
*/
|
|
void define_function(VM *vm, ScopeTable *st) {
|
|
Symbol s;
|
|
s.scope = LOCAL;
|
|
s.type = FUNCTION;
|
|
|
|
Token name = next_token_is(TOKEN_IDENTIFIER);
|
|
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
|
|
printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length,
|
|
name.start);
|
|
exit(1);
|
|
}
|
|
memcpy(s.name, name.start, name.length);
|
|
s.name[name.length] = '\0';
|
|
s.name_length = name.length;
|
|
|
|
next_token_is(TOKEN_LPAREN);
|
|
|
|
i32 temp = st->scope_ref;
|
|
|
|
st->count++;
|
|
st->scopes[st->count].parent = st->scope_ref;
|
|
st->scope_ref = (i32)st->count;
|
|
|
|
Token next = next_token();
|
|
while (next.type != TOKEN_RPAREN) {
|
|
define_var(st, next);
|
|
next = next_token();
|
|
if (next.type == TOKEN_COMMA) {
|
|
next = next_token();
|
|
continue;
|
|
} else if (next.type == TOKEN_RPAREN) {
|
|
break;
|
|
} else {
|
|
printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start);
|
|
exit(1);
|
|
}
|
|
}
|
|
s.ref = vm->cp;
|
|
|
|
st->scope_ref = temp; // need to add to the parents scope
|
|
symbol_table_add(st, s);
|
|
st->scope_ref = (i32)st->count;
|
|
}
|
|
|
|
/**
|
|
* Branch.
|
|
*/
|
|
void define_branch(VM *vm, ScopeTable *st) {
|
|
Symbol s;
|
|
s.scope = LOCAL;
|
|
s.type = VOID;
|
|
|
|
Token name = next_token_is(TOKEN_IDENTIFIER);
|
|
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
|
|
printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length,
|
|
name.start);
|
|
exit(1);
|
|
}
|
|
memcpy(s.name, name.start, name.length);
|
|
s.name_length = name.length;
|
|
s.name[name.length] = '\0';
|
|
|
|
s.ref = vm->cp;
|
|
symbol_table_add(st, s);
|
|
}
|
|
|
|
int get_instruction_byte_size(const char *opname) {
|
|
|
|
if (strcmp(opname, "return") == 0) {
|
|
return 2;
|
|
}
|
|
|
|
if (strcmp(opname, "neg_int") == 0 ||
|
|
strcmp(opname, "abs_int") == 0 ||
|
|
strcmp(opname, "neg_nat") == 0 ||
|
|
strcmp(opname, "abs_nat") == 0 ||
|
|
strcmp(opname, "neg_real") == 0 ||
|
|
strcmp(opname, "abs_real") == 0 ||
|
|
strcmp(opname, "int_to_string") == 0 ||
|
|
strcmp(opname, "load_indirect_8") == 0 ||
|
|
strcmp(opname, "nat_to_string") == 0 ||
|
|
strcmp(opname, "load_indirect_16") == 0 ||
|
|
strcmp(opname, "real_to_string") == 0 ||
|
|
strcmp(opname, "load_indirect_32") == 0 ||
|
|
strcmp(opname, "int_to_real") == 0 ||
|
|
strcmp(opname, "store_indirect_8") == 0 ||
|
|
strcmp(opname, "nat_to_real") == 0 ||
|
|
strcmp(opname, "store_indirect_16") == 0 ||
|
|
strcmp(opname, "real_to_int") == 0 ||
|
|
strcmp(opname, "store_indirect_32") == 0 ||
|
|
strcmp(opname, "real_to_nat") == 0 ||
|
|
strcmp(opname, "nat_to_int") == 0 ||
|
|
strcmp(opname, "int_to_nat") == 0 ||
|
|
strcmp(opname, "string_length") == 0 ||
|
|
strcmp(opname, "memset") == 0 ||
|
|
strcmp(opname, "memset") == 0 ||
|
|
strcmp(opname, "memset_8") == 0 ||
|
|
strcmp(opname, "memset_16") == 0 ||
|
|
strcmp(opname, "register_move") == 0 ||
|
|
strcmp(opname, "malloc") == 0) {
|
|
return 3;
|
|
}
|
|
|
|
if (strcmp(opname, "add_int") == 0 ||
|
|
strcmp(opname, "sub_int") == 0 ||
|
|
strcmp(opname, "mul_int") == 0 ||
|
|
strcmp(opname, "div_int") == 0 ||
|
|
strcmp(opname, "add_nat") == 0 ||
|
|
strcmp(opname, "sub_nat") == 0 ||
|
|
strcmp(opname, "mul_nat") == 0 ||
|
|
strcmp(opname, "div_nat") == 0 ||
|
|
strcmp(opname, "add_real") == 0 ||
|
|
strcmp(opname, "sub_real") == 0 ||
|
|
strcmp(opname, "mul_real") == 0 ||
|
|
strcmp(opname, "div_real") == 0 ||
|
|
strcmp(opname, "bit_shift_left") == 0 ||
|
|
strcmp(opname, "bit_shift_right") == 0 ||
|
|
strcmp(opname, "bit_shift_r_ext") == 0 ||
|
|
strcmp(opname, "bit_and") == 0 ||
|
|
strcmp(opname, "bit_or") == 0 ||
|
|
strcmp(opname, "bit_xor") == 0) {
|
|
return 4;
|
|
}
|
|
|
|
if (strcmp(opname, "halt") == 0 ||
|
|
strcmp(opname, "jump_if_flag") == 0 ||
|
|
strcmp(opname, "jump") == 0) {
|
|
return 5;
|
|
}
|
|
|
|
if (strcmp(opname, "load_absolute_32") == 0 ||
|
|
strcmp(opname, "load_immediate") == 0 ||
|
|
strcmp(opname, "load_address") == 0 ||
|
|
strcmp(opname, "load_absolute_16") == 0 ||
|
|
strcmp(opname, "load_absolute_8") == 0 ||
|
|
strcmp(opname, "store_absolute_32") == 0 ||
|
|
strcmp(opname, "store_absolute_8") == 0 ||
|
|
strcmp(opname, "store_absolute_16") == 0) {
|
|
return 6;
|
|
}
|
|
|
|
if (strcmp(opname, "jump_eq_int") == 0 ||
|
|
strcmp(opname, "jump_neq_int") == 0 ||
|
|
strcmp(opname, "jump_gt_int") == 0 ||
|
|
strcmp(opname, "jump_lt_int") == 0 ||
|
|
strcmp(opname, "jump_le_int") == 0 ||
|
|
strcmp(opname, "jump_ge_int") == 0 ||
|
|
strcmp(opname, "jump_eq_nat") == 0 ||
|
|
strcmp(opname, "jump_neq_nat") == 0 ||
|
|
strcmp(opname, "jump_gt_nat") == 0 ||
|
|
strcmp(opname, "jump_lt_nat") == 0 ||
|
|
strcmp(opname, "jump_le_nat") == 0 ||
|
|
strcmp(opname, "jump_ge_nat") == 0 ||
|
|
strcmp(opname, "jump_eq_real") == 0 ||
|
|
strcmp(opname, "jump_neq_real") == 0 ||
|
|
strcmp(opname, "jump_gt_real") == 0 ||
|
|
strcmp(opname, "jump_lt_real") == 0 ||
|
|
strcmp(opname, "jump_le_real") == 0 ||
|
|
strcmp(opname, "jump_ge_real") == 0 ||
|
|
strcmp(opname, "store_offset_8") == 0 ||
|
|
strcmp(opname, "store_offset_16") == 0 ||
|
|
strcmp(opname, "store_offset_32") == 0 ||
|
|
strcmp(opname, "load_offset_8") == 0 ||
|
|
strcmp(opname, "load_offset_16") == 0 ||
|
|
strcmp(opname, "load_offset_32") == 0) {
|
|
return 7;
|
|
}
|
|
|
|
fprintf(stderr, "Unknown opcode for sizing: %s\n", opname);
|
|
exit(-1);
|
|
}
|
|
|
|
/**
|
|
* Build the symbol table and calculate the types/size/offsets of all values.
|
|
*/
|
|
void build_symbol_table(VM *vm, char *source, ScopeTable *st) {
|
|
Token token;
|
|
init_parser(source);
|
|
do {
|
|
token = next_token();
|
|
if (token.type == TOKEN_ERROR) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
|
|
exit(1);
|
|
}
|
|
|
|
if (token.type != TOKEN_EOF) {
|
|
|
|
if (token.type == TOKEN_LBRACE) {
|
|
st->count++;
|
|
st->scopes[st->count].parent = st->scope_ref;
|
|
st->scope_ref = (i32)st->count;
|
|
st->depth++;
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_RBRACE) {
|
|
i32 current_scope = st->scope_ref;
|
|
i32 parent = st->scopes[current_scope].parent;
|
|
if (parent < 0) parent = 0;
|
|
st->scope_ref = parent;
|
|
st->depth--;
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_FN) {
|
|
define_function(vm, st);
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_PLEX) {
|
|
define_plex(vm, st);
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_TYPE_I8 ||
|
|
token.type == TOKEN_TYPE_I16 ||
|
|
token.type == TOKEN_TYPE_INT ||
|
|
token.type == TOKEN_TYPE_U8 ||
|
|
token.type == TOKEN_TYPE_U16 ||
|
|
token.type == TOKEN_TYPE_NAT ||
|
|
token.type == TOKEN_TYPE_REAL ||
|
|
token.type == TOKEN_TYPE_STR ||
|
|
token.type == TOKEN_TYPE_BOOL) {
|
|
define_var(st, token);
|
|
next_token_is(TOKEN_SEMICOLON);
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_LOOP ||
|
|
token.type == TOKEN_KEYWORD_IF ||
|
|
token.type == TOKEN_KEYWORD_ELSE ||
|
|
token.type == TOKEN_KEYWORD_DO ||
|
|
token.type == TOKEN_KEYWORD_FOR) {
|
|
define_branch(vm, st);
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_RETURN) {
|
|
vm->cp++;
|
|
|
|
Token next = next_token();
|
|
if (next.type == TOKEN_SEMICOLON) {
|
|
/* put 0xFF as return register */
|
|
vm->cp++;
|
|
continue;
|
|
}
|
|
|
|
get_ref(st, next.start, next.length);
|
|
vm->cp++;
|
|
next_token_is(TOKEN_SEMICOLON);
|
|
continue;
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT
|
|
printf("-- %.*s --\n", token.length, token.start);
|
|
#endif
|
|
if (token.type == TOKEN_IDENTIFIER) {
|
|
/* check to see if it is an opcode first */
|
|
if (false) {
|
|
} else {
|
|
/* some other identifier */
|
|
printf("Unknown id at line %d: %.*s\n", token.line, token.length,
|
|
token.start);
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
} while (token.type != TOKEN_EOF);
|
|
}
|
|
|
|
/**
|
|
* 2nd pass, emit the bytecode
|
|
*/
|
|
void emit_bytecode(VM *vm, char *source, ScopeTable *st) {
|
|
Token token;
|
|
init_parser(source);
|
|
do {
|
|
token = next_token();
|
|
if (token.type == TOKEN_ERROR) {
|
|
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
|
|
break;
|
|
}
|
|
if (token.type != TOKEN_EOF) {
|
|
|
|
if (token.type == TOKEN_LBRACE) {
|
|
st->count++;
|
|
st->scopes[st->count].parent = st->scope_ref;
|
|
st->scope_ref = (i32)st->count;
|
|
st->depth++;
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_RBRACE) {
|
|
i32 current_scope = st->scope_ref;
|
|
i32 parent = st->scopes[current_scope].parent;
|
|
if (parent < 0) parent = 0;
|
|
st->scope_ref = parent;
|
|
st->depth--;
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_FN) {
|
|
/* ignore, already processed */
|
|
Token next = next_token();
|
|
while (next.type != TOKEN_RPAREN) {
|
|
next = next_token();
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_PLEX) {
|
|
/* ignore, already processed */
|
|
/* FIXME: consume all tokens for this plex */
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_TYPE_I8 ||
|
|
token.type == TOKEN_TYPE_I16 ||
|
|
token.type == TOKEN_TYPE_INT ||
|
|
token.type == TOKEN_TYPE_U8 ||
|
|
token.type == TOKEN_TYPE_U16 ||
|
|
token.type == TOKEN_TYPE_NAT ||
|
|
token.type == TOKEN_TYPE_REAL ||
|
|
token.type == TOKEN_TYPE_STR) {
|
|
/* ignore, already processed */
|
|
next_token(); /* type */
|
|
next_token(); /* var */
|
|
next_token(); /* reg */
|
|
next_token(); /* ; */
|
|
continue;
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_LOOP ||
|
|
token.type == TOKEN_KEYWORD_IF ||
|
|
token.type == TOKEN_KEYWORD_ELSE ||
|
|
token.type == TOKEN_KEYWORD_DO ||
|
|
token.type == TOKEN_KEYWORD_FOR) {
|
|
/* ignore, already processed */
|
|
next_token(); /* id */
|
|
}
|
|
|
|
if (token.type == TOKEN_KEYWORD_RETURN) {
|
|
emit_op(vm, OP_RETURN);
|
|
vm->cp++;
|
|
|
|
Token next = next_token();
|
|
if (next.type == TOKEN_SEMICOLON) {
|
|
/* put 0xFF as return register */
|
|
emit_byte(vm, 0xFF);
|
|
vm->cp++;
|
|
continue;
|
|
}
|
|
|
|
u32 reg = get_ref(st, next.start, next.length);
|
|
emit_byte(vm, reg);
|
|
vm->cp++;
|
|
next_token_is(TOKEN_SEMICOLON);
|
|
continue;
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT
|
|
printf("-- %.*s --\n", token.length, token.start);
|
|
#endif
|
|
if (token.type == TOKEN_IDENTIFIER) {
|
|
if (false) {
|
|
|
|
} else {
|
|
/* some other identifier */
|
|
printf("Unknown id at line %d: %.*s\n", token.line, token.length,
|
|
token.start);
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
} while (token.type != TOKEN_EOF);
|
|
}
|
|
|
|
/**
|
|
* Compile.
|
|
*/
|
|
bool compile(VM *vm, ScopeTable *st, char *source) {
|
|
build_symbol_table(vm, source, st);
|
|
vm->cp = 0; /* actually start emitting code */
|
|
st->count = 0;
|
|
emit_bytecode(vm, source, st);
|
|
return true;
|
|
}
|