undar-lang-fixed-length/tools/compiler.c

729 lines
17 KiB
C

#include "compiler.h"
#include "parser.h"
#include <stdio.h>
#include <stdlib.h>
#define DEBUG_COMPILER
void emit_byte(u8 byte) {
#ifdef DEBUG_COMPILER
printf("code[%d] = %d\n", cp, byte);
#endif
code[cp] = byte;
}
void emit_u32(u32 value) {
#ifdef DEBUG_COMPILER
printf("code[%d..%d] = %d\n", cp, cp + 3, value);
#endif
u32 *c = (u32 *)(&code[cp / 4]);
c[0] = value;
}
Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length,
i32 scope_ref) {
SymbolTable st = table->scopes[scope_ref];
for (u32 i = 0; i < st.count; i++) {
if (st.symbols[i].name_length == length) {
if (sleq(st.symbols[i].name, name, length)) {
return &table->scopes[scope_ref].symbols[i];
}
}
}
if (st.parent < 0)
return nil;
return symbol_table_lookup(table, name, length, st.parent);
}
u8 symbol_table_add(ScopeTable *table, Symbol s) {
Symbol *sym =
symbol_table_lookup(table, s.name, s.name_length, table->scope_ref);
if (sym != nil) {
fprintf(stderr,
"Error: Symbol '%.*s' already defined, in this scope"
" please pick a different variable name or create a new scope.\n",
s.name_length, s.name);
exit(1);
}
u8 current_index = table->scopes[table->scope_ref].count;
if (current_index + 1 > 255) {
fprintf(stderr, "Error: Only 255 symbols are allowed per scope"
" first off: impressive; secondly:"
" just create a new scope and keep going.\n");
exit(1);
}
if (!table_realloc(table)) {
fprintf(stderr,
"Error: Symbol table is out of memory! This is likely because you "
" built the assembler in static mode, increase the static size."
" if you built using malloc, that means your computer is out of"
" memory. Close a few tabs in your web browser and try again."
" Count was %d, while capacity was %d\n",
table->count, table->capacity);
exit(1);
}
/* set ref to current count for local */
s.ref = current_index;
#ifdef DEBUG_COMPILER
if (s.scope == VAR) {
printf("$%d = %s\n", s.ref, s.name);
} else if (s.scope == GLOBAL) {
printf("memory[%d] = %s\n", s.ref, s.name);
} else {
printf("code[%d] = %s\n", s.ref, s.name);
}
#endif
table->scopes[table->scope_ref].symbols[current_index] = s;
table->scopes[table->scope_ref].count++;
return current_index;
}
u32 get_ref(ScopeTable *st, const char *name, u32 length) {
Symbol *sym = symbol_table_lookup(st, name, length, st->scope_ref);
if (!sym) {
fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n",
length, name);
exit(1);
return 0;
}
return sym->ref;
}
u32 get_ptr(Token token, ScopeTable *st) {
if (token.type == TOKEN_IDENTIFIER) {
return get_ref(st, token.start, token.length);
}
if (token.type == TOKEN_LITERAL_INT) {
return atoi(token.start);
}
if (token.type == TOKEN_LITERAL_NAT) {
char *endptr;
u32 out = (u32)strtoul(token.start, &endptr, 10);
if (endptr == token.start || *endptr != '\0') {
fprintf(stderr, "Invalid decimal literal at line %d: %.*s\n", token.line,
token.length, token.start);
exit(1);
}
return out;
}
fprintf(stderr, "Error: Not a pointer or symbol at line %d: %.*s\n",
token.line, token.length, token.start);
exit(1);
}
u32 get_reg(Token token, ScopeTable *st) {
if (token.type == TOKEN_IDENTIFIER) {
return get_ref(st, token.start, token.length);
}
if (token.type == TOKEN_BIG_MONEY) {
token = next_token();
return atoi(token.start);
}
fprintf(stderr, "Error: Not a register or symbol at line %d: %.*s\n",
token.line, token.length, token.start);
exit(1);
}
Token next_id_or_reg() {
Token token = next_token();
if (token.type == TOKEN_IDENTIFIER) {
return token;
}
if (token.type == TOKEN_BIG_MONEY) {
token = next_token();
return token;
}
printf("Not an ID or register at line %d: %.*s\n", token.line, token.length,
token.start);
exit(1);
return token;
}
Token next_id_or_ptr() {
Token token = next_token();
if (token.type != TOKEN_IDENTIFIER && token.type != TOKEN_LITERAL_NAT &&
token.type != TOKEN_LITERAL_INT && token.type != TOKEN_LITERAL_REAL) {
printf("Not an ID or register at line %d: %.*s\n", token.line, token.length,
token.start);
exit(1);
}
return token;
}
Token next_token_is(TokenType type) {
Token token = next_token();
if (token.type != type) {
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
exit(1);
}
return token;
}
/**
* Global .
*/
bool define_global(ScopeTable *st, Token regType) {
u32 *globals = (u32 *)(mem);
Symbol s;
switch (regType.type) {
case TOKEN_TYPE_BOOL:
s.type = BOOL;
s.size = 1;
break;
case TOKEN_TYPE_I8:
s.type = I8;
s.size = 1;
break;
case TOKEN_TYPE_U8:
s.type = U8;
s.size = 1;
break;
case TOKEN_TYPE_I16:
s.type = I16;
s.size = 2;
break;
case TOKEN_TYPE_U16:
s.type = U16;
s.size = 2;
break;
case TOKEN_TYPE_INT:
s.type = I32;
s.size = 4;
break;
case TOKEN_TYPE_NAT:
s.type = U32;
s.size = 4;
break;
case TOKEN_TYPE_REAL:
s.type = F32;
s.size = 4;
break;
case TOKEN_TYPE_STR:
s.type = STR;
break;
default:
return false;
}
Token name = next_token_is(TOKEN_IDENTIFIER);
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
return false;
}
mcpy(s.name, (char *)name.start, name.length);
s.name_length = name.length;
s.name[name.length] = '\0';
u32 addr = mp;
s.ref = addr;
s.scope = GLOBAL;
next_token_is(TOKEN_EQ);
Token value = next_token();
switch (value.type) {
case TOKEN_KEYWORD_TRUE: {
u32 addr = mp;
WRITE_U8(addr, 1);
mp += s.size;
break;
}
case TOKEN_KEYWORD_FALSE: {
u32 addr = mp;
WRITE_U8(addr, 0);
mp += s.size;
break;
}
case TOKEN_LITERAL_INT: {
i32 out = atoi(value.start);
u32 addr = mp;
WRITE_U32(addr, out);
mp += s.size;
break;
}
case TOKEN_LITERAL_NAT: {
char *endptr;
u32 out = (u32)strtoul(value.start, &endptr, 10);
if (endptr == value.start || *endptr != '\0') {
fprintf(stderr, "Invalid decimal literal: %s\n", value.start);
exit(1);
}
u32 addr = mp;
WRITE_U32(addr, out);
mp += s.size;
break;
}
case TOKEN_LITERAL_REAL: {
i32 out = FLOAT_TO_REAL(atof(value.start));
u32 addr = mp;
WRITE_U32(addr, out);
mp += s.size;
break;
}
case TOKEN_LITERAL_STR: {
const char *src = value.start;
i32 len = 0;
i32 i = 0;
while (i < value.length) {
char c = src[i++];
if (c == '"') {
continue;
}
if (c == '\\' && i < value.length) {
switch (src[i++]) {
case 'n':
c = '\n';
break;
case 't':
c = '\t';
break;
case 'r':
c = '\r';
break;
case '\\':
case '"':
case '\'':
break;
default:
i--; /* Rewind for unknown escapes */
}
}
WRITE_U8(addr + 4 + len, c);
len++;
}
u32 size = len + 5; /* 4 (len) + dst_len + 1 (null) */
s.size = size;
mp += size;
WRITE_U32(addr, len);
WRITE_U8(addr + 4 + len, '\0');
break;
}
default:
return false;
}
next_token_is(TOKEN_SEMICOLON);
symbol_table_add(st, s);
return true;
}
/**
* Var .
*/
void define_var(ScopeTable *st, Token regType) {
Symbol s;
s.scope = VAR;
switch (regType.type) {
case TOKEN_TYPE_I8: {
s.type = I8;
s.size = 1;
break;
}
case TOKEN_TYPE_I16: {
s.type = I16;
s.size = 2;
break;
}
case TOKEN_TYPE_INT: {
s.type = I32;
s.size = 4;
break;
}
case TOKEN_TYPE_U8: {
s.type = U8;
s.size = 1;
break;
}
case TOKEN_TYPE_U16: {
s.type = U16;
s.size = 2;
break;
}
case TOKEN_TYPE_NAT: {
s.type = U32;
s.size = 4;
break;
}
case TOKEN_TYPE_REAL: {
s.type = F32;
s.size = 4;
break;
}
case TOKEN_TYPE_BOOL: {
s.type = BOOL;
s.size = 1;
break;
}
case TOKEN_TYPE_STR: {
s.type = STR;
s.size = 4; /* not really this type, pointer alias which is 4 */
break;
}
default:
printf("ERROR at line %d: %.*s\n", regType.line, regType.length,
regType.start);
exit(1);
}
Token name = next_token_is(TOKEN_IDENTIFIER);
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line,
regType.length, regType.start);
exit(1);
}
mcpy(s.name, (void *)name.start, name.length);
s.name[name.length] = '\0';
s.name_length = name.length;
symbol_table_add(st, s);
}
/**
* Function.
*/
void define_function(ScopeTable *st) {
Symbol s;
s.scope = LOCAL;
s.type = FUNCTION;
Token name = next_token_is(TOKEN_IDENTIFIER);
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length,
name.start);
exit(1);
}
mcpy(s.name, (void *)name.start, name.length);
s.name[name.length] = '\0';
s.name_length = name.length;
next_token_is(TOKEN_LPAREN);
i32 temp = st->scope_ref;
st->count++;
st->scopes[st->count].parent = st->scope_ref;
st->scope_ref = (i32)st->count;
Token next = next_token();
while (next.type != TOKEN_RPAREN) {
define_var(st, next);
next = next_token();
if (next.type == TOKEN_COMMA) {
next = next_token();
continue;
} else if (next.type == TOKEN_RPAREN) {
break;
} else {
printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start);
exit(1);
}
}
s.ref = cp;
next = next_token_is(TOKEN_LBRACE);
st->scope_ref = temp; // need to add to the parents scope
symbol_table_add(st, s);
st->scope_ref = (i32)st->count;
}
/**
* Plex.
*/
void define_plex(ScopeTable *st) {
Symbol s;
s.scope = GLOBAL;
s.type = PLEX;
Token name = next_token_is(TOKEN_IDENTIFIER);
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
printf("PLEX NAME TOO LONG at line %d: %.*s\n", name.line, name.length,
name.start);
exit(1);
}
mcpy(s.name, (void *)name.start, name.length);
s.name[name.length] = '\0';
s.name_length = name.length;
next_token_is(TOKEN_LPAREN);
}
/**
* Branch.
*/
void define_branch(ScopeTable *st) {
Symbol s;
s.scope = LOCAL;
s.type = VOID;
Token name = next_token_is(TOKEN_IDENTIFIER);
if (name.length > MAX_SYMBOL_NAME_LENGTH) {
printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length,
name.start);
exit(1);
}
mcpy(s.name, (void *)name.start, name.length);
s.name_length = name.length;
s.name[name.length] = '\0';
s.ref = cp;
symbol_table_add(st, s);
}
/**
* Define a loop
*/
void define_loop(ScopeTable *st) {}
/**
* Build the symbol table and calculate the types/size/offsets of all values.
*/
void build_symbol_table(char *source, ScopeTable *st) {
Token token;
init_lexer(source);
do {
token = next_token();
if (token.type == TOKEN_ERROR) {
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
exit(1);
}
if (token.type != TOKEN_EOF) {
if (token.type == TOKEN_LBRACE) {
st->count++;
st->scopes[st->count].parent = st->scope_ref;
st->scope_ref = (i32)st->count;
st->depth++;
continue;
}
if (token.type == TOKEN_RBRACE) {
i32 current_scope = st->scope_ref;
i32 parent = st->scopes[current_scope].parent;
if (parent < 0)
parent = 0;
st->scope_ref = parent;
st->depth--;
continue;
}
if (token.type == TOKEN_KEYWORD_PLEX) {
if (st->depth != 0) {
printf("I'm letting it slide, but generally plexes are declared "
"outside of a scope %d: %.*s\n",
token.line, token.length, token.start);
}
define_plex(st);
continue;
}
if (token.type == TOKEN_KEYWORD_FN) {
if (st->depth != 0) {
printf("Functions can only be declared outside of a scope %d: %.*s\n",
token.line, token.length, token.start);
exit(1);
}
define_function(st);
continue;
}
if (token.type == TOKEN_KEYWORD_CONST) {
// FIXME: add consts, for now just make everything
next_token();
continue;
}
if (token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 ||
token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 ||
token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT ||
token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR ||
token.type == TOKEN_TYPE_BOOL) {
if (st->depth == 0) {
define_global(st, token);
continue;
}
define_var(st, token);
next_token_is(TOKEN_SEMICOLON);
continue;
}
if (token.type == TOKEN_KEYWORD_IF) {
define_loop(st);
continue;
}
if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_DO ||
token.type == TOKEN_KEYWORD_FOR) {
define_branch(st);
continue;
}
if (token.type == TOKEN_KEYWORD_RETURN) {
Token next = next_token();
if (next.type == TOKEN_SEMICOLON) {
/* put 0xFF as return register */
cp++;
continue;
}
get_reg(next, st);
cp++;
next_token_is(TOKEN_SEMICOLON);
continue;
}
#ifdef DEBUG_COMPILER
printf("-- %.*s --\n", token.length, token.start);
#endif
}
} while (token.type != TOKEN_EOF);
}
/**
* 2nd pass, emit the bytecode
*/
void emit_bytecode(char *source, ScopeTable *st) {
Token token;
init_lexer(source);
do {
token = next_token();
if (token.type == TOKEN_ERROR) {
printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start);
break;
}
if (token.type != TOKEN_EOF) {
if (token.type == TOKEN_LBRACE) {
st->count++;
st->scopes[st->count].parent = st->scope_ref;
st->scope_ref = (i32)st->count;
st->depth++;
continue;
}
if (token.type == TOKEN_RBRACE) {
i32 current_scope = st->scope_ref;
i32 parent = st->scopes[current_scope].parent;
if (parent < 0)
parent = 0;
st->scope_ref = parent;
st->depth--;
continue;
}
if (token.type == TOKEN_KEYWORD_FN) {
/* ignore, already processed */
Token next = next_token();
while (next.type != TOKEN_RPAREN) {
next = next_token();
}
continue;
}
if (token.type == TOKEN_KEYWORD_PLEX) {
/* ignore, already processed */
Token next = next_token();
while (next.type != TOKEN_RPAREN) {
next = next_token();
}
continue;
}
if (token.type == TOKEN_KEYWORD_CONST) {
/* ignore, already processed */
next_token(); /* type */
next_token(); /* var */
next_token(); /* reg */
next_token(); /* ; */
continue;
}
if (token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 ||
token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 ||
token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT ||
token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) {
/* ignore, already processed */
next_token(); /* var */
next_token(); /* reg */
next_token(); /* ; */
continue;
}
if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF ||
token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO ||
token.type == TOKEN_KEYWORD_FOR) {
/* ignore, already processed */
next_token(); /* id */
}
if (token.type == TOKEN_KEYWORD_RETURN) {
Token next = next_token();
if (next.type == TOKEN_SEMICOLON) {
/* put 0xFF as return register */
code[cp++] = ENCODE_B(OP_RETURN, 255, 0);
continue;
}
u32 reg = get_reg(next, st);
code[cp++] = ENCODE_B(OP_RETURN, reg, 0);
next_token_is(TOKEN_SEMICOLON);
continue;
}
#ifdef DEBUG_COMPILER
printf("-- %.*s --\n", token.length, token.start);
#endif
if (token.type == TOKEN_IDENTIFIER) {
/*} else {
some other identifier
printf("Unknown id at line %d: %.*s\n", token.line, token.length,
token.start);
exit(1);
}
*/
}
}
} while (token.type != TOKEN_EOF);
}
/**
* Compile.
*/
bool compile(ScopeTable *st, char *source) {
build_symbol_table(source, st);
cp = 0; /* actually start emitting code */
st->count = 0;
emit_bytecode(source, st);
return true;
}