undar-lang-fixed-length/tools/compiler/compiler.c

506 lines
13 KiB
C

#include "compiler.h"
#include "parser.h"
#include <stdio.h>
#include <stdlib.h>
Parser parser;
Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length,
i32 scope_ref) {
SymbolTable st = table->scopes[scope_ref];
for (u32 i = 0; i < st.count; i++) {
if (st.symbols[i].name_length == length) {
if (sleq(st.symbols[i].name, name, length)) {
return &table->scopes[scope_ref].symbols[i];
}
}
}
if (st.parent < 0)
return nil;
return symbol_table_lookup(table, name, length, st.parent);
}
u8 symbol_table_add(ScopeTable *table, Symbol s) {
Symbol *sym =
symbol_table_lookup(table, s.name, s.name_length, table->scope_ref);
if (sym != nil) {
fprintf(stderr,
"Error: Symbol '%.*s' already defined, in this scope"
" please pick a different variable name or create a new scope.\n",
s.name_length, s.name);
exit(1);
}
if (table->scopes[table->scope_ref].count + 1 > 255) {
fprintf(stderr, "Error: Only 255 symbols are allowed per scope"
" first off: impressive; secondly:"
" just create a new scope and keep going.\n");
exit(1);
}
if (!table_realloc(table)) {
fprintf(stderr,
"Error: Symbol table is out of memory! This is likely because you "
" built the assembler in static mode, increase the static size."
" if you built using malloc, that means your computer is out of"
" memory. Close a few tabs in your web browser and try again."
" Count was %d, while capacity was %d\n",
table->count, table->capacity);
exit(1);
}
table->scopes[table->scope_ref]
.symbols[table->scopes[table->scope_ref].count] = s;
u8 index = table->scopes[table->scope_ref].count;
table->scopes[table->scope_ref].count++;
return index;
}
u32 get_ref(ScopeTable *st, const char *name, u32 length) {
Symbol *sym = symbol_table_lookup(st, name, length, st->scope_ref);
if (!sym) {
fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n",
length, name);
exit(1);
return 0;
}
return sym->ref;
}
u32 get_ptr(Token token, ScopeTable *st) {
if (token.type == TOKEN_IDENTIFIER) {
return get_ref(st, token.start, token.length);
}
if (token.type == TOKEN_LITERAL_INT) {
return atoi(token.start);
}
if (token.type == TOKEN_LITERAL_NAT) {
char *endptr;
u32 out = (u32)strtoul(token.start, &endptr, 10);
if (endptr == token.start || *endptr != '\0') {
fprintf(stderr, "Invalid decimal literal at line %d: %.*s\n", token.line,
token.length, token.start);
exit(1);
}
return out;
}
fprintf(stderr, "Error: Not a pointer or symbol at line %d: %.*s\n",
token.line, token.length, token.start);
exit(1);
}
u32 get_reg(Token token, ScopeTable *st) {
if (token.type == TOKEN_IDENTIFIER) {
return get_ref(st, token.start, token.length);
}
if (token.type == TOKEN_BIG_MONEY) {
token = next_token();
return atoi(token.start);
}
fprintf(stderr, "Error: Not a register or symbol at line %d: %.*s\n",
token.line, token.length, token.start);
exit(1);
}
void advance() {
parser.previous = parser.current;
parser.current = next_token();
}
static void consume(TokenType type, char *err_msg) {
if (parser.current.type == type) {
advance();
return;
}
printf("ERROR at line %d: %.*s %s\n", parser.current.line,
parser.current.length, parser.current.start, err_msg);
exit(1);
}
void next_id_or_reg() {
advance();
if (parser.current.type == TOKEN_IDENTIFIER) {
return;
}
if (parser.current.type == TOKEN_BIG_MONEY) {
advance();
return;
}
printf("Not an ID or register at line %d: %.*s\n", parser.current.line,
parser.current.length, parser.current.start);
exit(1);
}
void next_id_or_ptr() {
advance();
if (parser.current.type != TOKEN_IDENTIFIER &&
parser.current.type != TOKEN_LITERAL_NAT &&
parser.current.type != TOKEN_LITERAL_INT &&
parser.current.type != TOKEN_LITERAL_REAL) {
printf("Not an ID or register at line %d: %.*s\n", parser.current.line,
parser.current.length, parser.current.start);
exit(1);
}
}
static void expression();
static ParseRule *getRule(TokenType type);
static void parsePrecedence(Precedence precedence);
static void number() {
switch (parser.previous.type) {
case TOKEN_LITERAL_INT: {
i32 out = atoi(parser.previous.start);
if (out <= I8_MAX && out >= I8_MIN) {
code[cp++] = OP_PUSH_8;
code[cp++] = (out) & 0xFF;
return;
}
if (out <= I16_MAX && out >= I16_MIN) {
code[cp++] = OP_PUSH_16;
code[cp++] = (out) & 0xFF;
code[cp++] = ((out) >> 8) & 0xFF;
return;
}
code[cp++] = OP_PUSH_32;
code[cp++] = (out) & 0xFF;
code[cp++] = ((out) >> 8) & 0xFF;
code[cp++] = ((out) >> 16) & 0xFF;
code[cp++] = ((out) >> 24) & 0xFF;
return;
}
case TOKEN_LITERAL_NAT: {
char *endptr;
u32 out = (u32)strtoul(parser.previous.start, &endptr, 10);
if (endptr == parser.previous.start || *endptr != '\0') {
fprintf(stderr, "Invalid 'real' number: '%.*s'\n", parser.previous.length,
parser.previous.start);
exit(1);
}
if (out <= U8_MAX) {
code[cp++] = OP_PUSH_8;
code[cp++] = (out) & 0xFF;
return;
}
if (out <= U16_MAX) {
code[cp++] = OP_PUSH_16;
code[cp++] = (out) & 0xFF;
code[cp++] = ((out) >> 8) & 0xFF;
return;
}
code[cp++] = OP_PUSH_32;
code[cp++] = (out) & 0xFF;
code[cp++] = ((out) >> 8) & 0xFF;
code[cp++] = ((out) >> 16) & 0xFF;
code[cp++] = ((out) >> 24) & 0xFF;
return;
}
case TOKEN_LITERAL_REAL: {
i32 out = FLOAT_TO_REAL(atof(parser.previous.start));
code[cp++] = OP_PUSH_32;
code[cp++] = (out) & 0xFF;
code[cp++] = ((out) >> 8) & 0xFF;
code[cp++] = ((out) >> 16) & 0xFF;
code[cp++] = ((out) >> 24) & 0xFF;
return;
}
default: {
fprintf(stderr, "Unknown immediate: '%.*s'\n", parser.previous.length,
parser.previous.start);
exit(1);
}
}
}
static void expression() { parsePrecedence(PREC_ASSIGNMENT); }
static void grouping() {
expression();
consume(TOKEN_RPAREN, "Expected ')'.");
}
static void unary() {
TokenType operatorType = parser.previous.type;
parsePrecedence(PREC_UNARY);
switch (operatorType) {
case TOKEN_MINUS: {
code[cp++] = OP_NEG;
break;
}
case TOKEN_BANG: {
code[cp++] = OP_NOT;
break;
}
default:
return;
}
}
static void binary() {
TokenType operatorType = parser.previous.type;
ParseRule *rule = getRule(operatorType);
parsePrecedence((Precedence)(rule->precedence + 1));
switch (operatorType) {
case TOKEN_PLUS: {
switch (parser.previous.type) {
case TOKEN_LITERAL_INT:
code[cp++] = OP_ADD_INT;
break;
case TOKEN_LITERAL_NAT:
code[cp++] = OP_ADD_NAT;
break;
case TOKEN_LITERAL_REAL:
code[cp++] = OP_ADD_REAL;
break;
case TOKEN_IDENTIFIER:
printf("FIXME: find the identifier's type for add\n");
break;
default:
printf("Unknown Add Arg=%d\n", parser.previous.type);
return; // Unreachable.
}
break;
}
case TOKEN_MINUS: {
switch (parser.previous.type) {
case TOKEN_LITERAL_INT:
code[cp++] = OP_SUB_INT;
break;
case TOKEN_LITERAL_NAT:
code[cp++] = OP_SUB_NAT;
break;
case TOKEN_LITERAL_REAL:
code[cp++] = OP_SUB_REAL;
break;
case TOKEN_IDENTIFIER:
printf("FIXME: find the identifier's type for sub\n");
break;
default:
printf("Unknown Sub Arg=%d\n", parser.previous.type);
return; // Unreachable.
}
break;
}
case TOKEN_STAR: {
switch (parser.previous.type) {
case TOKEN_LITERAL_INT:
code[cp++] = OP_MUL_INT;
break;
case TOKEN_LITERAL_NAT:
code[cp++] = OP_MUL_NAT;
break;
case TOKEN_LITERAL_REAL:
code[cp++] = OP_MUL_REAL;
break;
case TOKEN_IDENTIFIER:
printf("FIXME: find the identifier's type for mul\n");
break;
default:
printf("Unknown Mul Arg=%d\n", parser.previous.type);
return; // Unreachable.
}
break;
}
case TOKEN_SLASH: {
switch (parser.previous.type) {
case TOKEN_LITERAL_INT:
code[cp++] = OP_DIV_INT;
break;
case TOKEN_LITERAL_NAT:
code[cp++] = OP_DIV_NAT;
break;
case TOKEN_LITERAL_REAL:
code[cp++] = OP_DIV_REAL;
break;
case TOKEN_IDENTIFIER:
printf("FIXME: find the identifier's type for div\n");
break;
default:
printf("Unknown Div Arg=%d\n", parser.previous.type);
return; // Unreachable.
}
break;
}
case TOKEN_EQ_EQ: {
code[cp++] = OP_EQ;
break;
}
case TOKEN_GT: {
code[cp++] = OP_GT;
break;
}
case TOKEN_GTE: {
code[cp++] = OP_GE;
break;
}
case TOKEN_LT: {
code[cp++] = OP_LT;
break;
}
case TOKEN_LTE: {
code[cp++] = OP_LE;
break;
}
default:
return; // Unreachable.
}
}
static void literal() {
switch (parser.previous.type) {
case TOKEN_KEYWORD_NIL: {
code[cp++] = OP_PUSH_8;
code[cp++] = 0;
break;
}
case TOKEN_KEYWORD_TRUE: {
code[cp++] = OP_PUSH_8;
code[cp++] = 1;
break;
}
case TOKEN_KEYWORD_FALSE: {
code[cp++] = OP_PUSH_8;
code[cp++] = 0;
break;
}
default:
return; // Unreachable.
}
}
static void string() {
u32 addr = mp;
const char *src = parser.previous.start + 1;
i32 len = 0;
i32 i = 0;
while (i < parser.previous.length - 2) {
char c = src[i++];
if (c == '\\' && i < parser.previous.length - 2) {
switch (src[i++]) {
case 'n':
c = '\n';
break;
case 't':
c = '\t';
break;
case 'r':
c = '\r';
break;
case '\\':
case '"':
case '\'':
break;
default:
i--; /* Rewind for unknown escapes */
}
}
WRITE_U8(addr + 4 + len, c);
len++;
}
u32 size = len + 5; /* 4 (len) + dst_len + 1 (null) */
mp += size;
WRITE_U32(addr, len);
WRITE_U8(addr + 4 + len, '\0');
// TODO: this really should always be tied to a global or local variable
// we can fake it for now
WRITE_U32(fp + lp, addr);
lp++;
}
ParseRule rules[] = {
[TOKEN_LPAREN] = {grouping, NULL, PREC_NONE},
[TOKEN_RPAREN] = {NULL, NULL, PREC_NONE},
[TOKEN_LBRACE] = {NULL, NULL, PREC_NONE},
[TOKEN_RBRACE] = {NULL, NULL, PREC_NONE},
[TOKEN_COMMA] = {NULL, NULL, PREC_NONE},
[TOKEN_DOT] = {NULL, NULL, PREC_NONE},
[TOKEN_MINUS] = {unary, binary, PREC_TERM},
[TOKEN_PLUS] = {NULL, binary, PREC_TERM},
[TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE},
[TOKEN_SLASH] = {NULL, binary, PREC_FACTOR},
[TOKEN_STAR] = {NULL, binary, PREC_FACTOR},
[TOKEN_BANG] = {unary, NULL, PREC_NONE},
[TOKEN_BANG_EQ] = {NULL, binary, PREC_EQUALITY},
[TOKEN_EQ] = {NULL, NULL, PREC_NONE},
[TOKEN_EQ_EQ] = {NULL, binary, PREC_EQUALITY},
[TOKEN_GT] = {NULL, binary, PREC_COMPARISON},
[TOKEN_GTE] = {NULL, binary, PREC_COMPARISON},
[TOKEN_LT] = {NULL, binary, PREC_COMPARISON},
[TOKEN_LTE] = {NULL, binary, PREC_COMPARISON},
[TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE},
[TOKEN_LITERAL_STR] = {string, NULL, PREC_NONE},
[TOKEN_LITERAL_INT] = {number, NULL, PREC_NONE},
[TOKEN_LITERAL_NAT] = {number, NULL, PREC_NONE},
[TOKEN_LITERAL_REAL] = {number, NULL, PREC_NONE},
[TOKEN_AND] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_PLEX] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_ELSE] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_FALSE] = {literal, NULL, PREC_NONE},
[TOKEN_KEYWORD_FOR] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_FN] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_IF] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_NIL] = {literal, NULL, PREC_NONE},
[TOKEN_OPERATOR_OR] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_RETURN] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_TRUE] = {literal, NULL, PREC_NONE},
[TOKEN_ERROR] = {NULL, NULL, PREC_NONE},
[TOKEN_EOF] = {NULL, NULL, PREC_NONE},
};
ParseRule *getRule(TokenType type) { return &rules[type]; }
void parsePrecedence(Precedence precedence) {
advance();
ParseFn prefixRule = getRule(parser.previous.type)->prefix;
if (prefixRule == NULL) {
error("Expect expression.");
return;
}
prefixRule();
while (precedence <= getRule(parser.current.type)->precedence) {
advance();
ParseFn infixRule = getRule(parser.previous.type)->infix;
infixRule();
}
}
/**
* Compile.
*/
bool compile(ScopeTable *st, char *source) {
USED(st);
initLexer(source);
advance();
expression();
consume(TOKEN_EOF, "Cannot find end of expression.");
// technically should not need, but just in case
code[cp++] = OP_HALT;
return true;
}