reality-engine/src/compiler.c

378 lines
10 KiB
C

#include "compiler.h"
#include "vm.h"
#include <stdio.h>
typedef struct {
Token current;
Token previous;
bool hadError;
bool panicMode;
} Parser;
typedef enum {
PREC_NONE,
PREC_ASSIGNMENT, /* = */
PREC_OR, /* or */
PREC_AND, /* and */
PREC_EQUALITY, /* == != */
PREC_COMPARISON, /* < > <= >= */
PREC_TERM, /* + - */
PREC_FACTOR, /* * / */
PREC_UNARY, /* not */
PREC_CALL, /* . () */
PREC_PRIMARY
} Precedence;
typedef void (*ParseFn)(VM *vm);
typedef struct {
ParseFn prefix;
ParseFn infix;
Precedence precedence;
} ParseRule;
Parser parser;
SymbolTable st;
const char *internalErrorMsg = "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes.";
void errorAt(Token *token, const char *message) {
if (parser.panicMode)
return;
parser.panicMode = true;
fprintf(stderr, "[line %d] Error", token->line);
if (token->type == TOKEN_EOF) {
fprintf(stderr, " at end");
} else if (token->type == TOKEN_ERROR) {
} else {
fprintf(stderr, " at '%.*s'", token->length, token->start);
}
fprintf(stderr, ": %s\n", message);
parser.hadError = true;
}
void error(const char *message) { errorAt(&parser.previous, message); }
void errorAtCurrent(const char *message) { errorAt(&parser.current, message); }
void advance() {
parser.previous = parser.current;
for (;;) {
parser.current = nextToken();
if (parser.current.type != TOKEN_ERROR)
break;
errorAtCurrent(parser.current.start);
}
}
void consume(TokenType type, const char *message) {
if (parser.current.type == type) {
advance();
return;
}
errorAtCurrent(message);
}
static bool check(TokenType type) { return parser.current.type == type; }
static bool match(TokenType type) {
if (!check(type))
return false;
advance();
return true;
}
void emitOp(VM *vm, uint8_t opcode, uint8_t dest, uint8_t src1, uint8_t src2) {
vm->code[vm->cp++].u = OP(opcode, dest, src1, src2);
}
void expression(VM *vm);
void statement(VM *vm);
void declaration(VM *vm);
ParseRule *getRule(TokenType type);
void parsePrecedence(VM *vm, Precedence precedence);
void number(VM *vm) {
if (parser.previous.type == TOKEN_INT_LITERAL) {
char *endptr;
int32_t value = (int32_t)strtol(parser.previous.start, &endptr, 10);
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = int_alloc(vm, value);
return;
} else if (parser.previous.type == TOKEN_UINT_LITERAL) {
long value = atol(parser.previous.start);
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = nat_alloc(vm, value);
return;
} else if (parser.previous.type == TOKEN_FLOAT_LITERAL) {
float value = atof(parser.previous.start);
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = real_alloc(vm, value);
return;
}
errorAtCurrent("Invalid number format");
}
void string(VM *vm) {
uint32_t length = parser.previous.length - 2;
uint32_t str_addr = vm->mp;
vm->memory[vm->mp++].u = length;
uint32_t i, j = 0;
for (i = 0; i < length; i++) {
vm->memory[vm->mp].c[i % 4] = parser.previous.start[i + 1];
if (++j == 4) {
j = 0;
vm->mp++;
}
}
vm->frames[vm->fp].allocated.end += length / 4;
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = str_addr;
}
void grouping(VM *vm) {
expression(vm);
consume(TOKEN_RPAREN, "Expect ')' after expression.");
}
void unary(VM *vm) {
TokenType operatorType = parser.previous.type;
parsePrecedence(vm, PREC_UNARY);
switch (operatorType) {
default:
return;
}
}
static void literal(VM *vm) {
switch (parser.previous.type) {
case TOKEN_KEYWORD_NIL: {
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = 0;
break;
}
case TOKEN_KEYWORD_FALSE: {
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = 0;
break;
}
case TOKEN_KEYWORD_TRUE: {
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].u = 1;
break;
}
default:
return;
}
}
void binary(VM *vm) {
TokenType operatorType = parser.previous.type;
ParseRule *rule = getRule(operatorType);
parsePrecedence(vm, (Precedence)(rule->precedence + 1));
TokenType operandType = parser.previous.type;
Frame f = vm->frames[vm->fp];
uint32_t src1 = f.rp--;
uint32_t src2 = f.rp--;
uint32_t dest = f.rp++;
switch (operatorType) {
case TOKEN_PLUS:
if (operandType == TOKEN_UINT_LITERAL) {
emitOp(vm, OP_ADD_UINT, dest, src1, src2);
} else if (operandType == TOKEN_INT_LITERAL) {
emitOp(vm, OP_ADD_INT, dest, src1, src2);
} else if (operandType == TOKEN_FLOAT_LITERAL) {
emitOp(vm, OP_ADD_REAL, dest, src1, src2);
} else {
error("not numeric");
}
break;
case TOKEN_MINUS:
if (operandType == TOKEN_UINT_LITERAL) {
emitOp(vm, OP_SUB_UINT, dest, src1, src2);
} else if (operandType == TOKEN_INT_LITERAL) {
emitOp(vm, OP_SUB_INT, dest, src1, src2);
} else if (operandType == TOKEN_FLOAT_LITERAL) {
emitOp(vm, OP_SUB_REAL, dest, src1, src2);
} else {
error("not numeric");
}
break;
case TOKEN_STAR:
if (operandType == TOKEN_UINT_LITERAL) {
emitOp(vm, OP_MUL_UINT, dest, src1, src2);
} else if (operandType == TOKEN_INT_LITERAL) {
emitOp(vm, OP_MUL_INT, dest, src1, src2);
} else if (operandType == TOKEN_FLOAT_LITERAL) {
emitOp(vm, OP_MUL_REAL, dest, src1, src2);
} else {
error("not numeric");
}
break;
case TOKEN_SLASH:
if (operandType == TOKEN_UINT_LITERAL) {
emitOp(vm, OP_DIV_UINT, dest, src1, src2);
} else if (operandType == TOKEN_INT_LITERAL) {
emitOp(vm, OP_DIV_INT, dest, src1, src2);
} else if (operandType == TOKEN_FLOAT_LITERAL) {
emitOp(vm, OP_DIV_REAL, dest, src1, src2);
} else {
error("not numeric");
}
break;
default:
return; /* Unreachable. */
}
}
ParseRule rules[] = {
[TOKEN_LPAREN] = {grouping, NULL, PREC_NONE},
[TOKEN_RPAREN] = {NULL, NULL, PREC_NONE},
[TOKEN_LBRACE] = {NULL, NULL, PREC_NONE},
[TOKEN_RBRACE] = {NULL, NULL, PREC_NONE},
[TOKEN_COMMA] = {NULL, NULL, PREC_NONE},
[TOKEN_DOT] = {NULL, NULL, PREC_NONE},
[TOKEN_MINUS] = {NULL, binary, PREC_TERM},
[TOKEN_PLUS] = {NULL, binary, PREC_TERM},
[TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE},
[TOKEN_SLASH] = {NULL, binary, PREC_FACTOR},
[TOKEN_STAR] = {NULL, binary, PREC_FACTOR},
[TOKEN_BANG] = {NULL, NULL, PREC_NONE},
[TOKEN_BANG_EQ] = {NULL, NULL, PREC_NONE},
[TOKEN_EQ] = {NULL, NULL, PREC_NONE},
[TOKEN_EQ_EQ] = {NULL, NULL, PREC_NONE},
[TOKEN_GT] = {NULL, NULL, PREC_NONE},
[TOKEN_GTE] = {NULL, NULL, PREC_NONE},
[TOKEN_LT] = {NULL, NULL, PREC_NONE},
[TOKEN_LTE] = {NULL, NULL, PREC_NONE},
[TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE},
[TOKEN_STRING_LITERAL] = {string, NULL, PREC_NONE},
[TOKEN_INT_LITERAL] = {number, NULL, PREC_NONE},
[TOKEN_UINT_LITERAL] = {number, NULL, PREC_NONE},
[TOKEN_FLOAT_LITERAL] = {number, NULL, PREC_NONE},
[TOKEN_KEYWORD_ELSE] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_FOR] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_FN] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_IF] = {NULL, NULL, PREC_NONE},
[TOKEN_OPERATOR_AND] = {NULL, binary, PREC_NONE},
[TOKEN_OPERATOR_OR] = {NULL, binary, PREC_NONE},
[TOKEN_OPERATOR_NOT] = {unary, NULL, PREC_NONE},
[TOKEN_KEYWORD_NIL] = {literal, NULL, PREC_NONE},
[TOKEN_KEYWORD_TRUE] = {literal, NULL, PREC_NONE},
[TOKEN_KEYWORD_FALSE] = {literal, NULL, PREC_NONE},
[TOKEN_KEYWORD_PRINT] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_RETURN] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_THIS] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_LET] = {NULL, NULL, PREC_NONE},
[TOKEN_KEYWORD_WHILE] = {NULL, NULL, PREC_NONE},
[TOKEN_ERROR] = {NULL, NULL, PREC_NONE},
[TOKEN_EOF] = {NULL, NULL, PREC_NONE},
};
ParseRule *getRule(TokenType type) { return &rules[type]; }
void parsePrecedence(VM *vm, Precedence precedence) {
advance();
ParseFn prefixRule = getRule(parser.previous.type)->prefix;
if (prefixRule == NULL) {
error("Expect expression.");
return;
}
prefixRule(vm);
while (precedence <= getRule(parser.current.type)->precedence) {
advance();
ParseFn infixRule = getRule(parser.previous.type)->infix;
infixRule(vm);
}
}
void expression(VM *vm) { parsePrecedence(vm, PREC_ASSIGNMENT); }
void printStatement(VM *vm) {
expression(vm);
consume(TOKEN_SEMICOLON, "Expect ';' after value.");
Frame f = vm->frames[vm->fp];
vm->code[vm->cp++].u = OP(OP_DBG_PRINT_STRING, 0, f.rp--, 0);
}
static void expressionStatement(VM *vm) {
expression(vm);
consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
}
static void intDeclaration(VM *vm) {
/* insert variable name in symbol table */
uint32_t length = parser.previous.length - 2;
if (length > SYMBOL_NAME_SIZE) {
error("Variable names cannot be longer than 24 characters.");
return;
}
st.symbols[st.sc].type = INT;
st.symbols[st.sc].frame = vm->fp;
Frame f = vm->frames[vm->fp];
st.symbols[st.sc].reg = f.rp;
uint32_t i;
for (i = 0; i < length; i++) {
st.symbols[st.sc].name[i] = parser.previous.start[i + 1];
}
st.sc++;
if (match(TOKEN_EQ)) {
expression(vm);
} else {
/* initialize as zero/null */
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
vm->code[vm->cp++].i = 0;
}
consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
}
void statement(VM *vm) {
if (match(TOKEN_KEYWORD_PRINT)) {
printStatement(vm);
} else if (match(TOKEN_TYPE_INT)) {
intDeclaration(vm);
} else {
expressionStatement(vm);
}
}
void declaration(VM *vm) { statement(vm); }
bool compile(const char *source, VM *vm) {
initLexer(source);
parser.hadError = false;
parser.panicMode = false;
st.sc = 0;
st.name[0] = 'm';
st.name[1] = 'a';
st.name[2] = 'i';
st.name[3] = 'n';
advance();
while (!match(TOKEN_EOF)) {
declaration(vm);
}
emitOp(vm, OP_HALT, 0, 0, 0);
return !parser.hadError;
}