378 lines
10 KiB
C
378 lines
10 KiB
C
#include "compiler.h"
|
|
#include "vm.h"
|
|
#include <stdio.h>
|
|
|
|
typedef struct {
|
|
Token current;
|
|
Token previous;
|
|
bool hadError;
|
|
bool panicMode;
|
|
} Parser;
|
|
|
|
typedef enum {
|
|
PREC_NONE,
|
|
PREC_ASSIGNMENT, /* = */
|
|
PREC_OR, /* or */
|
|
PREC_AND, /* and */
|
|
PREC_EQUALITY, /* == != */
|
|
PREC_COMPARISON, /* < > <= >= */
|
|
PREC_TERM, /* + - */
|
|
PREC_FACTOR, /* * / */
|
|
PREC_UNARY, /* not */
|
|
PREC_CALL, /* . () */
|
|
PREC_PRIMARY
|
|
} Precedence;
|
|
|
|
typedef void (*ParseFn)(VM *vm);
|
|
|
|
typedef struct {
|
|
ParseFn prefix;
|
|
ParseFn infix;
|
|
Precedence precedence;
|
|
} ParseRule;
|
|
|
|
Parser parser;
|
|
SymbolTable st;
|
|
|
|
const char *internalErrorMsg = "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes.";
|
|
|
|
void errorAt(Token *token, const char *message) {
|
|
if (parser.panicMode)
|
|
return;
|
|
parser.panicMode = true;
|
|
fprintf(stderr, "[line %d] Error", token->line);
|
|
|
|
if (token->type == TOKEN_EOF) {
|
|
fprintf(stderr, " at end");
|
|
} else if (token->type == TOKEN_ERROR) {
|
|
} else {
|
|
fprintf(stderr, " at '%.*s'", token->length, token->start);
|
|
}
|
|
|
|
fprintf(stderr, ": %s\n", message);
|
|
parser.hadError = true;
|
|
}
|
|
|
|
void error(const char *message) { errorAt(&parser.previous, message); }
|
|
|
|
void errorAtCurrent(const char *message) { errorAt(&parser.current, message); }
|
|
|
|
void advance() {
|
|
parser.previous = parser.current;
|
|
|
|
for (;;) {
|
|
parser.current = nextToken();
|
|
if (parser.current.type != TOKEN_ERROR)
|
|
break;
|
|
|
|
errorAtCurrent(parser.current.start);
|
|
}
|
|
}
|
|
|
|
void consume(TokenType type, const char *message) {
|
|
if (parser.current.type == type) {
|
|
advance();
|
|
return;
|
|
}
|
|
|
|
errorAtCurrent(message);
|
|
}
|
|
|
|
static bool check(TokenType type) { return parser.current.type == type; }
|
|
|
|
static bool match(TokenType type) {
|
|
if (!check(type))
|
|
return false;
|
|
advance();
|
|
return true;
|
|
}
|
|
|
|
void emitOp(VM *vm, uint8_t opcode, uint8_t dest, uint8_t src1, uint8_t src2) {
|
|
vm->code[vm->cp++].u = OP(opcode, dest, src1, src2);
|
|
}
|
|
|
|
void expression(VM *vm);
|
|
void statement(VM *vm);
|
|
void declaration(VM *vm);
|
|
ParseRule *getRule(TokenType type);
|
|
void parsePrecedence(VM *vm, Precedence precedence);
|
|
|
|
void number(VM *vm) {
|
|
if (parser.previous.type == TOKEN_INT_LITERAL) {
|
|
char *endptr;
|
|
int32_t value = (int32_t)strtol(parser.previous.start, &endptr, 10);
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = int_alloc(vm, value);
|
|
return;
|
|
} else if (parser.previous.type == TOKEN_UINT_LITERAL) {
|
|
long value = atol(parser.previous.start);
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = nat_alloc(vm, value);
|
|
return;
|
|
} else if (parser.previous.type == TOKEN_FLOAT_LITERAL) {
|
|
float value = atof(parser.previous.start);
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = real_alloc(vm, value);
|
|
return;
|
|
}
|
|
errorAtCurrent("Invalid number format");
|
|
}
|
|
|
|
void string(VM *vm) {
|
|
uint32_t length = parser.previous.length - 2;
|
|
uint32_t str_addr = vm->mp;
|
|
vm->memory[vm->mp++].u = length;
|
|
uint32_t i, j = 0;
|
|
for (i = 0; i < length; i++) {
|
|
vm->memory[vm->mp].c[i % 4] = parser.previous.start[i + 1];
|
|
if (++j == 4) {
|
|
j = 0;
|
|
vm->mp++;
|
|
}
|
|
}
|
|
vm->frames[vm->fp].allocated.end += length / 4;
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = str_addr;
|
|
}
|
|
|
|
void grouping(VM *vm) {
|
|
expression(vm);
|
|
consume(TOKEN_RPAREN, "Expect ')' after expression.");
|
|
}
|
|
|
|
void unary(VM *vm) {
|
|
TokenType operatorType = parser.previous.type;
|
|
|
|
parsePrecedence(vm, PREC_UNARY);
|
|
|
|
switch (operatorType) {
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void literal(VM *vm) {
|
|
switch (parser.previous.type) {
|
|
case TOKEN_KEYWORD_NIL: {
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = 0;
|
|
break;
|
|
}
|
|
case TOKEN_KEYWORD_FALSE: {
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = 0;
|
|
break;
|
|
}
|
|
case TOKEN_KEYWORD_TRUE: {
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].u = 1;
|
|
break;
|
|
}
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
|
|
void binary(VM *vm) {
|
|
TokenType operatorType = parser.previous.type;
|
|
ParseRule *rule = getRule(operatorType);
|
|
parsePrecedence(vm, (Precedence)(rule->precedence + 1));
|
|
TokenType operandType = parser.previous.type;
|
|
|
|
Frame f = vm->frames[vm->fp];
|
|
uint32_t src1 = f.rp--;
|
|
uint32_t src2 = f.rp--;
|
|
uint32_t dest = f.rp++;
|
|
|
|
switch (operatorType) {
|
|
case TOKEN_PLUS:
|
|
if (operandType == TOKEN_UINT_LITERAL) {
|
|
emitOp(vm, OP_ADD_UINT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_INT_LITERAL) {
|
|
emitOp(vm, OP_ADD_INT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_FLOAT_LITERAL) {
|
|
emitOp(vm, OP_ADD_REAL, dest, src1, src2);
|
|
} else {
|
|
error("not numeric");
|
|
}
|
|
break;
|
|
case TOKEN_MINUS:
|
|
if (operandType == TOKEN_UINT_LITERAL) {
|
|
emitOp(vm, OP_SUB_UINT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_INT_LITERAL) {
|
|
emitOp(vm, OP_SUB_INT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_FLOAT_LITERAL) {
|
|
emitOp(vm, OP_SUB_REAL, dest, src1, src2);
|
|
} else {
|
|
error("not numeric");
|
|
}
|
|
break;
|
|
case TOKEN_STAR:
|
|
if (operandType == TOKEN_UINT_LITERAL) {
|
|
emitOp(vm, OP_MUL_UINT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_INT_LITERAL) {
|
|
emitOp(vm, OP_MUL_INT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_FLOAT_LITERAL) {
|
|
emitOp(vm, OP_MUL_REAL, dest, src1, src2);
|
|
} else {
|
|
error("not numeric");
|
|
}
|
|
break;
|
|
case TOKEN_SLASH:
|
|
if (operandType == TOKEN_UINT_LITERAL) {
|
|
emitOp(vm, OP_DIV_UINT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_INT_LITERAL) {
|
|
emitOp(vm, OP_DIV_INT, dest, src1, src2);
|
|
} else if (operandType == TOKEN_FLOAT_LITERAL) {
|
|
emitOp(vm, OP_DIV_REAL, dest, src1, src2);
|
|
} else {
|
|
error("not numeric");
|
|
}
|
|
break;
|
|
default:
|
|
return; /* Unreachable. */
|
|
}
|
|
}
|
|
|
|
ParseRule rules[] = {
|
|
[TOKEN_LPAREN] = {grouping, NULL, PREC_NONE},
|
|
[TOKEN_RPAREN] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_LBRACE] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_RBRACE] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_COMMA] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_DOT] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_MINUS] = {NULL, binary, PREC_TERM},
|
|
[TOKEN_PLUS] = {NULL, binary, PREC_TERM},
|
|
[TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_SLASH] = {NULL, binary, PREC_FACTOR},
|
|
[TOKEN_STAR] = {NULL, binary, PREC_FACTOR},
|
|
[TOKEN_BANG] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_BANG_EQ] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_EQ] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_EQ_EQ] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_GT] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_GTE] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_LT] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_LTE] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_STRING_LITERAL] = {string, NULL, PREC_NONE},
|
|
[TOKEN_INT_LITERAL] = {number, NULL, PREC_NONE},
|
|
[TOKEN_UINT_LITERAL] = {number, NULL, PREC_NONE},
|
|
[TOKEN_FLOAT_LITERAL] = {number, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_ELSE] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_FOR] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_FN] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_IF] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_OPERATOR_AND] = {NULL, binary, PREC_NONE},
|
|
[TOKEN_OPERATOR_OR] = {NULL, binary, PREC_NONE},
|
|
[TOKEN_OPERATOR_NOT] = {unary, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_NIL] = {literal, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_TRUE] = {literal, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_FALSE] = {literal, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_PRINT] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_RETURN] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_THIS] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_LET] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_KEYWORD_WHILE] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_ERROR] = {NULL, NULL, PREC_NONE},
|
|
[TOKEN_EOF] = {NULL, NULL, PREC_NONE},
|
|
};
|
|
|
|
ParseRule *getRule(TokenType type) { return &rules[type]; }
|
|
|
|
void parsePrecedence(VM *vm, Precedence precedence) {
|
|
advance();
|
|
ParseFn prefixRule = getRule(parser.previous.type)->prefix;
|
|
if (prefixRule == NULL) {
|
|
error("Expect expression.");
|
|
return;
|
|
}
|
|
|
|
prefixRule(vm);
|
|
|
|
while (precedence <= getRule(parser.current.type)->precedence) {
|
|
advance();
|
|
ParseFn infixRule = getRule(parser.previous.type)->infix;
|
|
infixRule(vm);
|
|
}
|
|
}
|
|
|
|
void expression(VM *vm) { parsePrecedence(vm, PREC_ASSIGNMENT); }
|
|
|
|
void printStatement(VM *vm) {
|
|
expression(vm);
|
|
consume(TOKEN_SEMICOLON, "Expect ';' after value.");
|
|
Frame f = vm->frames[vm->fp];
|
|
vm->code[vm->cp++].u = OP(OP_DBG_PRINT_STRING, 0, f.rp--, 0);
|
|
}
|
|
|
|
static void expressionStatement(VM *vm) {
|
|
expression(vm);
|
|
consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
|
|
}
|
|
|
|
static void intDeclaration(VM *vm) {
|
|
/* insert variable name in symbol table */
|
|
uint32_t length = parser.previous.length - 2;
|
|
if (length > SYMBOL_NAME_SIZE) {
|
|
error("Variable names cannot be longer than 24 characters.");
|
|
return;
|
|
}
|
|
st.symbols[st.sc].type = INT;
|
|
st.symbols[st.sc].frame = vm->fp;
|
|
|
|
Frame f = vm->frames[vm->fp];
|
|
st.symbols[st.sc].reg = f.rp;
|
|
|
|
uint32_t i;
|
|
for (i = 0; i < length; i++) {
|
|
st.symbols[st.sc].name[i] = parser.previous.start[i + 1];
|
|
}
|
|
st.sc++;
|
|
|
|
if (match(TOKEN_EQ)) {
|
|
expression(vm);
|
|
} else {
|
|
/* initialize as zero/null */
|
|
emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0);
|
|
vm->code[vm->cp++].i = 0;
|
|
}
|
|
|
|
consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
|
|
}
|
|
|
|
void statement(VM *vm) {
|
|
if (match(TOKEN_KEYWORD_PRINT)) {
|
|
printStatement(vm);
|
|
} else if (match(TOKEN_TYPE_INT)) {
|
|
intDeclaration(vm);
|
|
} else {
|
|
expressionStatement(vm);
|
|
}
|
|
}
|
|
|
|
void declaration(VM *vm) { statement(vm); }
|
|
|
|
bool compile(const char *source, VM *vm) {
|
|
initLexer(source);
|
|
|
|
parser.hadError = false;
|
|
parser.panicMode = false;
|
|
|
|
st.sc = 0;
|
|
st.name[0] = 'm';
|
|
st.name[1] = 'a';
|
|
st.name[2] = 'i';
|
|
st.name[3] = 'n';
|
|
|
|
advance();
|
|
|
|
while (!match(TOKEN_EOF)) {
|
|
declaration(vm);
|
|
}
|
|
|
|
emitOp(vm, OP_HALT, 0, 0, 0);
|
|
|
|
return !parser.hadError;
|
|
}
|