#include "compiler.h" #include "vm.h" #include typedef struct { Token current; Token previous; bool hadError; bool panicMode; } Parser; typedef enum { PREC_NONE, PREC_ASSIGNMENT, /* = */ PREC_OR, /* or */ PREC_AND, /* and */ PREC_EQUALITY, /* == != */ PREC_COMPARISON, /* < > <= >= */ PREC_TERM, /* + - */ PREC_FACTOR, /* * / */ PREC_UNARY, /* not */ PREC_CALL, /* . () */ PREC_PRIMARY } Precedence; typedef void (*ParseFn)(VM *vm); typedef struct { ParseFn prefix; ParseFn infix; Precedence precedence; } ParseRule; Parser parser; SymbolTable st; const char *internalErrorMsg = "FLAGRANT COMPILER ERROR\n\nCompiler over.\nBug = Very Yes."; void errorAt(Token *token, const char *message) { if (parser.panicMode) return; parser.panicMode = true; fprintf(stderr, "[line %d] Error", token->line); if (token->type == TOKEN_EOF) { fprintf(stderr, " at end"); } else if (token->type == TOKEN_ERROR) { } else { fprintf(stderr, " at '%.*s'", token->length, token->start); } fprintf(stderr, ": %s\n", message); parser.hadError = true; } void error(const char *message) { errorAt(&parser.previous, message); } void errorAtCurrent(const char *message) { errorAt(&parser.current, message); } void advance() { parser.previous = parser.current; for (;;) { parser.current = nextToken(); if (parser.current.type != TOKEN_ERROR) break; errorAtCurrent(parser.current.start); } } void consume(TokenType type, const char *message) { if (parser.current.type == type) { advance(); return; } errorAtCurrent(message); } static bool check(TokenType type) { return parser.current.type == type; } static bool match(TokenType type) { if (!check(type)) return false; advance(); return true; } void emitOp(VM *vm, uint8_t opcode, uint8_t dest, uint8_t src1, uint8_t src2) { vm->code[vm->cp++].u = OP(opcode, dest, src1, src2); } void expression(VM *vm); void statement(VM *vm); void declaration(VM *vm); ParseRule *getRule(TokenType type); void parsePrecedence(VM *vm, Precedence precedence); void number(VM *vm) { if (parser.previous.type == TOKEN_INT_LITERAL) { char *endptr; int32_t value = (int32_t)strtol(parser.previous.start, &endptr, 10); emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = int_alloc(vm, value); return; } else if (parser.previous.type == TOKEN_UINT_LITERAL) { long value = atol(parser.previous.start); emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = nat_alloc(vm, value); return; } else if (parser.previous.type == TOKEN_FLOAT_LITERAL) { float value = atof(parser.previous.start); emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = real_alloc(vm, value); return; } errorAtCurrent("Invalid number format"); } void string(VM *vm) { uint32_t length = parser.previous.length - 2; uint32_t str_addr = vm->mp; vm->memory[vm->mp++].u = length; uint32_t i, j = 0; for (i = 0; i < length; i++) { vm->memory[vm->mp].c[i % 4] = parser.previous.start[i + 1]; if (++j == 4) { j = 0; vm->mp++; } } vm->frames[vm->fp].allocated.end += length / 4; emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = str_addr; } void grouping(VM *vm) { expression(vm); consume(TOKEN_RPAREN, "Expect ')' after expression."); } void unary(VM *vm) { TokenType operatorType = parser.previous.type; parsePrecedence(vm, PREC_UNARY); switch (operatorType) { default: return; } } static void literal(VM *vm) { switch (parser.previous.type) { case TOKEN_KEYWORD_NIL: { emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = 0; break; } case TOKEN_KEYWORD_FALSE: { emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = 0; break; } case TOKEN_KEYWORD_TRUE: { emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].u = 1; break; } default: return; } } void binary(VM *vm) { TokenType operatorType = parser.previous.type; ParseRule *rule = getRule(operatorType); parsePrecedence(vm, (Precedence)(rule->precedence + 1)); TokenType operandType = parser.previous.type; Frame f = vm->frames[vm->fp]; uint32_t src1 = f.rp--; uint32_t src2 = f.rp--; uint32_t dest = f.rp++; switch (operatorType) { case TOKEN_PLUS: if (operandType == TOKEN_UINT_LITERAL) { emitOp(vm, OP_ADD_UINT, dest, src1, src2); } else if (operandType == TOKEN_INT_LITERAL) { emitOp(vm, OP_ADD_INT, dest, src1, src2); } else if (operandType == TOKEN_FLOAT_LITERAL) { emitOp(vm, OP_ADD_REAL, dest, src1, src2); } else { error("not numeric"); } break; case TOKEN_MINUS: if (operandType == TOKEN_UINT_LITERAL) { emitOp(vm, OP_SUB_UINT, dest, src1, src2); } else if (operandType == TOKEN_INT_LITERAL) { emitOp(vm, OP_SUB_INT, dest, src1, src2); } else if (operandType == TOKEN_FLOAT_LITERAL) { emitOp(vm, OP_SUB_REAL, dest, src1, src2); } else { error("not numeric"); } break; case TOKEN_STAR: if (operandType == TOKEN_UINT_LITERAL) { emitOp(vm, OP_MUL_UINT, dest, src1, src2); } else if (operandType == TOKEN_INT_LITERAL) { emitOp(vm, OP_MUL_INT, dest, src1, src2); } else if (operandType == TOKEN_FLOAT_LITERAL) { emitOp(vm, OP_MUL_REAL, dest, src1, src2); } else { error("not numeric"); } break; case TOKEN_SLASH: if (operandType == TOKEN_UINT_LITERAL) { emitOp(vm, OP_DIV_UINT, dest, src1, src2); } else if (operandType == TOKEN_INT_LITERAL) { emitOp(vm, OP_DIV_INT, dest, src1, src2); } else if (operandType == TOKEN_FLOAT_LITERAL) { emitOp(vm, OP_DIV_REAL, dest, src1, src2); } else { error("not numeric"); } break; default: return; /* Unreachable. */ } } ParseRule rules[] = { [TOKEN_LPAREN] = {grouping, NULL, PREC_NONE}, [TOKEN_RPAREN] = {NULL, NULL, PREC_NONE}, [TOKEN_LBRACE] = {NULL, NULL, PREC_NONE}, [TOKEN_RBRACE] = {NULL, NULL, PREC_NONE}, [TOKEN_COMMA] = {NULL, NULL, PREC_NONE}, [TOKEN_DOT] = {NULL, NULL, PREC_NONE}, [TOKEN_MINUS] = {NULL, binary, PREC_TERM}, [TOKEN_PLUS] = {NULL, binary, PREC_TERM}, [TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE}, [TOKEN_SLASH] = {NULL, binary, PREC_FACTOR}, [TOKEN_STAR] = {NULL, binary, PREC_FACTOR}, [TOKEN_BANG] = {NULL, NULL, PREC_NONE}, [TOKEN_BANG_EQ] = {NULL, NULL, PREC_NONE}, [TOKEN_EQ] = {NULL, NULL, PREC_NONE}, [TOKEN_EQ_EQ] = {NULL, NULL, PREC_NONE}, [TOKEN_GT] = {NULL, NULL, PREC_NONE}, [TOKEN_GTE] = {NULL, NULL, PREC_NONE}, [TOKEN_LT] = {NULL, NULL, PREC_NONE}, [TOKEN_LTE] = {NULL, NULL, PREC_NONE}, [TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE}, [TOKEN_STRING_LITERAL] = {string, NULL, PREC_NONE}, [TOKEN_INT_LITERAL] = {number, NULL, PREC_NONE}, [TOKEN_UINT_LITERAL] = {number, NULL, PREC_NONE}, [TOKEN_FLOAT_LITERAL] = {number, NULL, PREC_NONE}, [TOKEN_KEYWORD_ELSE] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_FOR] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_FN] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_IF] = {NULL, NULL, PREC_NONE}, [TOKEN_OPERATOR_AND] = {NULL, binary, PREC_NONE}, [TOKEN_OPERATOR_OR] = {NULL, binary, PREC_NONE}, [TOKEN_OPERATOR_NOT] = {unary, NULL, PREC_NONE}, [TOKEN_KEYWORD_NIL] = {literal, NULL, PREC_NONE}, [TOKEN_KEYWORD_TRUE] = {literal, NULL, PREC_NONE}, [TOKEN_KEYWORD_FALSE] = {literal, NULL, PREC_NONE}, [TOKEN_KEYWORD_PRINT] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_RETURN] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_THIS] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_LET] = {NULL, NULL, PREC_NONE}, [TOKEN_KEYWORD_WHILE] = {NULL, NULL, PREC_NONE}, [TOKEN_ERROR] = {NULL, NULL, PREC_NONE}, [TOKEN_EOF] = {NULL, NULL, PREC_NONE}, }; ParseRule *getRule(TokenType type) { return &rules[type]; } void parsePrecedence(VM *vm, Precedence precedence) { advance(); ParseFn prefixRule = getRule(parser.previous.type)->prefix; if (prefixRule == NULL) { error("Expect expression."); return; } prefixRule(vm); while (precedence <= getRule(parser.current.type)->precedence) { advance(); ParseFn infixRule = getRule(parser.previous.type)->infix; infixRule(vm); } } void expression(VM *vm) { parsePrecedence(vm, PREC_ASSIGNMENT); } void printStatement(VM *vm) { expression(vm); consume(TOKEN_SEMICOLON, "Expect ';' after value."); Frame f = vm->frames[vm->fp]; vm->code[vm->cp++].u = OP(OP_DBG_PRINT_STRING, 0, f.rp--, 0); } static void expressionStatement(VM *vm) { expression(vm); consume(TOKEN_SEMICOLON, "Expect ';' after expression."); } static void intDeclaration(VM *vm) { /* insert variable name in symbol table */ uint32_t length = parser.previous.length - 2; if (length > SYMBOL_NAME_SIZE) { error("Variable names cannot be longer than 24 characters."); return; } st.symbols[st.sc].type = INT; st.symbols[st.sc].frame = vm->fp; Frame f = vm->frames[vm->fp]; st.symbols[st.sc].reg = f.rp; uint32_t i; for (i = 0; i < length; i++) { st.symbols[st.sc].name[i] = parser.previous.start[i + 1]; } st.sc++; if (match(TOKEN_EQ)) { expression(vm); } else { /* initialize as zero/null */ emitOp(vm, OP_LOAD, vm->frames[vm->fp].rp++, 0, 0); vm->code[vm->cp++].i = 0; } consume(TOKEN_SEMICOLON, "Expect ';' after expression."); } void statement(VM *vm) { if (match(TOKEN_KEYWORD_PRINT)) { printStatement(vm); } else if (match(TOKEN_TYPE_INT)) { intDeclaration(vm); } else { expressionStatement(vm); } } void declaration(VM *vm) { statement(vm); } bool compile(const char *source, VM *vm) { initLexer(source); parser.hadError = false; parser.panicMode = false; st.sc = 0; st.name[0] = 'm'; st.name[1] = 'a'; st.name[2] = 'i'; st.name[3] = 'n'; advance(); while (!match(TOKEN_EOF)) { declaration(vm); } emitOp(vm, OP_HALT, 0, 0, 0); return !parser.hadError; }