diff --git a/src/common.h b/src/common.h index e5224fd..98d48b6 100644 --- a/src/common.h +++ b/src/common.h @@ -6,5 +6,6 @@ #include #define DEBUG_TRACE_EXECUTION +#define DEBUG_PRINT_CODE #endif diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..82c316d --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,250 @@ +#include +#include + +#include "common.h" +#include "compiler.h" +#include "scanner.h" + +#ifdef DEBUG_PRINT_CODE +#include "debug.h" +#endif + +typedef struct { + Token current; + Token previous; + bool hadError; + bool panicMode; +} Parser; + +typedef enum { + PREC_NONE, + PREC_ASSIGNMENT, // = + PREC_OR, // or + PREC_AND, // and + PREC_EQUALITY, // == != + PREC_COMPARISON, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / + PREC_UNARY, // ! - + PREC_CALL, // . () + PREC_PRIMARY +} Precedence; + +typedef void (*ParseFn)(); + +typedef struct { + ParseFn prefix; + ParseFn infix; + Precedence precedence; +} ParseRule; + +Parser parser; +Chunk* compilingChunk; + +static Chunk* currentChunk() { + return compilingChunk; +} + +static void errorAt(Token* token, const char* message) { + if (parser.panicMode) return; + parser.panicMode = true; + fprintf(stderr, "[line %d] Error", token->line); + + if (token->type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } else if (token->type == TOKEN_ERROR) { + // Nothing. + } else { + fprintf(stderr, " at '%.*s'", token->length, token->start); + } + + fprintf(stderr, ": %s\n", message); + parser.hadError = true; +} + +static void error(const char* message) { + errorAt(&parser.previous, message); +} + +static void errorAtCurrent(const char* message) { + errorAt(&parser.current, message); +} + +static void advance() { + parser.previous = parser.current; + + for (;;) { + parser.current = scanToken(); + if (parser.current.type != TOKEN_ERROR) break; + + errorAtCurrent(parser.current.start); + } +} + +static void consume(TokenType type, const char* message) { + if (parser.current.type == type) { + advance(); + return; + } + + errorAtCurrent(message); +} + +static void emitByte(uint8_t byte) { + writeChunk(currentChunk(), byte, parser.previous.line); +} + +static void emitBytes(uint8_t byte1, uint8_t byte2) { + emitByte(byte1); + emitByte(byte2); +} + +static void emitReturn() { + emitByte(OP_RETURN); +} + +static uint8_t makeConstant(Value value) { + int constant = addConstant(currentChunk(), value); + if (constant > UINT8_MAX) { + error("Too many constants in one chunk."); + return 0; + } + + return (uint8_t)constant; +} + +static void emitConstant(Value value) { + emitBytes(OP_CONSTANT, makeConstant(value)); +} + +static void endCompiler() { + emitReturn(); +#ifdef DEBUG_PRINT_CODE + if (!parser.hadError) { + disassembleChunk(currentChunk(), "code"); + } +#endif +} + +static void expression(); +static ParseRule* getRule(TokenType type); +static void parsePrecedence(Precedence precedence); + +static void expression() { + parsePrecedence(PREC_ASSIGNMENT); +} + +static void grouping() { + expression(); + consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression."); +} + +static void number() { + double value = strtod(parser.previous.start, NULL); + emitConstant(value); +} + +static void binary() { + TokenType operatorType = parser.previous.type; + ParseRule* rule = getRule(operatorType); + parsePrecedence((Precedence)(rule->precedence + 1)); + + switch (operatorType) { + case TOKEN_PLUS: emitByte(OP_ADD); break; + case TOKEN_MINUS: emitByte(OP_SUBTRACT); break; + case TOKEN_STAR: emitByte(OP_MULTIPLY); break; + case TOKEN_SLASH: emitByte(OP_DIVIDE); break; + default: return; // Unreachable. + } +} + +static void unary() { + TokenType operatorType = parser.previous.type; + + // Compile the operand. + parsePrecedence(PREC_UNARY); + + // Emit the operator instruction. + switch (operatorType) { + case TOKEN_MINUS: emitByte(OP_NEGATE); break; + default: return; // Unreachable. + } +} + +ParseRule rules[] = { + [TOKEN_LEFT_PAREN] = {grouping, NULL, PREC_NONE}, + [TOKEN_RIGHT_PAREN] = {NULL, NULL, PREC_NONE}, + [TOKEN_LEFT_BRACE] = {NULL, NULL, PREC_NONE}, + [TOKEN_RIGHT_BRACE] = {NULL, NULL, PREC_NONE}, + [TOKEN_COMMA] = {NULL, NULL, PREC_NONE}, + [TOKEN_DOT] = {NULL, NULL, PREC_NONE}, + [TOKEN_MINUS] = {unary, binary, PREC_TERM}, + [TOKEN_PLUS] = {NULL, binary, PREC_TERM}, + [TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE}, + [TOKEN_SLASH] = {NULL, binary, PREC_FACTOR}, + [TOKEN_STAR] = {NULL, binary, PREC_FACTOR}, + [TOKEN_BANG] = {NULL, NULL, PREC_NONE}, + [TOKEN_BANG_EQUAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQUAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQUAL_EQUAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_GREATER] = {NULL, NULL, PREC_NONE}, + [TOKEN_GREATER_EQUAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_LESS] = {NULL, NULL, PREC_NONE}, + [TOKEN_LESS_EQUAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE}, + [TOKEN_STRING] = {NULL, NULL, PREC_NONE}, + [TOKEN_NUMBER] = {number, NULL, PREC_NONE}, + [TOKEN_AND] = {NULL, NULL, PREC_NONE}, + [TOKEN_TYPE] = {NULL, NULL, PREC_NONE}, + [TOKEN_ELSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_FALSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_FOR] = {NULL, NULL, PREC_NONE}, + [TOKEN_FN] = {NULL, NULL, PREC_NONE}, + [TOKEN_IF] = {NULL, NULL, PREC_NONE}, + [TOKEN_NIL] = {NULL, NULL, PREC_NONE}, + [TOKEN_OR] = {NULL, NULL, PREC_NONE}, + [TOKEN_PRINT] = {NULL, NULL, PREC_NONE}, + [TOKEN_RETURN] = {NULL, NULL, PREC_NONE}, + [TOKEN_SUPER] = {NULL, NULL, PREC_NONE}, + [TOKEN_THIS] = {NULL, NULL, PREC_NONE}, + [TOKEN_TRUE] = {NULL, NULL, PREC_NONE}, + [TOKEN_LET] = {NULL, NULL, PREC_NONE}, + [TOKEN_WHILE] = {NULL, NULL, PREC_NONE}, + [TOKEN_ERROR] = {NULL, NULL, PREC_NONE}, + [TOKEN_EOF] = {NULL, NULL, PREC_NONE}, +}; + +static ParseRule* getRule(TokenType type) { + return &rules[type]; +} + +static void parsePrecedence(Precedence precedence) { + advance(); + ParseFn prefixRule = getRule(parser.previous.type)->prefix; + if (prefixRule == NULL) { + error("Expect expression."); + return; + } + + prefixRule(); + + while (precedence <= getRule(parser.current.type)->precedence) { + advance(); + ParseFn infixRule = getRule(parser.previous.type)->infix; + infixRule(); + } +} + +bool compile(const char* source, Chunk* chunk) { + newScanner(source); + compilingChunk = chunk; + + parser.hadError = false; + parser.panicMode = false; + + advance(); + expression(); + consume(TOKEN_EOF, "Expect end of expression."); + endCompiler(); + return !parser.hadError; +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..67eb4d0 --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,9 @@ +#ifndef ztl_compiler_h +#define ztl_compiler_h + +#include "vm.h" + +bool compile(const char* source, Chunk* chunk); + +#endif + diff --git a/src/main.c b/src/main.c index ec32b79..5f73ee6 100644 --- a/src/main.c +++ b/src/main.c @@ -1,36 +1,77 @@ +#include +#include +#include + #include "common.h" #include "chunk.h" #include "debug.h" #include "vm.h" -int main(int argc, const char** argv) { +static void repl() { + char line[1024]; + for (;;) { + printf("> "); + + if (!fgets(line, sizeof(line), stdin)) { + printf("\n"); + break; + } + + interpret(line); + } +} + +static char *readFile(const char *path) { + FILE *file = fopen(path, "rb"); + if (file == NULL) { + fprintf(stderr, "Could not open file \"%s\".\n", path); + exit(74); + } + + fseek(file, 0L, SEEK_END); + size_t fileSize = ftell(file); + rewind(file); + + char *buffer = (char*)malloc(fileSize + 1); + if (buffer == NULL) { + fprintf(stderr, "Not enough memory to read \"%s\".\n", path); + exit(74); + } + + size_t bytesRead = fread(buffer, sizeof(char), fileSize, file); + if (bytesRead < fileSize) { + fprintf(stderr, "Could not read file \"%s\".\n", path); + exit(74); + } + + buffer[bytesRead] = '\0'; + + fclose(file); + return buffer; +} + +static void runFile(const char *path) { + char *source = readFile(path); + InterpretResult result = interpret(source); + free(source); + + if (result == INTERPRET_COMPILE_ERROR) exit(65); + if (result == INTERPRET_RUNTIME_ERROR) exit(70); +} + +int main(int argc, const char* *argv) { newVM(); - Chunk chunk; - newChunk(&chunk); + if (argc == 1) { + repl(); + } else if (argc == 2) { + runFile(argv[1]); + } else { + fprintf(stderr, "Usage: clox [path]\n"); + exit(64); + } - int constant = addConstant(&chunk, 1.2); - writeChunk(&chunk, OP_CONSTANT, 123); - writeChunk(&chunk, constant, 123); - - constant = addConstant(&chunk, 3.4); - writeChunk(&chunk, OP_CONSTANT, 123); - writeChunk(&chunk, constant, 123); - - writeChunk(&chunk, OP_ADD, 123); - - constant = addConstant(&chunk, 5.6); - writeChunk(&chunk, OP_CONSTANT, 123); - writeChunk(&chunk, constant, 123); - - writeChunk(&chunk, OP_DIVIDE, 123); - writeChunk(&chunk, OP_NEGATE, 123); - - writeChunk(&chunk, OP_RETURN, 1); - - disassembleChunk(&chunk, "test chunk"); freeVM(); - freeChunk(&chunk); return 0; } diff --git a/src/scanner.c b/src/scanner.c new file mode 100644 index 0000000..505167d --- /dev/null +++ b/src/scanner.c @@ -0,0 +1,240 @@ +#include +#include + +#include "common.h" +#include "scanner.h" + +typedef struct { + const char *start; + const char *current; + int line; +} Scanner; + +Scanner scanner; + +void newScanner(const char* source) { + scanner.start = source; + scanner.current = source; + scanner.line = 1; +} + +static Token newToken(TokenType type) { + Token token; + token.type = type; + token.start = scanner.start; + token.length = (int)(scanner.current - scanner.start); + token.line = scanner.line; + return token; +} + +static Token errorToken(const char* message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int)strlen(message); + token.line = scanner.line; + return token; +} + +static bool isAtEnd() { + return *scanner.current == '\0'; +} + +static char peek() { + return *scanner.current; +} + +static char peekNext() { + if (isAtEnd()) return '\0'; + return scanner.current[1]; +} + +static char advance() { + scanner.current++; + return scanner.current[-1]; +} + +static bool match(char expected) { + if (isAtEnd()) return false; + if (*scanner.current != expected) return false; + scanner.current++; + return true; +} + +static void skipWhitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + scanner.line++; + advance(); + break; + case '/': + if (peekNext() == '/') { + // A comment goes until the end of the line. + while (peek() != '\n' && !isAtEnd()) advance(); + } else { + return; + } + break; + default: + return; + } + } +} + +static bool isDigit(char c) { + return c >= '0' && c <= '9'; +} + +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_'; +} + +static Token number() { + while (isDigit(peek())) advance(); + + // Look for a fractional part. + if (peek() == '.' && isDigit(peekNext())) { + // Consume the ".". + advance(); + + while (isDigit(peek())) advance(); + } + + return newToken(TOKEN_NUMBER); +} + +static Token string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') scanner.line++; + advance(); + } + + if (isAtEnd()) return errorToken("Unterminated string."); + + // The closing quote. + advance(); + return newToken(TOKEN_STRING); +} + +static TokenType checkKeyword(int start, int length, const char *rest, + TokenType type) { + if (scanner.current - scanner.start == start + length && + memcmp(scanner.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + switch (scanner.start[0]) { + case 'a': + return checkKeyword(1, 2, "nd", TOKEN_AND); + case 'e': + return checkKeyword(1, 3, "lse", TOKEN_ELSE); + case 'f': + if (scanner.current - scanner.start > 1) { + switch (scanner.start[1]) { + case 'a': + return checkKeyword(2, 3, "lse", TOKEN_FALSE); + case 'o': + return checkKeyword(2, 1, "r", TOKEN_FOR); + } + return checkKeyword(1, 1, "n", TOKEN_FN); + } + break; + case 'i': + return checkKeyword(1, 1, "f", TOKEN_IF); + case 'n': + return checkKeyword(1, 2, "il", TOKEN_NIL); + case 'o': + return checkKeyword(1, 1, "r", TOKEN_OR); + case 'p': + return checkKeyword(1, 4, "rint", TOKEN_PRINT); + case 'r': + return checkKeyword(1, 5, "eturn", TOKEN_RETURN); + case 's': + return checkKeyword(1, 4, "uper", TOKEN_SUPER); + case 't': + if (scanner.current - scanner.start > 1) { + switch (scanner.start[1]) { + case 'h': + return checkKeyword(2, 2, "is", TOKEN_THIS); + case 'r': + return checkKeyword(2, 2, "ue", TOKEN_TRUE); + case 'y': + return checkKeyword(2, 2, "pe", TOKEN_TYPE); + } + } + break; + case 'l': + return checkKeyword(1, 2, "et", TOKEN_LET); + case 'w': + return checkKeyword(1, 4, "hile", TOKEN_WHILE); + } + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (isAlpha(peek()) || isDigit(peek())) advance(); + return newToken(identifierType()); +} + +Token scanToken() { + skipWhitespace(); + scanner.start = scanner.current; + + if (isAtEnd()) return newToken(TOKEN_EOF); + + char c = advance(); + if (isAlpha(c)) return identifier(); + if (isDigit(c)) return number(); + + switch (c) { + case '(': return newToken(TOKEN_LEFT_PAREN); + case ')': return newToken(TOKEN_RIGHT_PAREN); + case '{': return newToken(TOKEN_LEFT_BRACE); + case '}': return newToken(TOKEN_RIGHT_BRACE); + case '[': return newToken(TOKEN_LEFT_BRACKET); + case ']': return newToken(TOKEN_RIGHT_BRACKET); + case ';': return newToken(TOKEN_SEMICOLON); + case ':': return newToken(TOKEN_COLON); + case '#': return newToken(TOKEN_MESH); + case '$': return newToken(TOKEN_DOLLAR); + case '%': return newToken(TOKEN_PERCENT); + case '&': return newToken(TOKEN_AMPERSAND); + case '@': return newToken(TOKEN_AT); + case ',': return newToken(TOKEN_COMMA); + case '.': return newToken(TOKEN_DOT); + case '-': return newToken(TOKEN_MINUS); + case '+': return newToken(TOKEN_PLUS); + case '/': return newToken(TOKEN_SLASH); + case '*': return newToken(TOKEN_STAR); + case '!': + return newToken( + match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG); + case '=': + return newToken( + match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL); + case '<': + return newToken( + match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS); + case '>': + return newToken( + match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER); + + case '"': return string(); + } + + return errorToken("Unexpected character."); +} + diff --git a/src/scanner.h b/src/scanner.h new file mode 100644 index 0000000..70c9f4f --- /dev/null +++ b/src/scanner.h @@ -0,0 +1,70 @@ +#ifndef ztl_scanner_h +#define ztl_scanner_h + +typedef enum { + // End of file + TOKEN_EOF, + // Single-character tokens. + TOKEN_LEFT_PAREN, + TOKEN_RIGHT_PAREN, + TOKEN_LEFT_BRACE, + TOKEN_RIGHT_BRACE, + TOKEN_LEFT_BRACKET, + TOKEN_RIGHT_BRACKET, + TOKEN_COMMA, + TOKEN_DOT, + TOKEN_MINUS, + TOKEN_PLUS, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_SLASH, + TOKEN_STAR, + TOKEN_MESH, + TOKEN_DOLLAR, + TOKEN_AT, + TOKEN_AMPERSAND, + TOKEN_PERCENT, + // One or two character tokens. + TOKEN_BANG, + TOKEN_BANG_EQUAL, + TOKEN_EQUAL, + TOKEN_EQUAL_EQUAL, + TOKEN_GREATER, + TOKEN_GREATER_EQUAL, + TOKEN_LESS, + TOKEN_LESS_EQUAL, + // Literals. + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_NUMBER, + // Keywords. + TOKEN_AND, + TOKEN_TYPE, + TOKEN_ELSE, + TOKEN_FALSE, + TOKEN_FOR, + TOKEN_FN, + TOKEN_IF, + TOKEN_NIL, + TOKEN_OR, + TOKEN_PRINT, + TOKEN_RETURN, + TOKEN_SUPER, + TOKEN_THIS, + TOKEN_TRUE, + TOKEN_LET, + TOKEN_WHILE, + TOKEN_ERROR, +} TokenType; + +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; + +void newScanner(const char *source); +Token scanToken(); + +#endif diff --git a/src/vm.c b/src/vm.c index 715446c..bd909b9 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,4 +1,5 @@ #include "common.h" +#include "compiler.h" #include "debug.h" #include "vm.h" @@ -76,9 +77,22 @@ static InterpretResult run() { #undef BINARY_OP } -InterpretResult interpret(Chunk* chunk) { - vm.chunk = chunk; +InterpretResult interpret(const char* source) { + Chunk chunk; + newChunk(&chunk); + + if (!compile(source, &chunk)) { + freeChunk(&chunk); + return INTERPRET_COMPILE_ERROR; + } + + vm.chunk = &chunk; vm.ip = vm.chunk->code; - return run(); + + InterpretResult result = run(); + + freeChunk(&chunk); + return result; } + diff --git a/src/vm.h b/src/vm.h index aeec3f2..50cd09b 100644 --- a/src/vm.h +++ b/src/vm.h @@ -21,7 +21,7 @@ typedef enum { void newVM(); void freeVM(); -InterpretResult interpret(Chunk *chunk); +InterpretResult interpret(const char* source); void push(Value value); Value pop();