add scanner, compiler

This commit is contained in:
zongor 2025-05-04 12:55:28 -04:00
parent fb6cc45f83
commit bf38431165
8 changed files with 653 additions and 28 deletions

View File

@ -6,5 +6,6 @@
#include <stdint.h> #include <stdint.h>
#define DEBUG_TRACE_EXECUTION #define DEBUG_TRACE_EXECUTION
#define DEBUG_PRINT_CODE
#endif #endif

250
src/compiler.c Normal file
View File

@ -0,0 +1,250 @@
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
#include "compiler.h"
#include "scanner.h"
#ifdef DEBUG_PRINT_CODE
#include "debug.h"
#endif
typedef struct {
Token current;
Token previous;
bool hadError;
bool panicMode;
} Parser;
typedef enum {
PREC_NONE,
PREC_ASSIGNMENT, // =
PREC_OR, // or
PREC_AND, // and
PREC_EQUALITY, // == !=
PREC_COMPARISON, // < > <= >=
PREC_TERM, // + -
PREC_FACTOR, // * /
PREC_UNARY, // ! -
PREC_CALL, // . ()
PREC_PRIMARY
} Precedence;
typedef void (*ParseFn)();
typedef struct {
ParseFn prefix;
ParseFn infix;
Precedence precedence;
} ParseRule;
Parser parser;
Chunk* compilingChunk;
static Chunk* currentChunk() {
return compilingChunk;
}
static void errorAt(Token* token, const char* message) {
if (parser.panicMode) return;
parser.panicMode = true;
fprintf(stderr, "[line %d] Error", token->line);
if (token->type == TOKEN_EOF) {
fprintf(stderr, " at end");
} else if (token->type == TOKEN_ERROR) {
// Nothing.
} else {
fprintf(stderr, " at '%.*s'", token->length, token->start);
}
fprintf(stderr, ": %s\n", message);
parser.hadError = true;
}
static void error(const char* message) {
errorAt(&parser.previous, message);
}
static void errorAtCurrent(const char* message) {
errorAt(&parser.current, message);
}
static void advance() {
parser.previous = parser.current;
for (;;) {
parser.current = scanToken();
if (parser.current.type != TOKEN_ERROR) break;
errorAtCurrent(parser.current.start);
}
}
static void consume(TokenType type, const char* message) {
if (parser.current.type == type) {
advance();
return;
}
errorAtCurrent(message);
}
static void emitByte(uint8_t byte) {
writeChunk(currentChunk(), byte, parser.previous.line);
}
static void emitBytes(uint8_t byte1, uint8_t byte2) {
emitByte(byte1);
emitByte(byte2);
}
static void emitReturn() {
emitByte(OP_RETURN);
}
static uint8_t makeConstant(Value value) {
int constant = addConstant(currentChunk(), value);
if (constant > UINT8_MAX) {
error("Too many constants in one chunk.");
return 0;
}
return (uint8_t)constant;
}
static void emitConstant(Value value) {
emitBytes(OP_CONSTANT, makeConstant(value));
}
static void endCompiler() {
emitReturn();
#ifdef DEBUG_PRINT_CODE
if (!parser.hadError) {
disassembleChunk(currentChunk(), "code");
}
#endif
}
static void expression();
static ParseRule* getRule(TokenType type);
static void parsePrecedence(Precedence precedence);
static void expression() {
parsePrecedence(PREC_ASSIGNMENT);
}
static void grouping() {
expression();
consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
}
static void number() {
double value = strtod(parser.previous.start, NULL);
emitConstant(value);
}
static void binary() {
TokenType operatorType = parser.previous.type;
ParseRule* rule = getRule(operatorType);
parsePrecedence((Precedence)(rule->precedence + 1));
switch (operatorType) {
case TOKEN_PLUS: emitByte(OP_ADD); break;
case TOKEN_MINUS: emitByte(OP_SUBTRACT); break;
case TOKEN_STAR: emitByte(OP_MULTIPLY); break;
case TOKEN_SLASH: emitByte(OP_DIVIDE); break;
default: return; // Unreachable.
}
}
static void unary() {
TokenType operatorType = parser.previous.type;
// Compile the operand.
parsePrecedence(PREC_UNARY);
// Emit the operator instruction.
switch (operatorType) {
case TOKEN_MINUS: emitByte(OP_NEGATE); break;
default: return; // Unreachable.
}
}
ParseRule rules[] = {
[TOKEN_LEFT_PAREN] = {grouping, NULL, PREC_NONE},
[TOKEN_RIGHT_PAREN] = {NULL, NULL, PREC_NONE},
[TOKEN_LEFT_BRACE] = {NULL, NULL, PREC_NONE},
[TOKEN_RIGHT_BRACE] = {NULL, NULL, PREC_NONE},
[TOKEN_COMMA] = {NULL, NULL, PREC_NONE},
[TOKEN_DOT] = {NULL, NULL, PREC_NONE},
[TOKEN_MINUS] = {unary, binary, PREC_TERM},
[TOKEN_PLUS] = {NULL, binary, PREC_TERM},
[TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE},
[TOKEN_SLASH] = {NULL, binary, PREC_FACTOR},
[TOKEN_STAR] = {NULL, binary, PREC_FACTOR},
[TOKEN_BANG] = {NULL, NULL, PREC_NONE},
[TOKEN_BANG_EQUAL] = {NULL, NULL, PREC_NONE},
[TOKEN_EQUAL] = {NULL, NULL, PREC_NONE},
[TOKEN_EQUAL_EQUAL] = {NULL, NULL, PREC_NONE},
[TOKEN_GREATER] = {NULL, NULL, PREC_NONE},
[TOKEN_GREATER_EQUAL] = {NULL, NULL, PREC_NONE},
[TOKEN_LESS] = {NULL, NULL, PREC_NONE},
[TOKEN_LESS_EQUAL] = {NULL, NULL, PREC_NONE},
[TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE},
[TOKEN_STRING] = {NULL, NULL, PREC_NONE},
[TOKEN_NUMBER] = {number, NULL, PREC_NONE},
[TOKEN_AND] = {NULL, NULL, PREC_NONE},
[TOKEN_TYPE] = {NULL, NULL, PREC_NONE},
[TOKEN_ELSE] = {NULL, NULL, PREC_NONE},
[TOKEN_FALSE] = {NULL, NULL, PREC_NONE},
[TOKEN_FOR] = {NULL, NULL, PREC_NONE},
[TOKEN_FN] = {NULL, NULL, PREC_NONE},
[TOKEN_IF] = {NULL, NULL, PREC_NONE},
[TOKEN_NIL] = {NULL, NULL, PREC_NONE},
[TOKEN_OR] = {NULL, NULL, PREC_NONE},
[TOKEN_PRINT] = {NULL, NULL, PREC_NONE},
[TOKEN_RETURN] = {NULL, NULL, PREC_NONE},
[TOKEN_SUPER] = {NULL, NULL, PREC_NONE},
[TOKEN_THIS] = {NULL, NULL, PREC_NONE},
[TOKEN_TRUE] = {NULL, NULL, PREC_NONE},
[TOKEN_LET] = {NULL, NULL, PREC_NONE},
[TOKEN_WHILE] = {NULL, NULL, PREC_NONE},
[TOKEN_ERROR] = {NULL, NULL, PREC_NONE},
[TOKEN_EOF] = {NULL, NULL, PREC_NONE},
};
static ParseRule* getRule(TokenType type) {
return &rules[type];
}
static void parsePrecedence(Precedence precedence) {
advance();
ParseFn prefixRule = getRule(parser.previous.type)->prefix;
if (prefixRule == NULL) {
error("Expect expression.");
return;
}
prefixRule();
while (precedence <= getRule(parser.current.type)->precedence) {
advance();
ParseFn infixRule = getRule(parser.previous.type)->infix;
infixRule();
}
}
bool compile(const char* source, Chunk* chunk) {
newScanner(source);
compilingChunk = chunk;
parser.hadError = false;
parser.panicMode = false;
advance();
expression();
consume(TOKEN_EOF, "Expect end of expression.");
endCompiler();
return !parser.hadError;
}

9
src/compiler.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef ztl_compiler_h
#define ztl_compiler_h
#include "vm.h"
bool compile(const char* source, Chunk* chunk);
#endif

View File

@ -1,36 +1,77 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "common.h" #include "common.h"
#include "chunk.h" #include "chunk.h"
#include "debug.h" #include "debug.h"
#include "vm.h" #include "vm.h"
int main(int argc, const char** argv) { static void repl() {
char line[1024];
for (;;) {
printf("> ");
if (!fgets(line, sizeof(line), stdin)) {
printf("\n");
break;
}
interpret(line);
}
}
static char *readFile(const char *path) {
FILE *file = fopen(path, "rb");
if (file == NULL) {
fprintf(stderr, "Could not open file \"%s\".\n", path);
exit(74);
}
fseek(file, 0L, SEEK_END);
size_t fileSize = ftell(file);
rewind(file);
char *buffer = (char*)malloc(fileSize + 1);
if (buffer == NULL) {
fprintf(stderr, "Not enough memory to read \"%s\".\n", path);
exit(74);
}
size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
if (bytesRead < fileSize) {
fprintf(stderr, "Could not read file \"%s\".\n", path);
exit(74);
}
buffer[bytesRead] = '\0';
fclose(file);
return buffer;
}
static void runFile(const char *path) {
char *source = readFile(path);
InterpretResult result = interpret(source);
free(source);
if (result == INTERPRET_COMPILE_ERROR) exit(65);
if (result == INTERPRET_RUNTIME_ERROR) exit(70);
}
int main(int argc, const char* *argv) {
newVM(); newVM();
Chunk chunk; if (argc == 1) {
newChunk(&chunk); repl();
} else if (argc == 2) {
runFile(argv[1]);
} else {
fprintf(stderr, "Usage: clox [path]\n");
exit(64);
}
int constant = addConstant(&chunk, 1.2);
writeChunk(&chunk, OP_CONSTANT, 123);
writeChunk(&chunk, constant, 123);
constant = addConstant(&chunk, 3.4);
writeChunk(&chunk, OP_CONSTANT, 123);
writeChunk(&chunk, constant, 123);
writeChunk(&chunk, OP_ADD, 123);
constant = addConstant(&chunk, 5.6);
writeChunk(&chunk, OP_CONSTANT, 123);
writeChunk(&chunk, constant, 123);
writeChunk(&chunk, OP_DIVIDE, 123);
writeChunk(&chunk, OP_NEGATE, 123);
writeChunk(&chunk, OP_RETURN, 1);
disassembleChunk(&chunk, "test chunk");
freeVM(); freeVM();
freeChunk(&chunk);
return 0; return 0;
} }

240
src/scanner.c Normal file
View File

@ -0,0 +1,240 @@
#include <stdio.h>
#include <string.h>
#include "common.h"
#include "scanner.h"
typedef struct {
const char *start;
const char *current;
int line;
} Scanner;
Scanner scanner;
void newScanner(const char* source) {
scanner.start = source;
scanner.current = source;
scanner.line = 1;
}
static Token newToken(TokenType type) {
Token token;
token.type = type;
token.start = scanner.start;
token.length = (int)(scanner.current - scanner.start);
token.line = scanner.line;
return token;
}
static Token errorToken(const char* message) {
Token token;
token.type = TOKEN_ERROR;
token.start = message;
token.length = (int)strlen(message);
token.line = scanner.line;
return token;
}
static bool isAtEnd() {
return *scanner.current == '\0';
}
static char peek() {
return *scanner.current;
}
static char peekNext() {
if (isAtEnd()) return '\0';
return scanner.current[1];
}
static char advance() {
scanner.current++;
return scanner.current[-1];
}
static bool match(char expected) {
if (isAtEnd()) return false;
if (*scanner.current != expected) return false;
scanner.current++;
return true;
}
static void skipWhitespace() {
for (;;) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
scanner.line++;
advance();
break;
case '/':
if (peekNext() == '/') {
// A comment goes until the end of the line.
while (peek() != '\n' && !isAtEnd()) advance();
} else {
return;
}
break;
default:
return;
}
}
}
static bool isDigit(char c) {
return c >= '0' && c <= '9';
}
static bool isAlpha(char c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_';
}
static Token number() {
while (isDigit(peek())) advance();
// Look for a fractional part.
if (peek() == '.' && isDigit(peekNext())) {
// Consume the ".".
advance();
while (isDigit(peek())) advance();
}
return newToken(TOKEN_NUMBER);
}
static Token string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n') scanner.line++;
advance();
}
if (isAtEnd()) return errorToken("Unterminated string.");
// The closing quote.
advance();
return newToken(TOKEN_STRING);
}
static TokenType checkKeyword(int start, int length, const char *rest,
TokenType type) {
if (scanner.current - scanner.start == start + length &&
memcmp(scanner.start + start, rest, length) == 0) {
return type;
}
return TOKEN_IDENTIFIER;
}
static TokenType identifierType() {
switch (scanner.start[0]) {
case 'a':
return checkKeyword(1, 2, "nd", TOKEN_AND);
case 'e':
return checkKeyword(1, 3, "lse", TOKEN_ELSE);
case 'f':
if (scanner.current - scanner.start > 1) {
switch (scanner.start[1]) {
case 'a':
return checkKeyword(2, 3, "lse", TOKEN_FALSE);
case 'o':
return checkKeyword(2, 1, "r", TOKEN_FOR);
}
return checkKeyword(1, 1, "n", TOKEN_FN);
}
break;
case 'i':
return checkKeyword(1, 1, "f", TOKEN_IF);
case 'n':
return checkKeyword(1, 2, "il", TOKEN_NIL);
case 'o':
return checkKeyword(1, 1, "r", TOKEN_OR);
case 'p':
return checkKeyword(1, 4, "rint", TOKEN_PRINT);
case 'r':
return checkKeyword(1, 5, "eturn", TOKEN_RETURN);
case 's':
return checkKeyword(1, 4, "uper", TOKEN_SUPER);
case 't':
if (scanner.current - scanner.start > 1) {
switch (scanner.start[1]) {
case 'h':
return checkKeyword(2, 2, "is", TOKEN_THIS);
case 'r':
return checkKeyword(2, 2, "ue", TOKEN_TRUE);
case 'y':
return checkKeyword(2, 2, "pe", TOKEN_TYPE);
}
}
break;
case 'l':
return checkKeyword(1, 2, "et", TOKEN_LET);
case 'w':
return checkKeyword(1, 4, "hile", TOKEN_WHILE);
}
return TOKEN_IDENTIFIER;
}
static Token identifier() {
while (isAlpha(peek()) || isDigit(peek())) advance();
return newToken(identifierType());
}
Token scanToken() {
skipWhitespace();
scanner.start = scanner.current;
if (isAtEnd()) return newToken(TOKEN_EOF);
char c = advance();
if (isAlpha(c)) return identifier();
if (isDigit(c)) return number();
switch (c) {
case '(': return newToken(TOKEN_LEFT_PAREN);
case ')': return newToken(TOKEN_RIGHT_PAREN);
case '{': return newToken(TOKEN_LEFT_BRACE);
case '}': return newToken(TOKEN_RIGHT_BRACE);
case '[': return newToken(TOKEN_LEFT_BRACKET);
case ']': return newToken(TOKEN_RIGHT_BRACKET);
case ';': return newToken(TOKEN_SEMICOLON);
case ':': return newToken(TOKEN_COLON);
case '#': return newToken(TOKEN_MESH);
case '$': return newToken(TOKEN_DOLLAR);
case '%': return newToken(TOKEN_PERCENT);
case '&': return newToken(TOKEN_AMPERSAND);
case '@': return newToken(TOKEN_AT);
case ',': return newToken(TOKEN_COMMA);
case '.': return newToken(TOKEN_DOT);
case '-': return newToken(TOKEN_MINUS);
case '+': return newToken(TOKEN_PLUS);
case '/': return newToken(TOKEN_SLASH);
case '*': return newToken(TOKEN_STAR);
case '!':
return newToken(
match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
case '=':
return newToken(
match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
case '<':
return newToken(
match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
case '>':
return newToken(
match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
case '"': return string();
}
return errorToken("Unexpected character.");
}

70
src/scanner.h Normal file
View File

@ -0,0 +1,70 @@
#ifndef ztl_scanner_h
#define ztl_scanner_h
typedef enum {
// End of file
TOKEN_EOF,
// Single-character tokens.
TOKEN_LEFT_PAREN,
TOKEN_RIGHT_PAREN,
TOKEN_LEFT_BRACE,
TOKEN_RIGHT_BRACE,
TOKEN_LEFT_BRACKET,
TOKEN_RIGHT_BRACKET,
TOKEN_COMMA,
TOKEN_DOT,
TOKEN_MINUS,
TOKEN_PLUS,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_SLASH,
TOKEN_STAR,
TOKEN_MESH,
TOKEN_DOLLAR,
TOKEN_AT,
TOKEN_AMPERSAND,
TOKEN_PERCENT,
// One or two character tokens.
TOKEN_BANG,
TOKEN_BANG_EQUAL,
TOKEN_EQUAL,
TOKEN_EQUAL_EQUAL,
TOKEN_GREATER,
TOKEN_GREATER_EQUAL,
TOKEN_LESS,
TOKEN_LESS_EQUAL,
// Literals.
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_NUMBER,
// Keywords.
TOKEN_AND,
TOKEN_TYPE,
TOKEN_ELSE,
TOKEN_FALSE,
TOKEN_FOR,
TOKEN_FN,
TOKEN_IF,
TOKEN_NIL,
TOKEN_OR,
TOKEN_PRINT,
TOKEN_RETURN,
TOKEN_SUPER,
TOKEN_THIS,
TOKEN_TRUE,
TOKEN_LET,
TOKEN_WHILE,
TOKEN_ERROR,
} TokenType;
typedef struct {
TokenType type;
const char *start;
int length;
int line;
} Token;
void newScanner(const char *source);
Token scanToken();
#endif

View File

@ -1,4 +1,5 @@
#include "common.h" #include "common.h"
#include "compiler.h"
#include "debug.h" #include "debug.h"
#include "vm.h" #include "vm.h"
@ -76,9 +77,22 @@ static InterpretResult run() {
#undef BINARY_OP #undef BINARY_OP
} }
InterpretResult interpret(Chunk* chunk) { InterpretResult interpret(const char* source) {
vm.chunk = chunk; Chunk chunk;
newChunk(&chunk);
if (!compile(source, &chunk)) {
freeChunk(&chunk);
return INTERPRET_COMPILE_ERROR;
}
vm.chunk = &chunk;
vm.ip = vm.chunk->code; vm.ip = vm.chunk->code;
return run();
InterpretResult result = run();
freeChunk(&chunk);
return result;
} }

View File

@ -21,7 +21,7 @@ typedef enum {
void newVM(); void newVM();
void freeVM(); void freeVM();
InterpretResult interpret(Chunk *chunk); InterpretResult interpret(const char* source);
void push(Value value); void push(Value value);
Value pop(); Value pop();