From c348ea3fdd8e194f3c11f4626a0fbf0caed4a712 Mon Sep 17 00:00:00 2001 From: zongor Date: Sun, 3 Aug 2025 16:03:25 -0400 Subject: [PATCH] add initial compiler --- .gitignore | 4 +- src/arch/linux/main.c | 73 ++++---- src/compiler.c | 305 +++++++++++++++++++++++++++++++ src/compiler.h | 10 ++ src/compiler.org | 407 ++++++++++++++++++++++++++++++++++++++++++ src/debug.c | 37 ++-- src/lexer.c | 378 +++++++++++++++++---------------------- src/lexer.h | 32 ++-- src/opcodes.h | 1 + src/vm.c | 30 ++-- test/add.zrl | 4 +- test/fib.zrl | 2 +- 12 files changed, 986 insertions(+), 297 deletions(-) create mode 100644 src/compiler.c create mode 100644 src/compiler.h create mode 100644 src/compiler.org diff --git a/.gitignore b/.gitignore index 87b3ffe..6880352 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,6 @@ dkms.conf zre zre.wasm memory_dump.bin -src/build/ \ No newline at end of file +src/build/ +.gdb_history +.vscode \ No newline at end of file diff --git a/src/arch/linux/main.c b/src/arch/linux/main.c index 7576c2a..26dd857 100644 --- a/src/arch/linux/main.c +++ b/src/arch/linux/main.c @@ -1,27 +1,15 @@ -#include "../../vm.h" +#include "../../compiler.h" #include "../../debug.h" -#include "../../test.h" -#include "../../lexer.h" +#include "../../vm.h" #include #define MAX_SRC_SIZE 16384 -int main(int argc, char **argv) { - VM vm = {0}; - vm.frames_size = FRAMES_SIZE; - vm.return_stack_size = STACK_SIZE; - vm.stack_size = STACK_SIZE; - vm.memory_size = MEMORY_SIZE; - - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - - FILE *f = fopen(argv[1], "rb"); +void compileFile(const char *path, VM *vm) { + FILE *f = fopen(path, "rb"); if (!f) { perror("fopen"); - return 1; + exit(1); } static char source[MAX_SRC_SIZE + 1]; @@ -31,32 +19,51 @@ int main(int argc, char **argv) { fseek(f, 0, SEEK_SET); if (len >= MAX_SRC_SIZE) { perror("source is larget than buffer"); - return 1; + exit(1); } size_t read = fread(source, 1, len, f); source[read] = '\0'; fclose(f); - init_lexer(source); + compile(source, vm); +} +static void repl(VM *vm) { + char line[1024]; for (;;) { - Token token = next_token(); - printf("[%d] %-18s: '%.*s'\n", token.line, token_type_name(token.type), token.length, token.start); - if (token.type == TOKEN_EOF) break; + printf("> "); + + if (!fgets(line, sizeof(line), stdin)) { + printf("\n"); + break; + } + /* reset the code counter to 0 */ + vm->cp = 0; + vm->sp = 0; + vm->pc = 0; + + compile(line, vm); + core_dump(vm); + while (step_vm(vm)); } +} - /* return 0; */ +int main(int argc, char **argv) { + VM vm = {0}; + vm.frames_size = FRAMES_SIZE; + vm.return_stack_size = STACK_SIZE; + vm.stack_size = STACK_SIZE; + vm.memory_size = MEMORY_SIZE; - - test_hello_world_compile(&vm); - /* test_add_compile(&vm); */ - /* test_add_function_compile(&vm); */ - /* test_loop_compile(&vm); */ - /* test_recursive_function_compile(&vm); */ - - while(step_vm(&vm)); - core_dump(&vm); - return 0; + if (argc == 1) { + repl(&vm); + } else if (argc == 2) { + compileFile(argv[1], &vm); + return 1; + } else { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 64; + } uint32_t buffer_size = 640 * 480 * sizeof(uint32_t); diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..9db8655 --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,305 @@ +#include "compiler.h" + +typedef struct { + Token current; + Token previous; + bool hadError; + bool panicMode; +} Parser; + +typedef enum { + PREC_NONE, + PREC_ASSIGNMENT, /* = */ + PREC_OR, /* or */ + PREC_AND, /* and */ + PREC_EQUALITY, /* == != */ + PREC_COMPARISON, /* < > <= >= */ + PREC_TERM, /* + - */ + PREC_FACTOR, /* * / */ + PREC_UNARY, /* not */ + PREC_CALL, /* . () */ + PREC_PRIMARY +} Precedence; + +typedef void (*ParseFn)(VM *vm); + +typedef struct { + ParseFn prefix; + ParseFn infix; + Precedence precedence; +} ParseRule; + +Parser parser; + +void errorAt(Token *token, const char *message) { + if (parser.panicMode) + return; + parser.panicMode = true; + fprintf(stderr, "[line %d] Error", token->line); + + if (token->type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } else if (token->type == TOKEN_ERROR) { + } else { + fprintf(stderr, " at '%.*s'", token->length, token->start); + } + + fprintf(stderr, ": %s\n", message); + parser.hadError = true; +} + +void error(const char *message) { errorAt(&parser.previous, message); } + +void errorAtCurrent(const char *message) { + errorAt(&parser.current, message); +} + +void advance() { + parser.previous = parser.current; + + for (;;) { + parser.current = nextToken(); + if (parser.current.type != TOKEN_ERROR) + break; + + errorAtCurrent(parser.current.start); + } +} + +void consume(TokenType type, const char *message) { + if (parser.current.type == type) { + advance(); + return; + } + + errorAtCurrent(message); +} + +void emitOp(VM *vm, uint8_t opcode, uint8_t dest, uint8_t src1, + uint8_t src2) { + vm->code[vm->cp++].u = OP(opcode, dest, src1, src2); +} + +void expression(VM *vm); +ParseRule *getRule(TokenType type); +void parsePrecedence(VM *vm, Precedence precedence); + +void number(VM *vm) { + if (parser.previous.type == TOKEN_INT_LITERAL) { + char *endptr; + int32_t value = (int32_t)strtol(parser.previous.start, &endptr, 10); + emitOp(vm, OP_LOADI, vm->frames[vm->fp].rp++, 0, 0); + vm->code[vm->cp++].i = value; + return; + } else if (parser.previous.type == TOKEN_UINT_LITERAL) { + long value = atol(parser.previous.start); + emitOp(vm, OP_LOADU, vm->frames[vm->fp].rp++, 0, 0); + vm->code[vm->cp++].u = value; + return; + } else if (parser.previous.type == TOKEN_FLOAT_LITERAL) { + float value = atof(parser.previous.start); + emitOp(vm, OP_LOADF, vm->frames[vm->fp].rp++, 0, 0); + vm->code[vm->cp++].f = value; + return; + } + errorAtCurrent("Invalid number format"); +} + +void grouping(VM *vm) { + expression(vm); + consume(TOKEN_RPAREN, "Expect ')' after expression."); +} + +void unary(VM *vm) { + TokenType operatorType = parser.previous.type; + + parsePrecedence(vm, PREC_UNARY); + + switch (operatorType) { + default: + return; + } +} + +void binary(VM *vm) { + TokenType operatorType = parser.previous.type; + ParseRule *rule = getRule(operatorType); + parsePrecedence(vm, (Precedence)(rule->precedence + 1)); + TokenType operandType = parser.previous.type; + + switch (operatorType) { + case TOKEN_PLUS: + if (operandType == TOKEN_UINT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_ADD_UINT, dest, src1, src2); + } else if (operandType == TOKEN_INT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_ADD_INT, dest, src1, src2); + } else if (operandType == TOKEN_FLOAT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_ADD_REAL, dest, src1, src2); + } else { + error("not numeric"); + } + break; + case TOKEN_MINUS: + if (operandType == TOKEN_UINT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_SUB_UINT, dest, src1, src2); + } else if (operandType == TOKEN_INT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_SUB_INT, dest, src1, src2); + } else if (operandType == TOKEN_FLOAT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_SUB_REAL, dest, src1, src2); + } else { + error("not numeric"); + } + break; + case TOKEN_STAR: + if (operandType == TOKEN_UINT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_MUL_UINT, dest, src1, src2); + } else if (operandType == TOKEN_INT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_MUL_INT, dest, src1, src2); + } else if (operandType == TOKEN_FLOAT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_MUL_REAL, dest, src1, src2); + } else { + error("not numeric"); + } + break; + case TOKEN_SLASH: + if (operandType == TOKEN_UINT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_DIV_UINT, dest, src1, src2); + } else if (operandType == TOKEN_INT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_DIV_INT, dest, src1, src2); + } else if (operandType == TOKEN_FLOAT_LITERAL) { + Frame f = vm->frames[vm->fp]; + uint32_t src1 = f.rp--; + uint32_t src2 = f.rp--; + uint32_t dest = f.rp++; + emitOp(vm, OP_DIV_REAL, dest, src1, src2); + } else { + error("not numeric"); + } + break; + default: + return; /* Unreachable. */ + } +} + +ParseRule rules[] = { + [TOKEN_LPAREN] = {grouping, NULL, PREC_NONE}, + [TOKEN_RPAREN] = {NULL, NULL, PREC_NONE}, + [TOKEN_LBRACE] = {NULL, NULL, PREC_NONE}, + [TOKEN_RBRACE] = {NULL, NULL, PREC_NONE}, + [TOKEN_COMMA] = {NULL, NULL, PREC_NONE}, + [TOKEN_DOT] = {NULL, NULL, PREC_NONE}, + [TOKEN_MINUS] = {NULL, binary, PREC_TERM}, + [TOKEN_PLUS] = {NULL, binary, PREC_TERM}, + [TOKEN_SEMICOLON] = {NULL, NULL, PREC_NONE}, + [TOKEN_SLASH] = {NULL, binary, PREC_FACTOR}, + [TOKEN_STAR] = {NULL, binary, PREC_FACTOR}, + [TOKEN_BANG] = {NULL, NULL, PREC_NONE}, + [TOKEN_BANG_EQ] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQ] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQ_EQ] = {NULL, NULL, PREC_NONE}, + [TOKEN_GT] = {NULL, NULL, PREC_NONE}, + [TOKEN_GTE] = {NULL, NULL, PREC_NONE}, + [TOKEN_LT] = {NULL, NULL, PREC_NONE}, + [TOKEN_LTE] = {NULL, NULL, PREC_NONE}, + [TOKEN_IDENTIFIER] = {NULL, NULL, PREC_NONE}, + [TOKEN_STRING_LITERAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_INT_LITERAL] = {number, NULL, PREC_NONE}, + [TOKEN_UINT_LITERAL] = {number, NULL, PREC_NONE}, + [TOKEN_FLOAT_LITERAL] = {number, NULL, PREC_NONE}, + [TOKEN_KEYWORD_ELSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_FOR] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_FN] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_IF] = {NULL, NULL, PREC_NONE}, + [TOKEN_OPERATOR_AND] = {NULL, binary, PREC_NONE}, + [TOKEN_OPERATOR_OR] = {NULL, binary, PREC_NONE}, + [TOKEN_OPERATOR_NOT] = {unary, NULL, PREC_NONE}, + [TOKEN_KEYWORD_NIL] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_TRUE] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_FALSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_PRINT] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_RETURN] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_THIS] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_LET] = {NULL, NULL, PREC_NONE}, + [TOKEN_KEYWORD_WHILE] = {NULL, NULL, PREC_NONE}, + [TOKEN_ERROR] = {NULL, NULL, PREC_NONE}, + [TOKEN_EOF] = {NULL, NULL, PREC_NONE}, +}; + +ParseRule *getRule(TokenType type) { return &rules[type]; } + +void parsePrecedence(VM *vm, Precedence precedence) { + advance(); + ParseFn prefixRule = getRule(parser.previous.type)->prefix; + if (prefixRule == NULL) { + error("Expect expression."); + return; + } + + prefixRule(vm); + + while (precedence <= getRule(parser.current.type)->precedence) { + advance(); + ParseFn infixRule = getRule(parser.previous.type)->infix; + infixRule(vm); + } +} + +void expression(VM *vm) { parsePrecedence(vm, PREC_ASSIGNMENT); } + +bool compile(const char *source, VM *vm) { + initLexer(source); + + parser.hadError = false; + parser.panicMode = false; + + advance(); + expression(vm); + consume(TOKEN_EOF, "end of file"); + emitOp(vm, OP_HALT, 0, 0, 0); + + return !parser.hadError; +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..198a357 --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,10 @@ +#ifndef ZRL_COMPILER_H +#define ZRL_COMPILER_H + +#include "lexer.h" +#include "opcodes.h" + + +bool compile(const char* source, VM* vm); + +#endif diff --git a/src/compiler.org b/src/compiler.org new file mode 100644 index 0000000..55ddce2 --- /dev/null +++ b/src/compiler.org @@ -0,0 +1,407 @@ +I am creating a new programming language in C89, this is for retrocomputing and confined platforms so I am using preallocated buffers for everything without malloc/free. +For reference here are my opcodes and vm defs: +```c +/* defines a uint32 opcode */ +#define OP(opcode, a, b, c) ((opcode << 24) | (a << 16) | (b << 8) | c) +typedef enum { + OP_HALT, /* halt : terminate execution */ + OP_LOADI, /* multiple byte: lodi : dest = next memory location as int */ + OP_LOADU, /* multiple byte: lodu : dest = next memory location as uint */ + OP_LOADF, /* multiple byte: lodf : dest = next memory location as float */ + OP_STOREI, /* multiple byte: stri : next memory location = src1 as int */ + OP_STOREU, /*multiple byte: stru : next memory location = src1 as uint */ + OP_STOREF, /* multiple byte: strf : next memory location = src1 as float */ + OP_PUSHI, /* pshi : push int from register onto the stack */ + OP_PUSHU, /* pshu : push uint from register onto the stack */ + OP_PUSHF, /* pshf : push float from register onto the stack */ + OP_PUSHS, /* pshs : push str ref from register onto the stack and copy str */ + OP_POPI, /* popi : pop int from stack onto the register */ + OP_POPU, /* popu : pop uint from stack onto the register */ + OP_POPF, /* popf : pop float from stack onto the register */ + OP_POPS, /* pops : pop str ref from stack and move/copy to register */ + OP_ADD_INT, /* addi : dest = src1 + src2 */ + OP_SUB_INT, /* subi : dest = src1 - src2 */ + OP_MUL_INT, /* muli : dest = src1 _src2_ / + OP_DIV_INT, /* divi : dest = src1 / src2 */ + OP_JEQ_INT, /* jeqi : jump to address dest if src1 as int == src2 as int */ + OP_JGT_INT, /* jgti : jump to address dest if src1 as int > src2 as int*/ + OP_JLT_INT, /* jlti : jump to address dest if src1 as int < src2 as int */ + OP_JLE_INT, /* jlei : jump to address dest if src1 as int <= src2 as int */ + OP_JGE_INT, /* jgei : jump to address dest if src1 as int >= src2 as int*/ + OP_INT_TO_REAL, /* itor : dest = src1 as f32 */ + OP_ADD_UINT, /* addu : dest = src1 + src2 */ + OP_SUB_UINT, /* subu : dest = src1 - src2 */ + OP_MUL_UINT, /* mulu : dest = src1 _src2_ / + OP_DIV_UINT, /* divu : dest = src1 / src2 */ + OP_JEQ_UINT, /* jequ : jump to address dest if src1 as int == src2 as uint */ + OP_JGT_UINT, /* jgtu : jump to address dest if src1 as int > src2 as uint*/ + OP_JLT_UINT, /* jltu : jump to address dest if src1 as int < src2 as uint */ + OP_JLE_UINT, /* jleu : jump to address dest if src1 as int <= src2 as uint */ + OP_JGE_UINT, /* jgeu : jump to address dest if src1 as int >= src2 as uint*/ + OP_UINT_TO_REAL, /* utor : dest = src1 as f32 */ + OP_ADD_REAL, /* addr : dest = src1 + src2 */ + OP_SUB_REAL, /* subr : dest = src1 - src2 */ + OP_MUL_REAL, /* mulr : dest = src1 _src2_ / + OP_DIV_REAL, /* divr : dest = src1 / src2 */ + OP_JEQ_REAL, /* jeqr : jump to address dest if src1 as real == src2 as real */ + OP_JGE_REAL, /* jgtr : jump to address dest if src1 as real >= src2 as real */ + OP_JGT_REAL, /* jltr : jump to address dest if src1 as real > src2 as real */ + OP_JLT_REAL, /* jler : jump to address dest if src1 as real < src2 as real */ + OP_JLE_REAL, /* jger : jump to address dest if src1 as real <= src2 as real */ + OP_REAL_TO_INT, /* rtoi : dest = src1 as int */ + OP_REAL_TO_UINT, /* rtou : dest = src1 as uint */ + OP_MOV, /* move : dest = src1 */ + OP_JMP, /* jump : jump to address src1 unconditionally */ + OP_CALL, /* call : creates a new frame */ + OP_RETURN, /* retn : returns from a frame to the parent frame */ + OP_INT_TO_STRING, /* itos : dest = src1 as str */ + OP_UINT_TO_STRING, /* utos : dest = src1 as str */ + OP_REAL_TO_STRING, /* rtos : dest = src1 as str */ + OP_READ_STRING, /* gets : dest = gets as str */ + OP_PRINT_STRING, /* puts : write src1 to stdout */ + OP_CMP_STRING, /* cmps : dest = (str == src2) as bool */ +} Opcode; +typedef union value_u { + int32_t i; /* Integers */ + float f; /* Float */ + uint32_t u; /* Unsigned integers, also used for pointer address */ + char c[4]; /* 4 Byte char array for string packing */ +} Value; +typedef struct slice_s { + uint32_t start; + uint32_t end; +} Slice; +#define MAX_REGS 32 +typedef struct frame_s { + Value registers[MAX_REGS]; /* R0-R31 */ + uint32_t rp; /* register pointer (last unused) */ + Slice allocated; /* start and end of global allocated block */ +} Frame; +typedef struct screen_t { + uint8_t width; + uint8_t height; + Slice allocated; + Value *buffer; +} Screen; +typedef struct mouse_t { + uint32_t x; + uint32_t y; + uint8_t btn1; + uint8_t btn2; + uint8_t btn3; +} Mouse; +typedef struct keyboard_t { + uint32_t length; + const uint8_t *keys; +} Keyboard; +typedef union device_u { + uint8_t type; + Screen s; + Mouse m; + Keyboard k; +} Device; +#define MEMORY_SIZE 65536 +#define CODE_SIZE 8192 +#define FRAMES_SIZE 128 +#define STACK_SIZE 256 +#define DEVICES_SIZE 8 +typedef struct vm_s { + uint32_t pc; /* program counter */ + uint32_t cp; /* code pointer (last allocated opcode) */ + uint32_t fp; /* frame pointer (current frame) */ + uint32_t sp; /* stack pointer (top of stack) */ + uint32_t rp; /* return stack pointer (top of stack) */ + uint32_t mp; /* memory pointer (last allocated value) */ + uint32_t dp; /* device pointer (last allocated device) */ + uint8_t devices_size; + Device devices[DEVICES_SIZE]; + uint32_t frames_size; + Frame frames[FRAMES_SIZE]; /* function call frames */ + uint32_t stack_size; + Value stack[STACK_SIZE]; /* main stack */ + uint32_t return_stack_size; + Value return_stack[STACK_SIZE]; /* return stack (for recursion) */ + uint32_t code_size; + Value code[CODE_SIZE]; /* code block */ + uint32_t memory_size; + Value memory[MEMORY_SIZE]; /* memory block */ +} VM; +/** +* Embeds a string into the VM +*/ +uint32_t str_alloc(VM _vm, const char_ str, uint32_t length) { + if (!length) length = strlen(str); + uint32_t str_addr = vm->mp; + vm->memory[vm->mp++].u = length; + uint32_t i, j = 0; + for (i = 0; i < length; i++) { + vm->memory[vm->mp].c[i % 4] = str[i]; + if (++j == 4) { + j = 0; + vm->mp++; + } + } + vm->frames[vm->fp].allocated.end += length / 4; + return str_addr; +} + +/** + * Step to the next opcode in the vm. + */ +bool step_vm(VM *vm) { + /* Get current instruction & Advance to next instruction */ + uint32_t instruction = vm->code[vm->pc++].u; + + uint8_t opcode = (instruction >> 24) & 0xFF; + uint8_t dest = (instruction >> 16) & 0xFF; + uint8_t src1 = (instruction >> 8) & 0xFF; + uint8_t src2 = instruction & 0xFF; + + switch (opcode) { + case OP_HALT: + return false; + case OP_CALL:; /* whats up with this semicolon? ANSI C does not allow you to create a variabel after a case, so this noop is here */ + uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */ + vm->return_stack[vm->rp++].u = vm->pc; /* set return address */ + vm->fp++; /* increment to the next free frame */ + vm->frames[vm->fp].allocated.start = vm->mp; /* set start of new memory block */ + vm->pc = jmp; + return true; + case OP_RETURN: + vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */ + vm->mp = vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start of old slice, pop the frame */ + return true; + case OP_LOADI: + vm->frames[vm->fp].registers[dest].i = vm->code[vm->pc++].i; + return true; + ... +``` +Here is my lexer: +```c +typedef enum { + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_INT_LITERAL, + TOKEN_FLOAT_LITERAL, + TOKEN_STRING_LITERAL, + TOKEN_TYPE_INT, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, + TOKEN_KEYWORD_TYPE, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_LET, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, + TOKEN_OPERATOR_IS, + TOKEN_BANG, + TOKEN_BANG_EQ, + TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ERROR +} TokenType; +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; +typedef struct { + const char *keyword; + TokenType token; +} Keyword; +typedef struct { + const char *start; + const char *current; + int line; +} Lexer; +void init_lexer(const char *source); +const char *token_type_name(TokenType type); +Token next_token(); +``` +This is something like my grammar: +``` +type «token» { + init() { + // values + } +} +! example +type Vec3 { + init(x real, y real, z real) { + this.x = x; + this.y = z; + this.y = z; + } +} +- real + - 32 bit floats +- int + - 32 bit integer +- nat + - 32 bit unsigned integer (for loop counting and indexing) +- str + - "" +- bool (uint32, 0 = false, anything else = true) + - true / false +- ! + - comment +- ?? + - unwrap or +- .? + - null check or return error +- + + - addition +- - + - subtraction + - negation +- * + - multiplication +- / + - divisor +- ^ + - power +- == + - equals +- < + - less than +- > + - greater than +- >= + - greater than or equals +- <= + - less than or equals +- . + - accessor +- ++ + - inline add 1 +- -- + - inline subtract 1 +- += + - inline add n +- -= + - inline subtract n +- *= + - inline multiply n +- \= + - inline divide n +- mod + - modulo +- not + - logical not +- and + - logical and +- or + - logical or +- xor + - logical xor +- band + - bitwise and +- bor + - bitwise or +- bxor + - bitwise xor +- srl + - bit shift right +- sll + - bit shift left +«simple_type» «variable» = val; ! similar to c +«complex_type» «variable»(«fields», …); ! similar to c++ +«type»[«length»] «variable» = [val1, val2, ...]; ! similar to c/c++ +if («boolean expression») { +} else if («boolean expression») { +} else { +} +if («token» is real) { + print("hello yes self is a real?"); +} +switch (value) { + case A: + case B: + case C: + default: +} +for («token» in «collection») { «body» } +while («boolean expression») { «body» } +do («variable» = initial_value, end_value, increment) { «body» } +fn «token» («type» «parameter», ...) «return_type» { + «body» +} +``` +Here is an example compile test program that I did by hand for testing: +```zrl +fn fib(int n) int { + if (n < 2) return n; + return fib(n - 2) + fib(n - 1); +} + +print fib(35); +``` +```c +bool test_recursive_function_compile(VM *vm) { + /* fn main() */ + vm->code[vm->cp++].u = OP(OP_LOADI, 0, 0, 0); /* 35 */ + vm->code[vm->cp++].i = 35; + vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0); + vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* ); */ + vm->code[vm->cp++].u = 9; + vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* get return value */ + vm->code[vm->cp++].u = OP(OP_INT_TO_STRING, 1, 0, 0); + vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print(fib(35).toS()); */ + vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0); + /* fn fib() */ + vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* n int */ + vm->code[vm->cp++].u = OP(OP_LOADI, 1, 0, 0); /* 2 */ + vm->code[vm->cp++].i = 2; + vm->code[vm->cp++].u = OP(OP_LOADI, 2, 0, 0); /* &fib */ + vm->code[vm->cp++].i = 32; + vm->code[vm->cp++].u = OP(OP_JLT_INT, 2, 0, 1); + vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 2 */ + vm->code[vm->cp++].i = 2; + vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3); + vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0); + vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 2) */ + vm->code[vm->cp++].u = 9; + vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 1 */ + vm->code[vm->cp++].i = 1; + vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3); + vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0); + vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 1) */ + vm->code[vm->cp++].u = 9; + vm->code[vm->cp++].u = OP(OP_POPI, 4, 0, 0); + vm->code[vm->cp++].u = OP(OP_POPI, 5, 0, 0); + vm->code[vm->cp++].u = OP(OP_ADD_INT, 6, 5, 4); + vm->code[vm->cp++].u = OP(OP_PUSHI, 6, 0, 0); + vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0); + vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0); + vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0); + return true; +} +``` +I am at the point where I have a 32bit VM that can do math and store strings and a lexer but I need a way to compile the lexer output into a program using my opcodes. + diff --git a/src/debug.c b/src/debug.c index 0bb12eb..0895737 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,27 +1,34 @@ #include "debug.h" /** - * Dumps the vm memory to a file. + * Dumps the vm memory and code to a file. */ int core_dump(VM *vm) { - FILE *file = fopen("memory_dump.bin", "wb"); - if (!file) { - perror("Failed to open file"); - return EXIT_FAILURE; - } + FILE *file = fopen("memory_dump.bin", "wb"); + if (!file) { + perror("Failed to open file"); + return EXIT_FAILURE; + } + + size_t code_written = fwrite(vm->code, 1, vm->code_size, file); + if (code_written != vm->code_size) { + fprintf(stderr, "Incomplete code write: %zu bytes written out of %u\n", code_written, vm->code_size); + fclose(file); + return EXIT_FAILURE; + } + + size_t memory_written = fwrite(vm->memory, 1, vm->memory_size, file); + if (memory_written != vm->memory_size) { + fprintf(stderr, "Incomplete memory write: %zu bytes written out of %u\n", memory_written, vm->memory_size); + fclose(file); + return EXIT_FAILURE; + } - size_t written = fwrite(vm->memory, 1, vm->memory_size, file); - if (written != vm->memory_size) { - fprintf(stderr, "Incomplete write: %zu bytes written out of %u\n", written, - vm->memory_size); fclose(file); - return EXIT_FAILURE; - } - - fclose(file); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } + /** * Print opcode. */ diff --git a/src/lexer.c b/src/lexer.c index bf0a340..d67c8a2 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,33 +1,71 @@ +#include + +#include "common.h" #include "lexer.h" +typedef struct { + const char *start; + const char *current; + int line; +} Lexer; + Lexer lexer; -void init_lexer(const char *source) { +void initLexer(const char *source) { lexer.start = source; lexer.current = source; lexer.line = 1; } -int is_at_end() { return *lexer.current == '\0'; } +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} -char advance() { return *lexer.current++; } +static bool isDigit(char c) { return c >= '0' && c <= '9'; } -char peek() { return *lexer.current; } +static bool isAtEnd() { return *lexer.current == '\0'; } -char peek_next() { - if (is_at_end()) +static char advance() { + lexer.current++; + return lexer.current[-1]; +} + +static char peek() { return *lexer.current; } + +static char peekNext() { + if (isAtEnd()) return '\0'; return lexer.current[1]; } -int match(char expected) { +static bool match(char expected) { + if (isAtEnd()) + return false; if (*lexer.current != expected) - return 0; + return false; lexer.current++; - return 1; + return true; } -void skip_whitespace() { +static Token makeToken(TokenType type) { + Token token; + token.type = type; + token.start = lexer.start; + token.length = (int)(lexer.current - lexer.start); + token.line = lexer.line; + return token; +} + +static Token errorToken(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int)strlen(message); + token.line = lexer.line; + return token; +} + +static void skipWhitespace() { for (;;) { char c = peek(); switch (c) { @@ -40,13 +78,13 @@ void skip_whitespace() { lexer.line++; advance(); break; - case '!': - if (peek_next() == '!') { - while (peek() != '\n' && !is_at_end()) + case '/': + if (peekNext() == '/') { + /* A comment goes until the end of the line. */ + while (peek() != '\n' && !isAtEnd()) advance(); } else { - while (peek() != '\n' && !is_at_end()) - advance(); + return; } break; default: @@ -55,240 +93,150 @@ void skip_whitespace() { } } -Token make_token(TokenType type) { - Token token; - token.type = type; - token.start = lexer.start; - token.length = (int)(lexer.current - lexer.start); - token.line = lexer.line; - return token; -} - -Token error_token(const char *message) { - Token token; - token.type = TOKEN_ERROR; - token.start = message; - token.length = (int)strlen(message); - token.line = lexer.line; - return token; -} - -int is_alpha(char c) { return isalpha(c) || c == '_'; } - -int is_digit(char c) { return isdigit(c); } - -Token number() { - while (is_digit(peek())) - advance(); - - if (peek() == '.' && is_digit(peek_next())) { - advance(); - while (is_digit(peek())) - advance(); - return make_token(TOKEN_FLOAT_LITERAL); +static TokenType checkKeyword(int start, int length, const char *rest, + TokenType type) { + if (lexer.current - lexer.start == start + length && + memcmp(lexer.start + start, rest, length) == 0) { + return type; } - return make_token(TOKEN_INT_LITERAL); + return TOKEN_IDENTIFIER; } -Token string() { - while (peek() != '"' && !is_at_end()) { +static TokenType identifierType() { + switch (lexer.start[0]) { + case 'a': + return checkKeyword(1, 2, "nd", TOKEN_OPERATOR_AND); + case 'e': + return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + case 'o': + return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); + } + return checkKeyword(1, 1, "n", TOKEN_KEYWORD_FN); + } + break; + case 'i': + return checkKeyword(1, 1, "f", TOKEN_KEYWORD_IF); + case 'n': + return checkKeyword(1, 2, "il", TOKEN_KEYWORD_NIL); + case 'o': + return checkKeyword(1, 1, "r", TOKEN_OPERATOR_OR); + case 'p': + return checkKeyword(1, 4, "rint", TOKEN_KEYWORD_PRINT); + case 'r': + return checkKeyword(1, 5, "eturn", TOKEN_KEYWORD_RETURN); + case 't': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return checkKeyword(2, 2, "is", TOKEN_KEYWORD_THIS); + case 'r': + return checkKeyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + case 'y': + return checkKeyword(2, 2, "pe", TOKEN_KEYWORD_TYPE); + } + } + break; + case 'l': + return checkKeyword(1, 2, "et", TOKEN_KEYWORD_LET); + case 'w': + return checkKeyword(1, 4, "hile", TOKEN_KEYWORD_WHILE); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (isAlpha(peek()) || isDigit(peek())) + advance(); + return makeToken(identifierType()); +} + +static Token number() { + while (isDigit(peek())) + advance(); + + /* Look for a fractional part. */ + if (peek() == '.' && isDigit(peekNext())) { + /* Consume the ".". */ + advance(); + + while (isDigit(peek())) + advance(); + + return makeToken(TOKEN_FLOAT_LITERAL); + } + + return makeToken(TOKEN_INT_LITERAL); +} + +static Token string() { + while (peek() != '"' && !isAtEnd()) { if (peek() == '\n') lexer.line++; advance(); } - if (is_at_end()) - return error_token("Unterminated string."); + if (isAtEnd()) + return errorToken("Unterminated string."); + /* The closing quote. */ advance(); - return make_token(TOKEN_STRING_LITERAL); + return makeToken(TOKEN_STRING_LITERAL); } -Token identifier() { - while (is_alpha(peek()) || is_digit(peek())) - advance(); - - int length = (int)(lexer.current - lexer.start); - const char *text = lexer.start; - - if (length == 4 && strncmp(text, "init", 4) == 0) - return make_token(TOKEN_KEYWORD_INIT); - if (length == 4 && strncmp(text, "this", 4) == 0) - return make_token(TOKEN_KEYWORD_THIS); - if (length == 4 && strncmp(text, "type", 4) == 0) - return make_token(TOKEN_KEYWORD_TYPE); - if (length == 2 && strncmp(text, "fn", 2) == 0) - return make_token(TOKEN_KEYWORD_FN); - if (length == 3 && strncmp(text, "let", 3) == 0) - return make_token(TOKEN_KEYWORD_LET); - if (length == 5 && strncmp(text, "const", 5) == 0) - return make_token(TOKEN_KEYWORD_CONST); - if (length == 2 && strncmp(text, "if", 2) == 0) - return make_token(TOKEN_KEYWORD_IF); - if (length == 4 && strncmp(text, "else", 4) == 0) - return make_token(TOKEN_KEYWORD_ELSE); - if (length == 5 && strncmp(text, "while", 5) == 0) - return make_token(TOKEN_KEYWORD_WHILE); - if (length == 3 && strncmp(text, "for", 3) == 0) - return make_token(TOKEN_KEYWORD_FOR); - if (length == 6 && strncmp(text, "return", 6) == 0) - return make_token(TOKEN_KEYWORD_RETURN); - if (length == 3 && strncmp(text, "use", 3) == 0) - return make_token(TOKEN_KEYWORD_USE); - if (length == 2 && strncmp(text, "is", 2) == 0) - return make_token(TOKEN_OPERATOR_IS); - if (length == 3 && strncmp(text, "int", 3) == 0) - return make_token(TOKEN_TYPE_INT); - if (length == 3 && strncmp(text, "nat", 3) == 0) - return make_token(TOKEN_TYPE_NAT); - if (length == 3 && strncmp(text, "str", 3) == 0) - return make_token(TOKEN_TYPE_STR); - if (length == 3 && strncmp(text, "real", 4) == 0) - return make_token(TOKEN_TYPE_REAL); - - return make_token(TOKEN_IDENTIFIER); -} - -Token next_token() { - skip_whitespace(); +Token nextToken() { + skipWhitespace(); lexer.start = lexer.current; - if (is_at_end()) - return make_token(TOKEN_EOF); + if (isAtEnd()) + return makeToken(TOKEN_EOF); char c = advance(); - - if (is_alpha(c)) + if (isAlpha(c)) return identifier(); - if (is_digit(c)) + if (isDigit(c)) return number(); switch (c) { case '(': - return make_token(TOKEN_LPAREN); + return makeToken(TOKEN_LPAREN); case ')': - return make_token(TOKEN_RPAREN); + return makeToken(TOKEN_RPAREN); case '{': - return make_token(TOKEN_LBRACE); + return makeToken(TOKEN_LBRACE); case '}': - return make_token(TOKEN_RBRACE); - case '[': - return make_token(TOKEN_LBRACKET); - case ']': - return make_token(TOKEN_RBRACKET); - case ',': - return make_token(TOKEN_COMMA); - case '.': - return make_token(TOKEN_DOT); - case ':': - return make_token(TOKEN_COLON); + return makeToken(TOKEN_RBRACE); case ';': - return make_token(TOKEN_SEMICOLON); - case '+': - return make_token(TOKEN_PLUS); + return makeToken(TOKEN_SEMICOLON); + case ',': + return makeToken(TOKEN_COMMA); + case '.': + return makeToken(TOKEN_DOT); case '-': - return make_token(TOKEN_MINUS); - case '*': - return make_token(TOKEN_STAR); + return makeToken(TOKEN_MINUS); + case '+': + return makeToken(TOKEN_PLUS); case '/': - return make_token(TOKEN_SLASH); + return makeToken(TOKEN_SLASH); + case '*': + return makeToken(TOKEN_STAR); case '!': - return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + return makeToken(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); case '=': - return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + return makeToken(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); case '<': - return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); + return makeToken(match('=') ? TOKEN_LTE : TOKEN_LT); case '>': - return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); + return makeToken(match('=') ? TOKEN_GTE : TOKEN_GT); case '"': return string(); } - return error_token("Unexpected character."); -} - -const char *token_type_name(TokenType type) { - switch (type) { - case TOKEN_IDENTIFIER: - return "identifier"; - case TOKEN_INT_LITERAL: - return "int literal"; - case TOKEN_FLOAT_LITERAL: - return "real literal"; - case TOKEN_STRING_LITERAL: - return "string literal"; - case TOKEN_TYPE_INT: - return "int"; - case TOKEN_TYPE_REAL: - return "real"; - case TOKEN_TYPE_STR: - return "str"; - case TOKEN_TYPE_NAT: - return "nat"; - case TOKEN_KEYWORD_THIS: - return "this"; - case TOKEN_KEYWORD_TYPE: - return "type"; - case TOKEN_KEYWORD_FN: - return "fn"; - case TOKEN_KEYWORD_LET: - return "let"; - case TOKEN_KEYWORD_CONST: - return "const"; - case TOKEN_KEYWORD_IF: - return "if"; - case TOKEN_KEYWORD_ELSE: - return "else"; - case TOKEN_KEYWORD_WHILE: - return "while"; - case TOKEN_KEYWORD_FOR: - return "for"; - case TOKEN_KEYWORD_RETURN: - return "return"; - case TOKEN_KEYWORD_INIT: - return "init"; - case TOKEN_KEYWORD_USE: - return "use"; - case TOKEN_OPERATOR_IS: - return "is"; - case TOKEN_BANG: - return "!"; - case TOKEN_EQ: - return "="; - case TOKEN_DOT: - return "."; - case TOKEN_COMMA: - return ","; - case TOKEN_COLON: - return ":"; - case TOKEN_SEMICOLON: - return ";"; - case TOKEN_PLUS: - return "+"; - case TOKEN_MINUS: - return "-"; - case TOKEN_STAR: - return "*"; - case TOKEN_SLASH: - return "/"; - case TOKEN_LPAREN: - return "("; - case TOKEN_RPAREN: - return ")"; - case TOKEN_LBRACE: - return "{"; - case TOKEN_RBRACE: - return "}"; - case TOKEN_LBRACKET: - return "["; - case TOKEN_RBRACKET: - return "]"; - case TOKEN_EOF: - return "eof"; - case TOKEN_ERROR: - return "error"; - default: - return "unknown"; - } + return errorToken("Unexpected character."); } diff --git a/src/lexer.h b/src/lexer.h index 55c2c02..d001a65 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,14 +1,11 @@ -#ifndef ZRL_LEXER_H -#define ZRL_LEXER_H - -#include -#include -#include +#ifndef zre_lexer_h +#define zre_lexer_h typedef enum { TOKEN_EOF, TOKEN_IDENTIFIER, TOKEN_INT_LITERAL, + TOKEN_UINT_LITERAL, TOKEN_FLOAT_LITERAL, TOKEN_STRING_LITERAL, TOKEN_TYPE_INT, @@ -27,7 +24,14 @@ typedef enum { TOKEN_KEYWORD_USE, TOKEN_KEYWORD_INIT, TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_PRINT, + TOKEN_KEYWORD_NIL, + TOKEN_KEYWORD_TRUE, + TOKEN_KEYWORD_FALSE, TOKEN_OPERATOR_IS, + TOKEN_OPERATOR_NOT, + TOKEN_OPERATOR_AND, + TOKEN_OPERATOR_OR, TOKEN_BANG, TOKEN_BANG_EQ, TOKEN_EQ, @@ -60,19 +64,7 @@ typedef struct { int line; } Token; -typedef struct { - const char *keyword; - TokenType token; -} Keyword; - -typedef struct { - const char *start; - const char *current; - int line; -} Lexer; - -void init_lexer(const char *source); -const char *token_type_name(TokenType type); -Token next_token(); +void initLexer(const char *source); +Token nextToken(); #endif diff --git a/src/opcodes.h b/src/opcodes.h index 223699c..63f7523 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -133,6 +133,7 @@ typedef enum { OP_READ_STRING, /* gets : dest = gets as str */ OP_PRINT_STRING, /* puts : write src1 to stdout */ OP_CMP_STRING, /* cmps : dest = (str == src2) as bool */ + OP_NOT, } Opcode; typedef enum { diff --git a/src/vm.c b/src/vm.c index aea8fa0..2c489a4 100644 --- a/src/vm.c +++ b/src/vm.c @@ -27,7 +27,8 @@ * Embeds a string into the VM */ uint32_t str_alloc(VM *vm, const char *str, uint32_t length) { - if (!length) length = strlen(str); + if (!length) + length = strlen(str); uint32_t str_addr = vm->mp; vm->memory[vm->mp++].u = length; uint32_t i, j = 0; @@ -57,16 +58,21 @@ bool step_vm(VM *vm) { switch (opcode) { case OP_HALT: return false; - case OP_CALL:; /* whats up with this semicolon? ANSI C does not allow you to create a variabel after a case, so this noop is here */ - uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */ + case OP_CALL: { + uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */ vm->return_stack[vm->rp++].u = vm->pc; /* set return address */ vm->fp++; /* increment to the next free frame */ - vm->frames[vm->fp].allocated.start = vm->mp; /* set start of new memory block */ + vm->frames[vm->fp].allocated.start = + vm->mp; /* set start of new memory block */ vm->pc = jmp; return true; + } case OP_RETURN: - vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */ - vm->mp = vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start of old slice, pop the frame */ + vm->frames[vm->fp].rp = 0; /* reset register ptr */ + vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */ + vm->mp = + vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start + of old slice, pop the frame */ return true; case OP_LOADI: vm->frames[vm->fp].registers[dest].i = vm->code[vm->pc++].i; @@ -195,6 +201,9 @@ bool step_vm(VM *vm) { case OP_JLE_REAL: { COMPARE_AND_JUMP(float, u, <=); } + case OP_NOT: { + /* TODO implement not */ + } case OP_INT_TO_STRING: { int32_t a = (int32_t)vm->frames[vm->fp].registers[src1].i; /* get value */ char buffer[32]; @@ -256,7 +265,8 @@ bool step_vm(VM *vm) { uint32_t addr2 = (uint32_t)vm->frames[vm->fp].registers[src2].u; uint32_t length1 = vm->memory[addr1 - 1].u; uint32_t length2 = vm->memory[addr2 - 1].u; - uint32_t equal = 1; /* we dont have a native boolean type so we use uint32_t */ + uint32_t equal = + 1; /* we dont have a native boolean type so we use uint32_t */ if (length1 != length2) { equal = 0; @@ -270,9 +280,9 @@ bool step_vm(VM *vm) { break; } if ((char1 & 0xFF) == '\0' && (char2 & 0xFF) == '\0') { - equal = 1; - break; - } + equal = 1; + break; + } } } vm->memory[dest].u = equal; diff --git a/test/add.zrl b/test/add.zrl index dd23e5c..aa7b913 100644 --- a/test/add.zrl +++ b/test/add.zrl @@ -1,6 +1,6 @@ -fn add(i8 a, i8 b) i8 { +fn add(int a, int b) int { return a + b; } -i8 sum = add(1, 1); +int sum = add(1, 1); print(sum.toS()); diff --git a/test/fib.zrl b/test/fib.zrl index 04b9114..6fe3786 100644 --- a/test/fib.zrl +++ b/test/fib.zrl @@ -1,4 +1,4 @@ -fn fib(i32 n) i32 { +fn fib(int n) int { if (n < 2) return n; return fib(n - 2) + fib(n - 1); }