diff --git a/src/compiler.org b/src/compiler.org deleted file mode 100644 index 55ddce2..0000000 --- a/src/compiler.org +++ /dev/null @@ -1,407 +0,0 @@ -I am creating a new programming language in C89, this is for retrocomputing and confined platforms so I am using preallocated buffers for everything without malloc/free. -For reference here are my opcodes and vm defs: -```c -/* defines a uint32 opcode */ -#define OP(opcode, a, b, c) ((opcode << 24) | (a << 16) | (b << 8) | c) -typedef enum { - OP_HALT, /* halt : terminate execution */ - OP_LOADI, /* multiple byte: lodi : dest = next memory location as int */ - OP_LOADU, /* multiple byte: lodu : dest = next memory location as uint */ - OP_LOADF, /* multiple byte: lodf : dest = next memory location as float */ - OP_STOREI, /* multiple byte: stri : next memory location = src1 as int */ - OP_STOREU, /*multiple byte: stru : next memory location = src1 as uint */ - OP_STOREF, /* multiple byte: strf : next memory location = src1 as float */ - OP_PUSHI, /* pshi : push int from register onto the stack */ - OP_PUSHU, /* pshu : push uint from register onto the stack */ - OP_PUSHF, /* pshf : push float from register onto the stack */ - OP_PUSHS, /* pshs : push str ref from register onto the stack and copy str */ - OP_POPI, /* popi : pop int from stack onto the register */ - OP_POPU, /* popu : pop uint from stack onto the register */ - OP_POPF, /* popf : pop float from stack onto the register */ - OP_POPS, /* pops : pop str ref from stack and move/copy to register */ - OP_ADD_INT, /* addi : dest = src1 + src2 */ - OP_SUB_INT, /* subi : dest = src1 - src2 */ - OP_MUL_INT, /* muli : dest = src1 _src2_ / - OP_DIV_INT, /* divi : dest = src1 / src2 */ - OP_JEQ_INT, /* jeqi : jump to address dest if src1 as int == src2 as int */ - OP_JGT_INT, /* jgti : jump to address dest if src1 as int > src2 as int*/ - OP_JLT_INT, /* jlti : jump to address dest if src1 as int < src2 as int */ - OP_JLE_INT, /* jlei : jump to address dest if src1 as int <= src2 as int */ - OP_JGE_INT, /* jgei : jump to address dest if src1 as int >= src2 as int*/ - OP_INT_TO_REAL, /* itor : dest = src1 as f32 */ - OP_ADD_UINT, /* addu : dest = src1 + src2 */ - OP_SUB_UINT, /* subu : dest = src1 - src2 */ - OP_MUL_UINT, /* mulu : dest = src1 _src2_ / - OP_DIV_UINT, /* divu : dest = src1 / src2 */ - OP_JEQ_UINT, /* jequ : jump to address dest if src1 as int == src2 as uint */ - OP_JGT_UINT, /* jgtu : jump to address dest if src1 as int > src2 as uint*/ - OP_JLT_UINT, /* jltu : jump to address dest if src1 as int < src2 as uint */ - OP_JLE_UINT, /* jleu : jump to address dest if src1 as int <= src2 as uint */ - OP_JGE_UINT, /* jgeu : jump to address dest if src1 as int >= src2 as uint*/ - OP_UINT_TO_REAL, /* utor : dest = src1 as f32 */ - OP_ADD_REAL, /* addr : dest = src1 + src2 */ - OP_SUB_REAL, /* subr : dest = src1 - src2 */ - OP_MUL_REAL, /* mulr : dest = src1 _src2_ / - OP_DIV_REAL, /* divr : dest = src1 / src2 */ - OP_JEQ_REAL, /* jeqr : jump to address dest if src1 as real == src2 as real */ - OP_JGE_REAL, /* jgtr : jump to address dest if src1 as real >= src2 as real */ - OP_JGT_REAL, /* jltr : jump to address dest if src1 as real > src2 as real */ - OP_JLT_REAL, /* jler : jump to address dest if src1 as real < src2 as real */ - OP_JLE_REAL, /* jger : jump to address dest if src1 as real <= src2 as real */ - OP_REAL_TO_INT, /* rtoi : dest = src1 as int */ - OP_REAL_TO_UINT, /* rtou : dest = src1 as uint */ - OP_MOV, /* move : dest = src1 */ - OP_JMP, /* jump : jump to address src1 unconditionally */ - OP_CALL, /* call : creates a new frame */ - OP_RETURN, /* retn : returns from a frame to the parent frame */ - OP_INT_TO_STRING, /* itos : dest = src1 as str */ - OP_UINT_TO_STRING, /* utos : dest = src1 as str */ - OP_REAL_TO_STRING, /* rtos : dest = src1 as str */ - OP_READ_STRING, /* gets : dest = gets as str */ - OP_PRINT_STRING, /* puts : write src1 to stdout */ - OP_CMP_STRING, /* cmps : dest = (str == src2) as bool */ -} Opcode; -typedef union value_u { - int32_t i; /* Integers */ - float f; /* Float */ - uint32_t u; /* Unsigned integers, also used for pointer address */ - char c[4]; /* 4 Byte char array for string packing */ -} Value; -typedef struct slice_s { - uint32_t start; - uint32_t end; -} Slice; -#define MAX_REGS 32 -typedef struct frame_s { - Value registers[MAX_REGS]; /* R0-R31 */ - uint32_t rp; /* register pointer (last unused) */ - Slice allocated; /* start and end of global allocated block */ -} Frame; -typedef struct screen_t { - uint8_t width; - uint8_t height; - Slice allocated; - Value *buffer; -} Screen; -typedef struct mouse_t { - uint32_t x; - uint32_t y; - uint8_t btn1; - uint8_t btn2; - uint8_t btn3; -} Mouse; -typedef struct keyboard_t { - uint32_t length; - const uint8_t *keys; -} Keyboard; -typedef union device_u { - uint8_t type; - Screen s; - Mouse m; - Keyboard k; -} Device; -#define MEMORY_SIZE 65536 -#define CODE_SIZE 8192 -#define FRAMES_SIZE 128 -#define STACK_SIZE 256 -#define DEVICES_SIZE 8 -typedef struct vm_s { - uint32_t pc; /* program counter */ - uint32_t cp; /* code pointer (last allocated opcode) */ - uint32_t fp; /* frame pointer (current frame) */ - uint32_t sp; /* stack pointer (top of stack) */ - uint32_t rp; /* return stack pointer (top of stack) */ - uint32_t mp; /* memory pointer (last allocated value) */ - uint32_t dp; /* device pointer (last allocated device) */ - uint8_t devices_size; - Device devices[DEVICES_SIZE]; - uint32_t frames_size; - Frame frames[FRAMES_SIZE]; /* function call frames */ - uint32_t stack_size; - Value stack[STACK_SIZE]; /* main stack */ - uint32_t return_stack_size; - Value return_stack[STACK_SIZE]; /* return stack (for recursion) */ - uint32_t code_size; - Value code[CODE_SIZE]; /* code block */ - uint32_t memory_size; - Value memory[MEMORY_SIZE]; /* memory block */ -} VM; -/** -* Embeds a string into the VM -*/ -uint32_t str_alloc(VM _vm, const char_ str, uint32_t length) { - if (!length) length = strlen(str); - uint32_t str_addr = vm->mp; - vm->memory[vm->mp++].u = length; - uint32_t i, j = 0; - for (i = 0; i < length; i++) { - vm->memory[vm->mp].c[i % 4] = str[i]; - if (++j == 4) { - j = 0; - vm->mp++; - } - } - vm->frames[vm->fp].allocated.end += length / 4; - return str_addr; -} - -/** - * Step to the next opcode in the vm. - */ -bool step_vm(VM *vm) { - /* Get current instruction & Advance to next instruction */ - uint32_t instruction = vm->code[vm->pc++].u; - - uint8_t opcode = (instruction >> 24) & 0xFF; - uint8_t dest = (instruction >> 16) & 0xFF; - uint8_t src1 = (instruction >> 8) & 0xFF; - uint8_t src2 = instruction & 0xFF; - - switch (opcode) { - case OP_HALT: - return false; - case OP_CALL:; /* whats up with this semicolon? ANSI C does not allow you to create a variabel after a case, so this noop is here */ - uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */ - vm->return_stack[vm->rp++].u = vm->pc; /* set return address */ - vm->fp++; /* increment to the next free frame */ - vm->frames[vm->fp].allocated.start = vm->mp; /* set start of new memory block */ - vm->pc = jmp; - return true; - case OP_RETURN: - vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */ - vm->mp = vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start of old slice, pop the frame */ - return true; - case OP_LOADI: - vm->frames[vm->fp].registers[dest].i = vm->code[vm->pc++].i; - return true; - ... -``` -Here is my lexer: -```c -typedef enum { - TOKEN_EOF, - TOKEN_IDENTIFIER, - TOKEN_INT_LITERAL, - TOKEN_FLOAT_LITERAL, - TOKEN_STRING_LITERAL, - TOKEN_TYPE_INT, - TOKEN_TYPE_NAT, - TOKEN_TYPE_REAL, - TOKEN_TYPE_STR, - TOKEN_KEYWORD_TYPE, - TOKEN_KEYWORD_FN, - TOKEN_KEYWORD_LET, - TOKEN_KEYWORD_CONST, - TOKEN_KEYWORD_IF, - TOKEN_KEYWORD_ELSE, - TOKEN_KEYWORD_WHILE, - TOKEN_KEYWORD_FOR, - TOKEN_KEYWORD_RETURN, - TOKEN_KEYWORD_USE, - TOKEN_KEYWORD_INIT, - TOKEN_KEYWORD_THIS, - TOKEN_OPERATOR_IS, - TOKEN_BANG, - TOKEN_BANG_EQ, - TOKEN_EQ, - TOKEN_EQ_EQ, - TOKEN_GT, - TOKEN_LT, - TOKEN_GTE, - TOKEN_LTE, - TOKEN_DOT, - TOKEN_COMMA, - TOKEN_COLON, - TOKEN_SEMICOLON, - TOKEN_PLUS, - TOKEN_MINUS, - TOKEN_STAR, - TOKEN_SLASH, - TOKEN_LPAREN, - TOKEN_RPAREN, - TOKEN_LBRACE, - TOKEN_RBRACE, - TOKEN_LBRACKET, - TOKEN_RBRACKET, - TOKEN_ERROR -} TokenType; -typedef struct { - TokenType type; - const char *start; - int length; - int line; -} Token; -typedef struct { - const char *keyword; - TokenType token; -} Keyword; -typedef struct { - const char *start; - const char *current; - int line; -} Lexer; -void init_lexer(const char *source); -const char *token_type_name(TokenType type); -Token next_token(); -``` -This is something like my grammar: -``` -type «token» { - init() { - // values - } -} -! example -type Vec3 { - init(x real, y real, z real) { - this.x = x; - this.y = z; - this.y = z; - } -} -- real - - 32 bit floats -- int - - 32 bit integer -- nat - - 32 bit unsigned integer (for loop counting and indexing) -- str - - "" -- bool (uint32, 0 = false, anything else = true) - - true / false -- ! - - comment -- ?? - - unwrap or -- .? - - null check or return error -- + - - addition -- - - - subtraction - - negation -- * - - multiplication -- / - - divisor -- ^ - - power -- == - - equals -- < - - less than -- > - - greater than -- >= - - greater than or equals -- <= - - less than or equals -- . - - accessor -- ++ - - inline add 1 -- -- - - inline subtract 1 -- += - - inline add n -- -= - - inline subtract n -- *= - - inline multiply n -- \= - - inline divide n -- mod - - modulo -- not - - logical not -- and - - logical and -- or - - logical or -- xor - - logical xor -- band - - bitwise and -- bor - - bitwise or -- bxor - - bitwise xor -- srl - - bit shift right -- sll - - bit shift left -«simple_type» «variable» = val; ! similar to c -«complex_type» «variable»(«fields», …); ! similar to c++ -«type»[«length»] «variable» = [val1, val2, ...]; ! similar to c/c++ -if («boolean expression») { -} else if («boolean expression») { -} else { -} -if («token» is real) { - print("hello yes self is a real?"); -} -switch (value) { - case A: - case B: - case C: - default: -} -for («token» in «collection») { «body» } -while («boolean expression») { «body» } -do («variable» = initial_value, end_value, increment) { «body» } -fn «token» («type» «parameter», ...) «return_type» { - «body» -} -``` -Here is an example compile test program that I did by hand for testing: -```zrl -fn fib(int n) int { - if (n < 2) return n; - return fib(n - 2) + fib(n - 1); -} - -print fib(35); -``` -```c -bool test_recursive_function_compile(VM *vm) { - /* fn main() */ - vm->code[vm->cp++].u = OP(OP_LOADI, 0, 0, 0); /* 35 */ - vm->code[vm->cp++].i = 35; - vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0); - vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* ); */ - vm->code[vm->cp++].u = 9; - vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* get return value */ - vm->code[vm->cp++].u = OP(OP_INT_TO_STRING, 1, 0, 0); - vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print(fib(35).toS()); */ - vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0); - /* fn fib() */ - vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* n int */ - vm->code[vm->cp++].u = OP(OP_LOADI, 1, 0, 0); /* 2 */ - vm->code[vm->cp++].i = 2; - vm->code[vm->cp++].u = OP(OP_LOADI, 2, 0, 0); /* &fib */ - vm->code[vm->cp++].i = 32; - vm->code[vm->cp++].u = OP(OP_JLT_INT, 2, 0, 1); - vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 2 */ - vm->code[vm->cp++].i = 2; - vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3); - vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0); - vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 2) */ - vm->code[vm->cp++].u = 9; - vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 1 */ - vm->code[vm->cp++].i = 1; - vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3); - vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0); - vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 1) */ - vm->code[vm->cp++].u = 9; - vm->code[vm->cp++].u = OP(OP_POPI, 4, 0, 0); - vm->code[vm->cp++].u = OP(OP_POPI, 5, 0, 0); - vm->code[vm->cp++].u = OP(OP_ADD_INT, 6, 5, 4); - vm->code[vm->cp++].u = OP(OP_PUSHI, 6, 0, 0); - vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0); - vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0); - vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0); - return true; -} -``` -I am at the point where I have a 32bit VM that can do math and store strings and a lexer but I need a way to compile the lexer output into a program using my opcodes. -