remove doc
This commit is contained in:
parent
c348ea3fdd
commit
881c6c7740
407
src/compiler.org
407
src/compiler.org
|
@ -1,407 +0,0 @@
|
|||
I am creating a new programming language in C89, this is for retrocomputing and confined platforms so I am using preallocated buffers for everything without malloc/free.
|
||||
For reference here are my opcodes and vm defs:
|
||||
```c
|
||||
/* defines a uint32 opcode */
|
||||
#define OP(opcode, a, b, c) ((opcode << 24) | (a << 16) | (b << 8) | c)
|
||||
typedef enum {
|
||||
OP_HALT, /* halt : terminate execution */
|
||||
OP_LOADI, /* multiple byte: lodi : dest = next memory location as int */
|
||||
OP_LOADU, /* multiple byte: lodu : dest = next memory location as uint */
|
||||
OP_LOADF, /* multiple byte: lodf : dest = next memory location as float */
|
||||
OP_STOREI, /* multiple byte: stri : next memory location = src1 as int */
|
||||
OP_STOREU, /*multiple byte: stru : next memory location = src1 as uint */
|
||||
OP_STOREF, /* multiple byte: strf : next memory location = src1 as float */
|
||||
OP_PUSHI, /* pshi : push int from register onto the stack */
|
||||
OP_PUSHU, /* pshu : push uint from register onto the stack */
|
||||
OP_PUSHF, /* pshf : push float from register onto the stack */
|
||||
OP_PUSHS, /* pshs : push str ref from register onto the stack and copy str */
|
||||
OP_POPI, /* popi : pop int from stack onto the register */
|
||||
OP_POPU, /* popu : pop uint from stack onto the register */
|
||||
OP_POPF, /* popf : pop float from stack onto the register */
|
||||
OP_POPS, /* pops : pop str ref from stack and move/copy to register */
|
||||
OP_ADD_INT, /* addi : dest = src1 + src2 */
|
||||
OP_SUB_INT, /* subi : dest = src1 - src2 */
|
||||
OP_MUL_INT, /* muli : dest = src1 _src2_ /
|
||||
OP_DIV_INT, /* divi : dest = src1 / src2 */
|
||||
OP_JEQ_INT, /* jeqi : jump to address dest if src1 as int == src2 as int */
|
||||
OP_JGT_INT, /* jgti : jump to address dest if src1 as int > src2 as int*/
|
||||
OP_JLT_INT, /* jlti : jump to address dest if src1 as int < src2 as int */
|
||||
OP_JLE_INT, /* jlei : jump to address dest if src1 as int <= src2 as int */
|
||||
OP_JGE_INT, /* jgei : jump to address dest if src1 as int >= src2 as int*/
|
||||
OP_INT_TO_REAL, /* itor : dest = src1 as f32 */
|
||||
OP_ADD_UINT, /* addu : dest = src1 + src2 */
|
||||
OP_SUB_UINT, /* subu : dest = src1 - src2 */
|
||||
OP_MUL_UINT, /* mulu : dest = src1 _src2_ /
|
||||
OP_DIV_UINT, /* divu : dest = src1 / src2 */
|
||||
OP_JEQ_UINT, /* jequ : jump to address dest if src1 as int == src2 as uint */
|
||||
OP_JGT_UINT, /* jgtu : jump to address dest if src1 as int > src2 as uint*/
|
||||
OP_JLT_UINT, /* jltu : jump to address dest if src1 as int < src2 as uint */
|
||||
OP_JLE_UINT, /* jleu : jump to address dest if src1 as int <= src2 as uint */
|
||||
OP_JGE_UINT, /* jgeu : jump to address dest if src1 as int >= src2 as uint*/
|
||||
OP_UINT_TO_REAL, /* utor : dest = src1 as f32 */
|
||||
OP_ADD_REAL, /* addr : dest = src1 + src2 */
|
||||
OP_SUB_REAL, /* subr : dest = src1 - src2 */
|
||||
OP_MUL_REAL, /* mulr : dest = src1 _src2_ /
|
||||
OP_DIV_REAL, /* divr : dest = src1 / src2 */
|
||||
OP_JEQ_REAL, /* jeqr : jump to address dest if src1 as real == src2 as real */
|
||||
OP_JGE_REAL, /* jgtr : jump to address dest if src1 as real >= src2 as real */
|
||||
OP_JGT_REAL, /* jltr : jump to address dest if src1 as real > src2 as real */
|
||||
OP_JLT_REAL, /* jler : jump to address dest if src1 as real < src2 as real */
|
||||
OP_JLE_REAL, /* jger : jump to address dest if src1 as real <= src2 as real */
|
||||
OP_REAL_TO_INT, /* rtoi : dest = src1 as int */
|
||||
OP_REAL_TO_UINT, /* rtou : dest = src1 as uint */
|
||||
OP_MOV, /* move : dest = src1 */
|
||||
OP_JMP, /* jump : jump to address src1 unconditionally */
|
||||
OP_CALL, /* call : creates a new frame */
|
||||
OP_RETURN, /* retn : returns from a frame to the parent frame */
|
||||
OP_INT_TO_STRING, /* itos : dest = src1 as str */
|
||||
OP_UINT_TO_STRING, /* utos : dest = src1 as str */
|
||||
OP_REAL_TO_STRING, /* rtos : dest = src1 as str */
|
||||
OP_READ_STRING, /* gets : dest = gets as str */
|
||||
OP_PRINT_STRING, /* puts : write src1 to stdout */
|
||||
OP_CMP_STRING, /* cmps : dest = (str == src2) as bool */
|
||||
} Opcode;
|
||||
typedef union value_u {
|
||||
int32_t i; /* Integers */
|
||||
float f; /* Float */
|
||||
uint32_t u; /* Unsigned integers, also used for pointer address */
|
||||
char c[4]; /* 4 Byte char array for string packing */
|
||||
} Value;
|
||||
typedef struct slice_s {
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
} Slice;
|
||||
#define MAX_REGS 32
|
||||
typedef struct frame_s {
|
||||
Value registers[MAX_REGS]; /* R0-R31 */
|
||||
uint32_t rp; /* register pointer (last unused) */
|
||||
Slice allocated; /* start and end of global allocated block */
|
||||
} Frame;
|
||||
typedef struct screen_t {
|
||||
uint8_t width;
|
||||
uint8_t height;
|
||||
Slice allocated;
|
||||
Value *buffer;
|
||||
} Screen;
|
||||
typedef struct mouse_t {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint8_t btn1;
|
||||
uint8_t btn2;
|
||||
uint8_t btn3;
|
||||
} Mouse;
|
||||
typedef struct keyboard_t {
|
||||
uint32_t length;
|
||||
const uint8_t *keys;
|
||||
} Keyboard;
|
||||
typedef union device_u {
|
||||
uint8_t type;
|
||||
Screen s;
|
||||
Mouse m;
|
||||
Keyboard k;
|
||||
} Device;
|
||||
#define MEMORY_SIZE 65536
|
||||
#define CODE_SIZE 8192
|
||||
#define FRAMES_SIZE 128
|
||||
#define STACK_SIZE 256
|
||||
#define DEVICES_SIZE 8
|
||||
typedef struct vm_s {
|
||||
uint32_t pc; /* program counter */
|
||||
uint32_t cp; /* code pointer (last allocated opcode) */
|
||||
uint32_t fp; /* frame pointer (current frame) */
|
||||
uint32_t sp; /* stack pointer (top of stack) */
|
||||
uint32_t rp; /* return stack pointer (top of stack) */
|
||||
uint32_t mp; /* memory pointer (last allocated value) */
|
||||
uint32_t dp; /* device pointer (last allocated device) */
|
||||
uint8_t devices_size;
|
||||
Device devices[DEVICES_SIZE];
|
||||
uint32_t frames_size;
|
||||
Frame frames[FRAMES_SIZE]; /* function call frames */
|
||||
uint32_t stack_size;
|
||||
Value stack[STACK_SIZE]; /* main stack */
|
||||
uint32_t return_stack_size;
|
||||
Value return_stack[STACK_SIZE]; /* return stack (for recursion) */
|
||||
uint32_t code_size;
|
||||
Value code[CODE_SIZE]; /* code block */
|
||||
uint32_t memory_size;
|
||||
Value memory[MEMORY_SIZE]; /* memory block */
|
||||
} VM;
|
||||
/**
|
||||
* Embeds a string into the VM
|
||||
*/
|
||||
uint32_t str_alloc(VM _vm, const char_ str, uint32_t length) {
|
||||
if (!length) length = strlen(str);
|
||||
uint32_t str_addr = vm->mp;
|
||||
vm->memory[vm->mp++].u = length;
|
||||
uint32_t i, j = 0;
|
||||
for (i = 0; i < length; i++) {
|
||||
vm->memory[vm->mp].c[i % 4] = str[i];
|
||||
if (++j == 4) {
|
||||
j = 0;
|
||||
vm->mp++;
|
||||
}
|
||||
}
|
||||
vm->frames[vm->fp].allocated.end += length / 4;
|
||||
return str_addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Step to the next opcode in the vm.
|
||||
*/
|
||||
bool step_vm(VM *vm) {
|
||||
/* Get current instruction & Advance to next instruction */
|
||||
uint32_t instruction = vm->code[vm->pc++].u;
|
||||
|
||||
uint8_t opcode = (instruction >> 24) & 0xFF;
|
||||
uint8_t dest = (instruction >> 16) & 0xFF;
|
||||
uint8_t src1 = (instruction >> 8) & 0xFF;
|
||||
uint8_t src2 = instruction & 0xFF;
|
||||
|
||||
switch (opcode) {
|
||||
case OP_HALT:
|
||||
return false;
|
||||
case OP_CALL:; /* whats up with this semicolon? ANSI C does not allow you to create a variabel after a case, so this noop is here */
|
||||
uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */
|
||||
vm->return_stack[vm->rp++].u = vm->pc; /* set return address */
|
||||
vm->fp++; /* increment to the next free frame */
|
||||
vm->frames[vm->fp].allocated.start = vm->mp; /* set start of new memory block */
|
||||
vm->pc = jmp;
|
||||
return true;
|
||||
case OP_RETURN:
|
||||
vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */
|
||||
vm->mp = vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start of old slice, pop the frame */
|
||||
return true;
|
||||
case OP_LOADI:
|
||||
vm->frames[vm->fp].registers[dest].i = vm->code[vm->pc++].i;
|
||||
return true;
|
||||
...
|
||||
```
|
||||
Here is my lexer:
|
||||
```c
|
||||
typedef enum {
|
||||
TOKEN_EOF,
|
||||
TOKEN_IDENTIFIER,
|
||||
TOKEN_INT_LITERAL,
|
||||
TOKEN_FLOAT_LITERAL,
|
||||
TOKEN_STRING_LITERAL,
|
||||
TOKEN_TYPE_INT,
|
||||
TOKEN_TYPE_NAT,
|
||||
TOKEN_TYPE_REAL,
|
||||
TOKEN_TYPE_STR,
|
||||
TOKEN_KEYWORD_TYPE,
|
||||
TOKEN_KEYWORD_FN,
|
||||
TOKEN_KEYWORD_LET,
|
||||
TOKEN_KEYWORD_CONST,
|
||||
TOKEN_KEYWORD_IF,
|
||||
TOKEN_KEYWORD_ELSE,
|
||||
TOKEN_KEYWORD_WHILE,
|
||||
TOKEN_KEYWORD_FOR,
|
||||
TOKEN_KEYWORD_RETURN,
|
||||
TOKEN_KEYWORD_USE,
|
||||
TOKEN_KEYWORD_INIT,
|
||||
TOKEN_KEYWORD_THIS,
|
||||
TOKEN_OPERATOR_IS,
|
||||
TOKEN_BANG,
|
||||
TOKEN_BANG_EQ,
|
||||
TOKEN_EQ,
|
||||
TOKEN_EQ_EQ,
|
||||
TOKEN_GT,
|
||||
TOKEN_LT,
|
||||
TOKEN_GTE,
|
||||
TOKEN_LTE,
|
||||
TOKEN_DOT,
|
||||
TOKEN_COMMA,
|
||||
TOKEN_COLON,
|
||||
TOKEN_SEMICOLON,
|
||||
TOKEN_PLUS,
|
||||
TOKEN_MINUS,
|
||||
TOKEN_STAR,
|
||||
TOKEN_SLASH,
|
||||
TOKEN_LPAREN,
|
||||
TOKEN_RPAREN,
|
||||
TOKEN_LBRACE,
|
||||
TOKEN_RBRACE,
|
||||
TOKEN_LBRACKET,
|
||||
TOKEN_RBRACKET,
|
||||
TOKEN_ERROR
|
||||
} TokenType;
|
||||
typedef struct {
|
||||
TokenType type;
|
||||
const char *start;
|
||||
int length;
|
||||
int line;
|
||||
} Token;
|
||||
typedef struct {
|
||||
const char *keyword;
|
||||
TokenType token;
|
||||
} Keyword;
|
||||
typedef struct {
|
||||
const char *start;
|
||||
const char *current;
|
||||
int line;
|
||||
} Lexer;
|
||||
void init_lexer(const char *source);
|
||||
const char *token_type_name(TokenType type);
|
||||
Token next_token();
|
||||
```
|
||||
This is something like my grammar:
|
||||
```
|
||||
type «token» {
|
||||
init() {
|
||||
// values
|
||||
}
|
||||
}
|
||||
! example
|
||||
type Vec3 {
|
||||
init(x real, y real, z real) {
|
||||
this.x = x;
|
||||
this.y = z;
|
||||
this.y = z;
|
||||
}
|
||||
}
|
||||
- real
|
||||
- 32 bit floats
|
||||
- int
|
||||
- 32 bit integer
|
||||
- nat
|
||||
- 32 bit unsigned integer (for loop counting and indexing)
|
||||
- str
|
||||
- ""
|
||||
- bool (uint32, 0 = false, anything else = true)
|
||||
- true / false
|
||||
- !
|
||||
- comment
|
||||
- ??
|
||||
- unwrap or
|
||||
- .?
|
||||
- null check or return error
|
||||
- +
|
||||
- addition
|
||||
- -
|
||||
- subtraction
|
||||
- negation
|
||||
- *
|
||||
- multiplication
|
||||
- /
|
||||
- divisor
|
||||
- ^
|
||||
- power
|
||||
- ==
|
||||
- equals
|
||||
- <
|
||||
- less than
|
||||
- >
|
||||
- greater than
|
||||
- >=
|
||||
- greater than or equals
|
||||
- <=
|
||||
- less than or equals
|
||||
- .
|
||||
- accessor
|
||||
- ++
|
||||
- inline add 1
|
||||
- --
|
||||
- inline subtract 1
|
||||
- +=
|
||||
- inline add n
|
||||
- -=
|
||||
- inline subtract n
|
||||
- *=
|
||||
- inline multiply n
|
||||
- \=
|
||||
- inline divide n
|
||||
- mod
|
||||
- modulo
|
||||
- not
|
||||
- logical not
|
||||
- and
|
||||
- logical and
|
||||
- or
|
||||
- logical or
|
||||
- xor
|
||||
- logical xor
|
||||
- band
|
||||
- bitwise and
|
||||
- bor
|
||||
- bitwise or
|
||||
- bxor
|
||||
- bitwise xor
|
||||
- srl
|
||||
- bit shift right
|
||||
- sll
|
||||
- bit shift left
|
||||
«simple_type» «variable» = val; ! similar to c
|
||||
«complex_type» «variable»(«fields», …); ! similar to c++
|
||||
«type»[«length»] «variable» = [val1, val2, ...]; ! similar to c/c++
|
||||
if («boolean expression») {
|
||||
} else if («boolean expression») {
|
||||
} else {
|
||||
}
|
||||
if («token» is real) {
|
||||
print("hello yes self is a real?");
|
||||
}
|
||||
switch (value) {
|
||||
case A:
|
||||
case B:
|
||||
case C:
|
||||
default:
|
||||
}
|
||||
for («token» in «collection») { «body» }
|
||||
while («boolean expression») { «body» }
|
||||
do («variable» = initial_value, end_value, increment) { «body» }
|
||||
fn «token» («type» «parameter», ...) «return_type» {
|
||||
«body»
|
||||
}
|
||||
```
|
||||
Here is an example compile test program that I did by hand for testing:
|
||||
```zrl
|
||||
fn fib(int n) int {
|
||||
if (n < 2) return n;
|
||||
return fib(n - 2) + fib(n - 1);
|
||||
}
|
||||
|
||||
print fib(35);
|
||||
```
|
||||
```c
|
||||
bool test_recursive_function_compile(VM *vm) {
|
||||
/* fn main() */
|
||||
vm->code[vm->cp++].u = OP(OP_LOADI, 0, 0, 0); /* 35 */
|
||||
vm->code[vm->cp++].i = 35;
|
||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* ); */
|
||||
vm->code[vm->cp++].u = 9;
|
||||
vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* get return value */
|
||||
vm->code[vm->cp++].u = OP(OP_INT_TO_STRING, 1, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print(fib(35).toS()); */
|
||||
vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0);
|
||||
/* fn fib() */
|
||||
vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* n int */
|
||||
vm->code[vm->cp++].u = OP(OP_LOADI, 1, 0, 0); /* 2 */
|
||||
vm->code[vm->cp++].i = 2;
|
||||
vm->code[vm->cp++].u = OP(OP_LOADI, 2, 0, 0); /* &fib */
|
||||
vm->code[vm->cp++].i = 32;
|
||||
vm->code[vm->cp++].u = OP(OP_JLT_INT, 2, 0, 1);
|
||||
vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 2 */
|
||||
vm->code[vm->cp++].i = 2;
|
||||
vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3);
|
||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 2) */
|
||||
vm->code[vm->cp++].u = 9;
|
||||
vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 1 */
|
||||
vm->code[vm->cp++].i = 1;
|
||||
vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3);
|
||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 1) */
|
||||
vm->code[vm->cp++].u = 9;
|
||||
vm->code[vm->cp++].u = OP(OP_POPI, 4, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_POPI, 5, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_ADD_INT, 6, 5, 4);
|
||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 6, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0);
|
||||
vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0);
|
||||
return true;
|
||||
}
|
||||
```
|
||||
I am at the point where I have a 32bit VM that can do math and store strings and a lexer but I need a way to compile the lexer output into a program using my opcodes.
|
||||
|
Loading…
Reference in New Issue