remove doc
This commit is contained in:
parent
c348ea3fdd
commit
881c6c7740
407
src/compiler.org
407
src/compiler.org
|
@ -1,407 +0,0 @@
|
||||||
I am creating a new programming language in C89, this is for retrocomputing and confined platforms so I am using preallocated buffers for everything without malloc/free.
|
|
||||||
For reference here are my opcodes and vm defs:
|
|
||||||
```c
|
|
||||||
/* defines a uint32 opcode */
|
|
||||||
#define OP(opcode, a, b, c) ((opcode << 24) | (a << 16) | (b << 8) | c)
|
|
||||||
typedef enum {
|
|
||||||
OP_HALT, /* halt : terminate execution */
|
|
||||||
OP_LOADI, /* multiple byte: lodi : dest = next memory location as int */
|
|
||||||
OP_LOADU, /* multiple byte: lodu : dest = next memory location as uint */
|
|
||||||
OP_LOADF, /* multiple byte: lodf : dest = next memory location as float */
|
|
||||||
OP_STOREI, /* multiple byte: stri : next memory location = src1 as int */
|
|
||||||
OP_STOREU, /*multiple byte: stru : next memory location = src1 as uint */
|
|
||||||
OP_STOREF, /* multiple byte: strf : next memory location = src1 as float */
|
|
||||||
OP_PUSHI, /* pshi : push int from register onto the stack */
|
|
||||||
OP_PUSHU, /* pshu : push uint from register onto the stack */
|
|
||||||
OP_PUSHF, /* pshf : push float from register onto the stack */
|
|
||||||
OP_PUSHS, /* pshs : push str ref from register onto the stack and copy str */
|
|
||||||
OP_POPI, /* popi : pop int from stack onto the register */
|
|
||||||
OP_POPU, /* popu : pop uint from stack onto the register */
|
|
||||||
OP_POPF, /* popf : pop float from stack onto the register */
|
|
||||||
OP_POPS, /* pops : pop str ref from stack and move/copy to register */
|
|
||||||
OP_ADD_INT, /* addi : dest = src1 + src2 */
|
|
||||||
OP_SUB_INT, /* subi : dest = src1 - src2 */
|
|
||||||
OP_MUL_INT, /* muli : dest = src1 _src2_ /
|
|
||||||
OP_DIV_INT, /* divi : dest = src1 / src2 */
|
|
||||||
OP_JEQ_INT, /* jeqi : jump to address dest if src1 as int == src2 as int */
|
|
||||||
OP_JGT_INT, /* jgti : jump to address dest if src1 as int > src2 as int*/
|
|
||||||
OP_JLT_INT, /* jlti : jump to address dest if src1 as int < src2 as int */
|
|
||||||
OP_JLE_INT, /* jlei : jump to address dest if src1 as int <= src2 as int */
|
|
||||||
OP_JGE_INT, /* jgei : jump to address dest if src1 as int >= src2 as int*/
|
|
||||||
OP_INT_TO_REAL, /* itor : dest = src1 as f32 */
|
|
||||||
OP_ADD_UINT, /* addu : dest = src1 + src2 */
|
|
||||||
OP_SUB_UINT, /* subu : dest = src1 - src2 */
|
|
||||||
OP_MUL_UINT, /* mulu : dest = src1 _src2_ /
|
|
||||||
OP_DIV_UINT, /* divu : dest = src1 / src2 */
|
|
||||||
OP_JEQ_UINT, /* jequ : jump to address dest if src1 as int == src2 as uint */
|
|
||||||
OP_JGT_UINT, /* jgtu : jump to address dest if src1 as int > src2 as uint*/
|
|
||||||
OP_JLT_UINT, /* jltu : jump to address dest if src1 as int < src2 as uint */
|
|
||||||
OP_JLE_UINT, /* jleu : jump to address dest if src1 as int <= src2 as uint */
|
|
||||||
OP_JGE_UINT, /* jgeu : jump to address dest if src1 as int >= src2 as uint*/
|
|
||||||
OP_UINT_TO_REAL, /* utor : dest = src1 as f32 */
|
|
||||||
OP_ADD_REAL, /* addr : dest = src1 + src2 */
|
|
||||||
OP_SUB_REAL, /* subr : dest = src1 - src2 */
|
|
||||||
OP_MUL_REAL, /* mulr : dest = src1 _src2_ /
|
|
||||||
OP_DIV_REAL, /* divr : dest = src1 / src2 */
|
|
||||||
OP_JEQ_REAL, /* jeqr : jump to address dest if src1 as real == src2 as real */
|
|
||||||
OP_JGE_REAL, /* jgtr : jump to address dest if src1 as real >= src2 as real */
|
|
||||||
OP_JGT_REAL, /* jltr : jump to address dest if src1 as real > src2 as real */
|
|
||||||
OP_JLT_REAL, /* jler : jump to address dest if src1 as real < src2 as real */
|
|
||||||
OP_JLE_REAL, /* jger : jump to address dest if src1 as real <= src2 as real */
|
|
||||||
OP_REAL_TO_INT, /* rtoi : dest = src1 as int */
|
|
||||||
OP_REAL_TO_UINT, /* rtou : dest = src1 as uint */
|
|
||||||
OP_MOV, /* move : dest = src1 */
|
|
||||||
OP_JMP, /* jump : jump to address src1 unconditionally */
|
|
||||||
OP_CALL, /* call : creates a new frame */
|
|
||||||
OP_RETURN, /* retn : returns from a frame to the parent frame */
|
|
||||||
OP_INT_TO_STRING, /* itos : dest = src1 as str */
|
|
||||||
OP_UINT_TO_STRING, /* utos : dest = src1 as str */
|
|
||||||
OP_REAL_TO_STRING, /* rtos : dest = src1 as str */
|
|
||||||
OP_READ_STRING, /* gets : dest = gets as str */
|
|
||||||
OP_PRINT_STRING, /* puts : write src1 to stdout */
|
|
||||||
OP_CMP_STRING, /* cmps : dest = (str == src2) as bool */
|
|
||||||
} Opcode;
|
|
||||||
typedef union value_u {
|
|
||||||
int32_t i; /* Integers */
|
|
||||||
float f; /* Float */
|
|
||||||
uint32_t u; /* Unsigned integers, also used for pointer address */
|
|
||||||
char c[4]; /* 4 Byte char array for string packing */
|
|
||||||
} Value;
|
|
||||||
typedef struct slice_s {
|
|
||||||
uint32_t start;
|
|
||||||
uint32_t end;
|
|
||||||
} Slice;
|
|
||||||
#define MAX_REGS 32
|
|
||||||
typedef struct frame_s {
|
|
||||||
Value registers[MAX_REGS]; /* R0-R31 */
|
|
||||||
uint32_t rp; /* register pointer (last unused) */
|
|
||||||
Slice allocated; /* start and end of global allocated block */
|
|
||||||
} Frame;
|
|
||||||
typedef struct screen_t {
|
|
||||||
uint8_t width;
|
|
||||||
uint8_t height;
|
|
||||||
Slice allocated;
|
|
||||||
Value *buffer;
|
|
||||||
} Screen;
|
|
||||||
typedef struct mouse_t {
|
|
||||||
uint32_t x;
|
|
||||||
uint32_t y;
|
|
||||||
uint8_t btn1;
|
|
||||||
uint8_t btn2;
|
|
||||||
uint8_t btn3;
|
|
||||||
} Mouse;
|
|
||||||
typedef struct keyboard_t {
|
|
||||||
uint32_t length;
|
|
||||||
const uint8_t *keys;
|
|
||||||
} Keyboard;
|
|
||||||
typedef union device_u {
|
|
||||||
uint8_t type;
|
|
||||||
Screen s;
|
|
||||||
Mouse m;
|
|
||||||
Keyboard k;
|
|
||||||
} Device;
|
|
||||||
#define MEMORY_SIZE 65536
|
|
||||||
#define CODE_SIZE 8192
|
|
||||||
#define FRAMES_SIZE 128
|
|
||||||
#define STACK_SIZE 256
|
|
||||||
#define DEVICES_SIZE 8
|
|
||||||
typedef struct vm_s {
|
|
||||||
uint32_t pc; /* program counter */
|
|
||||||
uint32_t cp; /* code pointer (last allocated opcode) */
|
|
||||||
uint32_t fp; /* frame pointer (current frame) */
|
|
||||||
uint32_t sp; /* stack pointer (top of stack) */
|
|
||||||
uint32_t rp; /* return stack pointer (top of stack) */
|
|
||||||
uint32_t mp; /* memory pointer (last allocated value) */
|
|
||||||
uint32_t dp; /* device pointer (last allocated device) */
|
|
||||||
uint8_t devices_size;
|
|
||||||
Device devices[DEVICES_SIZE];
|
|
||||||
uint32_t frames_size;
|
|
||||||
Frame frames[FRAMES_SIZE]; /* function call frames */
|
|
||||||
uint32_t stack_size;
|
|
||||||
Value stack[STACK_SIZE]; /* main stack */
|
|
||||||
uint32_t return_stack_size;
|
|
||||||
Value return_stack[STACK_SIZE]; /* return stack (for recursion) */
|
|
||||||
uint32_t code_size;
|
|
||||||
Value code[CODE_SIZE]; /* code block */
|
|
||||||
uint32_t memory_size;
|
|
||||||
Value memory[MEMORY_SIZE]; /* memory block */
|
|
||||||
} VM;
|
|
||||||
/**
|
|
||||||
* Embeds a string into the VM
|
|
||||||
*/
|
|
||||||
uint32_t str_alloc(VM _vm, const char_ str, uint32_t length) {
|
|
||||||
if (!length) length = strlen(str);
|
|
||||||
uint32_t str_addr = vm->mp;
|
|
||||||
vm->memory[vm->mp++].u = length;
|
|
||||||
uint32_t i, j = 0;
|
|
||||||
for (i = 0; i < length; i++) {
|
|
||||||
vm->memory[vm->mp].c[i % 4] = str[i];
|
|
||||||
if (++j == 4) {
|
|
||||||
j = 0;
|
|
||||||
vm->mp++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
vm->frames[vm->fp].allocated.end += length / 4;
|
|
||||||
return str_addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Step to the next opcode in the vm.
|
|
||||||
*/
|
|
||||||
bool step_vm(VM *vm) {
|
|
||||||
/* Get current instruction & Advance to next instruction */
|
|
||||||
uint32_t instruction = vm->code[vm->pc++].u;
|
|
||||||
|
|
||||||
uint8_t opcode = (instruction >> 24) & 0xFF;
|
|
||||||
uint8_t dest = (instruction >> 16) & 0xFF;
|
|
||||||
uint8_t src1 = (instruction >> 8) & 0xFF;
|
|
||||||
uint8_t src2 = instruction & 0xFF;
|
|
||||||
|
|
||||||
switch (opcode) {
|
|
||||||
case OP_HALT:
|
|
||||||
return false;
|
|
||||||
case OP_CALL:; /* whats up with this semicolon? ANSI C does not allow you to create a variabel after a case, so this noop is here */
|
|
||||||
uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */
|
|
||||||
vm->return_stack[vm->rp++].u = vm->pc; /* set return address */
|
|
||||||
vm->fp++; /* increment to the next free frame */
|
|
||||||
vm->frames[vm->fp].allocated.start = vm->mp; /* set start of new memory block */
|
|
||||||
vm->pc = jmp;
|
|
||||||
return true;
|
|
||||||
case OP_RETURN:
|
|
||||||
vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */
|
|
||||||
vm->mp = vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start of old slice, pop the frame */
|
|
||||||
return true;
|
|
||||||
case OP_LOADI:
|
|
||||||
vm->frames[vm->fp].registers[dest].i = vm->code[vm->pc++].i;
|
|
||||||
return true;
|
|
||||||
...
|
|
||||||
```
|
|
||||||
Here is my lexer:
|
|
||||||
```c
|
|
||||||
typedef enum {
|
|
||||||
TOKEN_EOF,
|
|
||||||
TOKEN_IDENTIFIER,
|
|
||||||
TOKEN_INT_LITERAL,
|
|
||||||
TOKEN_FLOAT_LITERAL,
|
|
||||||
TOKEN_STRING_LITERAL,
|
|
||||||
TOKEN_TYPE_INT,
|
|
||||||
TOKEN_TYPE_NAT,
|
|
||||||
TOKEN_TYPE_REAL,
|
|
||||||
TOKEN_TYPE_STR,
|
|
||||||
TOKEN_KEYWORD_TYPE,
|
|
||||||
TOKEN_KEYWORD_FN,
|
|
||||||
TOKEN_KEYWORD_LET,
|
|
||||||
TOKEN_KEYWORD_CONST,
|
|
||||||
TOKEN_KEYWORD_IF,
|
|
||||||
TOKEN_KEYWORD_ELSE,
|
|
||||||
TOKEN_KEYWORD_WHILE,
|
|
||||||
TOKEN_KEYWORD_FOR,
|
|
||||||
TOKEN_KEYWORD_RETURN,
|
|
||||||
TOKEN_KEYWORD_USE,
|
|
||||||
TOKEN_KEYWORD_INIT,
|
|
||||||
TOKEN_KEYWORD_THIS,
|
|
||||||
TOKEN_OPERATOR_IS,
|
|
||||||
TOKEN_BANG,
|
|
||||||
TOKEN_BANG_EQ,
|
|
||||||
TOKEN_EQ,
|
|
||||||
TOKEN_EQ_EQ,
|
|
||||||
TOKEN_GT,
|
|
||||||
TOKEN_LT,
|
|
||||||
TOKEN_GTE,
|
|
||||||
TOKEN_LTE,
|
|
||||||
TOKEN_DOT,
|
|
||||||
TOKEN_COMMA,
|
|
||||||
TOKEN_COLON,
|
|
||||||
TOKEN_SEMICOLON,
|
|
||||||
TOKEN_PLUS,
|
|
||||||
TOKEN_MINUS,
|
|
||||||
TOKEN_STAR,
|
|
||||||
TOKEN_SLASH,
|
|
||||||
TOKEN_LPAREN,
|
|
||||||
TOKEN_RPAREN,
|
|
||||||
TOKEN_LBRACE,
|
|
||||||
TOKEN_RBRACE,
|
|
||||||
TOKEN_LBRACKET,
|
|
||||||
TOKEN_RBRACKET,
|
|
||||||
TOKEN_ERROR
|
|
||||||
} TokenType;
|
|
||||||
typedef struct {
|
|
||||||
TokenType type;
|
|
||||||
const char *start;
|
|
||||||
int length;
|
|
||||||
int line;
|
|
||||||
} Token;
|
|
||||||
typedef struct {
|
|
||||||
const char *keyword;
|
|
||||||
TokenType token;
|
|
||||||
} Keyword;
|
|
||||||
typedef struct {
|
|
||||||
const char *start;
|
|
||||||
const char *current;
|
|
||||||
int line;
|
|
||||||
} Lexer;
|
|
||||||
void init_lexer(const char *source);
|
|
||||||
const char *token_type_name(TokenType type);
|
|
||||||
Token next_token();
|
|
||||||
```
|
|
||||||
This is something like my grammar:
|
|
||||||
```
|
|
||||||
type «token» {
|
|
||||||
init() {
|
|
||||||
// values
|
|
||||||
}
|
|
||||||
}
|
|
||||||
! example
|
|
||||||
type Vec3 {
|
|
||||||
init(x real, y real, z real) {
|
|
||||||
this.x = x;
|
|
||||||
this.y = z;
|
|
||||||
this.y = z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
- real
|
|
||||||
- 32 bit floats
|
|
||||||
- int
|
|
||||||
- 32 bit integer
|
|
||||||
- nat
|
|
||||||
- 32 bit unsigned integer (for loop counting and indexing)
|
|
||||||
- str
|
|
||||||
- ""
|
|
||||||
- bool (uint32, 0 = false, anything else = true)
|
|
||||||
- true / false
|
|
||||||
- !
|
|
||||||
- comment
|
|
||||||
- ??
|
|
||||||
- unwrap or
|
|
||||||
- .?
|
|
||||||
- null check or return error
|
|
||||||
- +
|
|
||||||
- addition
|
|
||||||
- -
|
|
||||||
- subtraction
|
|
||||||
- negation
|
|
||||||
- *
|
|
||||||
- multiplication
|
|
||||||
- /
|
|
||||||
- divisor
|
|
||||||
- ^
|
|
||||||
- power
|
|
||||||
- ==
|
|
||||||
- equals
|
|
||||||
- <
|
|
||||||
- less than
|
|
||||||
- >
|
|
||||||
- greater than
|
|
||||||
- >=
|
|
||||||
- greater than or equals
|
|
||||||
- <=
|
|
||||||
- less than or equals
|
|
||||||
- .
|
|
||||||
- accessor
|
|
||||||
- ++
|
|
||||||
- inline add 1
|
|
||||||
- --
|
|
||||||
- inline subtract 1
|
|
||||||
- +=
|
|
||||||
- inline add n
|
|
||||||
- -=
|
|
||||||
- inline subtract n
|
|
||||||
- *=
|
|
||||||
- inline multiply n
|
|
||||||
- \=
|
|
||||||
- inline divide n
|
|
||||||
- mod
|
|
||||||
- modulo
|
|
||||||
- not
|
|
||||||
- logical not
|
|
||||||
- and
|
|
||||||
- logical and
|
|
||||||
- or
|
|
||||||
- logical or
|
|
||||||
- xor
|
|
||||||
- logical xor
|
|
||||||
- band
|
|
||||||
- bitwise and
|
|
||||||
- bor
|
|
||||||
- bitwise or
|
|
||||||
- bxor
|
|
||||||
- bitwise xor
|
|
||||||
- srl
|
|
||||||
- bit shift right
|
|
||||||
- sll
|
|
||||||
- bit shift left
|
|
||||||
«simple_type» «variable» = val; ! similar to c
|
|
||||||
«complex_type» «variable»(«fields», …); ! similar to c++
|
|
||||||
«type»[«length»] «variable» = [val1, val2, ...]; ! similar to c/c++
|
|
||||||
if («boolean expression») {
|
|
||||||
} else if («boolean expression») {
|
|
||||||
} else {
|
|
||||||
}
|
|
||||||
if («token» is real) {
|
|
||||||
print("hello yes self is a real?");
|
|
||||||
}
|
|
||||||
switch (value) {
|
|
||||||
case A:
|
|
||||||
case B:
|
|
||||||
case C:
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
for («token» in «collection») { «body» }
|
|
||||||
while («boolean expression») { «body» }
|
|
||||||
do («variable» = initial_value, end_value, increment) { «body» }
|
|
||||||
fn «token» («type» «parameter», ...) «return_type» {
|
|
||||||
«body»
|
|
||||||
}
|
|
||||||
```
|
|
||||||
Here is an example compile test program that I did by hand for testing:
|
|
||||||
```zrl
|
|
||||||
fn fib(int n) int {
|
|
||||||
if (n < 2) return n;
|
|
||||||
return fib(n - 2) + fib(n - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
print fib(35);
|
|
||||||
```
|
|
||||||
```c
|
|
||||||
bool test_recursive_function_compile(VM *vm) {
|
|
||||||
/* fn main() */
|
|
||||||
vm->code[vm->cp++].u = OP(OP_LOADI, 0, 0, 0); /* 35 */
|
|
||||||
vm->code[vm->cp++].i = 35;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* ); */
|
|
||||||
vm->code[vm->cp++].u = 9;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* get return value */
|
|
||||||
vm->code[vm->cp++].u = OP(OP_INT_TO_STRING, 1, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print(fib(35).toS()); */
|
|
||||||
vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0);
|
|
||||||
/* fn fib() */
|
|
||||||
vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* n int */
|
|
||||||
vm->code[vm->cp++].u = OP(OP_LOADI, 1, 0, 0); /* 2 */
|
|
||||||
vm->code[vm->cp++].i = 2;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_LOADI, 2, 0, 0); /* &fib */
|
|
||||||
vm->code[vm->cp++].i = 32;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_JLT_INT, 2, 0, 1);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 2 */
|
|
||||||
vm->code[vm->cp++].i = 2;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 2) */
|
|
||||||
vm->code[vm->cp++].u = 9;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 1 */
|
|
||||||
vm->code[vm->cp++].i = 1;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 1) */
|
|
||||||
vm->code[vm->cp++].u = 9;
|
|
||||||
vm->code[vm->cp++].u = OP(OP_POPI, 4, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_POPI, 5, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_ADD_INT, 6, 5, 4);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 6, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0);
|
|
||||||
vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
I am at the point where I have a 32bit VM that can do math and store strings and a lexer but I need a way to compile the lexer output into a program using my opcodes.
|
|
||||||
|
|
Loading…
Reference in New Issue