remove doc

This commit is contained in:
zongor 2025-08-03 16:03:50 -04:00
parent c348ea3fdd
commit 881c6c7740
1 changed files with 0 additions and 407 deletions

View File

@ -1,407 +0,0 @@
I am creating a new programming language in C89, this is for retrocomputing and confined platforms so I am using preallocated buffers for everything without malloc/free.
For reference here are my opcodes and vm defs:
```c
/* defines a uint32 opcode */
#define OP(opcode, a, b, c) ((opcode << 24) | (a << 16) | (b << 8) | c)
typedef enum {
OP_HALT, /* halt : terminate execution */
OP_LOADI, /* multiple byte: lodi : dest = next memory location as int */
OP_LOADU, /* multiple byte: lodu : dest = next memory location as uint */
OP_LOADF, /* multiple byte: lodf : dest = next memory location as float */
OP_STOREI, /* multiple byte: stri : next memory location = src1 as int */
OP_STOREU, /*multiple byte: stru : next memory location = src1 as uint */
OP_STOREF, /* multiple byte: strf : next memory location = src1 as float */
OP_PUSHI, /* pshi : push int from register onto the stack */
OP_PUSHU, /* pshu : push uint from register onto the stack */
OP_PUSHF, /* pshf : push float from register onto the stack */
OP_PUSHS, /* pshs : push str ref from register onto the stack and copy str */
OP_POPI, /* popi : pop int from stack onto the register */
OP_POPU, /* popu : pop uint from stack onto the register */
OP_POPF, /* popf : pop float from stack onto the register */
OP_POPS, /* pops : pop str ref from stack and move/copy to register */
OP_ADD_INT, /* addi : dest = src1 + src2 */
OP_SUB_INT, /* subi : dest = src1 - src2 */
OP_MUL_INT, /* muli : dest = src1 _src2_ /
OP_DIV_INT, /* divi : dest = src1 / src2 */
OP_JEQ_INT, /* jeqi : jump to address dest if src1 as int == src2 as int */
OP_JGT_INT, /* jgti : jump to address dest if src1 as int > src2 as int*/
OP_JLT_INT, /* jlti : jump to address dest if src1 as int < src2 as int */
OP_JLE_INT, /* jlei : jump to address dest if src1 as int <= src2 as int */
OP_JGE_INT, /* jgei : jump to address dest if src1 as int >= src2 as int*/
OP_INT_TO_REAL, /* itor : dest = src1 as f32 */
OP_ADD_UINT, /* addu : dest = src1 + src2 */
OP_SUB_UINT, /* subu : dest = src1 - src2 */
OP_MUL_UINT, /* mulu : dest = src1 _src2_ /
OP_DIV_UINT, /* divu : dest = src1 / src2 */
OP_JEQ_UINT, /* jequ : jump to address dest if src1 as int == src2 as uint */
OP_JGT_UINT, /* jgtu : jump to address dest if src1 as int > src2 as uint*/
OP_JLT_UINT, /* jltu : jump to address dest if src1 as int < src2 as uint */
OP_JLE_UINT, /* jleu : jump to address dest if src1 as int <= src2 as uint */
OP_JGE_UINT, /* jgeu : jump to address dest if src1 as int >= src2 as uint*/
OP_UINT_TO_REAL, /* utor : dest = src1 as f32 */
OP_ADD_REAL, /* addr : dest = src1 + src2 */
OP_SUB_REAL, /* subr : dest = src1 - src2 */
OP_MUL_REAL, /* mulr : dest = src1 _src2_ /
OP_DIV_REAL, /* divr : dest = src1 / src2 */
OP_JEQ_REAL, /* jeqr : jump to address dest if src1 as real == src2 as real */
OP_JGE_REAL, /* jgtr : jump to address dest if src1 as real >= src2 as real */
OP_JGT_REAL, /* jltr : jump to address dest if src1 as real > src2 as real */
OP_JLT_REAL, /* jler : jump to address dest if src1 as real < src2 as real */
OP_JLE_REAL, /* jger : jump to address dest if src1 as real <= src2 as real */
OP_REAL_TO_INT, /* rtoi : dest = src1 as int */
OP_REAL_TO_UINT, /* rtou : dest = src1 as uint */
OP_MOV, /* move : dest = src1 */
OP_JMP, /* jump : jump to address src1 unconditionally */
OP_CALL, /* call : creates a new frame */
OP_RETURN, /* retn : returns from a frame to the parent frame */
OP_INT_TO_STRING, /* itos : dest = src1 as str */
OP_UINT_TO_STRING, /* utos : dest = src1 as str */
OP_REAL_TO_STRING, /* rtos : dest = src1 as str */
OP_READ_STRING, /* gets : dest = gets as str */
OP_PRINT_STRING, /* puts : write src1 to stdout */
OP_CMP_STRING, /* cmps : dest = (str == src2) as bool */
} Opcode;
typedef union value_u {
int32_t i; /* Integers */
float f; /* Float */
uint32_t u; /* Unsigned integers, also used for pointer address */
char c[4]; /* 4 Byte char array for string packing */
} Value;
typedef struct slice_s {
uint32_t start;
uint32_t end;
} Slice;
#define MAX_REGS 32
typedef struct frame_s {
Value registers[MAX_REGS]; /* R0-R31 */
uint32_t rp; /* register pointer (last unused) */
Slice allocated; /* start and end of global allocated block */
} Frame;
typedef struct screen_t {
uint8_t width;
uint8_t height;
Slice allocated;
Value *buffer;
} Screen;
typedef struct mouse_t {
uint32_t x;
uint32_t y;
uint8_t btn1;
uint8_t btn2;
uint8_t btn3;
} Mouse;
typedef struct keyboard_t {
uint32_t length;
const uint8_t *keys;
} Keyboard;
typedef union device_u {
uint8_t type;
Screen s;
Mouse m;
Keyboard k;
} Device;
#define MEMORY_SIZE 65536
#define CODE_SIZE 8192
#define FRAMES_SIZE 128
#define STACK_SIZE 256
#define DEVICES_SIZE 8
typedef struct vm_s {
uint32_t pc; /* program counter */
uint32_t cp; /* code pointer (last allocated opcode) */
uint32_t fp; /* frame pointer (current frame) */
uint32_t sp; /* stack pointer (top of stack) */
uint32_t rp; /* return stack pointer (top of stack) */
uint32_t mp; /* memory pointer (last allocated value) */
uint32_t dp; /* device pointer (last allocated device) */
uint8_t devices_size;
Device devices[DEVICES_SIZE];
uint32_t frames_size;
Frame frames[FRAMES_SIZE]; /* function call frames */
uint32_t stack_size;
Value stack[STACK_SIZE]; /* main stack */
uint32_t return_stack_size;
Value return_stack[STACK_SIZE]; /* return stack (for recursion) */
uint32_t code_size;
Value code[CODE_SIZE]; /* code block */
uint32_t memory_size;
Value memory[MEMORY_SIZE]; /* memory block */
} VM;
/**
* Embeds a string into the VM
*/
uint32_t str_alloc(VM _vm, const char_ str, uint32_t length) {
if (!length) length = strlen(str);
uint32_t str_addr = vm->mp;
vm->memory[vm->mp++].u = length;
uint32_t i, j = 0;
for (i = 0; i < length; i++) {
vm->memory[vm->mp].c[i % 4] = str[i];
if (++j == 4) {
j = 0;
vm->mp++;
}
}
vm->frames[vm->fp].allocated.end += length / 4;
return str_addr;
}
/**
* Step to the next opcode in the vm.
*/
bool step_vm(VM *vm) {
/* Get current instruction & Advance to next instruction */
uint32_t instruction = vm->code[vm->pc++].u;
uint8_t opcode = (instruction >> 24) & 0xFF;
uint8_t dest = (instruction >> 16) & 0xFF;
uint8_t src1 = (instruction >> 8) & 0xFF;
uint8_t src2 = instruction & 0xFF;
switch (opcode) {
case OP_HALT:
return false;
case OP_CALL:; /* whats up with this semicolon? ANSI C does not allow you to create a variabel after a case, so this noop is here */
uint32_t jmp = vm->code[vm->pc++].u; /* location of function in code */
vm->return_stack[vm->rp++].u = vm->pc; /* set return address */
vm->fp++; /* increment to the next free frame */
vm->frames[vm->fp].allocated.start = vm->mp; /* set start of new memory block */
vm->pc = jmp;
return true;
case OP_RETURN:
vm->pc = vm->return_stack[--vm->rp].u; /* set pc to return address */
vm->mp = vm->frames[vm->fp--].allocated.start; /* reset memory pointer to start of old slice, pop the frame */
return true;
case OP_LOADI:
vm->frames[vm->fp].registers[dest].i = vm->code[vm->pc++].i;
return true;
...
```
Here is my lexer:
```c
typedef enum {
TOKEN_EOF,
TOKEN_IDENTIFIER,
TOKEN_INT_LITERAL,
TOKEN_FLOAT_LITERAL,
TOKEN_STRING_LITERAL,
TOKEN_TYPE_INT,
TOKEN_TYPE_NAT,
TOKEN_TYPE_REAL,
TOKEN_TYPE_STR,
TOKEN_KEYWORD_TYPE,
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_LET,
TOKEN_KEYWORD_CONST,
TOKEN_KEYWORD_IF,
TOKEN_KEYWORD_ELSE,
TOKEN_KEYWORD_WHILE,
TOKEN_KEYWORD_FOR,
TOKEN_KEYWORD_RETURN,
TOKEN_KEYWORD_USE,
TOKEN_KEYWORD_INIT,
TOKEN_KEYWORD_THIS,
TOKEN_OPERATOR_IS,
TOKEN_BANG,
TOKEN_BANG_EQ,
TOKEN_EQ,
TOKEN_EQ_EQ,
TOKEN_GT,
TOKEN_LT,
TOKEN_GTE,
TOKEN_LTE,
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_ERROR
} TokenType;
typedef struct {
TokenType type;
const char *start;
int length;
int line;
} Token;
typedef struct {
const char *keyword;
TokenType token;
} Keyword;
typedef struct {
const char *start;
const char *current;
int line;
} Lexer;
void init_lexer(const char *source);
const char *token_type_name(TokenType type);
Token next_token();
```
This is something like my grammar:
```
type «token» {
init() {
// values
}
}
! example
type Vec3 {
init(x real, y real, z real) {
this.x = x;
this.y = z;
this.y = z;
}
}
- real
- 32 bit floats
- int
- 32 bit integer
- nat
- 32 bit unsigned integer (for loop counting and indexing)
- str
- ""
- bool (uint32, 0 = false, anything else = true)
- true / false
- !
- comment
- ??
- unwrap or
- .?
- null check or return error
- +
- addition
- -
- subtraction
- negation
- *
- multiplication
- /
- divisor
- ^
- power
- ==
- equals
- <
- less than
- >
- greater than
- >=
- greater than or equals
- <=
- less than or equals
- .
- accessor
- ++
- inline add 1
- --
- inline subtract 1
- +=
- inline add n
- -=
- inline subtract n
- *=
- inline multiply n
- \=
- inline divide n
- mod
- modulo
- not
- logical not
- and
- logical and
- or
- logical or
- xor
- logical xor
- band
- bitwise and
- bor
- bitwise or
- bxor
- bitwise xor
- srl
- bit shift right
- sll
- bit shift left
«simple_type» «variable» = val; ! similar to c
«complex_type» «variable»(«fields», …); ! similar to c++
«type»[«length»] «variable» = [val1, val2, ...]; ! similar to c/c++
if («boolean expression») {
} else if («boolean expression») {
} else {
}
if («token» is real) {
print("hello yes self is a real?");
}
switch (value) {
case A:
case B:
case C:
default:
}
for («token» in «collection») { «body» }
while («boolean expression») { «body» }
do («variable» = initial_value, end_value, increment) { «body» }
fn «token» («type» «parameter», ...) «return_type» {
«body»
}
```
Here is an example compile test program that I did by hand for testing:
```zrl
fn fib(int n) int {
if (n < 2) return n;
return fib(n - 2) + fib(n - 1);
}
print fib(35);
```
```c
bool test_recursive_function_compile(VM *vm) {
/* fn main() */
vm->code[vm->cp++].u = OP(OP_LOADI, 0, 0, 0); /* 35 */
vm->code[vm->cp++].i = 35;
vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0);
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* ); */
vm->code[vm->cp++].u = 9;
vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* get return value */
vm->code[vm->cp++].u = OP(OP_INT_TO_STRING, 1, 0, 0);
vm->code[vm->cp++].u = OP(OP_PRINT_STRING, 0, 1, 0); /* print(fib(35).toS()); */
vm->code[vm->cp++].u = OP(OP_HALT, 0, 0, 0);
/* fn fib() */
vm->code[vm->cp++].u = OP(OP_POPI, 0, 0, 0); /* n int */
vm->code[vm->cp++].u = OP(OP_LOADI, 1, 0, 0); /* 2 */
vm->code[vm->cp++].i = 2;
vm->code[vm->cp++].u = OP(OP_LOADI, 2, 0, 0); /* &fib */
vm->code[vm->cp++].i = 32;
vm->code[vm->cp++].u = OP(OP_JLT_INT, 2, 0, 1);
vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 2 */
vm->code[vm->cp++].i = 2;
vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3);
vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0);
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 2) */
vm->code[vm->cp++].u = 9;
vm->code[vm->cp++].u = OP(OP_LOADI, 3, 0, 0); /* 1 */
vm->code[vm->cp++].i = 1;
vm->code[vm->cp++].u = OP(OP_SUB_INT, 4, 0, 3);
vm->code[vm->cp++].u = OP(OP_PUSHI, 4, 0, 0);
vm->code[vm->cp++].u = OP(OP_CALL, 0, 0, 0); /* fib(n - 1) */
vm->code[vm->cp++].u = 9;
vm->code[vm->cp++].u = OP(OP_POPI, 4, 0, 0);
vm->code[vm->cp++].u = OP(OP_POPI, 5, 0, 0);
vm->code[vm->cp++].u = OP(OP_ADD_INT, 6, 5, 4);
vm->code[vm->cp++].u = OP(OP_PUSHI, 6, 0, 0);
vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0);
vm->code[vm->cp++].u = OP(OP_PUSHI, 0, 0, 0);
vm->code[vm->cp++].u = OP(OP_RETURN, 0, 0, 0);
return true;
}
```
I am at the point where I have a 32bit VM that can do math and store strings and a lexer but I need a way to compile the lexer output into a program using my opcodes.