WIP allocator for compiler.

This commit is contained in:
zongor 2026-03-22 09:27:18 -07:00
parent 6310390cc4
commit 373caf7b5e
9 changed files with 1058 additions and 842 deletions

33
.clang-format Normal file
View File

@ -0,0 +1,33 @@
# Plan 9 coding conventions for C (http://man.9front.org/6/style)
BasedOnStyle: LLVM
IndentWidth: 2
TabWidth: 2
UseTab: Always
SpaceBeforeParens: Never
SpaceBeforeAssignmentOperators: true
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
RemoveBracesLLVM: true
BreakBeforeBraces: Custom
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
AlwaysBreakAfterReturnType: TopLevelDefinitions
DerivePointerAlignment: false
PointerAlignment: Right
AlignOperands: Align
AlignAfterOpenBracket: Align
SortIncludes: Never
IndentCaseLabels: false

1036
lexer.c

File diff suppressed because it is too large Load Diff

169
lexer.h
View File

@ -4,95 +4,96 @@
#include "libc.h"
typedef enum {
TOKEN_ERROR,
TOKEN_EOF,
TOKEN_IDENTIFIER,
TOKEN_LITERAL_INT,
TOKEN_LITERAL_NAT,
TOKEN_LITERAL_REAL,
TOKEN_LITERAL_STR,
TOKEN_TYPE_I8,
TOKEN_TYPE_I16,
TOKEN_TYPE_INT,
TOKEN_TYPE_U8,
TOKEN_TYPE_U16,
TOKEN_TYPE_NAT,
TOKEN_TYPE_REAL,
TOKEN_TYPE_STR,
TOKEN_TYPE_BOOL,
TOKEN_TYPE_VOID,
TOKEN_TYPE_PTR,
TOKEN_KEYWORD_PLEX,
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_CONST,
TOKEN_KEYWORD_IF,
TOKEN_KEYWORD_IS,
TOKEN_KEYWORD_AS,
TOKEN_KEYWORD_ELSE,
TOKEN_KEYWORD_WHILE,
TOKEN_KEYWORD_FOR,
TOKEN_KEYWORD_RETURN,
TOKEN_KEYWORD_USE,
TOKEN_KEYWORD_INIT,
TOKEN_KEYWORD_THIS,
TOKEN_KEYWORD_GLOBAL,
TOKEN_KEYWORD_OPEN,
TOKEN_KEYWORD_READ,
TOKEN_KEYWORD_WRITE,
TOKEN_KEYWORD_STAT,
TOKEN_KEYWORD_CLOSE,
TOKEN_KEYWORD_LOOP,
TOKEN_KEYWORD_DO,
TOKEN_KEYWORD_NIL,
TOKEN_KEYWORD_TRUE,
TOKEN_KEYWORD_FALSE,
TOKEN_OPERATOR_NOT,
TOKEN_OPERATOR_AND,
TOKEN_OPERATOR_OR,
TOKEN_BANG,
TOKEN_BANG_EQ,
TOKEN_EQ,
TOKEN_EQ_EQ,
TOKEN_AND,
TOKEN_AND_AND,
TOKEN_PIPE,
TOKEN_PIPE_PIPE,
TOKEN_QUESTION,
TOKEN_QUESTION_DOT,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
TOKEN_MESH,
TOKEN_BIG_MONEY,
TOKEN_GT,
TOKEN_LT,
TOKEN_GTE,
TOKEN_LTE,
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
TOKEN_CARET,
TOKEN_SEMICOLON,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_ARROW_RIGHT
TOKEN_ERROR,
TOKEN_EOF,
TOKEN_IDENTIFIER,
TOKEN_LITERAL_INT,
TOKEN_LITERAL_NAT,
TOKEN_LITERAL_REAL,
TOKEN_LITERAL_STR,
TOKEN_TYPE_I8,
TOKEN_TYPE_I16,
TOKEN_TYPE_INT,
TOKEN_TYPE_U8,
TOKEN_TYPE_U16,
TOKEN_TYPE_NAT,
TOKEN_TYPE_REAL,
TOKEN_TYPE_STR,
TOKEN_TYPE_BOOL,
TOKEN_TYPE_VOID,
TOKEN_TYPE_PTR,
TOKEN_KEYWORD_PLEX,
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_CONST,
TOKEN_KEYWORD_IF,
TOKEN_KEYWORD_IS,
TOKEN_KEYWORD_AS,
TOKEN_KEYWORD_ELSE,
TOKEN_KEYWORD_WHILE,
TOKEN_KEYWORD_FOR,
TOKEN_KEYWORD_RETURN,
TOKEN_KEYWORD_USE,
TOKEN_KEYWORD_INIT,
TOKEN_KEYWORD_THIS,
TOKEN_KEYWORD_GLOBAL,
TOKEN_KEYWORD_OPEN,
TOKEN_KEYWORD_READ,
TOKEN_KEYWORD_WRITE,
TOKEN_KEYWORD_STAT,
TOKEN_KEYWORD_CLOSE,
TOKEN_KEYWORD_LOOP,
TOKEN_KEYWORD_DO,
TOKEN_KEYWORD_NIL,
TOKEN_KEYWORD_TRUE,
TOKEN_KEYWORD_FALSE,
TOKEN_OPERATOR_NOT,
TOKEN_OPERATOR_AND,
TOKEN_OPERATOR_OR,
TOKEN_BANG,
TOKEN_BANG_EQ,
TOKEN_EQ,
TOKEN_EQ_EQ,
TOKEN_AND,
TOKEN_AND_AND,
TOKEN_PIPE,
TOKEN_PIPE_PIPE,
TOKEN_QUESTION,
TOKEN_QUESTION_DOT,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
TOKEN_MESH,
TOKEN_BIG_MONEY,
TOKEN_GT,
TOKEN_LT,
TOKEN_GTE,
TOKEN_LTE,
TOKEN_DOT,
TOKEN_COMMA,
TOKEN_COLON,
TOKEN_CARET,
TOKEN_SEMICOLON,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_ARROW_RIGHT
} TokenType;
typedef struct {
TokenType type;
const char *start;
i32 length;
i32 line;
} Token;
typedef struct token_s Token;
struct token_s {
TokenType type;
const char *start;
i32 length;
i32 line;
};
void init_lexer(const char *source);
Token next_token();
const char* token_type_to_string(TokenType type);
const char *token_type_to_string(TokenType type);
char peek();
#endif

134
libc.c
View File

@ -1,72 +1,96 @@
#include "libc.h"
void mcpy(void *to, void *from, u32 length) {
u8 *src, *dest;
if (to == nil || from == nil) return;
void
mcpy(void *to, void *from, u32 length)
{
u8 *src, *dest;
if(to == nil || from == nil) return;
src = (u8 *)from;
dest = (u8 *)to;
src = (u8 *)from;
dest = (u8 *)to;
while (length-- > 0) {
*(dest++) = *(src++);
}
return;
while(length-- > 0) *(dest++) = *(src++);
return;
}
i32 scpy(char *to, const char *from, u32 length) {
u32 i;
if (to == nil || from == nil) return -1;
if (length == 0) {return 0;}
for (i = 0; i < length - 1 && from[i] != '\0'; i++) {
to[i] = from[i];
}
to[i] = '\0';
return 0;
i32
scpy(char *to, const char *from, u32 length)
{
u32 i;
if(to == nil || from == nil) return -1;
if(length == 0) return 0;
for(i = 0; i < length - 1 && from[i] != '\0'; i++) to[i] = from[i];
to[i] = '\0';
return 0;
}
bool seq(const char *s1, const char *s2) {
if (s1 == nil && s2 == nil) return true;
if (s1 == nil || s2 == nil) return false;
bool
seq(const char *s1, const char *s2)
{
if(s1 == nil && s2 == nil) return true;
if(s1 == nil || s2 == nil) return false;
while (*s1 && *s2) {
if (*s1 != *s2) return false;
s1++;
s2++;
}
while(*s1 && *s2) {
if(*s1 != *s2) return false;
s1++;
s2++;
}
return (*s1 == '\0' && *s2 == '\0');
return (*s1 == '\0' && *s2 == '\0');
}
bool sleq(const char *s1, const char *s2, u32 length) {
u32 i;
if (s1 == nil && s2 == nil) return true;
if (s1 == nil || s2 == nil) return false;
bool
sleq(const char *s1, const char *s2, u32 length)
{
u32 i;
if(s1 == nil && s2 == nil) return true;
if(s1 == nil || s2 == nil) return false;
i = 0;
while (i < length && *s1 && *s2) {
if (*s1 != *s2) return false;
s1++;
s2++;
i++;
}
if (i == length) return true;
return (*s1 == '\0' && *s2 == '\0');
i = 0;
while(i < length && *s1 && *s2) {
if(*s1 != *s2) return false;
s1++;
s2++;
i++;
}
if(i == length) return true;
return (*s1 == '\0' && *s2 == '\0');
}
u32 slen(const char *str) {
u32 i;
if (str == nil) {return 0;}
for (i = 0; str[i] != '\0'; i++) {
;
}
return i;
u32
slen(const char *str)
{
u32 i;
if(str == nil) return 0;
for(i = 0; str[i] != '\0'; i++);
return i;
}
u32 snlen(const char *str, u32 max_len) {
u32 i;
if (str == nil) {return 0;}
for (i = 0; i < max_len && str[i] != '\0'; i++) {
;
}
return i;
}
u32
snlen(const char *str, u32 max_len)
{
u32 i;
if(str == nil) return 0;
for(i = 0; i < max_len && str[i] != '\0'; i++);
return i;
}
void *
aaloc(Arena *arena, u32 size)
{
u32 pos;
if(arena == nil) return nil;
if(arena->count + size > arena->capacity) return nil;
pos = arena->count;
arena->count += size;
return &arena->tape[pos];
}
u32
afree(Arena *arena)
{
u32 freed = arena->count;
arena->count = 0;
return freed;
}

45
libc.h
View File

@ -15,21 +15,21 @@
#ifdef HAVE_STDINT
#include <stdint.h>
typedef uint8_t u8;
typedef int8_t i8;
typedef uint16_t u16;
typedef int16_t i16;
typedef uint32_t u32;
typedef int32_t i32;
typedef float f32;
typedef uint8_t u8;
typedef int8_t i8;
typedef uint16_t u16;
typedef int16_t i16;
typedef uint32_t u32;
typedef int32_t i32;
typedef float f32;
#else
typedef unsigned char u8;
typedef signed char i8;
typedef unsigned short u16;
typedef signed short i16;
typedef unsigned int u32;
typedef signed int i32;
typedef float f32;
typedef unsigned char u8;
typedef signed char i8;
typedef unsigned short u16;
typedef signed short i16;
typedef unsigned int u32;
typedef signed int i32;
typedef float f32;
#endif
#ifdef HAVE_STDBOOL
@ -44,17 +44,17 @@ typedef u8 bool;
#include <stddef.h>
#define nil NULL
#else
#define nil ((void*)0)
#define nil ((void *)0)
#endif
#define I8_MIN -128
#define I8_MAX 127
#define U8_MAX 255
#define I16_MIN -32768
#define I16_MAX 32767
#define U16_MAX 65535
#define I32_MIN -2147483648
#define I32_MAX 2147483647
#define U32_MAX 4294967295
@ -69,11 +69,20 @@ typedef u8 bool;
#define USED(x) ((void)(x))
typedef struct arena_s Arena;
struct arena_s {
u8 *tape;
u32 count;
u32 capacity;
};
void mcpy(void *dest, void *src, u32 n);
i32 scpy(char* to, const char *from, u32 length);
i32 scpy(char *to, const char *from, u32 length);
bool seq(const char *s1, const char *s2);
bool sleq(const char *s1, const char *s2, u32 length);
u32 slen(const char *str);
u32 snlen(const char *str, u32 max_len);
void *aalloc(Arena *arena, u32 size);
u32 afree(Arena *arena);
#endif

36
main.c
View File

@ -1,24 +1,26 @@
#include <stdio.h>
#include <stdlib.h>
#define EMBED_FILE(name) \
void emit_##name(const char *filename) { \
FILE *f = fopen(filename, "wb"); \
if (f) { \
fwrite(name, 1, name##_len, f); \
fclose(f); \
} \
}
#define EMBED_FILE(name) \
void emit_##name(const char *filename) \
{ \
FILE *f = fopen(filename, "wb"); \
if(f) { \
fwrite(name, 1, name##_len, f); \
fclose(f); \
} \
}
int main(int argc, char **argv) {
char *name;
int
main(int argc, char **argv)
{
char *name;
if (argc > 1) {
name = argv[1];
} else {
name = "'u'";
}
if(argc > 1)
name = argv[1];
else
name = "'u'";
printf("nuqneH %s?\n", name);
return EXIT_SUCCESS;
printf("nuqneH %s?\n", name);
return EXIT_SUCCESS;
}

194
parser.c
View File

@ -1,51 +1,177 @@
#include "parser.h"
bool push(TokenStack *ts, Token t) {
if (ts->count >= ts->capacity) return false;
ts->stack[ts->count++] = t;
return true;
Parser parser;
bool
advance()
{
parser.previous = parser.current;
for(;;) {
parser.current = next_token();
if(parser.current.type != TOKEN_ERROR) return true;
return false;
}
}
Token pop(TokenStack *ts) {
if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
return ts->stack[--ts->count];
bool
push(TokenStack *ts, Token t)
{
if(ts->count >= ts->capacity) return false;
ts->stack[ts->count++] = t;
return true;
}
Token top(TokenStack *ts) {
if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
return ts->stack[ts->count - 1];
Token
pop(TokenStack *ts)
{
if(ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
return ts->stack[--ts->count];
}
bool enqueue(TokenQueue *tq, Token t) {
if (tq->count >= tq->capacity) return false;
tq->queue[tq->end] = t;
tq->end = (tq->end + 1) % tq->capacity; // Wrap around
tq->count++;
return true;
Token
top(TokenStack *ts)
{
if(ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
return ts->stack[ts->count - 1];
}
Token dequeue(TokenQueue *tq) {
if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
Token t = tq->queue[tq->start];
tq->start = (tq->start + 1) % tq->capacity; // Wrap around
tq->count--;
return t;
bool
enqueue(TokenQueue *tq, Token t)
{
if(tq->count >= tq->capacity) return false;
tq->queue[tq->end] = t;
tq->end = (tq->end + 1) % tq->capacity; // Wrap around
tq->count++;
return true;
}
Token peek_queue(TokenQueue *tq) {
if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
return tq->queue[tq->start];
Token
dequeue(TokenQueue *tq)
{
if(tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
Token t = tq->queue[tq->start];
tq->start = (tq->start + 1) % tq->capacity; // Wrap around
tq->count--;
return t;
}
bool expression() {
Token
peek_queue(TokenQueue *tq)
{
if(tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
return tq->queue[tq->start];
}
bool compile(char *source) {
TokenStack operators;
TokenQueue output;
return true;
u32
idx_from_arena(Arena *arena, void *p)
{
return (u32)((u8 *)p - arena->tape);
}
void *
ptr_from_arena(Arena *arena, u32 i)
{
return &arena->tape[i];
}
ArenaList *
al_create(Arena *arena, u32 size)
{
ArenaList *meta = aalloc(arena, sizeof(ArenaList));
if(!meta) return nil;
meta->size = size + sizeof(u32);
meta->arena = arena;
meta->head = 0;
meta->tail = 0;
return meta;
}
void *
al_append(ArenaList *list, void **out_payload)
{
void *node = aalloc(list->arena, list->size);
if(!node) return nil;
u32 idx = idx_from_arena(list->arena, node);
void *payload = node; /* Payload starts at offset 0 */
void *cdr_ptr = (u8 *)node + (list->size - sizeof(u32));
*(u32 *)cdr_ptr = 0;
if(list->tail != 0) {
void *prev_node = ptr_from_arena(list->arena, list->tail);
void *prev_cdr = (u8 *)prev_node + (list->size - sizeof(u32));
*(u32 *)prev_cdr = idx;
} else {
list->head = idx;
}
list->tail = idx;
if(out_payload) *out_payload = payload;
return payload;
}
void *
al_head(ArenaList *list)
{
if(list->head == 0) return nil;
return ptr_from_arena(list->arena, list->head);
}
void *
al_tail(ArenaList *list)
{
if(list->tail == 0) return nil;
return ptr_from_arena(list->arena, list->tail);
}
SymbolLink *
symbol_table_find(ArenaList *table, const char *name)
{
void *current = al_head(table);
Arena *arena = table->arena;
while(current != nil) {
SymbolLink *link = (SymbolLink *)current;
if(seq(link->s.name.start, name)) return link;
u32 next_idx = link->cdr;
current = (next_idx == 0) ? nil : ptr_from_arena(arena, next_idx);
}
return nil;
}
/****************************************************
* Parser
***************************************************/
bool
expression()
{
Token operator_stack[256];
TokenStack operators = {0};
operators.stack = operator_stack;
operators.capacity = 256;
Token output_queue[256];
TokenQueue output = {0};
output.queue = output_queue;
output.capacity = 256;
return true;
}
bool
compile(char *source)
{
return true;
}

146
parser.h
View File

@ -4,100 +4,92 @@
#include "libc.h"
#include "lexer.h"
typedef enum { GLOBAL, LOCAL, VAR } ScopeType;
typedef enum {
VOID,
BOOL,
I8,
I16,
I32,
U8,
U16,
U32,
F8,
F16,
F32,
STR,
PLEX,
ARRAY,
FUNCTION
typedef enum symbol_type_e {
VOID,
BOOL,
I8,
I16,
I32,
U8,
U16,
U32,
F8,
F16,
F32,
STR,
ARRAY,
FUNCTION,
PLEX,
METHOD,
TRAIT,
} SymbolType;
typedef struct arena_list_s ArenaList;
typedef struct symbol_s Symbol;
typedef struct symbol_tab_s SymbolTable;
typedef struct value_type_s ValueType;
typedef struct plex_fields_tab_s PlexFieldsTable;
typedef struct plex_def_s PlexDef;
typedef struct plex_tab_s PlexTable;
typedef struct scope_s Scope;
typedef struct scope_tab_s ScopeTable;
typedef struct symbol_link_s SymbolLink;
typedef struct token_stack_s TokenStack;
typedef struct queue_s TokenQueue;
typedef struct parser_s Parser;
struct value_type_s {
SymbolType type;
u32 name;
u32 size;
u32 table_ref; // if it is a heap object
};
struct plex_def_s {
u32 name;
u32 size;
u32 field_ref_start;
u32 field_count;
};
struct plex_fields_tab_s {
u32 *plex_refs;
ValueType *fields;
u32 count;
u32 capacity;
};
struct plex_tab_s {
PlexDef *symbols;
u32 count;
u32 capacity;
};
#define MAX_SYMBOL_NAME_LENGTH 64
struct symbol_s {
char name[MAX_SYMBOL_NAME_LENGTH];
u8 name_length;
SymbolType type;
ScopeType scope;
u32 ref; // vm->mp if global, vm->pc local, register if var
u32 size; // size of symbol
Token name;
SymbolType type;
u32 size;
i32 scope;
union type_def {
struct trait_def {
u32 field_ref_start; /* reference to field list of symbols */
u32 methods_ref_start; /* zero if none */
} trait;
struct plex_def {
u32 field_ref_start; /* reference to field list of symbols */
u32 methods_ref_start; /* zero if none */
} plex;
struct function_def {
SymbolType return_type;
u32 arguments_ref_start; /* reference to field list of symbols */
} function;
struct array_def {
SymbolType type;
u32 length; /* zero means "unbounded" */
} array;
struct field_def {
u32 offset;
} field;
} def;
};
#define MAX_SYMBOLS 256
struct symbol_tab_s {
Symbol symbols[MAX_SYMBOLS];
u8 count;
i32 parent;
struct symbol_link_s {
Symbol s;
u32 cdr; /* zero means "end of list" */
};
struct scope_tab_s {
SymbolTable *scopes;
u32 count;
u32 capacity;
i32 scope_ref;
u32 depth;
struct arena_list_s {
Arena *arena;
u32 head;
u32 tail;
u32 size;
u32 count;
i32 parent;
};
struct token_stack_s {
Token *stack;
i32 capacity;
i32 count;
Token *stack;
i32 capacity;
i32 count;
};
struct queue_s {
Token *queue;
i32 capacity;
i32 start;
i32 end;
i32 count;
Token *queue;
i32 capacity;
i32 start;
i32 end;
i32 count;
};
struct parser_s {
Token current;
Token previous;
};
bool push(TokenStack *ts, Token t);

View File

@ -3,64 +3,63 @@
#include <string.h>
#include <ctype.h>
int main(int argc, char *argv[]) {
FILE *in;
int c;
long count = 0;
long col = 0;
char *var_name;
char *p;
if (argc != 2) {
fprintf(stderr, "Usage: %s <input_file>\n", argv[0]);
return 1;
}
int
main(int argc, char *argv[])
{
FILE *in;
int c;
long count = 0;
long col = 0;
char *var_name;
char *p;
in = fopen(argv[1], "rb");
if (!in) {
perror("Error opening input file");
return 1;
}
if(argc != 2) {
fprintf(stderr, "Usage: %s <input_file>\n", argv[0]);
return 1;
}
var_name = (char *)malloc(strlen(argv[1]) + 1);
if (!var_name) {
perror("Memory allocation failed");
fclose(in);
return 1;
}
strcpy(var_name, argv[1]);
for (p = var_name; *p; ++p) {
if (!isalnum((unsigned char)*p)) {
*p = '_';
}
}
in = fopen(argv[1], "rb");
if(!in) {
perror("Error opening input file");
return 1;
}
printf("unsigned char %s[] = {\n", var_name);
var_name = (char *)malloc(strlen(argv[1]) + 1);
if(!var_name) {
perror("Memory allocation failed");
fclose(in);
return 1;
}
c = fgetc(in);
while (c != EOF) {
printf(" 0x%02x", c);
count++;
strcpy(var_name, argv[1]);
int next = fgetc(in);
if (next != EOF) {
printf(",");
ungetc(next, in);
if (++col >= 12) {
printf("\n");
col = 0;
}
}
c = fgetc(in);
}
printf("\n};\n");
printf("unsigned int %s_len = %lu;\n", var_name, count);
free(var_name);
fclose(in);
for(p = var_name; *p; ++p)
if(!isalnum((unsigned char)*p)) *p = '_';
return 0;
printf("unsigned char %s[] = {\n", var_name);
c = fgetc(in);
while(c != EOF) {
printf(" 0x%02x", c);
count++;
int next = fgetc(in);
if(next != EOF) {
printf(",");
ungetc(next, in);
if(++col >= 12) {
printf("\n");
col = 0;
}
}
c = fgetc(in);
}
printf("\n};\n");
printf("unsigned int %s_len = %lu;\n", var_name, count);
free(var_name);
fclose(in);
return 0;
}