WIP allocator for compiler.

This commit is contained in:
zongor 2026-03-22 09:27:18 -07:00
parent 6310390cc4
commit 373caf7b5e
9 changed files with 1058 additions and 842 deletions

33
.clang-format Normal file
View File

@ -0,0 +1,33 @@
# Plan 9 coding conventions for C (http://man.9front.org/6/style)
BasedOnStyle: LLVM
IndentWidth: 2
TabWidth: 2
UseTab: Always
SpaceBeforeParens: Never
SpaceBeforeAssignmentOperators: true
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
RemoveBracesLLVM: true
BreakBeforeBraces: Custom
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
AlwaysBreakAfterReturnType: TopLevelDefinitions
DerivePointerAlignment: false
PointerAlignment: Right
AlignOperands: Align
AlignAfterOpenBracket: Align
SortIncludes: Never
IndentCaseLabels: false

214
lexer.c
View File

@ -2,51 +2,73 @@
#include "lexer.h" #include "lexer.h"
typedef struct { typedef struct lexer_s Lexer;
struct lexer_s {
const char *start; const char *start;
const char *current; const char *current;
i32 line; i32 line;
} Lexer; };
Lexer lexer; Lexer lexer;
void init_lexer(const char *source) { void
init_lexer(const char *source)
{
lexer.start = source; lexer.start = source;
lexer.current = source; lexer.current = source;
lexer.line = 1; lexer.line = 1;
} }
static bool is_alpha(char c) { static bool
is_alpha(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
} }
static bool is_digit(char c) { return c >= '0' && c <= '9'; } static bool
is_digit(char c)
{
return c >= '0' && c <= '9';
}
static bool is_at_end() { return *lexer.current == '\0'; } static bool
is_at_end()
{
return *lexer.current == '\0';
}
static char advance() { static char
advance()
{
lexer.current++; lexer.current++;
return lexer.current[-1]; return lexer.current[-1];
} }
char peek() { return *lexer.current; } char
peek()
{
return *lexer.current;
}
static char peek_next() { static char
if (is_at_end()) peek_next()
return '\0'; {
if(is_at_end()) return '\0';
return lexer.current[1]; return lexer.current[1];
} }
static bool match(char expected) { static bool
if (is_at_end()) match(char expected)
return false; {
if (*lexer.current != expected) if(is_at_end()) return false;
return false; if(*lexer.current != expected) return false;
lexer.current++; lexer.current++;
return true; return true;
} }
static Token make_token(TokenType type) { static Token
make_token(TokenType type)
{
Token token; Token token;
token.type = type; token.type = type;
token.start = lexer.start; token.start = lexer.start;
@ -55,7 +77,9 @@ static Token make_token(TokenType type) {
return token; return token;
} }
static Token error_token(const char *message) { static Token
error_token(const char *message)
{
Token token; Token token;
token.type = TOKEN_ERROR; token.type = TOKEN_ERROR;
token.start = message; token.start = message;
@ -64,10 +88,12 @@ static Token error_token(const char *message) {
return token; return token;
} }
static void skip_whitespace() { static void
for (;;) { skip_whitespace()
{
for(;;) {
char c = peek(); char c = peek();
switch (c) { switch(c) {
case ' ': case ' ':
case '\r': case '\r':
case '\t': case '\t':
@ -78,19 +104,17 @@ static void skip_whitespace() {
advance(); advance();
break; break;
case '/': case '/':
if (peek_next() == '/') { if(peek_next() == '/') {
// Single-line comment: skip until newline or end of file // Single-line comment: skip until newline or end of file
advance(); advance();
while (peek() != '\n' && !is_at_end()) while(peek() != '\n' && !is_at_end()) advance();
advance(); } else if(peek_next() == '*') {
} else if (peek_next() == '*') {
// Multi-line comment: skip until '*/' or end of file // Multi-line comment: skip until '*/' or end of file
advance(); advance();
advance(); advance();
while (!is_at_end()) { while(!is_at_end()) {
if (peek() == '\n') if(peek() == '\n') lexer.line++;
lexer.line++; if(peek() == '*' && peek_next() == '/') {
if (peek() == '*' && peek_next() == '/') {
advance(); advance();
advance(); advance();
break; // Exit loop, comment ended break; // Exit loop, comment ended
@ -107,9 +131,10 @@ static void skip_whitespace() {
} }
} }
static TokenType check_keyword(i32 start, i32 length, const char *rest, static TokenType
TokenType type) { check_keyword(i32 start, i32 length, const char *rest, TokenType type)
if (lexer.current - lexer.start == start + length && {
if(lexer.current - lexer.start == start + length &&
memcmp(lexer.start + start, rest, length) == 0) { memcmp(lexer.start + start, rest, length) == 0) {
return type; return type;
} }
@ -117,11 +142,13 @@ static TokenType check_keyword(i32 start, i32 length, const char *rest,
return TOKEN_IDENTIFIER; return TOKEN_IDENTIFIER;
} }
static TokenType identifierType() { static TokenType
switch (lexer.start[0]) { identifierType()
{
switch(lexer.start[0]) {
case 'a': case 'a':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'n': case 'n':
return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND);
case 's': case 's':
@ -130,8 +157,8 @@ static TokenType identifierType() {
} }
break; break;
case 'c': case 'c':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'l': case 'l':
return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE);
case 'o': case 'o':
@ -142,8 +169,8 @@ static TokenType identifierType() {
case 'e': case 'e':
return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE);
case 'f': case 'f':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'a': case 'a':
return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE);
case 'o': case 'o':
@ -155,8 +182,8 @@ static TokenType identifierType() {
} }
break; break;
case 'i': case 'i':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'f': case 'f':
return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); return check_keyword(2, 0, "", TOKEN_KEYWORD_IF);
case 's': case 's':
@ -168,8 +195,8 @@ static TokenType identifierType() {
case '3': case '3':
return check_keyword(2, 1, "2", TOKEN_TYPE_INT); return check_keyword(2, 1, "2", TOKEN_TYPE_INT);
case 'n': case 'n':
if (lexer.current - lexer.start > 2) { if(lexer.current - lexer.start > 2) {
switch (lexer.start[2]) { switch(lexer.start[2]) {
case 'i': case 'i':
return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT);
case 't': case 't':
@ -181,8 +208,8 @@ static TokenType identifierType() {
} }
break; break;
case 'n': case 'n':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'a': case 'a':
return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); return check_keyword(2, 1, "t", TOKEN_TYPE_NAT);
case 'i': case 'i':
@ -191,8 +218,8 @@ static TokenType identifierType() {
} }
break; break;
case 'o': case 'o':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'p': case 'p':
return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN);
case 'r': case 'r':
@ -201,8 +228,9 @@ static TokenType identifierType() {
} }
break; break;
case 'p': case 'p':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { case 't': switch(lexer.start[1]) {
case 't':
return check_keyword(2, 1, "r", TOKEN_TYPE_PTR); return check_keyword(2, 1, "r", TOKEN_TYPE_PTR);
case 'l': case 'l':
@ -211,15 +239,15 @@ static TokenType identifierType() {
} }
break; break;
case 'r': case 'r':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'e': case 'e':
if (lexer.current - lexer.start > 2) { if(lexer.current - lexer.start > 2) {
switch (lexer.start[2]) { switch(lexer.start[2]) {
case 't': case 't':
return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN);
case 'a': case 'a':
if (lexer.current - lexer.start > 3) { if(lexer.current - lexer.start > 3) {
switch(lexer.start[3]) { switch(lexer.start[3]) {
case 'd': case 'd':
return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); return check_keyword(4, 0, "", TOKEN_KEYWORD_READ);
@ -234,11 +262,11 @@ static TokenType identifierType() {
} }
break; break;
case 's': case 's':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 't': case 't':
if (lexer.current - lexer.start > 2) { if(lexer.current - lexer.start > 2) {
switch (lexer.start[2]) { switch(lexer.start[2]) {
case 'r': case 'r':
return check_keyword(3, 0, "", TOKEN_TYPE_STR); return check_keyword(3, 0, "", TOKEN_TYPE_STR);
case 'a': case 'a':
@ -249,8 +277,8 @@ static TokenType identifierType() {
} }
break; break;
case 't': case 't':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'h': case 'h':
return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS);
case 'r': case 'r':
@ -259,8 +287,8 @@ static TokenType identifierType() {
} }
break; break;
case 'u': case 'u':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 's': case 's':
return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE);
case '8': case '8':
@ -273,8 +301,8 @@ static TokenType identifierType() {
} }
break; break;
case 'w': case 'w':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'h': case 'h':
return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE);
case 'r': case 'r':
@ -283,8 +311,8 @@ static TokenType identifierType() {
} }
break; break;
case 'b': case 'b':
if (lexer.current - lexer.start > 1) { if(lexer.current - lexer.start > 1) {
switch (lexer.start[1]) { switch(lexer.start[1]) {
case 'y': case 'y':
return check_keyword(2, 2, "te", TOKEN_TYPE_U8); return check_keyword(2, 2, "te", TOKEN_TYPE_U8);
case 'o': case 'o':
@ -305,23 +333,24 @@ static TokenType identifierType() {
return TOKEN_IDENTIFIER; return TOKEN_IDENTIFIER;
} }
static Token identifier() { static Token
while (is_alpha(peek()) || is_digit(peek())) identifier()
advance(); {
while(is_alpha(peek()) || is_digit(peek())) advance();
return make_token(identifierType()); return make_token(identifierType());
} }
static Token number() { static Token
while (is_digit(peek())) number()
advance(); {
while(is_digit(peek())) advance();
/* Look for a fractional part. */ /* Look for a fractional part. */
if (peek() == '.' && is_digit(peek_next())) { if(peek() == '.' && is_digit(peek_next())) {
/* Consume the ".". */ /* Consume the ".". */
advance(); advance();
while (is_digit(peek())) while(is_digit(peek())) advance();
advance();
return make_token(TOKEN_LITERAL_REAL); return make_token(TOKEN_LITERAL_REAL);
} }
@ -329,36 +358,35 @@ static Token number() {
return make_token(TOKEN_LITERAL_INT); return make_token(TOKEN_LITERAL_INT);
} }
static Token string() { static Token
while (peek() != '"' && !is_at_end()) { string()
if (peek() == '\n') {
lexer.line++; while(peek() != '"' && !is_at_end()) {
if(peek() == '\n') lexer.line++;
advance(); advance();
} }
if (is_at_end()) if(is_at_end()) return error_token("Unterminated string.");
return error_token("Unterminated string.");
/* The closing quote. */ /* The closing quote. */
advance(); advance();
return make_token(TOKEN_LITERAL_STR); return make_token(TOKEN_LITERAL_STR);
} }
Token next_token() { Token
next_token()
{
skip_whitespace(); skip_whitespace();
lexer.start = lexer.current; lexer.start = lexer.current;
if (is_at_end()) if(is_at_end()) return make_token(TOKEN_EOF);
return make_token(TOKEN_EOF);
char c = advance(); char c = advance();
if (is_alpha(c)) if(is_alpha(c)) return identifier();
return identifier();
char next = peek(); char next = peek();
if ((c == '-' && is_digit(next)) || is_digit(c)) if((c == '-' && is_digit(next)) || is_digit(c)) return number();
return number();
switch (c) { switch(c) {
case '(': case '(':
return make_token(TOKEN_LPAREN); return make_token(TOKEN_LPAREN);
case ')': case ')':
@ -406,8 +434,10 @@ Token next_token() {
return error_token("Unexpected character."); return error_token("Unexpected character.");
} }
const char *token_type_to_string(TokenType type) { const char *
switch (type) { token_type_to_string(TokenType type)
{
switch(type) {
case TOKEN_EOF: case TOKEN_EOF:
return "EOF"; return "EOF";
case TOKEN_IDENTIFIER: case TOKEN_IDENTIFIER:

View File

@ -83,16 +83,17 @@ typedef enum {
TOKEN_ARROW_RIGHT TOKEN_ARROW_RIGHT
} TokenType; } TokenType;
typedef struct { typedef struct token_s Token;
struct token_s {
TokenType type; TokenType type;
const char *start; const char *start;
i32 length; i32 length;
i32 line; i32 line;
} Token; };
void init_lexer(const char *source); void init_lexer(const char *source);
Token next_token(); Token next_token();
const char* token_type_to_string(TokenType type); const char *token_type_to_string(TokenType type);
char peek(); char peek();
#endif #endif

88
libc.c
View File

@ -1,35 +1,37 @@
#include "libc.h" #include "libc.h"
void mcpy(void *to, void *from, u32 length) { void
mcpy(void *to, void *from, u32 length)
{
u8 *src, *dest; u8 *src, *dest;
if (to == nil || from == nil) return; if(to == nil || from == nil) return;
src = (u8 *)from; src = (u8 *)from;
dest = (u8 *)to; dest = (u8 *)to;
while (length-- > 0) { while(length-- > 0) *(dest++) = *(src++);
*(dest++) = *(src++);
}
return; return;
} }
i32 scpy(char *to, const char *from, u32 length) { i32
scpy(char *to, const char *from, u32 length)
{
u32 i; u32 i;
if (to == nil || from == nil) return -1; if(to == nil || from == nil) return -1;
if (length == 0) {return 0;} if(length == 0) return 0;
for (i = 0; i < length - 1 && from[i] != '\0'; i++) { for(i = 0; i < length - 1 && from[i] != '\0'; i++) to[i] = from[i];
to[i] = from[i];
}
to[i] = '\0'; to[i] = '\0';
return 0; return 0;
} }
bool seq(const char *s1, const char *s2) { bool
if (s1 == nil && s2 == nil) return true; seq(const char *s1, const char *s2)
if (s1 == nil || s2 == nil) return false; {
if(s1 == nil && s2 == nil) return true;
if(s1 == nil || s2 == nil) return false;
while (*s1 && *s2) { while(*s1 && *s2) {
if (*s1 != *s2) return false; if(*s1 != *s2) return false;
s1++; s1++;
s2++; s2++;
} }
@ -37,36 +39,58 @@ bool seq(const char *s1, const char *s2) {
return (*s1 == '\0' && *s2 == '\0'); return (*s1 == '\0' && *s2 == '\0');
} }
bool sleq(const char *s1, const char *s2, u32 length) { bool
sleq(const char *s1, const char *s2, u32 length)
{
u32 i; u32 i;
if (s1 == nil && s2 == nil) return true; if(s1 == nil && s2 == nil) return true;
if (s1 == nil || s2 == nil) return false; if(s1 == nil || s2 == nil) return false;
i = 0; i = 0;
while (i < length && *s1 && *s2) { while(i < length && *s1 && *s2) {
if (*s1 != *s2) return false; if(*s1 != *s2) return false;
s1++; s1++;
s2++; s2++;
i++; i++;
} }
if (i == length) return true; if(i == length) return true;
return (*s1 == '\0' && *s2 == '\0'); return (*s1 == '\0' && *s2 == '\0');
} }
u32 slen(const char *str) { u32
slen(const char *str)
{
u32 i; u32 i;
if (str == nil) {return 0;} if(str == nil) return 0;
for (i = 0; str[i] != '\0'; i++) { for(i = 0; str[i] != '\0'; i++);
;
}
return i; return i;
} }
u32 snlen(const char *str, u32 max_len) { u32
snlen(const char *str, u32 max_len)
{
u32 i; u32 i;
if (str == nil) {return 0;} if(str == nil) return 0;
for (i = 0; i < max_len && str[i] != '\0'; i++) { for(i = 0; i < max_len && str[i] != '\0'; i++);
;
}
return i; return i;
} }
void *
aaloc(Arena *arena, u32 size)
{
u32 pos;
if(arena == nil) return nil;
if(arena->count + size > arena->capacity) return nil;
pos = arena->count;
arena->count += size;
return &arena->tape[pos];
}
u32
afree(Arena *arena)
{
u32 freed = arena->count;
arena->count = 0;
return freed;
}

41
libc.h
View File

@ -15,21 +15,21 @@
#ifdef HAVE_STDINT #ifdef HAVE_STDINT
#include <stdint.h> #include <stdint.h>
typedef uint8_t u8; typedef uint8_t u8;
typedef int8_t i8; typedef int8_t i8;
typedef uint16_t u16; typedef uint16_t u16;
typedef int16_t i16; typedef int16_t i16;
typedef uint32_t u32; typedef uint32_t u32;
typedef int32_t i32; typedef int32_t i32;
typedef float f32; typedef float f32;
#else #else
typedef unsigned char u8; typedef unsigned char u8;
typedef signed char i8; typedef signed char i8;
typedef unsigned short u16; typedef unsigned short u16;
typedef signed short i16; typedef signed short i16;
typedef unsigned int u32; typedef unsigned int u32;
typedef signed int i32; typedef signed int i32;
typedef float f32; typedef float f32;
#endif #endif
#ifdef HAVE_STDBOOL #ifdef HAVE_STDBOOL
@ -44,7 +44,7 @@ typedef u8 bool;
#include <stddef.h> #include <stddef.h>
#define nil NULL #define nil NULL
#else #else
#define nil ((void*)0) #define nil ((void *)0)
#endif #endif
#define I8_MIN -128 #define I8_MIN -128
@ -69,11 +69,20 @@ typedef u8 bool;
#define USED(x) ((void)(x)) #define USED(x) ((void)(x))
typedef struct arena_s Arena;
struct arena_s {
u8 *tape;
u32 count;
u32 capacity;
};
void mcpy(void *dest, void *src, u32 n); void mcpy(void *dest, void *src, u32 n);
i32 scpy(char* to, const char *from, u32 length); i32 scpy(char *to, const char *from, u32 length);
bool seq(const char *s1, const char *s2); bool seq(const char *s1, const char *s2);
bool sleq(const char *s1, const char *s2, u32 length); bool sleq(const char *s1, const char *s2, u32 length);
u32 slen(const char *str); u32 slen(const char *str);
u32 snlen(const char *str, u32 max_len); u32 snlen(const char *str, u32 max_len);
void *aalloc(Arena *arena, u32 size);
u32 afree(Arena *arena);
#endif #endif

14
main.c
View File

@ -2,22 +2,24 @@
#include <stdlib.h> #include <stdlib.h>
#define EMBED_FILE(name) \ #define EMBED_FILE(name) \
void emit_##name(const char *filename) { \ void emit_##name(const char *filename) \
{ \
FILE *f = fopen(filename, "wb"); \ FILE *f = fopen(filename, "wb"); \
if (f) { \ if(f) { \
fwrite(name, 1, name##_len, f); \ fwrite(name, 1, name##_len, f); \
fclose(f); \ fclose(f); \
} \ } \
} }
int main(int argc, char **argv) { int
main(int argc, char **argv)
{
char *name; char *name;
if (argc > 1) { if(argc > 1)
name = argv[1]; name = argv[1];
} else { else
name = "'u'"; name = "'u'";
}
printf("nuqneH %s?\n", name); printf("nuqneH %s?\n", name);
return EXIT_SUCCESS; return EXIT_SUCCESS;

160
parser.c
View File

@ -1,23 +1,46 @@
#include "parser.h" #include "parser.h"
bool push(TokenStack *ts, Token t) { Parser parser;
if (ts->count >= ts->capacity) return false;
bool
advance()
{
parser.previous = parser.current;
for(;;) {
parser.current = next_token();
if(parser.current.type != TOKEN_ERROR) return true;
return false;
}
}
bool
push(TokenStack *ts, Token t)
{
if(ts->count >= ts->capacity) return false;
ts->stack[ts->count++] = t; ts->stack[ts->count++] = t;
return true; return true;
} }
Token pop(TokenStack *ts) { Token
if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; pop(TokenStack *ts)
{
if(ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
return ts->stack[--ts->count]; return ts->stack[--ts->count];
} }
Token top(TokenStack *ts) { Token
if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; top(TokenStack *ts)
{
if(ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
return ts->stack[ts->count - 1]; return ts->stack[ts->count - 1];
} }
bool enqueue(TokenQueue *tq, Token t) { bool
if (tq->count >= tq->capacity) return false; enqueue(TokenQueue *tq, Token t)
{
if(tq->count >= tq->capacity) return false;
tq->queue[tq->end] = t; tq->queue[tq->end] = t;
tq->end = (tq->end + 1) % tq->capacity; // Wrap around tq->end = (tq->end + 1) % tq->capacity; // Wrap around
@ -25,8 +48,10 @@ bool enqueue(TokenQueue *tq, Token t) {
return true; return true;
} }
Token dequeue(TokenQueue *tq) { Token
if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; dequeue(TokenQueue *tq)
{
if(tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
Token t = tq->queue[tq->start]; Token t = tq->queue[tq->start];
tq->start = (tq->start + 1) % tq->capacity; // Wrap around tq->start = (tq->start + 1) % tq->capacity; // Wrap around
@ -34,18 +59,119 @@ Token dequeue(TokenQueue *tq) {
return t; return t;
} }
Token peek_queue(TokenQueue *tq) { Token
if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; peek_queue(TokenQueue *tq)
{
if(tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
return tq->queue[tq->start]; return tq->queue[tq->start];
} }
bool expression() { u32
idx_from_arena(Arena *arena, void *p)
{
return (u32)((u8 *)p - arena->tape);
} }
bool compile(char *source) { void *
TokenStack operators; ptr_from_arena(Arena *arena, u32 i)
TokenQueue output; {
return &arena->tape[i];
}
ArenaList *
al_create(Arena *arena, u32 size)
{
ArenaList *meta = aalloc(arena, sizeof(ArenaList));
if(!meta) return nil;
meta->size = size + sizeof(u32);
meta->arena = arena;
meta->head = 0;
meta->tail = 0;
return meta;
}
void *
al_append(ArenaList *list, void **out_payload)
{
void *node = aalloc(list->arena, list->size);
if(!node) return nil;
u32 idx = idx_from_arena(list->arena, node);
void *payload = node; /* Payload starts at offset 0 */
void *cdr_ptr = (u8 *)node + (list->size - sizeof(u32));
*(u32 *)cdr_ptr = 0;
if(list->tail != 0) {
void *prev_node = ptr_from_arena(list->arena, list->tail);
void *prev_cdr = (u8 *)prev_node + (list->size - sizeof(u32));
*(u32 *)prev_cdr = idx;
} else {
list->head = idx;
}
list->tail = idx;
if(out_payload) *out_payload = payload;
return payload;
}
void *
al_head(ArenaList *list)
{
if(list->head == 0) return nil;
return ptr_from_arena(list->arena, list->head);
}
void *
al_tail(ArenaList *list)
{
if(list->tail == 0) return nil;
return ptr_from_arena(list->arena, list->tail);
}
SymbolLink *
symbol_table_find(ArenaList *table, const char *name)
{
void *current = al_head(table);
Arena *arena = table->arena;
while(current != nil) {
SymbolLink *link = (SymbolLink *)current;
if(seq(link->s.name.start, name)) return link;
u32 next_idx = link->cdr;
current = (next_idx == 0) ? nil : ptr_from_arena(arena, next_idx);
}
return nil;
}
/****************************************************
* Parser
***************************************************/
bool
expression()
{
Token operator_stack[256];
TokenStack operators = {0};
operators.stack = operator_stack;
operators.capacity = 256;
Token output_queue[256];
TokenQueue output = {0};
output.queue = output_queue;
output.capacity = 256;
return true; return true;
} }
bool
compile(char *source)
{
return true;
}

100
parser.h
View File

@ -4,8 +4,7 @@
#include "libc.h" #include "libc.h"
#include "lexer.h" #include "lexer.h"
typedef enum { GLOBAL, LOCAL, VAR } ScopeType; typedef enum symbol_type_e {
typedef enum {
VOID, VOID,
BOOL, BOOL,
I8, I8,
@ -18,72 +17,60 @@ typedef enum {
F16, F16,
F32, F32,
STR, STR,
PLEX,
ARRAY, ARRAY,
FUNCTION FUNCTION,
PLEX,
METHOD,
TRAIT,
} SymbolType; } SymbolType;
typedef struct arena_list_s ArenaList;
typedef struct symbol_s Symbol; typedef struct symbol_s Symbol;
typedef struct symbol_tab_s SymbolTable; typedef struct symbol_link_s SymbolLink;
typedef struct value_type_s ValueType;
typedef struct plex_fields_tab_s PlexFieldsTable;
typedef struct plex_def_s PlexDef;
typedef struct plex_tab_s PlexTable;
typedef struct scope_s Scope;
typedef struct scope_tab_s ScopeTable;
typedef struct token_stack_s TokenStack; typedef struct token_stack_s TokenStack;
typedef struct queue_s TokenQueue; typedef struct queue_s TokenQueue;
typedef struct parser_s Parser;
struct value_type_s {
SymbolType type;
u32 name;
u32 size;
u32 table_ref; // if it is a heap object
};
struct plex_def_s {
u32 name;
u32 size;
u32 field_ref_start;
u32 field_count;
};
struct plex_fields_tab_s {
u32 *plex_refs;
ValueType *fields;
u32 count;
u32 capacity;
};
struct plex_tab_s {
PlexDef *symbols;
u32 count;
u32 capacity;
};
#define MAX_SYMBOL_NAME_LENGTH 64
struct symbol_s { struct symbol_s {
char name[MAX_SYMBOL_NAME_LENGTH]; Token name;
u8 name_length;
SymbolType type; SymbolType type;
ScopeType scope; u32 size;
u32 ref; // vm->mp if global, vm->pc local, register if var i32 scope;
u32 size; // size of symbol union type_def {
struct trait_def {
u32 field_ref_start; /* reference to field list of symbols */
u32 methods_ref_start; /* zero if none */
} trait;
struct plex_def {
u32 field_ref_start; /* reference to field list of symbols */
u32 methods_ref_start; /* zero if none */
} plex;
struct function_def {
SymbolType return_type;
u32 arguments_ref_start; /* reference to field list of symbols */
} function;
struct array_def {
SymbolType type;
u32 length; /* zero means "unbounded" */
} array;
struct field_def {
u32 offset;
} field;
} def;
}; };
#define MAX_SYMBOLS 256 struct symbol_link_s {
struct symbol_tab_s { Symbol s;
Symbol symbols[MAX_SYMBOLS]; u32 cdr; /* zero means "end of list" */
u8 count;
i32 parent;
}; };
struct scope_tab_s { struct arena_list_s {
SymbolTable *scopes; Arena *arena;
u32 head;
u32 tail;
u32 size;
u32 count; u32 count;
u32 capacity; i32 parent;
i32 scope_ref;
u32 depth;
}; };
struct token_stack_s { struct token_stack_s {
@ -100,6 +87,11 @@ struct queue_s {
i32 count; i32 count;
}; };
struct parser_s {
Token current;
Token previous;
};
bool push(TokenStack *ts, Token t); bool push(TokenStack *ts, Token t);
Token pop(TokenStack *ts); Token pop(TokenStack *ts);
Token top(TokenStack *ts); Token top(TokenStack *ts);

View File

@ -3,7 +3,9 @@
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
int main(int argc, char *argv[]) { int
main(int argc, char *argv[])
{
FILE *in; FILE *in;
int c; int c;
long count = 0; long count = 0;
@ -11,19 +13,19 @@ int main(int argc, char *argv[]) {
char *var_name; char *var_name;
char *p; char *p;
if (argc != 2) { if(argc != 2) {
fprintf(stderr, "Usage: %s <input_file>\n", argv[0]); fprintf(stderr, "Usage: %s <input_file>\n", argv[0]);
return 1; return 1;
} }
in = fopen(argv[1], "rb"); in = fopen(argv[1], "rb");
if (!in) { if(!in) {
perror("Error opening input file"); perror("Error opening input file");
return 1; return 1;
} }
var_name = (char *)malloc(strlen(argv[1]) + 1); var_name = (char *)malloc(strlen(argv[1]) + 1);
if (!var_name) { if(!var_name) {
perror("Memory allocation failed"); perror("Memory allocation failed");
fclose(in); fclose(in);
return 1; return 1;
@ -31,24 +33,21 @@ int main(int argc, char *argv[]) {
strcpy(var_name, argv[1]); strcpy(var_name, argv[1]);
for (p = var_name; *p; ++p) { for(p = var_name; *p; ++p)
if (!isalnum((unsigned char)*p)) { if(!isalnum((unsigned char)*p)) *p = '_';
*p = '_';
}
}
printf("unsigned char %s[] = {\n", var_name); printf("unsigned char %s[] = {\n", var_name);
c = fgetc(in); c = fgetc(in);
while (c != EOF) { while(c != EOF) {
printf(" 0x%02x", c); printf(" 0x%02x", c);
count++; count++;
int next = fgetc(in); int next = fgetc(in);
if (next != EOF) { if(next != EOF) {
printf(","); printf(",");
ungetc(next, in); ungetc(next, in);
if (++col >= 12) { if(++col >= 12) {
printf("\n"); printf("\n");
col = 0; col = 0;
} }