diff --git a/emit/emit_c.c b/emit/emit_c.c new file mode 100644 index 0000000..e69de29 diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..0d73ff3 --- /dev/null +++ b/lexer.c @@ -0,0 +1,542 @@ +#include + +#include "lexer.h" + +typedef struct { + const char *start; + const char *current; + i32 line; +} Lexer; + +Lexer lexer; + +void init_lexer(const char *source) { + lexer.start = source; + lexer.current = source; + lexer.line = 1; +} + +static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static bool is_digit(char c) { return c >= '0' && c <= '9'; } + +static bool is_at_end() { return *lexer.current == '\0'; } + +static char advance() { + lexer.current++; + return lexer.current[-1]; +} + +char peek() { return *lexer.current; } + +static char peek_next() { + if (is_at_end()) + return '\0'; + return lexer.current[1]; +} + +static bool match(char expected) { + if (is_at_end()) + return false; + if (*lexer.current != expected) + return false; + lexer.current++; + return true; +} + +static Token make_token(TokenType type) { + Token token; + token.type = type; + token.start = lexer.start; + token.length = (i32)(lexer.current - lexer.start); + token.line = lexer.line; + return token; +} + +static Token error_token(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (i32)strlen(message); + token.line = lexer.line; + return token; +} + +static void skip_whitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '/': + if (peek_next() == '/') { + // Single-line comment: skip until newline or end of file + advance(); + while (peek() != '\n' && !is_at_end()) + advance(); + } else if (peek_next() == '*') { + // Multi-line comment: skip until '*/' or end of file + advance(); + advance(); + while (!is_at_end()) { + if (peek() == '\n') + lexer.line++; + if (peek() == '*' && peek_next() == '/') { + advance(); + advance(); + break; // Exit loop, comment ended + } + advance(); + } + } else { + return; // Not a comment, let tokenization handle it + } + break; + default: + return; + } + } +} + +static TokenType check_keyword(i32 start, i32 length, const char *rest, + TokenType type) { + if (lexer.current - lexer.start == start + length && + memcmp(lexer.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + switch (lexer.start[0]) { + case 'a': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'n': + return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return check_keyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; + case 'c': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'l': + return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); + case 'o': + return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST); + } + } + break; + case 'e': + return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + case 'o': + return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return check_keyword(1, 1, "2", TOKEN_TYPE_REAL); + } + return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN); + } + break; + case 'i': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'f': + return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return check_keyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return check_keyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return check_keyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return check_keyword(2, 1, "2", TOKEN_TYPE_INT); + case 'n': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'i': + return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); + case 't': + return check_keyword(3, 0, "", TOKEN_TYPE_INT); + } + } + break; + } + } + break; + case 'n': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'a': + return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); + case 'i': + return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL); + } + } + break; + case 'o': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'p': + return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); + case 'r': + return check_keyword(2, 0, "", TOKEN_OPERATOR_OR); + } + } + break; + case 'p': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { case 't': + return check_keyword(2, 1, "r", TOKEN_TYPE_PTR); + + case 'l': + return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); + } + } + break; + case 'r': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'e': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 't': + return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + case 'a': + if (lexer.current - lexer.start > 3) { + switch(lexer.start[3]) { + case 'd': + return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); + case 'l': + return check_keyword(4, 0, "", TOKEN_TYPE_REAL); + } + } + } + } + break; + } + } + break; + case 's': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 't': + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'r': + return check_keyword(3, 0, "", TOKEN_TYPE_STR); + case 'a': + return check_keyword(3, 1, "t", TOKEN_KEYWORD_STAT); + } + } + } + } + break; + case 't': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); + case 'r': + return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + } + } + break; + case 'u': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 's': + return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return check_keyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return check_keyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return check_keyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'w': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'h': + return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); + case 'r': + return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); + } + } + break; + case 'b': + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'y': + return check_keyword(2, 2, "te", TOKEN_TYPE_U8); + case 'o': + return check_keyword(2, 2, "ol", TOKEN_TYPE_U8); + } + } + break; + case 'g': + return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + case 'l': + return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); + case 'd': + return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); + case 'v': + return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (is_alpha(peek()) || is_digit(peek())) + advance(); + return make_token(identifierType()); +} + +static Token number() { + while (is_digit(peek())) + advance(); + + /* Look for a fractional part. */ + if (peek() == '.' && is_digit(peek_next())) { + /* Consume the ".". */ + advance(); + + while (is_digit(peek())) + advance(); + + return make_token(TOKEN_LITERAL_REAL); + } + + return make_token(TOKEN_LITERAL_INT); +} + +static Token string() { + while (peek() != '"' && !is_at_end()) { + if (peek() == '\n') + lexer.line++; + advance(); + } + + if (is_at_end()) + return error_token("Unterminated string."); + + /* The closing quote. */ + advance(); + return make_token(TOKEN_LITERAL_STR); +} + +Token next_token() { + skip_whitespace(); + lexer.start = lexer.current; + + if (is_at_end()) + return make_token(TOKEN_EOF); + + char c = advance(); + if (is_alpha(c)) + return identifier(); + char next = peek(); + if ((c == '-' && is_digit(next)) || is_digit(c)) + return number(); + + switch (c) { + case '(': + return make_token(TOKEN_LPAREN); + case ')': + return make_token(TOKEN_RPAREN); + case '{': + return make_token(TOKEN_LBRACE); + case '}': + return make_token(TOKEN_RBRACE); + case '[': + return make_token(TOKEN_LBRACKET); + case ']': + return make_token(TOKEN_RBRACKET); + case ';': + return make_token(TOKEN_SEMICOLON); + case ',': + return make_token(TOKEN_COMMA); + case '.': + return make_token(TOKEN_DOT); + case '-': + return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); + case '+': + return make_token(TOKEN_PLUS); + case '/': + return make_token(TOKEN_SLASH); + case '&': + return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return make_token(TOKEN_MESH); + case '$': + return make_token(TOKEN_BIG_MONEY); + case '*': + return make_token(TOKEN_STAR); + case '!': + return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + case '=': + return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + case '<': + return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); + case '>': + return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); + case '"': + return string(); + } + + return error_token("Unexpected character."); +} + +const char *token_type_to_string(TokenType type) { + switch (type) { + case TOKEN_EOF: + return "EOF"; + case TOKEN_IDENTIFIER: + return "IDENTIFIER"; + case TOKEN_LITERAL_INT: + return "LITERAL_INT"; + case TOKEN_LITERAL_NAT: + return "LITERAL_NAT"; + case TOKEN_LITERAL_REAL: + return "LITERAL_REAL"; + case TOKEN_LITERAL_STR: + return "LITERAL_STR"; + case TOKEN_TYPE_INT: + return "TYPE_INT"; + case TOKEN_TYPE_NAT: + return "TYPE_NAT"; + case TOKEN_TYPE_REAL: + return "TYPE_REAL"; + case TOKEN_TYPE_STR: + return "TYPE_STR"; + case TOKEN_TYPE_PTR: + return "TYPE_PTR"; + case TOKEN_KEYWORD_PLEX: + return "KEYWORD_PLEX"; + case TOKEN_KEYWORD_FN: + return "KEYWORD_FN"; + case TOKEN_KEYWORD_CONST: + return "KEYWORD_CONST"; + case TOKEN_KEYWORD_IF: + return "KEYWORD_IF"; + case TOKEN_KEYWORD_IS: + return "IS"; + case TOKEN_KEYWORD_AS: + return "AS"; + case TOKEN_KEYWORD_ELSE: + return "KEYWORD_ELSE"; + case TOKEN_KEYWORD_WHILE: + return "KEYWORD_WHILE"; + case TOKEN_KEYWORD_FOR: + return "KEYWORD_FOR"; + case TOKEN_KEYWORD_RETURN: + return "KEYWORD_RETURN"; + case TOKEN_KEYWORD_USE: + return "KEYWORD_USE"; + case TOKEN_KEYWORD_INIT: + return "KEYWORD_INIT"; + case TOKEN_KEYWORD_THIS: + return "KEYWORD_THIS"; + case TOKEN_KEYWORD_OPEN: + return "TOKEN_KEYWORD_OPEN"; + case TOKEN_KEYWORD_READ: + return "TOKEN_KEYWORD_READ"; + case TOKEN_KEYWORD_WRITE: + return "TOKEN_KEYWORD_WRITE"; + case TOKEN_KEYWORD_STAT: + return "TOKEN_KEYWORD_STAT"; + case TOKEN_KEYWORD_CLOSE: + return "TOKEN_KEYWORD_CLOSE"; + case TOKEN_KEYWORD_NIL: + return "KEYWORD_NIL"; + case TOKEN_KEYWORD_TRUE: + return "KEYWORD_TRUE"; + case TOKEN_KEYWORD_FALSE: + return "KEYWORD_FALSE"; + case TOKEN_KEYWORD_GLOBAL: + return "KEYWORD_GLOBAL"; + case TOKEN_OPERATOR_NOT: + return "OPERATOR_NOT"; + case TOKEN_OPERATOR_AND: + return "OPERATOR_AND"; + case TOKEN_OPERATOR_OR: + return "OPERATOR_OR"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_BANG_EQ: + return "BANG_EQ"; + case TOKEN_EQ: + return "EQ"; + case TOKEN_EQ_EQ: + return "EQ_EQ"; + case TOKEN_GT: + return "GT"; + case TOKEN_LT: + return "LT"; + case TOKEN_GTE: + return "GTE"; + case TOKEN_LTE: + return "LTE"; + case TOKEN_DOT: + return "DOT"; + case TOKEN_COMMA: + return "COMMA"; + case TOKEN_COLON: + return "COLON"; + case TOKEN_SEMICOLON: + return "SEMICOLON"; + case TOKEN_PLUS: + return "PLUS"; + case TOKEN_MINUS: + return "MINUS"; + case TOKEN_STAR: + return "STAR"; + case TOKEN_SLASH: + return "SLASH"; + case TOKEN_LPAREN: + return "LPAREN"; + case TOKEN_RPAREN: + return "RPAREN"; + case TOKEN_LBRACE: + return "LBRACE"; + case TOKEN_RBRACE: + return "RBRACE"; + case TOKEN_LBRACKET: + return "LBRACKET"; + case TOKEN_RBRACKET: + return "RBRACKET"; + case TOKEN_ARROW_RIGHT: + return "ARROW_RIGHT"; + case TOKEN_MESH: + return "MESH"; + case TOKEN_BIG_MONEY: + return "BIG_MONEY"; + case TOKEN_AND: + return "AND"; + case TOKEN_AND_AND: + return "AND_AND"; + case TOKEN_ERROR: + return "ERROR"; + default: + return "UNKNOWN_TOKEN"; + } +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..0543945 --- /dev/null +++ b/lexer.h @@ -0,0 +1,98 @@ +#ifndef UNDAR_LEXER_H +#define UNDAR_LEXER_H + +#include "libc.h" + +typedef enum { + TOKEN_ERROR, + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, + TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, + TOKEN_TYPE_BOOL, + TOKEN_TYPE_VOID, + TOKEN_TYPE_PTR, + TOKEN_KEYWORD_PLEX, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, + TOKEN_KEYWORD_OPEN, + TOKEN_KEYWORD_READ, + TOKEN_KEYWORD_WRITE, + TOKEN_KEYWORD_STAT, + TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_LOOP, + TOKEN_KEYWORD_DO, + TOKEN_KEYWORD_NIL, + TOKEN_KEYWORD_TRUE, + TOKEN_KEYWORD_FALSE, + TOKEN_OPERATOR_NOT, + TOKEN_OPERATOR_AND, + TOKEN_OPERATOR_OR, + TOKEN_BANG, + TOKEN_BANG_EQ, + TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, + TOKEN_PIPE, + TOKEN_PIPE_PIPE, + TOKEN_QUESTION, + TOKEN_QUESTION_DOT, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_CARET, + TOKEN_SEMICOLON, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ARROW_RIGHT +} TokenType; + +typedef struct { + TokenType type; + const char *start; + i32 length; + i32 line; +} Token; + +void init_lexer(const char *source); +Token next_token(); +const char* token_type_to_string(TokenType type); +char peek(); + +#endif diff --git a/libc.c b/libc.c new file mode 100644 index 0000000..dd14c9f --- /dev/null +++ b/libc.c @@ -0,0 +1,72 @@ +#include "libc.h" + +void mcpy(void *to, void *from, u32 length) { + u8 *src, *dest; + if (to == nil || from == nil) return; + + src = (u8 *)from; + dest = (u8 *)to; + + while (length-- > 0) { + *(dest++) = *(src++); + } + return; +} + +i32 scpy(char *to, const char *from, u32 length) { + u32 i; + if (to == nil || from == nil) return -1; + if (length == 0) {return 0;} + for (i = 0; i < length - 1 && from[i] != '\0'; i++) { + to[i] = from[i]; + } + to[i] = '\0'; + return 0; +} + +bool seq(const char *s1, const char *s2) { + if (s1 == nil && s2 == nil) return true; + if (s1 == nil || s2 == nil) return false; + + while (*s1 && *s2) { + if (*s1 != *s2) return false; + s1++; + s2++; + } + + return (*s1 == '\0' && *s2 == '\0'); +} + +bool sleq(const char *s1, const char *s2, u32 length) { + u32 i; + if (s1 == nil && s2 == nil) return true; + if (s1 == nil || s2 == nil) return false; + + i = 0; + while (i < length && *s1 && *s2) { + if (*s1 != *s2) return false; + s1++; + s2++; + i++; + } + if (i == length) return true; + return (*s1 == '\0' && *s2 == '\0'); +} + +u32 slen(const char *str) { + u32 i; + if (str == nil) {return 0;} + for (i = 0; str[i] != '\0'; i++) { + ; + } + return i; +} + +u32 snlen(const char *str, u32 max_len) { + u32 i; + if (str == nil) {return 0;} + for (i = 0; i < max_len && str[i] != '\0'; i++) { + ; + } + return i; +} \ No newline at end of file diff --git a/libc.h b/libc.h new file mode 100644 index 0000000..ced5cb4 --- /dev/null +++ b/libc.h @@ -0,0 +1,79 @@ +#ifndef UNDAR_LIBC_H +#define UNDAR_LIBC_H + +#if defined(__has_include) +#if __has_include() +#define HAVE_STDINT 1 +#endif +#if __has_include() +#define HAVE_STDBOOL 1 +#endif +#if __has_include() +#define HAVE_STDDEF 1 +#endif +#endif + +#ifdef HAVE_STDINT +#include + typedef uint8_t u8; + typedef int8_t i8; + typedef uint16_t u16; + typedef int16_t i16; + typedef uint32_t u32; + typedef int32_t i32; + typedef float f32; +#else + typedef unsigned char u8; + typedef signed char i8; + typedef unsigned short u16; + typedef signed short i16; + typedef unsigned int u32; + typedef signed int i32; + typedef float f32; +#endif + +#ifdef HAVE_STDBOOL +#include +#else +#define true 1 +#define false 0 +typedef u8 bool; +#endif + +#ifdef HAVE_STDDEF +#include +#define nil NULL +#else +#define nil ((void*)0) +#endif + +#define I8_MIN -128 +#define I8_MAX 127 +#define U8_MAX 255 + +#define I16_MIN -32768 +#define I16_MAX 32767 +#define U16_MAX 65535 + +#define I32_MIN -2147483648 +#define I32_MAX 2147483647 +#define U32_MAX 4294967295 + +#define FIXED_CONST 65536.0f + +#define AS_INT(v) ((i32)(v)) +#define AS_NAT(v) ((u32)(v)) +#define AS_REAL(v) ((i32)(v)) +#define FLOAT_TO_REAL(v) (((i32)(v)) * FIXED_CONST) +#define REAL_TO_FLOAT(v) (((f32)(v)) / FIXED_CONST) + +#define USED(x) ((void)(x)) + +void mcpy(void *dest, void *src, u32 n); +i32 scpy(char* to, const char *from, u32 length); +bool seq(const char *s1, const char *s2); +bool sleq(const char *s1, const char *s2, u32 length); +u32 slen(const char *str); +u32 snlen(const char *str, u32 max_len); + +#endif diff --git a/main.c b/main.c index 74a813a..099815b 100644 --- a/main.c +++ b/main.c @@ -1,13 +1,13 @@ #include #include -#define EMBED_FILE(name) \ +#define EMBED_FILE(name) \ void emit_##name(const char *filename) { \ - FILE *f = fopen(filename, "wb"); \ - if (f) { \ - fwrite(name, 1, name##_len, f); \ - fclose(f); \ - } \ + FILE *f = fopen(filename, "wb"); \ + if (f) { \ + fwrite(name, 1, name##_len, f); \ + fclose(f); \ + } \ } int main(int argc, char **argv) { diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..a160a03 --- /dev/null +++ b/parser.c @@ -0,0 +1,51 @@ +#include "parser.h" + +bool push(TokenStack *ts, Token t) { + if (ts->count >= ts->capacity) return false; + ts->stack[ts->count++] = t; + return true; +} + +Token pop(TokenStack *ts) { + if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; + return ts->stack[--ts->count]; +} + +Token top(TokenStack *ts) { + if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; + return ts->stack[ts->count - 1]; +} + +bool enqueue(TokenQueue *tq, Token t) { + if (tq->count >= tq->capacity) return false; + + tq->queue[tq->end] = t; + tq->end = (tq->end + 1) % tq->capacity; // Wrap around + tq->count++; + return true; +} + +Token dequeue(TokenQueue *tq) { + if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; + + Token t = tq->queue[tq->start]; + tq->start = (tq->start + 1) % tq->capacity; // Wrap around + tq->count--; + return t; +} + +Token peek_queue(TokenQueue *tq) { + if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; + return tq->queue[tq->start]; +} + +bool expression() { + +} + +bool compile(char *source) { + TokenStack operators; + TokenQueue output; + + return true; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..b9174bb --- /dev/null +++ b/parser.h @@ -0,0 +1,111 @@ +#ifndef UNDAR_PARSER_H +#define UNDAR_PARSER_H + +#include "libc.h" +#include "lexer.h" + +typedef enum { GLOBAL, LOCAL, VAR } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION +} SymbolType; + +typedef struct symbol_s Symbol; +typedef struct symbol_tab_s SymbolTable; +typedef struct value_type_s ValueType; +typedef struct plex_fields_tab_s PlexFieldsTable; +typedef struct plex_def_s PlexDef; +typedef struct plex_tab_s PlexTable; +typedef struct scope_s Scope; +typedef struct scope_tab_s ScopeTable; +typedef struct token_stack_s TokenStack; +typedef struct queue_s TokenQueue; + +struct value_type_s { + SymbolType type; + u32 name; + u32 size; + u32 table_ref; // if it is a heap object +}; + +struct plex_def_s { + u32 name; + u32 size; + u32 field_ref_start; + u32 field_count; +}; + +struct plex_fields_tab_s { + u32 *plex_refs; + ValueType *fields; + u32 count; + u32 capacity; +}; + +struct plex_tab_s { + PlexDef *symbols; + u32 count; + u32 capacity; +}; + +#define MAX_SYMBOL_NAME_LENGTH 64 +struct symbol_s { + char name[MAX_SYMBOL_NAME_LENGTH]; + u8 name_length; + SymbolType type; + ScopeType scope; + u32 ref; // vm->mp if global, vm->pc local, register if var + u32 size; // size of symbol +}; + +#define MAX_SYMBOLS 256 +struct symbol_tab_s { + Symbol symbols[MAX_SYMBOLS]; + u8 count; + i32 parent; +}; + +struct scope_tab_s { + SymbolTable *scopes; + u32 count; + u32 capacity; + i32 scope_ref; + u32 depth; +}; + +struct token_stack_s { + Token *stack; + i32 capacity; + i32 count; +}; + +struct queue_s { + Token *queue; + i32 capacity; + i32 start; + i32 end; + i32 count; +}; + +bool push(TokenStack *ts, Token t); +Token pop(TokenStack *ts); +Token top(TokenStack *ts); +bool enqueue(TokenQueue *tq, Token t); +Token dequeue(TokenQueue *tq); +Token peek_queue(TokenQueue *tq); +bool compile(char *source); + +#endif