Add initial compiler

2026-03-02 21:35:45 -08:00 · 2026-03-02 21:35:45 -08:00 · 6310390cc4
parent 7dd2fd521e
commit 6310390cc4
8 changed files with 959 additions and 6 deletions
--- a/emit/emit_c.c
+++ b/emit/emit_c.c
--- a/lexer.c
+++ b/lexer.c
@ -0,0 +1,542 @@
+#include <string.h>
+
+#include "lexer.h"
+
+typedef struct {
+  const char *start;
+  const char *current;
+  i32 line;
+} Lexer;
+
+Lexer lexer;
+
+void init_lexer(const char *source) {
+  lexer.start = source;
+  lexer.current = source;
+  lexer.line = 1;
+}
+
+static bool is_alpha(char c) {
+  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
+}
+
+static bool is_digit(char c) { return c >= '0' && c <= '9'; }
+
+static bool is_at_end() { return *lexer.current == '\0'; }
+
+static char advance() {
+  lexer.current++;
+  return lexer.current[-1];
+}
+
+char peek() { return *lexer.current; }
+
+static char peek_next() {
+  if (is_at_end())
+    return '\0';
+  return lexer.current[1];
+}
+
+static bool match(char expected) {
+  if (is_at_end())
+    return false;
+  if (*lexer.current != expected)
+    return false;
+  lexer.current++;
+  return true;
+}
+
+static Token make_token(TokenType type) {
+  Token token;
+  token.type = type;
+  token.start = lexer.start;
+  token.length = (i32)(lexer.current - lexer.start);
+  token.line = lexer.line;
+  return token;
+}
+
+static Token error_token(const char *message) {
+  Token token;
+  token.type = TOKEN_ERROR;
+  token.start = message;
+  token.length = (i32)strlen(message);
+  token.line = lexer.line;
+  return token;
+}
+
+static void skip_whitespace() {
+  for (;;) {
+    char c = peek();
+    switch (c) {
+    case ' ':
+    case '\r':
+    case '\t':
+      advance();
+      break;
+    case '\n':
+      lexer.line++;
+      advance();
+      break;
+    case '/':
+      if (peek_next() == '/') {
+        // Single-line comment: skip until newline or end of file
+        advance();
+        while (peek() != '\n' && !is_at_end())
+          advance();
+      } else if (peek_next() == '*') {
+        // Multi-line comment: skip until '*/' or end of file
+        advance();
+        advance();
+        while (!is_at_end()) {
+          if (peek() == '\n')
+            lexer.line++;
+          if (peek() == '*' && peek_next() == '/') {
+            advance();
+            advance();
+            break; // Exit loop, comment ended
+          }
+          advance();
+        }
+      } else {
+        return; // Not a comment, let tokenization handle it
+      }
+      break;
+    default:
+      return;
+    }
+  }
+}
+
+static TokenType check_keyword(i32 start, i32 length, const char *rest,
+                              TokenType type) {
+  if (lexer.current - lexer.start == start + length &&
+      memcmp(lexer.start + start, rest, length) == 0) {
+    return type;
+  }
+
+  return TOKEN_IDENTIFIER;
+}
+
+static TokenType identifierType() {
+  switch (lexer.start[0]) {
+  case 'a':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'n':
+        return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND);
+      case 's':
+        return check_keyword(2, 0, "", TOKEN_KEYWORD_AS);
+      }
+    }
+    break;
+  case 'c':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'l':
+        return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE);
+      case 'o':
+        return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST);
+      }
+    }
+    break;
+  case 'e':
+    return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE);
+  case 'f':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'a':
+        return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE);
+      case 'o':
+        return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR);
+      case '3':
+        return check_keyword(1, 1, "2", TOKEN_TYPE_REAL);
+      }
+      return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN);
+    }
+    break;
+  case 'i':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'f':
+        return check_keyword(2, 0, "", TOKEN_KEYWORD_IF);
+      case 's':
+        return check_keyword(2, 0, "", TOKEN_KEYWORD_IS);
+      case '8':
+        return check_keyword(2, 0, "", TOKEN_TYPE_I8);
+      case '1':
+        return check_keyword(2, 1, "6", TOKEN_TYPE_I16);
+      case '3':
+        return check_keyword(2, 1, "2", TOKEN_TYPE_INT);        
+      case 'n':
+        if (lexer.current - lexer.start > 2) {
+          switch (lexer.start[2]) {
+          case 'i':
+            return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT);
+          case 't':
+            return check_keyword(3, 0, "", TOKEN_TYPE_INT);
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 'n':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'a':
+        return check_keyword(2, 1, "t", TOKEN_TYPE_NAT);
+      case 'i':
+        return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL);
+      }
+    }
+    break;
+  case 'o':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'p':
+        return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN);
+      case 'r':
+        return check_keyword(2, 0, "", TOKEN_OPERATOR_OR);
+      }
+    }
+    break;
+  case 'p':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {      case 't':
+        return check_keyword(2, 1, "r", TOKEN_TYPE_PTR);     
+
+      case 'l':
+        return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX);
+      }
+    }
+    break;
+  case 'r':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'e':
+        if (lexer.current - lexer.start > 2) {
+          switch (lexer.start[2]) {
+          case 't':
+            return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN);
+          case 'a':  
+            if (lexer.current - lexer.start > 3) {
+              switch(lexer.start[3]) {
+                case 'd':
+                  return check_keyword(4, 0, "", TOKEN_KEYWORD_READ);
+                case 'l':
+                  return check_keyword(4, 0, "", TOKEN_TYPE_REAL);
+              }
+            }
+          }
+        }
+        break;
+      }
+    }
+    break;
+  case 's':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 't':
+        if (lexer.current - lexer.start > 2) {
+          switch (lexer.start[2]) {
+          case 'r':
+            return check_keyword(3, 0, "", TOKEN_TYPE_STR);
+          case 'a':
+            return check_keyword(3, 1, "t", TOKEN_KEYWORD_STAT);
+          }
+        }
+      }
+    }
+    break;
+  case 't':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'h':
+        return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS);
+      case 'r':
+        return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE);
+      }
+    }
+    break;
+  case 'u':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 's':
+        return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE);
+      case '8':
+        return check_keyword(2, 0, "", TOKEN_TYPE_U8);
+      case '1':
+        return check_keyword(2, 1, "6", TOKEN_TYPE_U16);
+      case '3':
+        return check_keyword(2, 1, "2", TOKEN_TYPE_NAT);        
+      }
+    }
+    break;
+  case 'w':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'h':
+        return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE);
+      case 'r':
+        return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE);
+      }
+    }
+    break;
+  case 'b':
+    if (lexer.current - lexer.start > 1) {
+      switch (lexer.start[1]) {
+      case 'y':
+        return check_keyword(2, 2, "te", TOKEN_TYPE_U8);
+      case 'o':
+        return check_keyword(2, 2, "ol", TOKEN_TYPE_U8);
+      }
+    }    
+    break;
+  case 'g':
+    return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL);
+  case 'l':
+    return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP);
+  case 'd':
+    return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO);  
+  case 'v':
+    return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID);        
+  }
+
+  return TOKEN_IDENTIFIER;
+}
+
+static Token identifier() {
+  while (is_alpha(peek()) || is_digit(peek()))
+    advance();
+  return make_token(identifierType());
+}
+
+static Token number() {
+  while (is_digit(peek()))
+    advance();
+
+  /*  Look for a fractional part. */
+  if (peek() == '.' && is_digit(peek_next())) {
+    /*  Consume the ".". */
+    advance();
+
+    while (is_digit(peek()))
+      advance();
+
+    return make_token(TOKEN_LITERAL_REAL);
+  }
+
+  return make_token(TOKEN_LITERAL_INT);
+}
+
+static Token string() {
+  while (peek() != '"' && !is_at_end()) {
+    if (peek() == '\n')
+      lexer.line++;
+    advance();
+  }
+
+  if (is_at_end())
+    return error_token("Unterminated string.");
+
+  /*  The closing quote. */
+  advance();
+  return make_token(TOKEN_LITERAL_STR);
+}
+
+Token next_token() {
+  skip_whitespace();
+  lexer.start = lexer.current;
+
+  if (is_at_end())
+    return make_token(TOKEN_EOF);
+
+  char c = advance();
+  if (is_alpha(c))
+    return identifier();
+  char next = peek();
+  if ((c == '-' && is_digit(next)) || is_digit(c))
+    return number();
+
+  switch (c) {
+  case '(':
+    return make_token(TOKEN_LPAREN);
+  case ')':
+    return make_token(TOKEN_RPAREN);
+  case '{':
+    return make_token(TOKEN_LBRACE);
+  case '}':
+    return make_token(TOKEN_RBRACE);
+  case '[':
+    return make_token(TOKEN_LBRACKET);
+  case ']':
+    return make_token(TOKEN_RBRACKET);
+  case ';':
+    return make_token(TOKEN_SEMICOLON);
+  case ',':
+    return make_token(TOKEN_COMMA);
+  case '.':
+    return make_token(TOKEN_DOT);
+  case '-':
+    return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS);
+  case '+':
+    return make_token(TOKEN_PLUS);
+  case '/':
+    return make_token(TOKEN_SLASH);
+  case '&':
+    return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND);
+  case '#':
+    return make_token(TOKEN_MESH);
+  case '$':
+    return make_token(TOKEN_BIG_MONEY);
+  case '*':
+    return make_token(TOKEN_STAR);
+  case '!':
+    return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG);
+  case '=':
+    return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ);
+  case '<':
+    return make_token(match('=') ? TOKEN_LTE : TOKEN_LT);
+  case '>':
+    return make_token(match('=') ? TOKEN_GTE : TOKEN_GT);
+  case '"':
+    return string();
+  }
+
+  return error_token("Unexpected character.");
+}
+
+const char *token_type_to_string(TokenType type) {
+  switch (type) {
+  case TOKEN_EOF:
+    return "EOF";
+  case TOKEN_IDENTIFIER:
+    return "IDENTIFIER";
+  case TOKEN_LITERAL_INT:
+    return "LITERAL_INT";
+  case TOKEN_LITERAL_NAT:
+    return "LITERAL_NAT";
+  case TOKEN_LITERAL_REAL:
+    return "LITERAL_REAL";
+  case TOKEN_LITERAL_STR:
+    return "LITERAL_STR";
+  case TOKEN_TYPE_INT:
+    return "TYPE_INT";
+  case TOKEN_TYPE_NAT:
+    return "TYPE_NAT";
+  case TOKEN_TYPE_REAL:
+    return "TYPE_REAL";
+  case TOKEN_TYPE_STR:
+    return "TYPE_STR";
+  case TOKEN_TYPE_PTR:
+    return "TYPE_PTR";
+  case TOKEN_KEYWORD_PLEX:
+    return "KEYWORD_PLEX";
+  case TOKEN_KEYWORD_FN:
+    return "KEYWORD_FN";
+  case TOKEN_KEYWORD_CONST:
+    return "KEYWORD_CONST";
+  case TOKEN_KEYWORD_IF:
+    return "KEYWORD_IF";
+  case TOKEN_KEYWORD_IS:
+    return "IS";
+  case TOKEN_KEYWORD_AS:
+    return "AS";
+  case TOKEN_KEYWORD_ELSE:
+    return "KEYWORD_ELSE";
+  case TOKEN_KEYWORD_WHILE:
+    return "KEYWORD_WHILE";
+  case TOKEN_KEYWORD_FOR:
+    return "KEYWORD_FOR";
+  case TOKEN_KEYWORD_RETURN:
+    return "KEYWORD_RETURN";
+  case TOKEN_KEYWORD_USE:
+    return "KEYWORD_USE";
+  case TOKEN_KEYWORD_INIT:
+    return "KEYWORD_INIT";
+  case TOKEN_KEYWORD_THIS:
+    return "KEYWORD_THIS";
+  case TOKEN_KEYWORD_OPEN:
+    return "TOKEN_KEYWORD_OPEN";
+  case TOKEN_KEYWORD_READ:
+    return "TOKEN_KEYWORD_READ";
+  case TOKEN_KEYWORD_WRITE:
+    return "TOKEN_KEYWORD_WRITE";
+  case TOKEN_KEYWORD_STAT:
+    return "TOKEN_KEYWORD_STAT";
+  case TOKEN_KEYWORD_CLOSE:
+    return "TOKEN_KEYWORD_CLOSE";
+  case TOKEN_KEYWORD_NIL:
+    return "KEYWORD_NIL";
+  case TOKEN_KEYWORD_TRUE:
+    return "KEYWORD_TRUE";
+  case TOKEN_KEYWORD_FALSE:
+    return "KEYWORD_FALSE";
+  case TOKEN_KEYWORD_GLOBAL:
+    return "KEYWORD_GLOBAL";
+  case TOKEN_OPERATOR_NOT:
+    return "OPERATOR_NOT";
+  case TOKEN_OPERATOR_AND:
+    return "OPERATOR_AND";
+  case TOKEN_OPERATOR_OR:
+    return "OPERATOR_OR";
+  case TOKEN_BANG:
+    return "BANG";
+  case TOKEN_BANG_EQ:
+    return "BANG_EQ";
+  case TOKEN_EQ:
+    return "EQ";
+  case TOKEN_EQ_EQ:
+    return "EQ_EQ";
+  case TOKEN_GT:
+    return "GT";
+  case TOKEN_LT:
+    return "LT";
+  case TOKEN_GTE:
+    return "GTE";
+  case TOKEN_LTE:
+    return "LTE";
+  case TOKEN_DOT:
+    return "DOT";
+  case TOKEN_COMMA:
+    return "COMMA";
+  case TOKEN_COLON:
+    return "COLON";
+  case TOKEN_SEMICOLON:
+    return "SEMICOLON";
+  case TOKEN_PLUS:
+    return "PLUS";
+  case TOKEN_MINUS:
+    return "MINUS";
+  case TOKEN_STAR:
+    return "STAR";
+  case TOKEN_SLASH:
+    return "SLASH";
+  case TOKEN_LPAREN:
+    return "LPAREN";
+  case TOKEN_RPAREN:
+    return "RPAREN";
+  case TOKEN_LBRACE:
+    return "LBRACE";
+  case TOKEN_RBRACE:
+    return "RBRACE";
+  case TOKEN_LBRACKET:
+    return "LBRACKET";
+  case TOKEN_RBRACKET:
+    return "RBRACKET";
+  case TOKEN_ARROW_RIGHT:
+    return "ARROW_RIGHT";
+  case TOKEN_MESH:
+    return "MESH";
+  case TOKEN_BIG_MONEY:
+    return "BIG_MONEY";
+  case TOKEN_AND:
+    return "AND";
+  case TOKEN_AND_AND:
+    return "AND_AND";
+  case TOKEN_ERROR:
+    return "ERROR";
+  default:
+    return "UNKNOWN_TOKEN";
+  }
+}
--- a/lexer.h
+++ b/lexer.h
@ -0,0 +1,98 @@
+#ifndef UNDAR_LEXER_H
+#define UNDAR_LEXER_H
+
+#include "libc.h"
+
+typedef enum {
+  TOKEN_ERROR,
+  TOKEN_EOF,
+  TOKEN_IDENTIFIER,
+  TOKEN_LITERAL_INT,
+  TOKEN_LITERAL_NAT,
+  TOKEN_LITERAL_REAL,
+  TOKEN_LITERAL_STR,
+  TOKEN_TYPE_I8,
+  TOKEN_TYPE_I16,
+  TOKEN_TYPE_INT,
+  TOKEN_TYPE_U8,
+  TOKEN_TYPE_U16,
+  TOKEN_TYPE_NAT,
+  TOKEN_TYPE_REAL,
+  TOKEN_TYPE_STR,
+  TOKEN_TYPE_BOOL,
+  TOKEN_TYPE_VOID,
+  TOKEN_TYPE_PTR,
+  TOKEN_KEYWORD_PLEX,
+  TOKEN_KEYWORD_FN,
+  TOKEN_KEYWORD_CONST,
+  TOKEN_KEYWORD_IF,
+  TOKEN_KEYWORD_IS,
+  TOKEN_KEYWORD_AS,
+  TOKEN_KEYWORD_ELSE,
+  TOKEN_KEYWORD_WHILE,
+  TOKEN_KEYWORD_FOR,
+  TOKEN_KEYWORD_RETURN,
+  TOKEN_KEYWORD_USE,
+  TOKEN_KEYWORD_INIT,
+  TOKEN_KEYWORD_THIS,
+  TOKEN_KEYWORD_GLOBAL,
+  TOKEN_KEYWORD_OPEN,
+  TOKEN_KEYWORD_READ,
+  TOKEN_KEYWORD_WRITE,
+  TOKEN_KEYWORD_STAT,
+  TOKEN_KEYWORD_CLOSE,
+  TOKEN_KEYWORD_LOOP,
+  TOKEN_KEYWORD_DO,
+  TOKEN_KEYWORD_NIL,
+  TOKEN_KEYWORD_TRUE,
+  TOKEN_KEYWORD_FALSE,
+  TOKEN_OPERATOR_NOT,
+  TOKEN_OPERATOR_AND,
+  TOKEN_OPERATOR_OR,
+  TOKEN_BANG,
+  TOKEN_BANG_EQ,
+  TOKEN_EQ,
+  TOKEN_EQ_EQ,
+  TOKEN_AND,
+  TOKEN_AND_AND, 
+  TOKEN_PIPE,
+  TOKEN_PIPE_PIPE,
+  TOKEN_QUESTION,  
+  TOKEN_QUESTION_DOT,
+  TOKEN_PLUS,
+  TOKEN_MINUS,
+  TOKEN_STAR,
+  TOKEN_SLASH,
+  TOKEN_MESH,
+  TOKEN_BIG_MONEY,  
+  TOKEN_GT,
+  TOKEN_LT,
+  TOKEN_GTE,
+  TOKEN_LTE,
+  TOKEN_DOT,
+  TOKEN_COMMA,
+  TOKEN_COLON,
+  TOKEN_CARET,
+  TOKEN_SEMICOLON,
+  TOKEN_LPAREN,
+  TOKEN_RPAREN,
+  TOKEN_LBRACE,
+  TOKEN_RBRACE,
+  TOKEN_LBRACKET,
+  TOKEN_RBRACKET,
+  TOKEN_ARROW_RIGHT
+} TokenType;
+
+typedef struct {
+  TokenType type;
+  const char *start;
+  i32 length;
+  i32 line;
+} Token;
+
+void init_lexer(const char *source);
+Token next_token();
+const char* token_type_to_string(TokenType type);
+char peek();
+
+#endif
--- a/libc.c
+++ b/libc.c
@ -0,0 +1,72 @@
+#include "libc.h"
+
+void mcpy(void *to, void *from, u32 length) {
+    u8 *src, *dest;
+    if (to == nil || from == nil) return;
+
+    src = (u8 *)from; 
+    dest = (u8 *)to; 
+
+    while (length-- > 0) {
+        *(dest++) = *(src++);
+    }
+    return;
+}
+
+i32 scpy(char *to, const char *from, u32 length) {
+    u32 i;
+    if (to == nil || from == nil) return -1;
+    if (length == 0) {return 0;}
+    for (i = 0; i < length - 1 && from[i] != '\0'; i++) {
+        to[i] = from[i];
+    }
+    to[i] = '\0';
+    return 0;
+}
+
+bool seq(const char *s1, const char *s2) {
+    if (s1 == nil && s2 == nil) return true;
+    if (s1 == nil || s2 == nil) return false;
+
+    while (*s1 && *s2) {
+        if (*s1 != *s2) return false;
+        s1++;
+        s2++;
+    }
+
+    return (*s1 == '\0' && *s2 == '\0');
+}
+
+bool sleq(const char *s1, const char *s2, u32 length) {
+    u32 i;
+    if (s1 == nil && s2 == nil) return true;
+    if (s1 == nil || s2 == nil) return false;
+
+    i = 0;
+    while (i < length && *s1 && *s2) {
+        if (*s1 != *s2) return false;
+        s1++;
+        s2++;
+        i++;
+    }
+    if (i == length) return true;
+    return (*s1 == '\0' && *s2 == '\0');
+}
+
+u32 slen(const char *str) {
+    u32 i;
+    if (str == nil) {return 0;}
+    for (i = 0; str[i] != '\0'; i++) {
+        ;
+    }
+    return i;
+}
+
+u32 snlen(const char *str, u32 max_len) {
+    u32 i;
+    if (str == nil) {return 0;}
+    for (i = 0; i < max_len && str[i] != '\0'; i++) {
+        ;
+    }
+    return i;
+}
--- a/libc.h
+++ b/libc.h
@ -0,0 +1,79 @@
+#ifndef UNDAR_LIBC_H
+#define UNDAR_LIBC_H
+
+#if defined(__has_include)
+#if __has_include(<stdint.h>)
+#define HAVE_STDINT 1
+#endif
+#if __has_include(<stdbool.h>)
+#define HAVE_STDBOOL 1
+#endif
+#if __has_include(<stddef.h>)
+#define HAVE_STDDEF 1
+#endif
+#endif
+
+#ifdef HAVE_STDINT
+#include <stdint.h>
+   typedef uint8_t  u8;
+   typedef int8_t   i8;
+   typedef uint16_t u16;
+   typedef int16_t  i16;
+   typedef uint32_t u32;
+   typedef int32_t  i32;
+   typedef float    f32;
+#else
+   typedef unsigned char  u8;
+   typedef signed char    i8;
+   typedef unsigned short u16;
+   typedef signed short   i16;
+   typedef unsigned int   u32;
+   typedef signed int     i32;
+   typedef float          f32;
+#endif
+
+#ifdef HAVE_STDBOOL
+#include <stdbool.h>
+#else
+#define true 1
+#define false 0
+typedef u8 bool;
+#endif
+
+#ifdef HAVE_STDDEF
+#include <stddef.h>
+#define nil NULL
+#else
+#define nil ((void*)0)
+#endif
+
+#define I8_MIN -128
+#define I8_MAX 127
+#define U8_MAX 255
+ 
+#define I16_MIN -32768
+#define I16_MAX 32767
+#define U16_MAX 65535
+ 
+#define I32_MIN -2147483648
+#define I32_MAX 2147483647
+#define U32_MAX 4294967295
+
+#define FIXED_CONST 65536.0f
+
+#define AS_INT(v) ((i32)(v))
+#define AS_NAT(v) ((u32)(v))
+#define AS_REAL(v) ((i32)(v))
+#define FLOAT_TO_REAL(v) (((i32)(v)) * FIXED_CONST)
+#define REAL_TO_FLOAT(v) (((f32)(v)) / FIXED_CONST)
+
+#define USED(x) ((void)(x))
+
+void mcpy(void *dest, void *src, u32 n);
+i32 scpy(char* to, const char *from, u32 length);
+bool seq(const char *s1, const char *s2);
+bool sleq(const char *s1, const char *s2, u32 length);
+u32 slen(const char *str);
+u32 snlen(const char *str, u32 max_len);
+
+#endif
--- a/main.c
+++ b/main.c
@ -1,13 +1,13 @@
 #include <stdio.h>
 #include <stdlib.h>

-#define EMBED_FILE(name) \
+#define EMBED_FILE(name)                     \
    void emit_##name(const char *filename) { \
-        FILE *f = fopen(filename, "wb"); \
-        if (f) { \
-            fwrite(name, 1, name##_len, f); \
-            fclose(f); \
-        } \
+        FILE *f = fopen(filename, "wb");     \
+        if (f) {                             \
+            fwrite(name, 1, name##_len, f);  \
+            fclose(f);                       \
+        }                                    \
    }

 int main(int argc, char **argv) {
--- a/parser.c
+++ b/parser.c
@ -0,0 +1,51 @@
+#include "parser.h"
+
+bool push(TokenStack *ts, Token t) {
+    if (ts->count >= ts->capacity) return false;
+    ts->stack[ts->count++] = t;
+    return true;
+}
+
+Token pop(TokenStack *ts) {
+    if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
+    return ts->stack[--ts->count];
+}
+
+Token top(TokenStack *ts) {
+    if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1};
+    return ts->stack[ts->count - 1];
+}
+
+bool enqueue(TokenQueue *tq, Token t) {
+    if (tq->count >= tq->capacity) return false;
+    
+    tq->queue[tq->end] = t;
+    tq->end = (tq->end + 1) % tq->capacity; // Wrap around
+    tq->count++;
+    return true;
+}
+
+Token dequeue(TokenQueue *tq) {
+    if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
+    
+    Token t = tq->queue[tq->start];
+    tq->start = (tq->start + 1) % tq->capacity; // Wrap around
+    tq->count--;
+    return t;
+}
+
+Token peek_queue(TokenQueue *tq) {
+    if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1};
+    return tq->queue[tq->start];
+}
+
+bool expression() {
+    
+}
+
+bool compile(char *source) {
+    TokenStack operators;
+    TokenQueue output;
+
+    return true;
+}
--- a/parser.h
+++ b/parser.h
@ -0,0 +1,111 @@
+#ifndef UNDAR_PARSER_H
+#define UNDAR_PARSER_H
+
+#include "libc.h"
+#include "lexer.h"
+
+typedef enum { GLOBAL, LOCAL, VAR } ScopeType;
+typedef enum {
+  VOID,
+  BOOL,
+  I8,
+  I16,
+  I32,
+  U8,
+  U16,
+  U32,
+  F8,
+  F16,
+  F32,
+  STR,
+  PLEX,
+  ARRAY,
+  FUNCTION
+} SymbolType;
+
+typedef struct symbol_s Symbol;
+typedef struct symbol_tab_s SymbolTable;
+typedef struct value_type_s ValueType;
+typedef struct plex_fields_tab_s PlexFieldsTable;
+typedef struct plex_def_s PlexDef;
+typedef struct plex_tab_s PlexTable;
+typedef struct scope_s Scope;
+typedef struct scope_tab_s ScopeTable;
+typedef struct token_stack_s TokenStack;
+typedef struct queue_s TokenQueue;
+
+struct value_type_s {
+  SymbolType type;
+  u32 name;
+  u32 size;
+  u32 table_ref; // if it is a heap object
+};
+
+struct plex_def_s {
+  u32 name;
+  u32 size;
+  u32 field_ref_start;
+  u32 field_count;
+};
+
+struct plex_fields_tab_s {
+  u32 *plex_refs;
+  ValueType *fields;
+  u32 count;
+  u32 capacity;
+};
+
+struct plex_tab_s {
+  PlexDef *symbols;
+  u32 count;
+  u32 capacity;
+};
+
+#define MAX_SYMBOL_NAME_LENGTH 64
+struct symbol_s {
+  char name[MAX_SYMBOL_NAME_LENGTH];
+  u8 name_length;
+  SymbolType type;
+  ScopeType scope;
+  u32 ref;  // vm->mp if global, vm->pc local, register if var 
+  u32 size; // size of symbol
+};
+
+#define MAX_SYMBOLS 256
+struct symbol_tab_s {
+  Symbol symbols[MAX_SYMBOLS];
+  u8 count;
+  i32 parent;
+};
+
+struct scope_tab_s {
+  SymbolTable *scopes;
+  u32 count;
+  u32 capacity;
+  i32 scope_ref;
+  u32 depth;
+};
+
+struct token_stack_s {
+    Token *stack;
+    i32 capacity;
+    i32 count;
+};
+
+struct queue_s {
+    Token *queue;
+    i32 capacity;
+    i32 start;
+    i32 end;
+    i32 count;
+};
+
+bool push(TokenStack *ts, Token t);
+Token pop(TokenStack *ts);
+Token top(TokenStack *ts);
+bool enqueue(TokenQueue *tq, Token t);
+Token dequeue(TokenQueue *tq);
+Token peek_queue(TokenQueue *tq);
+bool compile(char *source);
+
+#endif