Move common lexer up to use for both assembler and compiler. Start work on compiler.

2026-02-21 11:09:22 -08:00 · 2026-02-21 11:09:22 -08:00 · fbff4dd188
parent 65ef3c5426
commit fbff4dd188
12 changed files with 856 additions and 1481 deletions
--- a/10
+++ b/10
@ -90,13 +90,15 @@ VM_SOURCES := \
 ifeq ($(BUILD_MODE), release)
 	PLATFORM_SOURCE := $(ARCH_DIR)/main.c \
 		$(ARCH_DIR)/devices.c\
-		$(SRC_DIR)/tools/assembler/lexer.c \
-		$(SRC_DIR)/tools/assembler/assembler.c 
+		$(SRC_DIR)/tools/lexer.c \
+		$(SRC_DIR)/tools/assembler/assembler.c  \
+		$(SRC_DIR)/tools/compiler/compiler.c
 else
 	PLATFORM_SOURCE := $(ARCH_DIR)/main.c \
 		$(ARCH_DIR)/devices.c \
-		$(SRC_DIR)/tools/assembler/lexer.c \
-		$(SRC_DIR)/tools/assembler/assembler.c 
+		$(SRC_DIR)/tools/lexer.c \
+		$(SRC_DIR)/tools/assembler/assembler.c \
+		$(SRC_DIR)/tools/compiler/compiler.c 
 endif

 # --- OBJECT FILES ---
--- a/src/arch/linux/main.c
+++ b/src/arch/linux/main.c
@ -1,3 +1,4 @@
+#include "../../tools/compiler/compiler.h"
 #include "../../tools/assembler/assembler.h"
 #include "../../vm/vm.h"
 #include "devices.h"
@ -120,15 +121,6 @@ bool loadVM(const char *filename, VM *vm) {
  return true;
 }

-// Function to compile and optionally save
-bool compileAndSave(const char *source_file, const char *output_file, VM *vm) {
-  USED(vm);
-  USED(output_file);
-  USED(source_file);
-
-  return true;
-}
-
 #ifdef STATIC
  #define SCOPES_COUNT 2048
  SymbolTable scopes[SCOPES_COUNT];
@ -210,6 +202,46 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) {
  return true;
 }

+// Function to compile and optionally save
+bool compileAndSave(const char *source_file, const char *output_file, VM *vm) {
+  FILE *f = fopen(source_file, "rb");
+  if (!f) {
+    perror("fopen");
+    return false;
+  }
+
+  static char source[MAX_SRC_SIZE + 1];
+
+  fseek(f, 0, SEEK_END);
+  long len = ftell(f);
+  fseek(f, 0, SEEK_SET);
+  if (len >= MAX_SRC_SIZE) {
+    fprintf(stderr, "Source is larger than buffer\n");
+    fclose(f);
+    return false;
+  }
+  size_t read = fread(source, 1, len, f);
+  source[read] = '\0';
+  fclose(f);
+
+  ScopeTable table = {0};
+  symbol_table_init(&table);
+  compile(vm, &table, source);
+#ifndef STATIC
+  free(table.scopes);
+#endif
+
+  if (output_file) {
+    if (!saveVM(output_file, vm)) {
+      printf("Failed to save VM to %s\n", output_file);
+      return false;
+    }
+    printf("VM saved to %s\n", output_file);
+  }
+  return true;
+  return true;
+}
+
 void scale_mouse_pos(u32 mouse_x, u32 mouse_y, u32 *vm_x, u32 *vm_y) {
  int win_w, win_h;
  SDL_GetWindowSize(screen_data.window, &win_w, &win_h);
--- a/src/tools/assembler/assembler.c
+++ b/src/tools/assembler/assembler.c
--- a/src/tools/assembler/assembler.h
+++ b/src/tools/assembler/assembler.h
@ -1,55 +1,10 @@
 #ifndef UNDAR_IR_ASSEMBLER_H
 #define UNDAR_IR_ASSEMBLER_H

+#include "../lexer.h"
+#include "../codegen.h"
 #include "../../vm/common.h"
 #include "../../vm/opcodes.h"
-#include "lexer.h"
-
-typedef enum { GLOBAL, LOCAL, VAR } ScopeType;
-typedef enum {
-  VOID,
-  BOOL,
-  I8,
-  I16,
-  I32,
-  U8,
-  U16,
-  U32,
-  F8,
-  F16,
-  F32,
-  STR,
-  PLEX,
-  ARRAY,
-  FUNCTION
-} SymbolType;
-
-typedef struct symbol_s Symbol;
-typedef struct symbol_tab_s SymbolTable;
-typedef struct scope_tab_s ScopeTable;
-
-#define MAX_SYMBOL_NAME_LENGTH 64
-struct symbol_s {
-  char name[MAX_SYMBOL_NAME_LENGTH];
-  u8 name_length;
-  SymbolType type;
-  ScopeType scope;
-  u32 ref;  // vm->mp if global, vm->pc local, register if var 
-  u32 size; // size of symbol
-};
-
-struct symbol_tab_s {
-  Symbol symbols[256];
-  u8 count;
-  i32 parent;
-};
-
-struct scope_tab_s {
-  SymbolTable *scopes;
-  u32 count;
-  u32 capacity;
-  i32 scope_ref;
-};

 void assemble(VM *vm, ScopeTable *st, char *source);
 extern bool table_realloc(ScopeTable *table);/* implement this in arch/ not here */
--- a/src/tools/codegen.h
+++ b/src/tools/codegen.h
@ -0,0 +1,91 @@
+#ifndef UNDAR_CODEGEN_H
+#define UNDAR_CODEGEN_H
+
+#include "../vm/common.h"
+
+typedef enum { GLOBAL, LOCAL, VAR } ScopeType;
+typedef enum {
+  VOID,
+  BOOL,
+  I8,
+  I16,
+  I32,
+  U8,
+  U16,
+  U32,
+  F8,
+  F16,
+  F32,
+  STR,
+  PLEX,
+  ARRAY,
+  FUNCTION
+} SymbolType;
+
+typedef struct symbol_s Symbol;
+typedef struct symbol_tab_s SymbolTable;
+typedef struct value_type_s ValueType;
+typedef struct plex_fields_tab_s PlexFieldsTable;
+typedef struct plex_def_s PlexDef;
+typedef struct plex_tab_s PlexTable;
+typedef struct scope_s Scope;
+typedef struct scope_tab_s ScopeTable;
+
+#define MAX_SYMBOL_NAME_LENGTH 64
+
+struct value_type_s {
+  SymbolType type;
+  char name[MAX_SYMBOL_NAME_LENGTH];
+  u8 name_length;
+  u32 size;
+  u32 table_ref; // if it is a heap object
+};
+
+struct plex_fields_tab_s {
+  u32 *plex_refs;
+  ValueType *fields;
+  u32 count;
+  u32 capacity;
+};
+
+struct plex_def_s {
+  char name[MAX_SYMBOL_NAME_LENGTH];
+  u8 name_length;
+  u32 size;
+  u32 field_ref_start;
+  u32 field_count;
+};
+
+struct plex_tab_s {
+  PlexDef *symbols;
+  u32 count;
+  u32 capacity;
+};
+
+struct symbol_s {
+  char name[MAX_SYMBOL_NAME_LENGTH];
+  u8 name_length;
+  SymbolType type;
+  ScopeType scope;
+  u32 ref;  // vm->mp if global, vm->pc local, register if var 
+  u32 size; // size of symbol
+};
+
+struct symbol_tab_s {
+  Symbol symbols[256];
+  u8 count;
+  i32 parent;
+};
+
+struct scope_tab_s {
+  SymbolTable *scopes;
+  u32 count;
+  u32 capacity;
+  i32 scope_ref;
+  u32 depth;
+  u8 last_used_registers[1024];
+  u32 reg_count;
+  u8 current_reg;
+};
+
+#endif
--- a/src/tools/compiler/compiler.c
+++ b/src/tools/compiler/compiler.c
@ -3,7 +3,7 @@
 #include "../../vm/libc.h"
 #include "../../vm/opcodes.h"

-#include "parser.h"
+#include "../lexer.h"
 #include "compiler.h"

 /* FIXME: remove these and replace with libc.h instead */
@ -150,6 +150,8 @@ Token next_token_is(TokenType type) {
 * Const .
 */
 bool parse_const(VM *vm, ScopeTable *st) {
+  USED(vm);
+  USED(st);
  return true; 
 }

@ -411,7 +413,8 @@ void define_var(ScopeTable *st, Token regType) {
 * Plex .
 */
 void define_plex(VM *vm, ScopeTable *st) {
-
+  USED(vm);
+  USED(st);
 }


@ -602,7 +605,7 @@ int get_instruction_byte_size(const char *opname) {
 */
 void build_symbol_table(VM *vm, char *source, ScopeTable *st) {
  Token token;
-  init_parser(source);
+  init_lexer(source);
  do {
    token = next_token();
    if (token.type == TOKEN_ERROR) {
@ -839,7 +842,7 @@ void build_symbol_table(VM *vm, char *source, ScopeTable *st) {
 */
 void emit_bytecode(VM *vm, char *source, ScopeTable *st) {
  Token token;
-  init_parser(source);
+  init_lexer(source);
  do {
    token = next_token();
    if (token.type == TOKEN_ERROR) {
--- a/src/tools/compiler/compiler.h
+++ b/src/tools/compiler/compiler.h
@ -1,94 +1,10 @@
 #ifndef UNDAR_COMPILER_H
 #define UNDAR_COMPILER_H

-#import "../../vm/common.h"
+#include "../codegen.h"
+#include "../../vm/common.h"
 #include "../../vm/opcodes.h"

-typedef enum { GLOBAL, LOCAL, VAR } ScopeType;
-typedef enum {
-  VOID,
-  BOOL,
-  I8,
-  I16,
-  I32,
-  U8,
-  U16,
-  U32,
-  F8,
-  F16,
-  F32,
-  STR,
-  PLEX,
-  ARRAY,
-  FUNCTION
-} SymbolType;
-
-typedef struct symbol_s Symbol;
-typedef struct symbol_tab_s SymbolTable;
-typedef struct value_type_s ValueType;
-typedef struct plex_fields_tab_s PlexFieldsTable;
-typedef struct plex_def_s PlexDef;
-typedef struct plex_tab_s PlexTable;
-typedef struct scope_s Scope;
-typedef struct scope_tab_s ScopeTable;
-
-#define MAX_SYMBOL_NAME_LENGTH 64
-
-struct value_type_s {
-  SymbolType type;
-  char name[MAX_SYMBOL_NAME_LENGTH];
-  u8 name_length;
-  u32 size;
-  u32 table_ref; // if it is a heap object
-};
-
-struct plex_fields_tab_s {
-  u32 *plex_refs;
-  ValueType *fields;
-  u32 count;
-  u32 capacity;
-};
-
-struct plex_def_s {
-  char name[MAX_SYMBOL_NAME_LENGTH];
-  u8 name_length;
-  u32 size;
-  u32 field_ref_start;
-  u32 field_count;
-};
-
-struct plex_tab_s {
-  PlexDef *symbols;
-  u32 count;
-  u32 capacity;
-};
-
-struct symbol_s {
-  char name[MAX_SYMBOL_NAME_LENGTH];
-  u8 name_length;
-  SymbolType type;
-  ScopeType scope;
-  u32 ref;  // vm->mp if global, vm->pc local, register if var 
-  u32 size; // size of symbol
-};
-
-struct symbol_tab_s {
-  Symbol symbols[256];
-  u8 count;
-  i32 parent;
-};
-
-struct scope_tab_s {
-  SymbolTable *scopes;
-  u32 count;
-  u32 capacity;
-  i32 scope_ref;
-  u32 depth;
-  u8 last_used_registers[1024];
-  u32 reg_count;
-  u8 current_reg;
-};
-
 bool compile(VM *vm, ScopeTable *st, char *source);
 extern bool table_realloc(ScopeTable *table);/* implement this in arch/ not here */

--- a/src/tools/compiler/parser.c
+++ b/src/tools/compiler/parser.c
@ -1,515 +0,0 @@
-#include <string.h>
-
-#include "../../vm/common.h"
-#include "parser.h"
-
-typedef struct {
-  const char *start;
-  const char *current;
-  int line;
-} Parser;
-
-Parser parser;
-
-void init_parser(const char *source) {
-  parser.start = source;
-  parser.current = source;
-  parser.line = 1;
-}
-
-static bool isAlpha(char c) {
-  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
-}
-
-static bool isDigit(char c) { return c >= '0' && c <= '9'; }
-
-static bool isAtEnd() { return *parser.current == '\0'; }
-
-static char advance() {
-  parser.current++;
-  return parser.current[-1];
-}
-
-static char peek() { return *parser.current; }
-
-static char peekNext() {
-  if (isAtEnd())
-    return '\0';
-  return parser.current[1];
-}
-
-static bool match(char expected) {
-  if (isAtEnd())
-    return false;
-  if (*parser.current != expected)
-    return false;
-  parser.current++;
-  return true;
-}
-
-static Token makeToken(TokenType type) {
-  Token token;
-  token.type = type;
-  token.start = parser.start;
-  token.length = (int)(parser.current - parser.start);
-  token.line = parser.line;
-  return token;
-}
-
-static Token errorToken(const char *message) {
-  Token token;
-  token.type = TOKEN_ERROR;
-  token.start = message;
-  token.length = (int)strlen(message);
-  token.line = parser.line;
-  return token;
-}
-
-static void skipWhitespace() {
-  for (;;) {
-    char c = peek();
-    switch (c) {
-    case ' ':
-    case '\r':
-    case '\t':
-      advance();
-      break;
-    case '\n':
-      parser.line++;
-      advance();
-      break;
-    case '/':
-      if (peekNext() == '/') {
-        // Single-line comment: skip until newline or end of file
-        advance();
-        while (peek() != '\n' && !isAtEnd())
-          advance();
-      } else if (peekNext() == '*') {
-        // Multi-line comment: skip until '*/' or end of file
-        advance();
-        advance();
-        while (!isAtEnd()) {
-          if (peek() == '\n')
-            parser.line++;
-          if (peek() == '*' && peekNext() == '/') {
-            advance();
-            advance();
-            break; // Exit loop, comment ended
-          }
-          advance();
-        }
-      } else {
-        return; // Not a comment, let tokenization handle it
-      }
-      break;
-    default:
-      return;
-    }
-  }
-}
-
-static TokenType checkKeyword(int start, int length, const char *rest,
-                              TokenType type) {
-  if (parser.current - parser.start == start + length &&
-      memcmp(parser.start + start, rest, length) == 0) {
-    return type;
-  }
-
-  return TOKEN_IDENTIFIER;
-}
-
-static TokenType identifierType() {
-  switch (parser.start[0]) {
-  case 'a':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'n':
-        return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND);
-      case 's':
-        return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS);
-      }
-    }
-    break;
-  case 'c':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'l':
-        return checkKeyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE);
-      case 'o':
-        return checkKeyword(2, 3, "nst", TOKEN_KEYWORD_CONST);
-      }
-    }
-    break;
-  case 'e':
-    return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE);
-  case 'f':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'a':
-        return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE);
-      case 'o':
-        return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR);
-      case '3':
-        return checkKeyword(2, 1, "2", TOKEN_TYPE_REAL);
-      }
-      return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN);
-    }
-    break;
-  case 'i':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'f':
-        return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF);
-      case 's':
-        return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS);
-      case '8':
-        return checkKeyword(2, 0, "", TOKEN_TYPE_I8);
-      case '1':
-        return checkKeyword(2, 1, "6", TOKEN_TYPE_I16);
-      case '3':
-        return checkKeyword(2, 1, "2", TOKEN_TYPE_INT);
-      case 'n':
-        if (parser.current - parser.start > 2) {
-          switch (parser.start[2]) {
-          case 'i':
-            return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT);
-          case 't':
-            return checkKeyword(3, 0, "", TOKEN_TYPE_INT);
-          }
-        }
-        break;
-      }
-    }
-    break;
-  case 'n':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'a':
-        return checkKeyword(2, 1, "t", TOKEN_TYPE_NAT);
-      case 'i':
-        return checkKeyword(2, 1, "l", TOKEN_KEYWORD_NIL);
-      }
-    }
-    break;
-  case 'o':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'p':
-        return checkKeyword(2, 2, "en", TOKEN_KEYWORD_OPEN);
-      case 'r':
-        return checkKeyword(2, 0, "", TOKEN_OPERATOR_OR);
-      }
-    }
-    break;
-  case 'p':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 't':
-        return checkKeyword(2, 1, "r", TOKEN_TYPE_PTR);     
-      case 'l':
-        return checkKeyword(2, 2, "ex", TOKEN_KEYWORD_PLEX);
-      }
-    }
-    break;
-  case 'r':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'e':
-        if (parser.current - parser.start > 2) {
-          switch (parser.start[2]) {
-          case 'a':
-            return checkKeyword(3, 1, "d", TOKEN_KEYWORD_READ);
-          case 't':
-            return checkKeyword(3, 3, "urn", TOKEN_KEYWORD_RETURN);
-          }
-        }
-        break;
-      }
-    }
-    break;
-  case 's':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 't':
-        if (parser.current - parser.start > 2) {
-          switch (parser.start[2]) {
-          case 'r':
-            return checkKeyword(2, 0, "", TOKEN_TYPE_STR);
-          case 'a':
-            return checkKeyword(2, 1, "t", TOKEN_KEYWORD_STAT);
-          }
-        }
-      }
-    }
-    break;
-  case 't':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'h':
-        return checkKeyword(2, 2, "is", TOKEN_KEYWORD_THIS);
-      case 'r':
-        return checkKeyword(2, 2, "ue", TOKEN_KEYWORD_TRUE);
-      }
-    }
-    break;
-  case 'u':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 's':
-        return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE);
-      case '8':
-        return checkKeyword(2, 0, "", TOKEN_TYPE_U8);
-      case '1':
-        return checkKeyword(2, 1, "6", TOKEN_TYPE_U16);
-      case '3':
-        return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT);
-      }
-    }
-    break;
-  case 'w':
-    if (parser.current - parser.start > 1) {
-      switch (parser.start[1]) {
-      case 'h':
-        return checkKeyword(2, 3, "ile", TOKEN_KEYWORD_WHILE);
-      case 'r':
-        return checkKeyword(2, 3, "ite", TOKEN_KEYWORD_WRITE);
-      }
-    }
-    break;
-  }
-
-  return TOKEN_IDENTIFIER;
-}
-
-static Token identifier() {
-  while (isAlpha(peek()) || isDigit(peek()))
-    advance();
-  return makeToken(identifierType());
-}
-
-static Token number() {
-  while (isDigit(peek()))
-    advance();
-
-  /*  Look for a fractional part. */
-  if (peek() == '.' && isDigit(peekNext())) {
-    /*  Consume the ".". */
-    advance();
-
-    while (isDigit(peek()))
-      advance();
-
-    return makeToken(TOKEN_LITERAL_REAL);
-  }
-
-  return makeToken(TOKEN_LITERAL_INT);
-}
-
-static Token string() {
-  while (peek() != '"' && !isAtEnd()) {
-    if (peek() == '\n')
-      parser.line++;
-    advance();
-  }
-
-  if (isAtEnd())
-    return errorToken("Unterminated string.");
-
-  /*  The closing quote. */
-  advance();
-  return makeToken(TOKEN_LITERAL_STR);
-}
-
-Token next_token() {
-  skipWhitespace();
-  parser.start = parser.current;
-
-  if (isAtEnd())
-    return makeToken(TOKEN_EOF);
-
-  char c = advance();
-  if (isAlpha(c))
-    return identifier();
-  if (isDigit(c))
-    return number();
-
-  switch (c) {
-  case '(':
-    return makeToken(TOKEN_LPAREN);
-  case ')':
-    return makeToken(TOKEN_RPAREN);
-  case '{':
-    return makeToken(TOKEN_LBRACE);
-  case '}':
-    return makeToken(TOKEN_RBRACE);
-  case '[':
-    return makeToken(TOKEN_LBRACKET);
-  case ']':
-    return makeToken(TOKEN_RBRACKET);
-  case ';':
-    return makeToken(TOKEN_SEMICOLON);
-  case ',':
-    return makeToken(TOKEN_COMMA);
-  case '.':
-    return makeToken(TOKEN_DOT);
-  case '-':
-    return makeToken(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS);
-  case '+':
-    return makeToken(TOKEN_PLUS);
-  case '/':
-    return makeToken(TOKEN_SLASH);
-  case '&':
-    return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND);
-  case '#':
-    return makeToken(TOKEN_MESH);
-  case '$':
-    return makeToken(TOKEN_BIG_MONEY);
-  case '*':
-    return makeToken(TOKEN_STAR);
-  case '!':
-    return makeToken(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG);
-  case '=':
-    return makeToken(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ);
-  case '<':
-    return makeToken(match('=') ? TOKEN_LTE : TOKEN_LT);
-  case '>':
-    return makeToken(match('=') ? TOKEN_GTE : TOKEN_GT);
-  case '"':
-    return string();
-  }
-
-  return errorToken("Unexpected character.");
-}
-
-const char *token_type_to_string(TokenType type) {
-  switch (type) {
-  case TOKEN_EOF:
-    return "EOF";
-  case TOKEN_IDENTIFIER:
-    return "IDENTIFIER";
-  case TOKEN_LITERAL_INT:
-    return "LITERAL_INT";
-  case TOKEN_LITERAL_NAT:
-    return "LITERAL_NAT";
-  case TOKEN_LITERAL_REAL:
-    return "LITERAL_REAL";
-  case TOKEN_LITERAL_STR:
-    return "LITERAL_STR";
-  case TOKEN_TYPE_INT:
-    return "TYPE_INT";
-  case TOKEN_TYPE_NAT:
-    return "TYPE_NAT";
-  case TOKEN_TYPE_REAL:
-    return "TYPE_REAL";
-  case TOKEN_TYPE_STR:
-    return "TYPE_STR";
-  case TOKEN_TYPE_PTR:
-    return "TYPE_PTR";
-  case TOKEN_KEYWORD_PLEX:
-    return "KEYWORD_PLEX";
-  case TOKEN_KEYWORD_FN:
-    return "KEYWORD_FN";
-  case TOKEN_KEYWORD_CONST:
-    return "KEYWORD_CONST";
-  case TOKEN_KEYWORD_IF:
-    return "KEYWORD_IF";
-  case TOKEN_KEYWORD_IS:
-    return "IS";
-  case TOKEN_KEYWORD_AS:
-    return "AS";
-  case TOKEN_KEYWORD_ELSE:
-    return "KEYWORD_ELSE";
-  case TOKEN_KEYWORD_WHILE:
-    return "KEYWORD_WHILE";
-  case TOKEN_KEYWORD_FOR:
-    return "KEYWORD_FOR";
-  case TOKEN_KEYWORD_RETURN:
-    return "KEYWORD_RETURN";
-  case TOKEN_KEYWORD_USE:
-    return "KEYWORD_USE";
-  case TOKEN_KEYWORD_INIT:
-    return "KEYWORD_INIT";
-  case TOKEN_KEYWORD_THIS:
-    return "KEYWORD_THIS";
-  case TOKEN_KEYWORD_OPEN:
-    return "TOKEN_KEYWORD_OPEN";
-  case TOKEN_KEYWORD_READ:
-    return "TOKEN_KEYWORD_READ";
-  case TOKEN_KEYWORD_WRITE:
-    return "TOKEN_KEYWORD_WRITE";
-  case TOKEN_KEYWORD_STAT:
-    return "TOKEN_KEYWORD_STAT";
-  case TOKEN_KEYWORD_CLOSE:
-    return "TOKEN_KEYWORD_CLOSE";
-  case TOKEN_KEYWORD_NIL:
-    return "KEYWORD_NIL";
-  case TOKEN_KEYWORD_TRUE:
-    return "KEYWORD_TRUE";
-  case TOKEN_KEYWORD_FALSE:
-    return "KEYWORD_FALSE";
-  case TOKEN_OPERATOR_NOT:
-    return "OPERATOR_NOT";
-  case TOKEN_OPERATOR_AND:
-    return "OPERATOR_AND";
-  case TOKEN_OPERATOR_OR:
-    return "OPERATOR_OR";
-  case TOKEN_BANG:
-    return "BANG";
-  case TOKEN_BANG_EQ:
-    return "BANG_EQ";
-  case TOKEN_EQ:
-    return "EQ";
-  case TOKEN_EQ_EQ:
-    return "EQ_EQ";
-  case TOKEN_GT:
-    return "GT";
-  case TOKEN_LT:
-    return "LT";
-  case TOKEN_GTE:
-    return "GTE";
-  case TOKEN_LTE:
-    return "LTE";
-  case TOKEN_DOT:
-    return "DOT";
-  case TOKEN_COMMA:
-    return "COMMA";
-  case TOKEN_COLON:
-    return "COLON";
-  case TOKEN_SEMICOLON:
-    return "SEMICOLON";
-  case TOKEN_PLUS:
-    return "PLUS";
-  case TOKEN_MINUS:
-    return "MINUS";
-  case TOKEN_STAR:
-    return "STAR";
-  case TOKEN_SLASH:
-    return "SLASH";
-  case TOKEN_LPAREN:
-    return "LPAREN";
-  case TOKEN_RPAREN:
-    return "RPAREN";
-  case TOKEN_LBRACE:
-    return "LBRACE";
-  case TOKEN_RBRACE:
-    return "RBRACE";
-  case TOKEN_LBRACKET:
-    return "LBRACKET";
-  case TOKEN_RBRACKET:
-    return "RBRACKET";
-  case TOKEN_ARROW_RIGHT:
-    return "ARROW_RIGHT";
-  case TOKEN_MESH:
-    return "MESH";
-  case TOKEN_BIG_MONEY:
-    return "BIG_MONEY";
-  case TOKEN_AND:
-    return "AND";
-  case TOKEN_AND_AND:
-    return "AND_AND";
-  case TOKEN_ERROR:
-    return "ERROR";
-  default:
-    return "UNKNOWN_TOKEN";
-  }
-}
--- a/src/tools/compiler/parser.h
+++ b/src/tools/compiler/parser.h
@ -1,89 +0,0 @@
-#ifndef UNDAR_LEXER_H
-#define UNDAR_LEXER_H
-
-typedef enum {
-  TOKEN_EOF,
-  TOKEN_IDENTIFIER,
-  TOKEN_LITERAL_INT,
-  TOKEN_LITERAL_NAT,
-  TOKEN_LITERAL_REAL,
-  TOKEN_LITERAL_STR,
-  TOKEN_TYPE_I8,
-  TOKEN_TYPE_I16,
-  TOKEN_TYPE_INT,
-  TOKEN_TYPE_U8,
-  TOKEN_TYPE_U16,
-  TOKEN_TYPE_NAT,
-  TOKEN_TYPE_REAL,
-  TOKEN_TYPE_STR,
-  TOKEN_TYPE_BOOL,
-  TOKEN_TYPE_VOID,
-  TOKEN_TYPE_PTR,
-  TOKEN_KEYWORD_PLEX,
-  TOKEN_KEYWORD_FN,
-  TOKEN_KEYWORD_CONST,
-  TOKEN_KEYWORD_IF,
-  TOKEN_KEYWORD_IS,
-  TOKEN_KEYWORD_AS,
-  TOKEN_KEYWORD_ELSE,
-  TOKEN_KEYWORD_WHILE,
-  TOKEN_KEYWORD_FOR,
-  TOKEN_KEYWORD_RETURN,
-  TOKEN_KEYWORD_USE,
-  TOKEN_KEYWORD_INIT,
-  TOKEN_KEYWORD_THIS,
-  TOKEN_KEYWORD_OPEN,
-  TOKEN_KEYWORD_READ,
-  TOKEN_KEYWORD_WRITE,
-  TOKEN_KEYWORD_STAT,
-  TOKEN_KEYWORD_CLOSE,
-  TOKEN_KEYWORD_LOOP,
-  TOKEN_KEYWORD_DO,
-  TOKEN_KEYWORD_NIL,
-  TOKEN_KEYWORD_TRUE,
-  TOKEN_KEYWORD_FALSE,
-  TOKEN_OPERATOR_NOT,
-  TOKEN_OPERATOR_AND,
-  TOKEN_OPERATOR_OR,
-  TOKEN_BANG,
-  TOKEN_BANG_EQ,
-  TOKEN_EQ,
-  TOKEN_EQ_EQ,
-  TOKEN_AND,
-  TOKEN_AND_AND,  
-  TOKEN_GT,
-  TOKEN_LT,
-  TOKEN_GTE,
-  TOKEN_LTE,
-  TOKEN_DOT,
-  TOKEN_COMMA,
-  TOKEN_COLON,
-  TOKEN_SEMICOLON,
-  TOKEN_PLUS,
-  TOKEN_MINUS,
-  TOKEN_STAR,
-  TOKEN_SLASH,
-  TOKEN_MESH,
-  TOKEN_BIG_MONEY,  
-  TOKEN_LPAREN,
-  TOKEN_RPAREN,
-  TOKEN_LBRACE,
-  TOKEN_RBRACE,
-  TOKEN_LBRACKET,
-  TOKEN_RBRACKET,
-  TOKEN_ARROW_RIGHT,
-  TOKEN_ERROR
-} TokenType;
-
-typedef struct {
-  TokenType type;
-  const char *start;
-  int length;
-  int line;
-} Token;
-
-void init_parser(const char *source);
-Token next_token();
-const char* token_type_to_string(TokenType type);
-
-#endif
--- a/src/tools/assembler/lexer.c
+++ b/src/tools/assembler/lexer.c
@ -1,6 +1,6 @@
 #include <string.h>

-#include "../../vm/common.h"
+#include "../vm/common.h"
 #include "lexer.h"

 typedef struct {
@ -203,7 +203,9 @@ static TokenType identifierType() {
    break;
  case 'p':
    if (lexer.current - lexer.start > 1) {
-      switch (lexer.start[1]) {
+      switch (lexer.start[1]) {      case 't':
+        return check_keyword(2, 1, "r", TOKEN_TYPE_PTR);     
+
      case 'l':
        return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX);
      }
@ -215,8 +217,6 @@ static TokenType identifierType() {
      case 'e':
        if (lexer.current - lexer.start > 2) {
          switch (lexer.start[2]) {
-          case 'f':
-            return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH);
          case 't':
            return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN);
          case 'a':  
@ -238,7 +238,14 @@ static TokenType identifierType() {
    if (lexer.current - lexer.start > 1) {
      switch (lexer.start[1]) {
      case 't':
-        return check_keyword(2, 1, "r", TOKEN_TYPE_STR);
+        if (lexer.current - lexer.start > 2) {
+          switch (lexer.start[2]) {
+          case 'r':
+            return check_keyword(3, 0, "", TOKEN_TYPE_STR);
+          case 'a':
+            return check_keyword(3, 1, "t", TOKEN_KEYWORD_STAT);
+          }
+        }
      }
    }
    break;
@ -422,6 +429,8 @@ const char *token_type_to_string(TokenType type) {
    return "TYPE_REAL";
  case TOKEN_TYPE_STR:
    return "TYPE_STR";
+  case TOKEN_TYPE_PTR:
+    return "TYPE_PTR";
  case TOKEN_KEYWORD_PLEX:
    return "KEYWORD_PLEX";
  case TOKEN_KEYWORD_FN:
@ -454,8 +463,8 @@ const char *token_type_to_string(TokenType type) {
    return "TOKEN_KEYWORD_READ";
  case TOKEN_KEYWORD_WRITE:
    return "TOKEN_KEYWORD_WRITE";
-  case TOKEN_KEYWORD_REFRESH:
-    return "TOKEN_KEYWORD_REFRESH";
+  case TOKEN_KEYWORD_STAT:
+    return "TOKEN_KEYWORD_STAT";
  case TOKEN_KEYWORD_CLOSE:
    return "TOKEN_KEYWORD_CLOSE";
  case TOKEN_KEYWORD_NIL:
--- a/src/tools/assembler/lexer.h
+++ b/src/tools/assembler/lexer.h
@ -18,6 +18,7 @@ typedef enum {
  TOKEN_TYPE_STR,
  TOKEN_TYPE_BOOL,
  TOKEN_TYPE_VOID,
+  TOKEN_TYPE_PTR,
  TOKEN_KEYWORD_PLEX,
  TOKEN_KEYWORD_FN,
  TOKEN_KEYWORD_CONST,
@ -35,7 +36,7 @@ typedef enum {
  TOKEN_KEYWORD_OPEN,
  TOKEN_KEYWORD_READ,
  TOKEN_KEYWORD_WRITE,
-  TOKEN_KEYWORD_REFRESH,
+  TOKEN_KEYWORD_STAT,
  TOKEN_KEYWORD_CLOSE,
  TOKEN_KEYWORD_LOOP,
  TOKEN_KEYWORD_DO,
--- a/test/add.uir.ul
+++ b/test/add.uir.ul
@ -7,7 +7,7 @@ function main () {
 	int local_x;
 	int local_y;
 	int result;
-	int result_str;
+	str result_str;

 	load_absolute_32 x -> local_x;
 	load_absolute_32 y -> local_y;