wip assembler v2

This commit is contained in:
zongor 2025-11-10 23:45:09 -08:00
parent 0113411f89
commit 32365a8895
4 changed files with 384 additions and 161 deletions

View File

@ -1,89 +1,22 @@
#include "../../vm/libc.h"
#include "assembler.h"
#include <stdio.h>
typedef enum { GLOBAL, LOCAL } ScopeType;
typedef enum {
VOID,
BOOL,
I8,
I16,
I32,
U8,
U16,
U32,
F8,
F16,
F32,
STR,
PLEX,
ARRAY,
FUNCTION
} SymbolType;
bool global() {
Token token = nextToken();
if (token.type == TOKEN_KEYWORD_CONST) {
token = nextToken();
}
typedef struct field_s {
char *name;
SymbolType type;
u32 offset;
u32 size;
} Field;
if (token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_NAT ||
token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) {
return true;
}
typedef struct function_def_s {
char *name;
SymbolType args[8];
u8 arg_count;
SymbolType return_type;
} FunctionDef;
return false;
}
typedef struct trait_def_s {
char *name;
Field *fields;
u32 field_count;
FunctionDef *methods;
u32 method_count;
} TraitDef;
typedef struct plex_def_s {
char *name;
u32 logical_size;
u32 physical_size;
Field *fields;
u32 field_count;
TraitDef *traits;
u32 trait_count;
FunctionDef *methods;
u32 method_count;
} PlexDef;
typedef struct array_def_s {
SymbolType type;
u32 length;
u32 logical_size;
u32 physical_size; // logical_size * type_size + 4
union {
PlexDef *plex;
struct array_def_s *array;
} ref;
} ArrayDef;
typedef struct symbol_s {
char *name;
u32 address;
ScopeType scope;
SymbolType type;
u32 logical_size;
u32 physical_size; // logical_size * type_size + 4
union {
PlexDef *plex;
ArrayDef *array;
FunctionDef *function;
} ref;
} Symbol;
typedef struct symbol_tab_s {
Symbol *symbols;
int count;
int capacity;
} SymbolTable;
void function() {}
void assemble(VM *vm, char *source) {
USED(vm);
@ -98,6 +31,109 @@ void assemble(VM *vm, char *source) {
if (token.type != TOKEN_EOF) {
printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type),
token.length, token.start);
if (token.type == TOKEN_KEYWORD_GLOBAL) {
if (!global()) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_KEYWORD_FN) {
function();
}
if (token.type == TOKEN_IDENTIFIER) {
if (streq(token.start, "exit")) {
} else if (streq(token.start, "call")) {
} else if (streq(token.start, "syscall")) {
} else if (streq(token.start, "load_immediate")) {
} else if (streq(token.start, "load_indirect_8")) {
} else if (streq(token.start, "load_indirect_16")) {
} else if (streq(token.start, "load_indirect_32")) {
} else if (streq(token.start, "load_absolute_8")) {
} else if (streq(token.start, "load_absolute_16")) {
} else if (streq(token.start, "load_absolute_32")) {
} else if (streq(token.start, "load_offset_8")) {
} else if (streq(token.start, "load_offset_16")) {
} else if (streq(token.start, "load_offset_32")) {
} else if (streq(token.start, "store_absolute_8")) {
} else if (streq(token.start, "store_absolute_16")) {
} else if (streq(token.start, "store_absolute_32")) {
} else if (streq(token.start, "store_indirect_8")) {
} else if (streq(token.start, "store_indirect_16")) {
} else if (streq(token.start, "store_indirect_32")) {
} else if (streq(token.start, "store_offset_8")) {
} else if (streq(token.start, "store_offset_16")) {
} else if (streq(token.start, "store_offset_32")) {
} else if (streq(token.start, "malloc")) {
} else if (streq(token.start, "malloc_immediate")) {
} else if (streq(token.start, "memset_8")) {
} else if (streq(token.start, "memset_16")) {
} else if (streq(token.start, "memset_32")) {
} else if (streq(token.start, "register_move")) {
} else if (streq(token.start, "add_int")) {
} else if (streq(token.start, "sub_int")) {
} else if (streq(token.start, "mul_int")) {
} else if (streq(token.start, "div_int")) {
} else if (streq(token.start, "abs_int")) {
} else if (streq(token.start, "neg_int")) {
} else if (streq(token.start, "add_nat")) {
} else if (streq(token.start, "sub_nat")) {
} else if (streq(token.start, "mul_nat")) {
} else if (streq(token.start, "div_nat")) {
} else if (streq(token.start, "abs_nat")) {
} else if (streq(token.start, "neg_nat")) {
} else if (streq(token.start, "add_real")) {
} else if (streq(token.start, "sub_real")) {
} else if (streq(token.start, "mul_real")) {
} else if (streq(token.start, "div_real")) {
} else if (streq(token.start, "abs_real")) {
} else if (streq(token.start, "neg_real")) {
} else if (streq(token.start, "int_to_real")) {
} else if (streq(token.start, "nat_to_real")) {
} else if (streq(token.start, "real_to_int")) {
} else if (streq(token.start, "real_to_nat")) {
} else if (streq(token.start, "bit_shift_left")) {
} else if (streq(token.start, "bit_shift_right")) {
} else if (streq(token.start, "bit_shift_r_ext")) {
} else if (streq(token.start, "bit_and")) {
} else if (streq(token.start, "bit_or")) {
} else if (streq(token.start, "bit_xor")) {
} else if (streq(token.start, "jump")) {
} else if (streq(token.start, "jump_if_flag")) {
} else if (streq(token.start, "jump_eq_int")) {
} else if (streq(token.start, "jump_neq_int")) {
} else if (streq(token.start, "jump_gt_int")) {
} else if (streq(token.start, "jump_lt_int")) {
} else if (streq(token.start, "jump_le_int")) {
} else if (streq(token.start, "jump_ge_int")) {
} else if (streq(token.start, "jump_eq_nat")) {
} else if (streq(token.start, "jump_neq_nat")) {
} else if (streq(token.start, "jump_gt_nat")) {
} else if (streq(token.start, "jump_lt_nat")) {
} else if (streq(token.start, "jump_le_nat")) {
} else if (streq(token.start, "jump_ge_nat")) {
} else if (streq(token.start, "jump_eq_real")) {
} else if (streq(token.start, "jump_neq_real")) {
} else if (streq(token.start, "jump_ge_real")) {
} else if (streq(token.start, "jump_gt_real")) {
} else if (streq(token.start, "jump_lt_real")) {
} else if (streq(token.start, "jump_le_real")) {
} else if (streq(token.start, "string_length")) {
} else if (streq(token.start, "string_eq")) {
} else if (streq(token.start, "string_concat")) {
} else if (streq(token.start, "string_get_char")) {
} else if (streq(token.start, "string_find_char")) {
} else if (streq(token.start, "string_slice")) {
} else if (streq(token.start, "int_to_string")) {
} else if (streq(token.start, "nat_to_string")) {
} else if (streq(token.start, "real_to_string")) {
} else if (streq(token.start, "string_to_int")) {
} else if (streq(token.start, "string_to_nat")) {
} else if (streq(token.start, "string_to_real")) {
}
}
}
} while (token.type != TOKEN_EOF);
}

View File

@ -89,7 +89,8 @@ static void skipWhitespace() {
advance();
advance();
while (!isAtEnd()) {
if (peek() == '\n') lexer.line++;
if (peek() == '\n')
lexer.line++;
if (peek() == '*' && peekNext() == '/') {
advance();
advance();
@ -120,7 +121,15 @@ static TokenType checkKeyword(int start, int length, const char *rest,
static TokenType identifierType() {
switch (lexer.start[0]) {
case 'a':
return checkKeyword(1, 2, "nd", TOKEN_OPERATOR_AND);
if (lexer.current - lexer.start > 1) {
switch (lexer.start[1]) {
case 'n':
return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND);
case 's':
return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS);
}
}
break;
case 'c':
if (lexer.current - lexer.start > 1) {
switch (lexer.start[1]) {
@ -149,13 +158,15 @@ static TokenType identifierType() {
switch (lexer.start[1]) {
case 'f':
return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF);
case 's':
return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS);
case 'n':
if (lexer.current - lexer.start > 2) {
switch (lexer.start[2]) {
case 'i':
return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT);
case 't':
return checkKeyword(3, 1, "", TOKEN_TYPE_INT);
return checkKeyword(3, 0, "", TOKEN_TYPE_INT);
}
}
break;
@ -244,6 +255,8 @@ static TokenType identifierType() {
}
}
break;
case 'g':
return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL);
}
return TOKEN_IDENTIFIER;
@ -267,10 +280,10 @@ static Token number() {
while (isDigit(peek()))
advance();
return makeToken(TOKEN_FLOAT_LITERAL);
return makeToken(TOKEN_LITERAL_REAL);
}
return makeToken(TOKEN_INT_LITERAL);
return makeToken(TOKEN_LITERAL_INT);
}
static Token string() {
@ -285,7 +298,7 @@ static Token string() {
/* The closing quote. */
advance();
return makeToken(TOKEN_STRING_LITERAL);
return makeToken(TOKEN_LITERAL_STR);
}
Token nextToken() {
@ -321,11 +334,17 @@ Token nextToken() {
case '.':
return makeToken(TOKEN_DOT);
case '-':
return makeToken(TOKEN_MINUS);
return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS);
case '+':
return makeToken(TOKEN_PLUS);
case '/':
return makeToken(TOKEN_SLASH);
case '&':
return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND);
case '#':
return makeToken(TOKEN_MESH);
case '$':
return makeToken(TOKEN_BIG_MONEY);
case '*':
return makeToken(TOKEN_STAR);
case '!':
@ -345,62 +364,133 @@ Token nextToken() {
const char *tokenTypeToString(TokenType type) {
switch (type) {
case TOKEN_EOF: return "EOF";
case TOKEN_IDENTIFIER: return "IDENTIFIER";
case TOKEN_INT_LITERAL: return "INT_LITERAL";
case TOKEN_UINT_LITERAL: return "UINT_LITERAL";
case TOKEN_FLOAT_LITERAL: return "FLOAT_LITERAL";
case TOKEN_STRING_LITERAL: return "STRING_LITERAL";
case TOKEN_TYPE_INT: return "TYPE_INT";
case TOKEN_TYPE_NAT: return "TYPE_NAT";
case TOKEN_TYPE_REAL: return "TYPE_REAL";
case TOKEN_TYPE_STR: return "TYPE_STR";
case TOKEN_KEYWORD_PLEX: return "KEYWORD_PLEX";
case TOKEN_KEYWORD_FN: return "KEYWORD_FN";
case TOKEN_KEYWORD_CONST: return "KEYWORD_CONST";
case TOKEN_KEYWORD_IF: return "KEYWORD_IF";
case TOKEN_KEYWORD_ELSE: return "KEYWORD_ELSE";
case TOKEN_KEYWORD_WHILE: return "KEYWORD_WHILE";
case TOKEN_KEYWORD_FOR: return "KEYWORD_FOR";
case TOKEN_KEYWORD_RETURN: return "KEYWORD_RETURN";
case TOKEN_KEYWORD_USE: return "KEYWORD_USE";
case TOKEN_KEYWORD_INIT: return "KEYWORD_INIT";
case TOKEN_KEYWORD_THIS: return "KEYWORD_THIS";
case TOKEN_KEYWORD_OPEN: return "TOKEN_KEYWORD_OPEN";
case TOKEN_KEYWORD_READ: return "TOKEN_KEYWORD_READ";
case TOKEN_KEYWORD_WRITE: return "TOKEN_KEYWORD_WRITE";
case TOKEN_KEYWORD_REFRESH: return "TOKEN_KEYWORD_REFRESH";
case TOKEN_KEYWORD_CLOSE: return "TOKEN_KEYWORD_CLOSE";
case TOKEN_KEYWORD_NIL: return "KEYWORD_NIL";
case TOKEN_KEYWORD_TRUE: return "KEYWORD_TRUE";
case TOKEN_KEYWORD_FALSE: return "KEYWORD_FALSE";
case TOKEN_OPERATOR_IS: return "OPERATOR_IS";
case TOKEN_OPERATOR_NOT: return "OPERATOR_NOT";
case TOKEN_OPERATOR_AND: return "OPERATOR_AND";
case TOKEN_OPERATOR_OR: return "OPERATOR_OR";
case TOKEN_BANG: return "BANG";
case TOKEN_BANG_EQ: return "BANG_EQ";
case TOKEN_EQ: return "EQ";
case TOKEN_EQ_EQ: return "EQ_EQ";
case TOKEN_GT: return "GT";
case TOKEN_LT: return "LT";
case TOKEN_GTE: return "GTE";
case TOKEN_LTE: return "LTE";
case TOKEN_DOT: return "DOT";
case TOKEN_COMMA: return "COMMA";
case TOKEN_COLON: return "COLON";
case TOKEN_SEMICOLON: return "SEMICOLON";
case TOKEN_PLUS: return "PLUS";
case TOKEN_MINUS: return "MINUS";
case TOKEN_STAR: return "STAR";
case TOKEN_SLASH: return "SLASH";
case TOKEN_LPAREN: return "LPAREN";
case TOKEN_RPAREN: return "RPAREN";
case TOKEN_LBRACE: return "LBRACE";
case TOKEN_RBRACE: return "RBRACE";
case TOKEN_LBRACKET: return "LBRACKET";
case TOKEN_RBRACKET: return "RBRACKET";
case TOKEN_ERROR: return "ERROR";
default: return "UNKNOWN_TOKEN";
case TOKEN_EOF:
return "EOF";
case TOKEN_IDENTIFIER:
return "IDENTIFIER";
case TOKEN_LITERAL_INT:
return "LITERAL_INT";
case TOKEN_LITERAL_NAT:
return "LITERAL_NAT";
case TOKEN_LITERAL_REAL:
return "LITERAL_REAL";
case TOKEN_LITERAL_STR:
return "LITERAL_STR";
case TOKEN_TYPE_INT:
return "TYPE_INT";
case TOKEN_TYPE_NAT:
return "TYPE_NAT";
case TOKEN_TYPE_REAL:
return "TYPE_REAL";
case TOKEN_TYPE_STR:
return "TYPE_STR";
case TOKEN_KEYWORD_PLEX:
return "KEYWORD_PLEX";
case TOKEN_KEYWORD_FN:
return "KEYWORD_FN";
case TOKEN_KEYWORD_CONST:
return "KEYWORD_CONST";
case TOKEN_KEYWORD_IF:
return "KEYWORD_IF";
case TOKEN_KEYWORD_IS:
return "IS";
case TOKEN_KEYWORD_AS:
return "AS";
case TOKEN_KEYWORD_ELSE:
return "KEYWORD_ELSE";
case TOKEN_KEYWORD_WHILE:
return "KEYWORD_WHILE";
case TOKEN_KEYWORD_FOR:
return "KEYWORD_FOR";
case TOKEN_KEYWORD_RETURN:
return "KEYWORD_RETURN";
case TOKEN_KEYWORD_USE:
return "KEYWORD_USE";
case TOKEN_KEYWORD_INIT:
return "KEYWORD_INIT";
case TOKEN_KEYWORD_THIS:
return "KEYWORD_THIS";
case TOKEN_KEYWORD_OPEN:
return "TOKEN_KEYWORD_OPEN";
case TOKEN_KEYWORD_READ:
return "TOKEN_KEYWORD_READ";
case TOKEN_KEYWORD_WRITE:
return "TOKEN_KEYWORD_WRITE";
case TOKEN_KEYWORD_REFRESH:
return "TOKEN_KEYWORD_REFRESH";
case TOKEN_KEYWORD_CLOSE:
return "TOKEN_KEYWORD_CLOSE";
case TOKEN_KEYWORD_NIL:
return "KEYWORD_NIL";
case TOKEN_KEYWORD_TRUE:
return "KEYWORD_TRUE";
case TOKEN_KEYWORD_FALSE:
return "KEYWORD_FALSE";
case TOKEN_KEYWORD_GLOBAL:
return "KEYWORD_GLOBAL";
case TOKEN_OPERATOR_NOT:
return "OPERATOR_NOT";
case TOKEN_OPERATOR_AND:
return "OPERATOR_AND";
case TOKEN_OPERATOR_OR:
return "OPERATOR_OR";
case TOKEN_BANG:
return "BANG";
case TOKEN_BANG_EQ:
return "BANG_EQ";
case TOKEN_EQ:
return "EQ";
case TOKEN_EQ_EQ:
return "EQ_EQ";
case TOKEN_GT:
return "GT";
case TOKEN_LT:
return "LT";
case TOKEN_GTE:
return "GTE";
case TOKEN_LTE:
return "LTE";
case TOKEN_DOT:
return "DOT";
case TOKEN_COMMA:
return "COMMA";
case TOKEN_COLON:
return "COLON";
case TOKEN_SEMICOLON:
return "SEMICOLON";
case TOKEN_PLUS:
return "PLUS";
case TOKEN_MINUS:
return "MINUS";
case TOKEN_STAR:
return "STAR";
case TOKEN_SLASH:
return "SLASH";
case TOKEN_LPAREN:
return "LPAREN";
case TOKEN_RPAREN:
return "RPAREN";
case TOKEN_LBRACE:
return "LBRACE";
case TOKEN_RBRACE:
return "RBRACE";
case TOKEN_LBRACKET:
return "LBRACKET";
case TOKEN_RBRACKET:
return "RBRACKET";
case TOKEN_ARROW_LEFT:
return "ARROW_LEFT";
case TOKEN_MESH:
return "MESH";
case TOKEN_BIG_MONEY:
return "BIG_MONEY";
case TOKEN_AND:
return "AND";
case TOKEN_AND_AND:
return "AND_AND";
case TOKEN_ERROR:
return "ERROR";
default:
return "UNKNOWN_TOKEN";
}
}

View File

@ -4,10 +4,10 @@
typedef enum {
TOKEN_EOF,
TOKEN_IDENTIFIER,
TOKEN_INT_LITERAL,
TOKEN_UINT_LITERAL,
TOKEN_FLOAT_LITERAL,
TOKEN_STRING_LITERAL,
TOKEN_LITERAL_INT,
TOKEN_LITERAL_NAT,
TOKEN_LITERAL_REAL,
TOKEN_LITERAL_STR,
TOKEN_TYPE_INT,
TOKEN_TYPE_NAT,
TOKEN_TYPE_REAL,
@ -16,6 +16,8 @@ typedef enum {
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_CONST,
TOKEN_KEYWORD_IF,
TOKEN_KEYWORD_IS,
TOKEN_KEYWORD_AS,
TOKEN_KEYWORD_ELSE,
TOKEN_KEYWORD_WHILE,
TOKEN_KEYWORD_FOR,
@ -23,6 +25,7 @@ typedef enum {
TOKEN_KEYWORD_USE,
TOKEN_KEYWORD_INIT,
TOKEN_KEYWORD_THIS,
TOKEN_KEYWORD_GLOBAL,
TOKEN_KEYWORD_OPEN,
TOKEN_KEYWORD_READ,
TOKEN_KEYWORD_WRITE,
@ -31,7 +34,6 @@ typedef enum {
TOKEN_KEYWORD_NIL,
TOKEN_KEYWORD_TRUE,
TOKEN_KEYWORD_FALSE,
TOKEN_OPERATOR_IS,
TOKEN_OPERATOR_NOT,
TOKEN_OPERATOR_AND,
TOKEN_OPERATOR_OR,
@ -39,6 +41,8 @@ typedef enum {
TOKEN_BANG_EQ,
TOKEN_EQ,
TOKEN_EQ_EQ,
TOKEN_AND,
TOKEN_AND_AND,
TOKEN_GT,
TOKEN_LT,
TOKEN_GTE,
@ -51,12 +55,15 @@ typedef enum {
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
TOKEN_MESH,
TOKEN_BIG_MONEY,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRACKET,
TOKEN_RBRACKET,
TOKEN_ARROW_LEFT,
TOKEN_ERROR
} TokenType;

View File

@ -0,0 +1,90 @@
#ifndef UNDAR_COMPILER_H
#define UNDAR_COMPILER_H
#import "../../vm/common.h"
typedef enum { GLOBAL, LOCAL } ScopeType;
typedef enum {
VOID,
BOOL,
I8,
I16,
I32,
U8,
U16,
U32,
F8,
F16,
F32,
STR,
PLEX,
ARRAY,
FUNCTION
} SymbolType;
typedef struct field_s {
char *name;
SymbolType type;
u32 offset;
u32 size;
} Field;
typedef struct function_def_s {
char *name;
SymbolType args[8];
u8 arg_count;
SymbolType return_type;
} FunctionDef;
typedef struct trait_def_s {
char *name;
Field *fields;
u32 field_count;
FunctionDef *methods;
u32 method_count;
} TraitDef;
typedef struct plex_def_s {
char *name;
u32 logical_size;
u32 physical_size;
Field *fields;
u32 field_count;
TraitDef *traits;
u32 trait_count;
FunctionDef *methods;
u32 method_count;
} PlexDef;
typedef struct array_def_s {
SymbolType type;
u32 length;
u32 logical_size;
u32 physical_size; // logical_size * type_size + 4
union {
PlexDef *plex;
struct array_def_s *array;
} ref;
} ArrayDef;
typedef struct symbol_s {
char *name;
u32 address;
ScopeType scope;
SymbolType type;
u32 logical_size;
u32 physical_size; // logical_size * type_size + 4
union {
PlexDef *plex;
ArrayDef *array;
FunctionDef *function;
} ref;
} Symbol;
typedef struct symbol_tab_s {
Symbol *symbols;
int count;
int capacity;
} SymbolTable;
#endif