remove old assembler, finish symbol table pass, start working on code gen

This commit is contained in:
zongor 2025-11-29 19:44:54 -08:00
parent 733dfc0364
commit 2e5eb03227
25 changed files with 262 additions and 1799 deletions

View File

@ -86,15 +86,11 @@ VM_SOURCES := \
ifeq ($(BUILD_MODE), release) ifeq ($(BUILD_MODE), release)
PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ PLATFORM_SOURCE := $(ARCH_DIR)/main.c \
$(ARCH_DIR)/devices.c\ $(ARCH_DIR)/devices.c\
$(SRC_DIR)/tools/old_assembler/parser.c \
$(SRC_DIR)/tools/old_assembler/assembler.c \
$(SRC_DIR)/tools/assembler/lexer.c \ $(SRC_DIR)/tools/assembler/lexer.c \
$(SRC_DIR)/tools/assembler/assembler.c $(SRC_DIR)/tools/assembler/assembler.c
else else
PLATFORM_SOURCE := $(ARCH_DIR)/main.c \ PLATFORM_SOURCE := $(ARCH_DIR)/main.c \
$(ARCH_DIR)/devices.c \ $(ARCH_DIR)/devices.c \
$(SRC_DIR)/tools/old_assembler/parser.c \
$(SRC_DIR)/tools/old_assembler/assembler.c\
$(SRC_DIR)/tools/assembler/lexer.c \ $(SRC_DIR)/tools/assembler/lexer.c \
$(SRC_DIR)/tools/assembler/assembler.c $(SRC_DIR)/tools/assembler/assembler.c
endif endif

View File

@ -1,5 +1,3 @@
#include "../../tools/old_assembler/assembler.h"
#include "../../tools/old_assembler/parser.h"
#include "../../tools/assembler/assembler.h" #include "../../tools/assembler/assembler.h"
#include "../../vm/vm.h" #include "../../vm/vm.h"
#include "devices.h" #include "devices.h"
@ -126,27 +124,7 @@ bool loadVM(const char *filename, VM *vm) {
bool compileAndSave(const char *source_file, const char *output_file, VM *vm) { bool compileAndSave(const char *source_file, const char *output_file, VM *vm) {
USED(vm); USED(vm);
USED(output_file); USED(output_file);
FILE *f = fopen(source_file, "rb"); USED(source_file);
if (!f) {
perror("fopen");
return false;
}
static char source[MAX_SRC_SIZE + 1];
fseek(f, 0, SEEK_END);
long len = ftell(f);
fseek(f, 0, SEEK_SET);
if (len >= MAX_SRC_SIZE) {
fprintf(stderr, "Source is larger than buffer\n");
fclose(f);
return false;
}
size_t read = fread(source, 1, len, f);
source[read] = '\0';
fclose(f);
assemble(vm, source);
return true; return true;
} }
@ -173,15 +151,8 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) {
source[read] = '\0'; source[read] = '\0';
fclose(f); fclose(f);
ExprNode *ast = expr_parse(source, strlen(source)); assemble(vm, source);
if (!ast) {
printf("Parse failed.\n");
return false;
} else {
old_assemble(vm, ast);
expr_free(ast);
// If output file specified, save the VM
if (output_file) { if (output_file) {
if (!saveVM(output_file, vm)) { if (!saveVM(output_file, vm)) {
printf("Failed to save VM to %s\n", output_file); printf("Failed to save VM to %s\n", output_file);
@ -190,24 +161,23 @@ bool assembleAndSave(const char *source_file, const char *output_file, VM *vm) {
printf("VM saved to %s\n", output_file); printf("VM saved to %s\n", output_file);
} }
return true; return true;
}
} }
bool init_vm(VM *vm) { bool init_vm(VM *vm) {
vm->memory = (u8*)malloc(MEMORY_SIZE * sizeof(u8)); vm->memory = (u8 *)malloc(MEMORY_SIZE * sizeof(u8));
vm->memory_size = MEMORY_SIZE; vm->memory_size = MEMORY_SIZE;
vm->code = (u8*)malloc(CODE_SIZE * sizeof(u8)); vm->code = (u8 *)malloc(CODE_SIZE * sizeof(u8));
vm->code_size = CODE_SIZE; vm->code_size = CODE_SIZE;
vm->frames = (Frame*)malloc(FRAMES_SIZE * sizeof(Frame)); vm->frames = (Frame *)malloc(FRAMES_SIZE * sizeof(Frame));
vm->frames_size = FRAMES_SIZE; vm->frames_size = FRAMES_SIZE;
vm->stack = (u32*)malloc(STACK_SIZE * sizeof(u32)) vm->stack = (u32 *)malloc(STACK_SIZE * sizeof(u32));
vm->stack_size = STACK_SIZE; vm->stack_size = STACK_SIZE;
vm->devices = (Device*)malloc(DEVICES_SIZE * sizeof(Device)); vm->devices = (Device *)malloc(DEVICES_SIZE * sizeof(Device));
vm->devices_size = DEVICES_SIZE; vm->device_size = DEVICES_SIZE;
return true; return true;
} }
@ -217,7 +187,6 @@ i32 main(i32 argc, char *argv[]) {
char *input_file = nil; char *input_file = nil;
char *output_file = nil; char *output_file = nil;
bool is_rom = false; bool is_rom = false;
bool is_assembly = false;
bool is_ir = false; bool is_ir = false;
// Parse command line arguments // Parse command line arguments
@ -233,9 +202,6 @@ i32 main(i32 argc, char *argv[]) {
if (ext && (strcmp(ext, ".rom") == 0)) { if (ext && (strcmp(ext, ".rom") == 0)) {
is_rom = true; is_rom = true;
} }
if (ext && (strcmp(ext, ".lisp") == 0)) {
is_assembly = true;
}
if (ext && (strcmp(ext, ".ir") == 0)) { if (ext && (strcmp(ext, ".ir") == 0)) {
is_ir = true; is_ir = true;
} }
@ -256,7 +222,7 @@ i32 main(i32 argc, char *argv[]) {
if (is_rom) { if (is_rom) {
// Load ROM file directly // Load ROM file directly
compilation_success = loadVM(input_file, &vm); compilation_success = loadVM(input_file, &vm);
} else if (is_assembly) { } else if (is_ir) {
// Compile Lisp file // Compile Lisp file
if (dump_rom && output_file) { if (dump_rom && output_file) {
compilation_success = assembleAndSave(input_file, output_file, &vm); compilation_success = assembleAndSave(input_file, output_file, &vm);

File diff suppressed because it is too large Load Diff

View File

@ -215,12 +215,19 @@ static TokenType identifierType() {
case 'e': case 'e':
if (lexer.current - lexer.start > 2) { if (lexer.current - lexer.start > 2) {
switch (lexer.start[2]) { switch (lexer.start[2]) {
case 'a':
return check_keyword(3, 1, "d", TOKEN_KEYWORD_READ);
case 'f': case 'f':
return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); return check_keyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH);
case 't': case 't':
return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN);
case 'a':
if (lexer.current - lexer.start > 3) {
switch(lexer.start[3]) {
case 'd':
return check_keyword(4, 0, "", TOKEN_KEYWORD_READ);
case 'l':
return check_keyword(4, 0, "", TOKEN_TYPE_REAL);
}
}
} }
} }
break; break;
@ -272,7 +279,7 @@ static TokenType identifierType() {
case 'g': case 'g':
return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL);
case 'l': case 'l':
return check_keyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP);
case 'd': case 'd':
return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO);
case 'v': case 'v':
@ -331,7 +338,8 @@ Token next_token() {
char c = advance(); char c = advance();
if (is_alpha(c)) if (is_alpha(c))
return identifier(); return identifier();
if (is_digit(c)) char next = peek();
if ((c == '-' && is_digit(next)) || is_digit(c))
return number(); return number();
switch (c) { switch (c) {
@ -354,7 +362,7 @@ Token next_token() {
case '.': case '.':
return make_token(TOKEN_DOT); return make_token(TOKEN_DOT);
case '-': case '-':
return make_token(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS);
case '+': case '+':
return make_token(TOKEN_PLUS); return make_token(TOKEN_PLUS);
case '/': case '/':
@ -389,7 +397,7 @@ const char *token_type_to_string(TokenType type) {
case TOKEN_IDENTIFIER: case TOKEN_IDENTIFIER:
return "IDENTIFIER"; return "IDENTIFIER";
case TOKEN_LITERAL_INT: case TOKEN_LITERAL_INT:
return "LITERAL_i32"; return "LITERAL_INT";
case TOKEN_LITERAL_NAT: case TOKEN_LITERAL_NAT:
return "LITERAL_NAT"; return "LITERAL_NAT";
case TOKEN_LITERAL_REAL: case TOKEN_LITERAL_REAL:
@ -397,7 +405,7 @@ const char *token_type_to_string(TokenType type) {
case TOKEN_LITERAL_STR: case TOKEN_LITERAL_STR:
return "LITERAL_STR"; return "LITERAL_STR";
case TOKEN_TYPE_INT: case TOKEN_TYPE_INT:
return "TYPE_i32"; return "TYPE_INT";
case TOKEN_TYPE_NAT: case TOKEN_TYPE_NAT:
return "TYPE_NAT"; return "TYPE_NAT";
case TOKEN_TYPE_REAL: case TOKEN_TYPE_REAL:
@ -498,8 +506,8 @@ const char *token_type_to_string(TokenType type) {
return "LBRACKET"; return "LBRACKET";
case TOKEN_RBRACKET: case TOKEN_RBRACKET:
return "RBRACKET"; return "RBRACKET";
case TOKEN_ARROW_LEFT: case TOKEN_ARROW_RIGHT:
return "ARROW_LEFT"; return "ARROW_RIGHT";
case TOKEN_MESH: case TOKEN_MESH:
return "MESH"; return "MESH";
case TOKEN_BIG_MONEY: case TOKEN_BIG_MONEY:

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +0,0 @@
#ifndef ASSEMBLER_H
#define ASSEMBLER_H
#include "../../vm/common.h"
#include "../../vm/vm.h"
#include "parser.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#define AS_FIXED(v) ((float)(i32)(v) / 65536.0f)
#define TO_FIXED(f) ((i32)( \
((f) >= 0.0f) ? ((f) * 65536.0f + 0.5f) : ((f) * 65536.0f - 0.5f) \
))
void old_assemble(VM *vm, ExprNode *program);
#endif

View File

@ -1,244 +0,0 @@
#include "parser.h"
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
// Helper function to create a new node
static ExprNode *expr_node_create(const char *token, int line) {
ExprNode *node = (ExprNode *)malloc(sizeof(ExprNode));
node->token = strdup(token ? token : "");
node->children = NULL;
node->child_count = 0;
node->line = line;
return node;
}
// Forward declaration
static ExprNode *parse_expression(const char **ptr, int line);
// Skip whitespace characters and comments
static const char *skip_whitespace(const char *ptr) {
while (*ptr) {
// Skip regular whitespace
if (isspace(*ptr)) {
ptr++;
continue;
}
// Check for comment start
if (*ptr == ';') {
// Skip everything until end of line
while (*ptr && *ptr != '\n') {
ptr++;
}
continue;
}
break;
}
return ptr;
}
// Parse a token (atom)
static char *parse_token(const char **ptr, int line) {
const char *start = *ptr;
// Skip leading whitespace and comments
start = skip_whitespace(start);
if (!*start) {
printf("Error at line:%d\n", line);
return NULL;
}
const char *end = start;
// Handle quoted strings
if (*start == '"') {
end++; // Skip opening quote
// Read until closing quote or end of string
while (*end && *end != '"') {
if (*end == '\\' && *(end + 1)) {
end += 2; // Skip escaped character
} else {
end++;
}
}
if (*end == '"') {
end++; // Include closing quote
}
}
// Handle parentheses as separate tokens
else if (*end == '(' || *end == ')') {
end++;
} else {
// Read until whitespace, parentheses, or comment
while (*end && !isspace(*end) && *end != '(' && *end != ')' &&
*end != ';') {
end++;
}
}
if (end == start) {
printf("Error at line:%d\n", line);
return NULL;
}
size_t len = end - start;
char *token = (char *)malloc(len + 1);
memcpy(token, start, len);
token[len] = '\0';
*ptr = end;
return token;
}
// Parse a list (expression starting with '(')
static ExprNode *parse_list(const char **ptr, int line) {
// Skip the opening parenthesis
(*ptr)++;
*ptr = skip_whitespace(*ptr);
if (**ptr == ')') {
// Empty list
(*ptr)++;
return expr_node_create("\0", line);
}
// Parse all children first
ExprNode **temp_children = NULL;
size_t temp_count = 0;
while (**ptr && **ptr != ')') {
ExprNode *child = parse_expression(ptr, line);
if (child) {
// Resize temp children array
ExprNode **new_temp =
(ExprNode **)malloc(sizeof(ExprNode *) * (temp_count + 1));
// Copy existing children
for (size_t i = 0; i < temp_count; i++) {
new_temp[i] = temp_children[i];
}
// Add new child
new_temp[temp_count] = child;
temp_count++;
// Free old array and update
free(temp_children);
temp_children = new_temp;
}
*ptr = skip_whitespace(*ptr);
}
if (**ptr == ')') {
(*ptr)++; // Skip closing parenthesis
} else {
fprintf(stderr, "Error: Missing closing parenthesis at line %d\n", line);
}
// Create the actual node
ExprNode *node;
if (temp_count > 0 && temp_children[0]->child_count == 0) {
// First child is an atom, use it as the operator
node = expr_node_create(temp_children[0]->token, line);
// Move remaining children
node->child_count = temp_count - 1;
if (node->child_count > 0) {
node->children =
(ExprNode **)malloc(sizeof(ExprNode *) * node->child_count);
for (size_t i = 0; i < node->child_count; i++) {
node->children[i] = temp_children[i + 1];
}
}
// Free the first child since we used its token
expr_free(temp_children[0]);
} else {
// No operator or first child is a list
node = expr_node_create("list", line);
node->children = temp_children;
node->child_count = temp_count;
}
if (temp_count == 0) {
free(temp_children);
}
return node;
}
// Parse an expression (either atom or list)
static ExprNode *parse_expression(const char **ptr, int line) {
*ptr = skip_whitespace(*ptr);
if (!**ptr)
return NULL;
if (**ptr == '(') {
return parse_list(ptr, line);
} else {
// Parse atom
char *token = parse_token(ptr, line);
if (token) {
ExprNode *node = expr_node_create(token, line);
free(token);
return node;
}
return NULL;
}
}
// Main parsing function
ExprNode *expr_parse(const char *source, size_t source_len) {
if (!source || source_len == 0)
return NULL;
const char *ptr = source;
int line = 1;
ptr = skip_whitespace(ptr);
if (!*ptr)
return NULL;
return parse_expression(&ptr, line);
}
// Free an Expr AST (and all children)
void expr_free(ExprNode *node) {
if (!node)
return;
free(node->token);
for (size_t i = 0; i < node->child_count; i++) {
expr_free(node->children[i]);
}
free(node->children);
free(node);
}
// Debug: print AST (for dev)
void expr_print(ExprNode *node, int indent) {
if (!node)
return;
for (int i = 0; i < indent; i++) {
printf(" ");
}
if (node->child_count == 0) {
// Atom
printf("Atom: '%s' (line %d)\n", node->token, node->line);
} else {
// List
printf("List: '%s' (line %d) [%zu children]\n", node->token, node->line,
node->child_count);
for (size_t i = 0; i < node->child_count; i++) {
expr_print(node->children[i], indent + 1);
}
}
}

View File

@ -1,25 +0,0 @@
#ifndef PARSER_H
#define PARSER_H
#include <stddef.h> // for size_t
// Forward declare
typedef struct ExprNode ExprNode;
// Node type: atom or list
struct ExprNode {
char *token; // For atoms: the value ("123", "$0", "add")
// For lists: the operator (first token)
ExprNode **children; // Array of child nodes (NULL if atom)
size_t child_count; // 0 if atom
int line; // Source line number (for errors)
};
ExprNode *expr_parse(const char *source, size_t source_len);
ExprNode* expand_macros(ExprNode* node);
ExprNode* expand_lambda(ExprNode* lambda_node);
void expr_free(ExprNode *node);
void expr_print(ExprNode *node, int indent);
void *safe_malloc(size_t size);
#endif

View File

@ -95,6 +95,21 @@ bool streq(const char *s1, const char *s2) {
return (*s1 == '\0' && *s2 == '\0'); return (*s1 == '\0' && *s2 == '\0');
} }
bool strleq(const char *s1, const char *s2, u32 length) {
u32 i;
if (s1 == nil && s2 == nil) return true;
if (s1 == nil || s2 == nil) return false;
i = 0;
while (i < length && *s1 && *s2) {
if (*s1 != *s2) return false;
s1++;
s2++;
i++;
}
if (i == length) return true;
return (*s1 == '\0' && *s2 == '\0');
}
u32 strlength(const char *str) { u32 strlength(const char *str) {
u32 i; u32 i;

View File

@ -4,6 +4,7 @@
#include "common.h" #include "common.h"
bool streq(const char *s1, const char *s2); bool streq(const char *s1, const char *s2);
bool strleq(const char *s1, const char *s2, u32 length);
i32 strcopy(char* to, const char *from, u32 length); i32 strcopy(char* to, const char *from, u32 length);
u32 strlength(const char *str); u32 strlength(const char *str);
u32 strnlength(const char *str, u32 max_len); u32 strnlength(const char *str, u32 max_len);

Binary file not shown.

View File

@ -28,8 +28,8 @@ function pln (str message $0)
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN terminal_namespace mode -> term; syscall OPEN terminal_namespace mode -> term;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE term message msg_length; syscall WRITE term message msg_length;
strlen new_line -> nl_length; string_length new_line -> nl_length;
syscall WRITE term nl nl_length; syscall WRITE term nl nl_length;
return; return;

View File

@ -40,10 +40,10 @@ function pln (str message $0)
load_immediate terminal_namespace -> ts; load_immediate terminal_namespace -> ts;
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN ts mode -> ts; syscall OPEN ts mode -> ts;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE ts message msg_length; syscall WRITE ts message msg_length;
load_immediate new_line -> nl; load_immediate new_line -> nl;
strlen nl -> nl_length; string_length nl -> nl_length;
syscall WRITE ts nl nl_length; syscall WRITE ts nl nl_length;
return; return;

Binary file not shown.

View File

@ -19,9 +19,9 @@ function pln (str message $0)
load_immediate terminal_namespace -> ts; load_immediate terminal_namespace -> ts;
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN ts mode -> ts; syscall OPEN ts mode -> ts;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE ts message msg_length; syscall WRITE ts message msg_length;
load_immediate new_line -> nl; load_immediate new_line -> nl;
strlen nl -> nl_length; string_length nl -> nl_length;
syscall WRITE ts nl nl_length; syscall WRITE ts nl nl_length;
return; return;

Binary file not shown.

View File

@ -2,7 +2,7 @@ global str terminal_namespace = "/dev/term/0";
global str prompt = "Enter a string:"; global str prompt = "Enter a string:";
global str new_line = "\n"; global str new_line = "\n";
function main (); function main ()
real a $0; real a $0;
int i $1; int i $1;
int mode $11; int mode $11;
@ -13,7 +13,7 @@ function main ();
load_immediate 0 -> $2; load_immediate 0 -> $2;
load_immediate -1 -> $3; load_immediate -1 -> $3;
load_immediate 5.0 -> $5; load_immediate 5.0 -> $5;
loop loop_body; loop loop_body
add_real a $5 -> a; add_real a $5 -> a;
add_int i $3 -> i; add_int i $3 -> i;
jump_ge_int loop_body i $2; jump_ge_int loop_body i $2;
@ -50,9 +50,9 @@ function pln (str message $0);
load_immediate terminal_namespace -> ts; load_immediate terminal_namespace -> ts;
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN ts mode -> ts; syscall OPEN ts mode -> ts;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE ts message msg_length ; syscall WRITE ts message msg_length ;
load_immediate new_line -> nl; load_immediate new_line -> nl;
strlen nl -> nl_length; string_length nl -> nl_length;
syscall WRITE ts nl nl_length; syscall WRITE ts nl nl_length;
return; return;

Binary file not shown.

View File

@ -32,9 +32,9 @@ function pln (str message $0)
load_immediate terminal_namespace -> ts; load_immediate terminal_namespace -> ts;
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN ts mode -> ts; syscall OPEN ts mode -> ts;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE ts message msg_length; syscall WRITE ts message msg_length;
load_immediate new_line -> nl; load_immediate new_line -> nl;
strlen nl -> nl_length; string_length nl -> nl_length;
syscall WRITE ts nl nl_length; syscall WRITE ts nl nl_length;
return; return;

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -3,12 +3,12 @@ global real x = 1.0;
global real y = 1.0; global real y = 1.0;
function main () function main ()
real x $0; real a $0;
load_absolute_32 x -> x; load_absolute_32 x -> a;
real y $1; real b $1;
load_absolute_32 y -> y; load_absolute_32 y -> b;
real result $2; real result $2;
add_real x y -> result; add_real a b -> result;
str result_str $3; str result_str $3;
real_to_string result -> result_str; real_to_string result -> result_str;
call pln result_str -> void; call pln result_str -> void;
@ -23,9 +23,9 @@ function pln (str message $0)
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN terminal_namespace mode -> term; syscall OPEN terminal_namespace mode -> term;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE term message msg_length; syscall WRITE term message msg_length;
load_address new_line -> nl; load_address new_line -> nl;
strlen nl -> nl_length; string_length nl -> nl_length;
syscall WRITE term nl nl_length; syscall WRITE term nl nl_length;
return; return;

Binary file not shown.

View File

@ -81,9 +81,9 @@ function pln (str message $0)
load_immediate terminal_namespace -> ts; load_immediate terminal_namespace -> ts;
load_immediate 0 -> mode; load_immediate 0 -> mode;
syscall OPEN ts mode -> ts; syscall OPEN ts mode -> ts;
strlen message -> msg_length; string_length message -> msg_length;
syscall WRITE ts message msg_length ; syscall WRITE ts message msg_length ;
load_immediate new_line -> nl; load_immediate new_line -> nl;
strlen nl -> nl_length; string_length nl -> nl_length;
syscall WRITE ts nl nl_length; syscall WRITE ts nl nl_length;
return; return;