refactor strings, add loop to lexer

This commit is contained in:
zongor 2025-11-24 21:40:57 -08:00
parent 6f47ee7ea1
commit 7b8059e6c7
5 changed files with 144 additions and 182 deletions

View File

@ -3,6 +3,7 @@
#include "../../vm/fixed.h" #include "../../vm/fixed.h"
#include "../../vm/libc.h" #include "../../vm/libc.h"
#include "../../vm/opcodes.h" #include "../../vm/opcodes.h"
#include "lexer.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -42,13 +43,13 @@ u32 names_table_add(NamesTable *table, const char *name) {
return index; return index;
} }
u32 symbol_table_add(SymbolTable *table, Symbol s) { u32 symbol_table_add(SymbolTable *table, Symbol *s) {
if (table->count >= table->capacity) { if (table->count >= table->capacity) {
table->capacity *= 2; table->capacity *= 2;
table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol));
} }
table->symbols[table->count] = s; table->symbols[table->count] = *s;
u32 index = table->count; u32 index = table->count;
table->count++; table->count++;
return index; return index;
@ -58,7 +59,7 @@ Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table,
const char *name) { const char *name) {
for (u32 i = 0; i < nt->count; i++) { for (u32 i = 0; i < nt->count; i++) {
if (strcmp(nt->names[i], name) == 0) { if (strcmp(nt->names[i], name) == 0) {
for (int j = 0; j < table->count; j++) { for (u32 j = 0; j < table->count; j++) {
if (table->symbols[j].name == i) { if (table->symbols[j].name == i) {
return &table->symbols[j]; return &table->symbols[j];
} }
@ -93,17 +94,17 @@ int parse_register(const char *reg_str) {
} }
u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) {
// Handle symbol references (e.g., &label) // symbol references (e.g., &label)
if (ref[0] == '&') { if (ref[0] == '&') {
return get_ref(nt, table, ref + 1); return get_ref(nt, table, ref + 1);
} }
// fixed-point numbers (e.g., 0.5) // fixed-point numbers
if (strchr(ref, '.')) { if (strchr(ref, '.')) {
return float_to_fixed(atof(ref)); return float_to_fixed(atof(ref));
} }
// decimal literals (e.g., 7) // decimal literals
char *endptr; char *endptr;
u32 value = (u32)strtoul(ref, &endptr, 10); u32 value = (u32)strtoul(ref, &endptr, 10);
@ -114,73 +115,7 @@ u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) {
return value; return value;
} }
static char *unwrap_string(const char *quoted_str) { bool global(VM *vm, NamesTable *nt, SymbolTable *st) {
if (!quoted_str)
return nil;
size_t len = strlen(quoted_str);
if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') {
// Remove quotes and process escape sequences
const char *src = quoted_str + 1;
size_t src_len = len - 2;
// First pass: calculate the actual length needed after escape processing
size_t actual_len = 0;
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Escape sequence
actual_len++;
i++; // Skip the next character
} else {
actual_len++;
}
}
char *unwrapped = (char *)malloc(actual_len + 1);
size_t dst_idx = 0;
// Second pass: process escape sequences
for (size_t i = 0; i < src_len; ++i) {
if (src[i] == '\\' && i + 1 < src_len) {
// Handle escape sequences
switch (src[i + 1]) {
case 'n':
unwrapped[dst_idx++] = '\n';
break;
case 't':
unwrapped[dst_idx++] = '\t';
break;
case 'r':
unwrapped[dst_idx++] = '\r';
break;
case '\\':
unwrapped[dst_idx++] = '\\';
break;
case '"':
unwrapped[dst_idx++] = '"';
break;
case '\'':
unwrapped[dst_idx++] = '\'';
break;
default:
// Unknown escape, keep both characters
unwrapped[dst_idx++] = src[i];
unwrapped[dst_idx++] = src[i + 1];
break;
}
i++; // Skip the next character
} else {
unwrapped[dst_idx++] = src[i];
}
}
unwrapped[dst_idx] = '\0';
return unwrapped;
}
// Not quoted, return copy
return strdup(quoted_str);
}
Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) {
Symbol *s = (Symbol *)malloc(sizeof(Symbol)); Symbol *s = (Symbol *)malloc(sizeof(Symbol));
ValueType t; ValueType t;
@ -190,14 +125,14 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) {
t.type = I8; t.type = I8;
t.size = 1; t.size = 1;
break; break;
case TOKEN_TYPE_I16:
t.type = I16;
t.size = 2;
break;
case TOKEN_TYPE_U8: case TOKEN_TYPE_U8:
t.type = U8; t.type = U8;
t.size = 1; t.size = 1;
break; break;
case TOKEN_TYPE_I16:
t.type = I16;
t.size = 2;
break;
case TOKEN_TYPE_U16: case TOKEN_TYPE_U16:
t.type = U16; t.type = U16;
t.size = 2; t.size = 2;
@ -220,16 +155,16 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) {
case TOKEN_IDENTIFIER: case TOKEN_IDENTIFIER:
break; break;
default: default:
return nil; return false;
} }
Token eq = nextToken(); Token eq = nextToken();
if (eq.type != TOKEN_EQ) if (eq.type != TOKEN_EQ)
return nil; return false;
Token name = nextToken(); Token name = nextToken();
if (name.type != TOKEN_IDENTIFIER) if (name.type != TOKEN_IDENTIFIER)
return nil; return false;
s->name = names_table_add(nt, name.start); s->name = names_table_add(nt, name.start);
@ -249,37 +184,59 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) {
vm->frames[vm->fp].end += t.size; vm->frames[vm->fp].end += t.size;
break; break;
case TOKEN_LITERAL_STR: { case TOKEN_LITERAL_STR: {
char *unwrapped = unwrap_string(value.start); const char* src = value.start;
int len = strlen(unwrapped); u32 len = 0;
u32 i = 0;
u32 addr = vm->mp; while (i < value.length) {
u32 size = len + 1 + 4; char c = src[i++];
t.size = size; if (c == '\\' && i < value.length) {
switch (src[i++]) {
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case '\\': case '"': case '\'': break; // Keep as-is
default: i--; // Rewind for unknown escapes
}
}
write_u8(vm, memory, addr + 4 + len++, c);
}
vm->mp += size; u32 size = len + 5; // 4 (len) + dst_len + 1 (null)
vm->frames[vm->fp].end += size; vm->mp += size;
vm->frames[vm->fp].end += size;
write_u32(vm, memory, addr, len); write_u32(vm, memory, addr, len);
for (int i = 0; i < len; i++) { write_u8(vm, memory, addr + 4 + len, '\0');
write_u8(vm, memory, addr + 4 + i, unwrapped[i]); break;
}
write_u8(vm, memory, addr + 4 + len, '\0');
free(unwrapped);
break;
} }
default: default:
return nil; return false;
} }
s->type = t; s->type = t;
return s; symbol_table_add(st, s);
return true;
} }
Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) { bool function(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm); USED(vm);
USED(nt); USED(nt);
USED(st); USED(st);
return nil; return true;
}
bool variable(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm);
USED(nt);
USED(st);
return true;
}
bool label(VM *vm, NamesTable *nt, SymbolTable *st) {
USED(vm);
USED(nt);
USED(st);
return true;
} }
void assemble(VM *vm, char *source) { void assemble(VM *vm, char *source) {
@ -306,7 +263,10 @@ void assemble(VM *vm, char *source) {
} }
if (token.type == TOKEN_KEYWORD_FN) { if (token.type == TOKEN_KEYWORD_FN) {
function(vm, nt, st); if (!function(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
} }
if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 ||
@ -314,7 +274,18 @@ void assemble(VM *vm, char *source) {
token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 ||
token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL ||
token.type == TOKEN_TYPE_STR) { token.type == TOKEN_TYPE_STR) {
if (!variable(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
}
if (token.type == TOKEN_KEYWORD_LOOP ||
token.type == TOKEN_KEYWORD_ELSE) {
if (!label(vm, nt, st)) {
printf("ERROR at line %d: %.*s\n", token.line, token.length,
token.start);
}
} }
if (token.type == TOKEN_IDENTIFIER) { if (token.type == TOKEN_IDENTIFIER) {

View File

@ -149,6 +149,8 @@ static TokenType identifierType() {
return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE);
case 'o': case 'o':
return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR);
case '3':
return checkKeyword(1, 1, "2", TOKEN_TYPE_REAL);
} }
return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN); return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN);
} }
@ -160,6 +162,12 @@ static TokenType identifierType() {
return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF);
case 's': case 's':
return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS);
case '8':
return checkKeyword(2, 0, "", TOKEN_TYPE_I8);
case '1':
return checkKeyword(2, 1, "6", TOKEN_TYPE_I16);
case '3':
return checkKeyword(2, 1, "2", TOKEN_TYPE_INT);
case 'n': case 'n':
if (lexer.current - lexer.start > 2) { if (lexer.current - lexer.start > 2) {
switch (lexer.start[2]) { switch (lexer.start[2]) {
@ -242,6 +250,12 @@ static TokenType identifierType() {
switch (lexer.start[1]) { switch (lexer.start[1]) {
case 's': case 's':
return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE); return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE);
case '8':
return checkKeyword(2, 0, "", TOKEN_TYPE_U8);
case '1':
return checkKeyword(2, 1, "6", TOKEN_TYPE_U16);
case '3':
return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT);
} }
} }
break; break;
@ -257,32 +271,8 @@ static TokenType identifierType() {
break; break;
case 'g': case 'g':
return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL);
case 'I': case 'l':
if (lexer.current - lexer.start > 1) { return checkKeyword(1, 4, "oop", TOKEN_KEYWORD_LOOP);
switch (lexer.start[1]) {
case '8':
return checkKeyword(2, 0, "", TOKEN_TYPE_I8);
case '1':
return checkKeyword(2, 1, "6", TOKEN_TYPE_I16);
case '3':
return checkKeyword(2, 1, "2", TOKEN_TYPE_INT);
}
}
break;
case 'U':
if (lexer.current - lexer.start > 1) {
switch (lexer.start[1]) {
case '8':
return checkKeyword(2, 0, "", TOKEN_TYPE_U8);
case '1':
return checkKeyword(2, 1, "6", TOKEN_TYPE_U16);
case '3':
return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT);
}
}
break;
case 'F':
return checkKeyword(1, 2, "32", TOKEN_TYPE_REAL);
} }
return TOKEN_IDENTIFIER; return TOKEN_IDENTIFIER;

View File

@ -35,6 +35,7 @@ typedef enum {
TOKEN_KEYWORD_WRITE, TOKEN_KEYWORD_WRITE,
TOKEN_KEYWORD_REFRESH, TOKEN_KEYWORD_REFRESH,
TOKEN_KEYWORD_CLOSE, TOKEN_KEYWORD_CLOSE,
TOKEN_KEYWORD_LOOP,
TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_NIL,
TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_TRUE,
TOKEN_KEYWORD_FALSE, TOKEN_KEYWORD_FALSE,

View File

@ -9,32 +9,32 @@ global byte SELECTED_COLOR = 255
function main () function main ()
# Open screen # Open screen
plex screen is $0 plex screen $0
str screen_name is $18 str screen_name $18
int mode is $11 int mode $11
nat screen_buffer is $21 nat screen_buffer $21
# use load immediate because it is a pointer to a string, not a value # use load immediate because it a pointer to a string, not a value
load_address &screen_namespace -> screen_name load_address &screen_namespace -> screen_name
load_immediate 0 -> mode load_immediate 0 -> mode
syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0);
nat width is $20 nat width $20
nat size is $22 nat size $22
load_offset_32 screen 8 -> width # load width load_offset_32 screen 8 -> width # load width
load_offset_32 screen 12 -> size # load size load_offset_32 screen 12 -> size # load size
load_immediate 16 -> $1 # offset for screen buffer load_immediate 16 -> $1 # offset for screen buffer
add_nat screen $1 -> screen_buffer add_nat screen $1 -> screen_buffer
# open mouse # open mouse
plex mouse is $15 plex mouse $15
str mouse_name is $16 str mouse_name $16
load_address &mouse_namespace -> mouse_name load_address &mouse_namespace -> mouse_name
syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0);
byte color is $1 byte color $1
nat x_pos is $12 nat x_pos $12
nat y_pos is $13 nat y_pos $13
load_absolute_32 &BLACK -> color load_absolute_32 &BLACK -> color
load_immediate 1 -> x_pos load_immediate 1 -> x_pos
@ -49,23 +49,23 @@ function main ()
# screen.draw# # screen.draw#
syscall WRITE screen screen_buffer size syscall WRITE screen screen_buffer size
nat zero is $11 nat zero $11
draw_loop: loop draw_loop
# load mouse click data # load mouse click data
syscall REFRESH mouse syscall REFRESH mouse
byte left_down is $9 byte left_down $9
load_offset_8 mouse 16 -> left_down # load btn1 pressed load_offset_8 mouse 16 -> left_down # load btn1 pressed
jump_eq_nat &draw_loop left_down zero jump_eq_nat &draw_loop left_down zero
nat mouse_x is $7 nat mouse_x $7
nat mouse_y is $8 nat mouse_y $8
load_offset_32 mouse 8 -> mouse_x # load x load_offset_32 mouse 8 -> mouse_x # load x
load_offset_32 mouse 12 -> mouse_y # load y load_offset_32 mouse 12 -> mouse_y # load y
nat box_size is $14 nat box_size $14
load_immediate 20 -> box_size load_immediate 20 -> box_size
# first row # first row
@ -84,10 +84,10 @@ function main ()
syscall WRITE screen screen_buffer size syscall WRITE screen screen_buffer size
byte selected_color is $25 byte selected_color $25
load_absolute_32 &SELECTED_COLOR -> selected_color load_absolute_32 &SELECTED_COLOR -> selected_color
nat brush_size is $19 nat brush_size $19
load_immediate 5 -> brush_size load_immediate 5 -> brush_size
call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size
@ -97,15 +97,15 @@ function main ()
# Flush and exit # Flush and exit
exit 0 exit 0
function set_color_if_clicked (int click_x is $0, int click_y is $1, function set_color_if_clicked (int click_x $0, int click_y $1,
int box_x is $2, int box_y is $3, byte color is $4, int box_size is $5) int box_x $2, int box_y $3, byte color $4, int box_size $5)
# Compute right # Compute right
int right_edge is $6 int right_edge $6
add_int box_x box_size -> right_edge add_int box_x box_size -> right_edge
# Compute bottom = box_y + box_size # Compute bottom = box_y + box_size
int bottom_edge is $7 int bottom_edge $7
add_int box_y box_size -> bottom_edge add_int box_y box_size -> bottom_edge
# Bounds check: x in [box_x, right] and y in [box_y, bottom] # Bounds check: x in [box_x, right] and y in [box_y, bottom]
@ -116,17 +116,17 @@ function set_color_if_clicked (int click_x is $0, int click_y is $1,
store_absolute_8 &SELECTED_COLOR color store_absolute_8 &SELECTED_COLOR color
fail: else fail
return return
function draw_outlined_swatch(nat base is $0, function draw_outlined_swatch(nat base $0,
byte color is $1, int x is $2, int y is $3, int width is $4) byte color $1, int x $2, int y $3, int width $4)
# Constants # Constants
nat background_color is $5 nat background_color $5
load_absolute_32 &GRAY -> background_color load_absolute_32 &GRAY -> background_color
byte selected_color is $10 byte selected_color $10
load_absolute_32 &SELECTED_COLOR -> selected_color load_absolute_32 &SELECTED_COLOR -> selected_color
jump_eq_int &set_selected selected_color color jump_eq_int &set_selected selected_color color
@ -135,13 +135,13 @@ function draw_outlined_swatch(nat base is $0,
load_absolute_32 &DARK_GRAY -> background_color load_absolute_32 &DARK_GRAY -> background_color
end_set_selected: end_set_selected:
nat outline_size is $6 nat outline_size $6
load_immediate 20 -> outline_size load_immediate 20 -> outline_size
nat fill_size is $7 nat fill_size $7
load_immediate 17 -> fill_size load_immediate 17 -> fill_size
nat offset is $8 nat offset $8
load_immediate 2 -> offset load_immediate 2 -> offset
call &draw_box base width background_color x y outline_size outline_size call &draw_box base width background_color x y outline_size outline_size
@ -153,28 +153,28 @@ function draw_outlined_swatch(nat base is $0,
return return
function draw_box (nat base is $0, nat screen_width is $1, function draw_box (nat base $0, nat screen_width $1,
byte color is $2, nat x_start is $3, nat y_start is $4, nat width is $5, nat height is $6) byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6)
# Compute start address: base + y*640 + x # Compute start address: base + y*640 + x
nat offset is $15 nat offset $15
mul_int y_start screen_width -> offset mul_int y_start screen_width -> offset
add_int offset x_start -> offset add_int offset x_start -> offset
add_nat offset base -> offset add_nat offset base -> offset
nat fat_ptr_size is $25 nat fat_ptr_size $25
load_immediate 4 -> fat_ptr_size load_immediate 4 -> fat_ptr_size
add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size
int i is $30 int i $30
load_immediate 1 -> i load_immediate 1 -> i
int zero is $26 int zero $26
load_immediate 0 -> zero load_immediate 0 -> zero
int row_end is $27 int row_end $27
nat pixel_ptr is $29 nat pixel_ptr $29
draw_box_outer: loop draw_box_outer
add_int offset width -> row_end # current + width add_int offset width -> row_end # current + width
register_move offset -> pixel_ptr # set pixel point register_move offset -> pixel_ptr # set pixel point
memset_8 pixel_ptr color width # draw row memset_8 pixel_ptr color width # draw row

View File

@ -1,31 +1,31 @@
global const real x = 1.0 global str terminal_namespace = "/dev/term/0"
global const real y = 1.0 global real x = 1.0
global real y = 1.0
function main () function main ()
real x is $0 real x $0
load_absolute_32 &x -> x load_absolute_32 &x -> x
real y is $1 real y $1
load_absolute_32 &y -> y load_absolute_32 &y -> y
real result is $2 real result $2
add_real x y -> result add_real x y -> result
str result_str is $3 str result_str $3
real_to_string result -> result_str real_to_string result -> result_str
call &pln result_str call &pln result_str
exit 0 exit 0
function pln (str message is $0) function pln (str message $0)
str term is $1 str term $1
int msg_length is $2 int msg_length $2
str nl is $3 str nl $3
int nl_length is $4 int nl_length $4
int mode is $5 int mode $5
malloc_immediate "/dev/term/0" -> term
load_immediate 0 -> mode load_immediate 0 -> mode
syscall OPEN term mode -> term syscall OPEN &terminal_namespace mode -> term
strlen message -> msg_length strlen message -> msg_length
syscall WRITE term message msg_length syscall WRITE term message msg_length
malloc_immediate "\n" -> nl load_address new_line -> nl
strlen nl -> nl_length strlen nl -> nl_length
syscall WRITE term nl nl_length syscall WRITE term nl nl_length
return return