diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index 8a21f2d..39bf0da 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -3,6 +3,7 @@ #include "../../vm/fixed.h" #include "../../vm/libc.h" #include "../../vm/opcodes.h" +#include "lexer.h" #include #include #include @@ -42,13 +43,13 @@ u32 names_table_add(NamesTable *table, const char *name) { return index; } -u32 symbol_table_add(SymbolTable *table, Symbol s) { +u32 symbol_table_add(SymbolTable *table, Symbol *s) { if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } - table->symbols[table->count] = s; + table->symbols[table->count] = *s; u32 index = table->count; table->count++; return index; @@ -58,7 +59,7 @@ Symbol *symbol_table_lookup(NamesTable *nt, SymbolTable *table, const char *name) { for (u32 i = 0; i < nt->count; i++) { if (strcmp(nt->names[i], name) == 0) { - for (int j = 0; j < table->count; j++) { + for (u32 j = 0; j < table->count; j++) { if (table->symbols[j].name == i) { return &table->symbols[j]; } @@ -93,17 +94,17 @@ int parse_register(const char *reg_str) { } u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { - // Handle symbol references (e.g., &label) + // symbol references (e.g., &label) if (ref[0] == '&') { return get_ref(nt, table, ref + 1); } - // fixed-point numbers (e.g., 0.5) + // fixed-point numbers if (strchr(ref, '.')) { return float_to_fixed(atof(ref)); } - // decimal literals (e.g., 7) + // decimal literals char *endptr; u32 value = (u32)strtoul(ref, &endptr, 10); @@ -114,73 +115,7 @@ u32 resolve_symbol(NamesTable *nt, SymbolTable *table, const char *ref) { return value; } -static char *unwrap_string(const char *quoted_str) { - if (!quoted_str) - return nil; - - size_t len = strlen(quoted_str); - if (len >= 2 && quoted_str[0] == '"' && quoted_str[len - 1] == '"') { - // Remove quotes and process escape sequences - const char *src = quoted_str + 1; - size_t src_len = len - 2; - - // First pass: calculate the actual length needed after escape processing - size_t actual_len = 0; - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Escape sequence - actual_len++; - i++; // Skip the next character - } else { - actual_len++; - } - } - - char *unwrapped = (char *)malloc(actual_len + 1); - size_t dst_idx = 0; - - // Second pass: process escape sequences - for (size_t i = 0; i < src_len; ++i) { - if (src[i] == '\\' && i + 1 < src_len) { - // Handle escape sequences - switch (src[i + 1]) { - case 'n': - unwrapped[dst_idx++] = '\n'; - break; - case 't': - unwrapped[dst_idx++] = '\t'; - break; - case 'r': - unwrapped[dst_idx++] = '\r'; - break; - case '\\': - unwrapped[dst_idx++] = '\\'; - break; - case '"': - unwrapped[dst_idx++] = '"'; - break; - case '\'': - unwrapped[dst_idx++] = '\''; - break; - default: - // Unknown escape, keep both characters - unwrapped[dst_idx++] = src[i]; - unwrapped[dst_idx++] = src[i + 1]; - break; - } - i++; // Skip the next character - } else { - unwrapped[dst_idx++] = src[i]; - } - } - unwrapped[dst_idx] = '\0'; - return unwrapped; - } - // Not quoted, return copy - return strdup(quoted_str); -} - -Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { +bool global(VM *vm, NamesTable *nt, SymbolTable *st) { Symbol *s = (Symbol *)malloc(sizeof(Symbol)); ValueType t; @@ -190,14 +125,14 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { t.type = I8; t.size = 1; break; - case TOKEN_TYPE_I16: - t.type = I16; - t.size = 2; - break; case TOKEN_TYPE_U8: t.type = U8; t.size = 1; break; + case TOKEN_TYPE_I16: + t.type = I16; + t.size = 2; + break; case TOKEN_TYPE_U16: t.type = U16; t.size = 2; @@ -220,16 +155,16 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { case TOKEN_IDENTIFIER: break; default: - return nil; + return false; } Token eq = nextToken(); if (eq.type != TOKEN_EQ) - return nil; + return false; Token name = nextToken(); if (name.type != TOKEN_IDENTIFIER) - return nil; + return false; s->name = names_table_add(nt, name.start); @@ -249,37 +184,59 @@ Symbol *global(VM *vm, NamesTable *nt, SymbolTable *st) { vm->frames[vm->fp].end += t.size; break; case TOKEN_LITERAL_STR: { - char *unwrapped = unwrap_string(value.start); - int len = strlen(unwrapped); + const char* src = value.start; + u32 len = 0; + u32 i = 0; - u32 addr = vm->mp; - u32 size = len + 1 + 4; - t.size = size; + while (i < value.length) { + char c = src[i++]; + if (c == '\\' && i < value.length) { + switch (src[i++]) { + case 'n': c = '\n'; break; + case 't': c = '\t'; break; + case 'r': c = '\r'; break; + case '\\': case '"': case '\'': break; // Keep as-is + default: i--; // Rewind for unknown escapes + } + } + write_u8(vm, memory, addr + 4 + len++, c); + } - vm->mp += size; - vm->frames[vm->fp].end += size; - - write_u32(vm, memory, addr, len); - for (int i = 0; i < len; i++) { - write_u8(vm, memory, addr + 4 + i, unwrapped[i]); - } - write_u8(vm, memory, addr + 4 + len, '\0'); - free(unwrapped); - break; + u32 size = len + 5; // 4 (len) + dst_len + 1 (null) + vm->mp += size; + vm->frames[vm->fp].end += size; + write_u32(vm, memory, addr, len); + write_u8(vm, memory, addr + 4 + len, '\0'); + break; } default: - return nil; + return false; } s->type = t; - return s; + symbol_table_add(st, s); + return true; } -Symbol *function(VM *vm, NamesTable *nt, SymbolTable *st) { +bool function(VM *vm, NamesTable *nt, SymbolTable *st) { USED(vm); USED(nt); USED(st); - return nil; + return true; +} + +bool variable(VM *vm, NamesTable *nt, SymbolTable *st) { + USED(vm); + USED(nt); + USED(st); + return true; +} + +bool label(VM *vm, NamesTable *nt, SymbolTable *st) { + USED(vm); + USED(nt); + USED(st); + return true; } void assemble(VM *vm, char *source) { @@ -306,7 +263,10 @@ void assemble(VM *vm, char *source) { } if (token.type == TOKEN_KEYWORD_FN) { - function(vm, nt, st); + if (!function(vm, nt, st)) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || @@ -314,7 +274,18 @@ void assemble(VM *vm, char *source) { token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { + if (!variable(vm, nt, st)) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } + } + if (token.type == TOKEN_KEYWORD_LOOP || + token.type == TOKEN_KEYWORD_ELSE) { + if (!label(vm, nt, st)) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } } if (token.type == TOKEN_IDENTIFIER) { diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index ee0bdab..655aa94 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -149,6 +149,8 @@ static TokenType identifierType() { return checkKeyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); case 'o': return checkKeyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return checkKeyword(1, 1, "2", TOKEN_TYPE_REAL); } return checkKeyword(1, 7, "unction", TOKEN_KEYWORD_FN); } @@ -160,6 +162,12 @@ static TokenType identifierType() { return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); case 's': return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); case 'n': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { @@ -242,6 +250,12 @@ static TokenType identifierType() { switch (lexer.start[1]) { case 's': return checkKeyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return checkKeyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); } } break; @@ -257,32 +271,8 @@ static TokenType identifierType() { break; case 'g': return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); - case 'I': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case '8': - return checkKeyword(2, 0, "", TOKEN_TYPE_I8); - case '1': - return checkKeyword(2, 1, "6", TOKEN_TYPE_I16); - case '3': - return checkKeyword(2, 1, "2", TOKEN_TYPE_INT); - } - } - break; - case 'U': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case '8': - return checkKeyword(2, 0, "", TOKEN_TYPE_U8); - case '1': - return checkKeyword(2, 1, "6", TOKEN_TYPE_U16); - case '3': - return checkKeyword(2, 1, "2", TOKEN_TYPE_NAT); - } - } - break; - case 'F': - return checkKeyword(1, 2, "32", TOKEN_TYPE_REAL); + case 'l': + return checkKeyword(1, 4, "oop", TOKEN_KEYWORD_LOOP); } return TOKEN_IDENTIFIER; diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index eaa137c..5ad14ef 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -35,6 +35,7 @@ typedef enum { TOKEN_KEYWORD_WRITE, TOKEN_KEYWORD_REFRESH, TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_LOOP, TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_FALSE, diff --git a/test/paint.ul.ir b/test/paint.ul.ir index ab40219..e7ebc4c 100644 --- a/test/paint.ul.ir +++ b/test/paint.ul.ir @@ -9,32 +9,32 @@ global byte SELECTED_COLOR = 255 function main () # Open screen - plex screen is $0 - str screen_name is $18 - int mode is $11 - nat screen_buffer is $21 + plex screen $0 + str screen_name $18 + int mode $11 + nat screen_buffer $21 - # use load immediate because it is a pointer to a string, not a value + # use load immediate because it a pointer to a string, not a value load_address &screen_namespace -> screen_name load_immediate 0 -> mode syscall OPEN screen_name mode -> screen # Screen screen = open("/dev/screen/0", 0); - nat width is $20 - nat size is $22 + nat width $20 + nat size $22 load_offset_32 screen 8 -> width # load width load_offset_32 screen 12 -> size # load size load_immediate 16 -> $1 # offset for screen buffer add_nat screen $1 -> screen_buffer # open mouse - plex mouse is $15 - str mouse_name is $16 + plex mouse $15 + str mouse_name $16 load_address &mouse_namespace -> mouse_name syscall OPEN mouse_name mode -> mouse # Mouse mouse = open("/dev/mouse/0", 0); - byte color is $1 - nat x_pos is $12 - nat y_pos is $13 + byte color $1 + nat x_pos $12 + nat y_pos $13 load_absolute_32 &BLACK -> color load_immediate 1 -> x_pos @@ -49,23 +49,23 @@ function main () # screen.draw# syscall WRITE screen screen_buffer size - nat zero is $11 + nat zero $11 - draw_loop: + loop draw_loop # load mouse click data syscall REFRESH mouse - byte left_down is $9 + byte left_down $9 load_offset_8 mouse 16 -> left_down # load btn1 pressed jump_eq_nat &draw_loop left_down zero - nat mouse_x is $7 - nat mouse_y is $8 + nat mouse_x $7 + nat mouse_y $8 load_offset_32 mouse 8 -> mouse_x # load x load_offset_32 mouse 12 -> mouse_y # load y - nat box_size is $14 + nat box_size $14 load_immediate 20 -> box_size # first row @@ -84,10 +84,10 @@ function main () syscall WRITE screen screen_buffer size - byte selected_color is $25 + byte selected_color $25 load_absolute_32 &SELECTED_COLOR -> selected_color - nat brush_size is $19 + nat brush_size $19 load_immediate 5 -> brush_size call &draw_box screen_buffer width selected_color mouse_x mouse_y brush_size brush_size @@ -97,15 +97,15 @@ function main () # Flush and exit exit 0 -function set_color_if_clicked (int click_x is $0, int click_y is $1, - int box_x is $2, int box_y is $3, byte color is $4, int box_size is $5) +function set_color_if_clicked (int click_x $0, int click_y $1, + int box_x $2, int box_y $3, byte color $4, int box_size $5) # Compute right - int right_edge is $6 + int right_edge $6 add_int box_x box_size -> right_edge # Compute bottom = box_y + box_size - int bottom_edge is $7 + int bottom_edge $7 add_int box_y box_size -> bottom_edge # Bounds check: x in [box_x, right] and y in [box_y, bottom] @@ -116,17 +116,17 @@ function set_color_if_clicked (int click_x is $0, int click_y is $1, store_absolute_8 &SELECTED_COLOR color - fail: + else fail return -function draw_outlined_swatch(nat base is $0, - byte color is $1, int x is $2, int y is $3, int width is $4) +function draw_outlined_swatch(nat base $0, + byte color $1, int x $2, int y $3, int width $4) # Constants - nat background_color is $5 + nat background_color $5 load_absolute_32 &GRAY -> background_color - byte selected_color is $10 + byte selected_color $10 load_absolute_32 &SELECTED_COLOR -> selected_color jump_eq_int &set_selected selected_color color @@ -135,13 +135,13 @@ function draw_outlined_swatch(nat base is $0, load_absolute_32 &DARK_GRAY -> background_color end_set_selected: - nat outline_size is $6 + nat outline_size $6 load_immediate 20 -> outline_size - nat fill_size is $7 + nat fill_size $7 load_immediate 17 -> fill_size - nat offset is $8 + nat offset $8 load_immediate 2 -> offset call &draw_box base width background_color x y outline_size outline_size @@ -153,28 +153,28 @@ function draw_outlined_swatch(nat base is $0, return -function draw_box (nat base is $0, nat screen_width is $1, - byte color is $2, nat x_start is $3, nat y_start is $4, nat width is $5, nat height is $6) +function draw_box (nat base $0, nat screen_width $1, + byte color $2, nat x_start $3, nat y_start $4, nat width $5, nat height $6) # Compute start address: base + y*640 + x - nat offset is $15 + nat offset $15 mul_int y_start screen_width -> offset add_int offset x_start -> offset add_nat offset base -> offset - nat fat_ptr_size is $25 + nat fat_ptr_size $25 load_immediate 4 -> fat_ptr_size add_nat offset fat_ptr_size -> offset # need to add offset for fat pointer size - int i is $30 + int i $30 load_immediate 1 -> i - int zero is $26 + int zero $26 load_immediate 0 -> zero - int row_end is $27 - nat pixel_ptr is $29 + int row_end $27 + nat pixel_ptr $29 - draw_box_outer: + loop draw_box_outer add_int offset width -> row_end # current + width register_move offset -> pixel_ptr # set pixel point memset_8 pixel_ptr color width # draw row diff --git a/test/simple.ul.ir b/test/simple.ul.ir index 8f549d9..e4a1407 100644 --- a/test/simple.ul.ir +++ b/test/simple.ul.ir @@ -1,31 +1,31 @@ -global const real x = 1.0 -global const real y = 1.0 +global str terminal_namespace = "/dev/term/0" +global real x = 1.0 +global real y = 1.0 function main () - real x is $0 + real x $0 load_absolute_32 &x -> x - real y is $1 + real y $1 load_absolute_32 &y -> y - real result is $2 + real result $2 add_real x y -> result - str result_str is $3 + str result_str $3 real_to_string result -> result_str call &pln result_str exit 0 -function pln (str message is $0) - str term is $1 - int msg_length is $2 - str nl is $3 - int nl_length is $4 - int mode is $5 +function pln (str message $0) + str term $1 + int msg_length $2 + str nl $3 + int nl_length $4 + int mode $5 - malloc_immediate "/dev/term/0" -> term load_immediate 0 -> mode - syscall OPEN term mode -> term + syscall OPEN &terminal_namespace mode -> term strlen message -> msg_length syscall WRITE term message msg_length - malloc_immediate "\n" -> nl + load_address new_line -> nl strlen nl -> nl_length syscall WRITE term nl nl_length return