diff --git a/test/fib.ul b/test/fib.ul index 3067c10..b6bdb6f 100644 --- a/test/fib.ul +++ b/test/fib.ul @@ -16,7 +16,7 @@ function main() { * Recursively calculate fibonacci */ function fib(int n) int { - if (n < 2) return n; + if (n < 2) { return n; } return fib(n - 2) + fib(n - 1); } diff --git a/tools/compiler.c b/tools/compiler.c index 06f87a1..51019aa 100644 --- a/tools/compiler.c +++ b/tools/compiler.c @@ -1,9 +1,26 @@ -#include "parser.h" #include "compiler.h" +#include "parser.h" #include #include +#define DEBUG_COMPILER + +void emit_byte(u8 byte) { +#ifdef DEBUG_COMPILER + printf("code[%d] = %d\n", cp, byte); +#endif + code[cp] = byte; +} + +void emit_u32(u32 value) { +#ifdef DEBUG_COMPILER + printf("code[%d..%d] = %d\n", cp, cp + 3, value); +#endif + u32 *c = (u32 *)(&code[cp / 4]); + c[0] = value; +} + Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length, i32 scope_ref) { SymbolTable st = table->scopes[scope_ref]; @@ -30,7 +47,9 @@ u8 symbol_table_add(ScopeTable *table, Symbol s) { exit(1); } - if (table->scopes[table->scope_ref].count + 1 > 255) { + u8 current_index = table->scopes[table->scope_ref].count; + + if (current_index + 1 > 255) { fprintf(stderr, "Error: Only 255 symbols are allowed per scope" " first off: impressive; secondly:" " just create a new scope and keep going.\n"); @@ -48,10 +67,22 @@ u8 symbol_table_add(ScopeTable *table, Symbol s) { exit(1); } - table->scopes[table->scope_ref].symbols[table->scopes[table->scope_ref].count] = s; - u8 index = table->scopes[table->scope_ref].count; + /* set ref to current count for local */ + s.ref = current_index; + +#ifdef DEBUG_COMPILER + if (s.scope == VAR) { + printf("$%d = %s\n", s.ref, s.name); + } else if (s.scope == GLOBAL) { + printf("memory[%d] = %s\n", s.ref, s.name); + } else { + printf("code[%d] = %s\n", s.ref, s.name); + } +#endif + + table->scopes[table->scope_ref].symbols[current_index] = s; table->scopes[table->scope_ref].count++; - return index; + return current_index; } u32 get_ref(ScopeTable *st, const char *name, u32 length) { @@ -144,9 +175,554 @@ Token next_token_is(TokenType type) { return token; } +/** + * Global . + */ +bool define_global(ScopeTable *st, Token regType) { + u32 *globals = (u32 *)(mem); + Symbol s; + + switch (regType.type) { + case TOKEN_TYPE_BOOL: + s.type = BOOL; + s.size = 1; + break; + case TOKEN_TYPE_I8: + s.type = I8; + s.size = 1; + break; + case TOKEN_TYPE_U8: + s.type = U8; + s.size = 1; + break; + case TOKEN_TYPE_I16: + s.type = I16; + s.size = 2; + break; + case TOKEN_TYPE_U16: + s.type = U16; + s.size = 2; + break; + case TOKEN_TYPE_INT: + s.type = I32; + s.size = 4; + break; + case TOKEN_TYPE_NAT: + s.type = U32; + s.size = 4; + break; + case TOKEN_TYPE_REAL: + s.type = F32; + s.size = 4; + break; + case TOKEN_TYPE_STR: + s.type = STR; + break; + default: + return false; + } + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + return false; + } + + mcpy(s.name, (char *)name.start, name.length); + s.name_length = name.length; + s.name[name.length] = '\0'; + + u32 addr = mp; + s.ref = addr; + s.scope = GLOBAL; + + next_token_is(TOKEN_EQ); + + Token value = next_token(); + switch (value.type) { + case TOKEN_KEYWORD_TRUE: { + u32 addr = mp; + WRITE_U8(addr, 1); + + mp += s.size; + break; + } + case TOKEN_KEYWORD_FALSE: { + u32 addr = mp; + WRITE_U8(addr, 0); + + mp += s.size; + break; + } + case TOKEN_LITERAL_INT: { + i32 out = atoi(value.start); + + u32 addr = mp; + WRITE_U32(addr, out); + + mp += s.size; + break; + } + case TOKEN_LITERAL_NAT: { + char *endptr; + u32 out = (u32)strtoul(value.start, &endptr, 10); + if (endptr == value.start || *endptr != '\0') { + fprintf(stderr, "Invalid decimal literal: %s\n", value.start); + exit(1); + } + + u32 addr = mp; + WRITE_U32(addr, out); + + mp += s.size; + break; + } + case TOKEN_LITERAL_REAL: { + i32 out = FLOAT_TO_REAL(atof(value.start)); + + u32 addr = mp; + WRITE_U32(addr, out); + + mp += s.size; + break; + } + case TOKEN_LITERAL_STR: { + const char *src = value.start; + i32 len = 0; + i32 i = 0; + + while (i < value.length) { + char c = src[i++]; + if (c == '"') { + continue; + } + if (c == '\\' && i < value.length) { + switch (src[i++]) { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + case '\\': + case '"': + case '\'': + break; + default: + i--; /* Rewind for unknown escapes */ + } + } + WRITE_U8(addr + 4 + len, c); + len++; + } + + u32 size = len + 5; /* 4 (len) + dst_len + 1 (null) */ + s.size = size; + + mp += size; + + WRITE_U32(addr, len); + WRITE_U8(addr + 4 + len, '\0'); + break; + } + default: + return false; + } + next_token_is(TOKEN_SEMICOLON); + + symbol_table_add(st, s); + return true; +} + +/** + * Var . + */ +void define_var(ScopeTable *st, Token regType) { + Symbol s; + s.scope = VAR; + switch (regType.type) { + case TOKEN_TYPE_I8: { + s.type = I8; + s.size = 1; + break; + } + case TOKEN_TYPE_I16: { + s.type = I16; + s.size = 2; + break; + } + case TOKEN_TYPE_INT: { + s.type = I32; + s.size = 4; + break; + } + case TOKEN_TYPE_U8: { + s.type = U8; + s.size = 1; + break; + } + case TOKEN_TYPE_U16: { + s.type = U16; + s.size = 2; + break; + } + case TOKEN_TYPE_NAT: { + s.type = U32; + s.size = 4; + break; + } + case TOKEN_TYPE_REAL: { + s.type = F32; + s.size = 4; + break; + } + case TOKEN_TYPE_BOOL: { + s.type = BOOL; + s.size = 1; + break; + } + case TOKEN_TYPE_STR: { + s.type = STR; + s.size = 4; /* not really this type, pointer alias which is 4 */ + break; + } + default: + printf("ERROR at line %d: %.*s\n", regType.line, regType.length, + regType.start); + exit(1); + } + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, + regType.length, regType.start); + exit(1); + } + + mcpy(s.name, (void *)name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; + + symbol_table_add(st, s); +} + +/** + * Function. + */ +void define_function(ScopeTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = FUNCTION; + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); + exit(1); + } + mcpy(s.name, (void *)name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; + + next_token_is(TOKEN_LPAREN); + + i32 temp = st->scope_ref; + + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + define_var(st, next); + next = next_token(); + if (next.type == TOKEN_COMMA) { + next = next_token(); + continue; + } else if (next.type == TOKEN_RPAREN) { + break; + } else { + printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start); + exit(1); + } + } + s.ref = cp; + next = next_token_is(TOKEN_LBRACE); + + st->scope_ref = temp; // need to add to the parents scope + symbol_table_add(st, s); + st->scope_ref = (i32)st->count; +} + +/** + * Plex. + */ +void define_plex(ScopeTable *st) { + Symbol s; + s.scope = GLOBAL; + s.type = PLEX; + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("PLEX NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); + exit(1); + } + mcpy(s.name, (void *)name.start, name.length); + s.name[name.length] = '\0'; + s.name_length = name.length; + + next_token_is(TOKEN_LPAREN); +} + +/** + * Branch. + */ +void define_branch(ScopeTable *st) { + Symbol s; + s.scope = LOCAL; + s.type = VOID; + + Token name = next_token_is(TOKEN_IDENTIFIER); + if (name.length > MAX_SYMBOL_NAME_LENGTH) { + printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length, + name.start); + exit(1); + } + mcpy(s.name, (void *)name.start, name.length); + s.name_length = name.length; + s.name[name.length] = '\0'; + + s.ref = cp; + symbol_table_add(st, s); +} + +/** + * Define a loop + */ +void define_loop(ScopeTable *st) {} + +/** + * Build the symbol table and calculate the types/size/offsets of all values. + */ +void build_symbol_table(char *source, ScopeTable *st) { + Token token; + init_lexer(source); + do { + token = next_token(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + exit(1); + } + + if (token.type != TOKEN_EOF) { + if (token.type == TOKEN_LBRACE) { + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + st->depth++; + continue; + } + + if (token.type == TOKEN_RBRACE) { + i32 current_scope = st->scope_ref; + i32 parent = st->scopes[current_scope].parent; + if (parent < 0) + parent = 0; + st->scope_ref = parent; + st->depth--; + continue; + } + + if (token.type == TOKEN_KEYWORD_PLEX) { + if (st->depth != 0) { + printf("I'm letting it slide, but generally plexes are declared " + "outside of a scope %d: %.*s\n", + token.line, token.length, token.start); + } + define_plex(st); + continue; + } + + if (token.type == TOKEN_KEYWORD_FN) { + if (st->depth != 0) { + printf("Functions can only be declared outside of a scope %d: %.*s\n", + token.line, token.length, token.start); + exit(1); + } + define_function(st); + continue; + } + + if (token.type == TOKEN_KEYWORD_CONST) { + // FIXME: add consts, for now just make everything + next_token(); + continue; + } + + if (token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || + token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || + token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || + token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR || + token.type == TOKEN_TYPE_BOOL) { + if (st->depth == 0) { + define_global(st, token); + continue; + } + + define_var(st, token); + next_token_is(TOKEN_SEMICOLON); + continue; + } + + if (token.type == TOKEN_KEYWORD_IF) { + define_loop(st); + continue; + } + + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + define_branch(st); + continue; + } + + if (token.type == TOKEN_KEYWORD_RETURN) { + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + cp++; + continue; + } + + get_reg(next, st); + cp++; + next_token_is(TOKEN_SEMICOLON); + continue; + } + +#ifdef DEBUG_COMPILER + printf("-- %.*s --\n", token.length, token.start); +#endif + } + } while (token.type != TOKEN_EOF); +} + +/** + * 2nd pass, emit the bytecode + */ +void emit_bytecode(char *source, ScopeTable *st) { + Token token; + init_lexer(source); + do { + token = next_token(); + if (token.type == TOKEN_ERROR) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); + break; + } + if (token.type != TOKEN_EOF) { + + if (token.type == TOKEN_LBRACE) { + st->count++; + st->scopes[st->count].parent = st->scope_ref; + st->scope_ref = (i32)st->count; + st->depth++; + continue; + } + + if (token.type == TOKEN_RBRACE) { + i32 current_scope = st->scope_ref; + i32 parent = st->scopes[current_scope].parent; + if (parent < 0) + parent = 0; + st->scope_ref = parent; + st->depth--; + continue; + } + + if (token.type == TOKEN_KEYWORD_FN) { + /* ignore, already processed */ + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + next = next_token(); + } + continue; + } + + if (token.type == TOKEN_KEYWORD_PLEX) { + /* ignore, already processed */ + Token next = next_token(); + while (next.type != TOKEN_RPAREN) { + next = next_token(); + } + continue; + } + + if (token.type == TOKEN_KEYWORD_CONST) { + /* ignore, already processed */ + next_token(); /* type */ + next_token(); /* var */ + next_token(); /* reg */ + next_token(); /* ; */ + continue; + } + + if (token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || + token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || + token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || + token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { + /* ignore, already processed */ + next_token(); /* var */ + next_token(); /* reg */ + next_token(); /* ; */ + continue; + } + + if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || + token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || + token.type == TOKEN_KEYWORD_FOR) { + /* ignore, already processed */ + next_token(); /* id */ + } + + if (token.type == TOKEN_KEYWORD_RETURN) { + Token next = next_token(); + if (next.type == TOKEN_SEMICOLON) { + /* put 0xFF as return register */ + code[cp++] = ENCODE_B(OP_RETURN, 255, 0); + continue; + } + + u32 reg = get_reg(next, st); + code[cp++] = ENCODE_B(OP_RETURN, reg, 0); + next_token_is(TOKEN_SEMICOLON); + continue; + } + +#ifdef DEBUG_COMPILER + printf("-- %.*s --\n", token.length, token.start); +#endif + if (token.type == TOKEN_IDENTIFIER) { + /*} else { + some other identifier + printf("Unknown id at line %d: %.*s\n", token.line, token.length, + token.start); + exit(1); + } + */ + } + } + } while (token.type != TOKEN_EOF); +} + /** * Compile. */ bool compile(ScopeTable *st, char *source) { - return false; + build_symbol_table(source, st); + cp = 0; /* actually start emitting code */ + st->count = 0; + emit_bytecode(source, st); + return true; } diff --git a/tools/compiler.h b/tools/compiler.h index a94a318..3fa7d04 100644 --- a/tools/compiler.h +++ b/tools/compiler.h @@ -80,6 +80,7 @@ struct scope_tab_s { u32 count; u32 capacity; i32 scope_ref; + u32 depth; }; #define EMIT_U8(value) \ diff --git a/tools/parser.c b/tools/parser.c index 01d8245..81fc9f6 100644 --- a/tools/parser.c +++ b/tools/parser.c @@ -214,8 +214,6 @@ static TokenType identifierType() { switch (lexer.start[2]) { case 'a': return checkKeyword(3, 1, "d", TOKEN_KEYWORD_READ); - case 'f': - return checkKeyword(3, 4, "resh", TOKEN_KEYWORD_REFRESH); case 't': return checkKeyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); } @@ -228,7 +226,14 @@ static TokenType identifierType() { if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { case 't': - return checkKeyword(2, 1, "r", TOKEN_TYPE_STR); + if (lexer.current - lexer.start > 2) { + switch (lexer.start[2]) { + case 'r': + return checkKeyword(2, 0, "", TOKEN_TYPE_STR); + case 'a': + return checkKeyword(2, 1, "t", TOKEN_KEYWORD_STAT); + } + } } } break; @@ -427,8 +432,8 @@ const char *token_type_to_string(TokenType type) { return "TOKEN_KEYWORD_READ"; case TOKEN_KEYWORD_WRITE: return "TOKEN_KEYWORD_WRITE"; - case TOKEN_KEYWORD_REFRESH: - return "TOKEN_KEYWORD_REFRESH"; + case TOKEN_KEYWORD_STAT: + return "TOKEN_KEYWORD_STAT"; case TOKEN_KEYWORD_CLOSE: return "TOKEN_KEYWORD_CLOSE"; case TOKEN_KEYWORD_NIL: diff --git a/tools/parser.h b/tools/parser.h index 06eef8b..07e4a3d 100644 --- a/tools/parser.h +++ b/tools/parser.h @@ -37,7 +37,7 @@ typedef enum { TOKEN_KEYWORD_OPEN, TOKEN_KEYWORD_READ, TOKEN_KEYWORD_WRITE, - TOKEN_KEYWORD_REFRESH, + TOKEN_KEYWORD_STAT, TOKEN_KEYWORD_CLOSE, TOKEN_KEYWORD_LOOP, TOKEN_KEYWORD_DO, @@ -84,7 +84,7 @@ typedef struct { int line; } Token; -void initLexer(const char *source); +void init_lexer(const char *source); Token next_token(); const char* token_type_to_string(TokenType type); diff --git a/vm/vm.c b/vm/vm.c index f013680..56bee24 100644 --- a/vm/vm.c +++ b/vm/vm.c @@ -81,17 +81,19 @@ bool step_vm() { /* reset the frame pointer */ fp = parent_fp; - if (is_ptr) { - /* copy value to end of mp if it is a pointer */ - globals[parent_local_return_address/4] = mp; - size = globals[return_value/4]; - globals[mp/4] = size; - mp += 4; - mcpy(&mem[mp], &mem[return_value], size); - mp += size; - } else { - /* otherwise just write the return value to its location */ - globals[(parent_local_return_address / 4)] = return_value; + if (parent_local_return_address != 255) { + if (is_ptr) { + /* copy value to end of mp if it is a pointer */ + globals[parent_local_return_address/4] = mp; + size = globals[return_value/4]; + globals[mp/4] = size; + mp += 4; + mcpy(&mem[mp], &mem[return_value], size); + mp += size; + } else { + /* otherwise just write the return value to its location */ + globals[(parent_local_return_address / 4)] = return_value; + } } /* jump to parent frame */ diff --git a/vm/vm.h b/vm/vm.h index 1a607aa..da0275b 100644 --- a/vm/vm.h +++ b/vm/vm.h @@ -3,27 +3,6 @@ #include "libc.h" -/* - * Locals - * one 32bit value - * [lllll|tt|p] - * 5 bits -> local position (up to 32 per scope) - * 2 bits -> type - * 1 bit -> is pointer? - * - * 1 -> ptr, - * 00 -> 8 - * 10 -> 16 - * 01 -> 32 - * 11 -> string - * - * 0 -> value - * 00 -> bool? - * 10 -> nat - * 01 -> int - * 11 -> real - */ - /** * Instruction Types * @@ -50,7 +29,7 @@ u16 imm = ((u32)(instruction)) & 0xFFFF; typedef enum { - OP_HALT, /* halt : B : all zeros : halt execution */ + OP_HALT, /* halt : A : all zeros : halt execution */ OP_CALL, /* call : A : dest args return : creates a new frame */ OP_RETURN, /* return : B : dest return_flags : returns from a frame to the parent frame */ OP_SYSCALL, /* syscall : A : id args mem_ptr : does a system call based on id with args */ @@ -76,7 +55,7 @@ typedef enum { OP_MEM_SET_16, /* memset_16 : A : memory[dest..dest+src2] = local[src1] as u16 */ OP_MEM_SET_32, /* memset_32 : A : memory[dest..dest+src2] = local[src1] as u32 */ OP_MOV, /* mov : A : locals[dest] = locals[src1] */ - OP_PARG, /* push_arg : B : push u32 value onto the childs locals */ + OP_PARG, /* push_arg : A : dest : push u32 value onto the childs locals */ OP_ADD_INT, /* add_int : A : locals[dest] = locals[src1] + locals[src2] */ OP_SUB_INT, /* sub_int : A : locals[dest] = locals[src1] - locals[src2] */ OP_MUL_INT, /* mul_int : A : locals[dest] = locals[src1] * locals[src2] */