#include "../../vm/common.h" #include "../../vm/fixed.h" #include "../../vm/libc.h" #include "../../vm/opcodes.h" #include "parser.h" #include "compiler.h" /* FIXME: remove these and replace with libc.h instead */ #include #include #include void emit_op(VM *vm, u8 byte) { vm->code[vm->cp] = byte; } void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp] = byte; } void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); } Symbol *symbol_table_lookup(ScopeTable *table, const char *name, u32 length, i32 scope_ref) { SymbolTable st = table->scopes[scope_ref]; for (u32 i = 0; i < st.count; i++) { if (st.symbols[i].name_length == length) { if (strleq(st.symbols[i].name, name, length)) { return &table->scopes[scope_ref].symbols[i]; } } } if (st.parent < 0) return nil; return symbol_table_lookup(table, name, length, st.parent); } u8 symbol_table_add(ScopeTable *table, Symbol s) { Symbol *sym = symbol_table_lookup(table, s.name, s.name_length, table->scope_ref); if (sym != nil) { fprintf(stderr, "Error: Symbol '%.*s' already defined, in this scope" " please pick a different variable name or create a new scope.\n", s.name_length, s.name); exit(1); } u8 current_index = table->scopes[table->scope_ref].count; if (current_index + 1 > 255) { fprintf(stderr, "Error: Only 255 symbols are allowed per scope" " first off: impressive; secondly:" " just create a new scope and keep going.\n"); exit(1); } if (!table_realloc(table)) { fprintf(stderr, "Error: Symbol table is out of memory! This is likely because you " " built the assembler in static mode, increase the static size." " if you built using malloc, that means your computer is out of" " memory. Close a few tabs in your web browser and try again." " Count was %d, while capacity was %d\n", table->count, table->capacity); exit(1); } /* set ref to current count for local */ s.ref = current_index; #ifdef DEBUG_COMPILER if (s.scope == VAR) { printf("$%d = %s\n", s.ref, s.name); } else if (s.scope == GLOBAL) { printf("memory[%d] = %s\n", s.ref, s.name); } else { printf("code[%d] = %s\n", s.ref, s.name); } #endif table->scopes[table->scope_ref].symbols[current_index] = s; table->scopes[table->scope_ref].count++; return current_index; } u32 get_ref(ScopeTable *st, const char *name, u32 length) { Symbol *sym = symbol_table_lookup(st, name, length, st->scope_ref); if (!sym) { fprintf(stderr, "Error: Assembler has no idea what Symbol '%.*s' means.\n", length, name); exit(1); return 0; } return sym->ref; } Token next_token_is(TokenType type) { Token token = next_token(); if (token.type != type) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } return token; } /** * Var . */ void define_var(ScopeTable *st, Token regType) { Symbol s; s.scope = (st->depth) ? VAR : GLOBAL; switch (regType.type) { case TOKEN_KEYWORD_PLEX: { s.type = PLEX; s.size = 4; /* not really this type, pointer alias which is 4 */ break; } case TOKEN_TYPE_I8: { s.type = I8; s.size = 1; break; } case TOKEN_TYPE_I16: { s.type = I16; s.size = 2; break; } case TOKEN_TYPE_INT: { s.type = I32; s.size = 4; break; } case TOKEN_TYPE_U8: { s.type = U8; s.size = 1; break; } case TOKEN_TYPE_U16: { s.type = U16; s.size = 2; break; } case TOKEN_TYPE_NAT: { s.type = U32; s.size = 4; break; } case TOKEN_TYPE_REAL: { s.type = F32; s.size = 4; break; } case TOKEN_TYPE_BOOL: { s.type = BOOL; s.size = 1; break; } case TOKEN_TYPE_STR: { s.type = STR; s.size = 4; /* not really this type, pointer alias which is 4 */ break; } default: printf("ERROR at line %d: %.*s\n", regType.line, regType.length, regType.start); exit(1); } Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, regType.length, regType.start); exit(1); } memcpy(s.name, name.start, name.length); s.name[name.length] = '\0'; s.name_length = name.length; symbol_table_add(st, s); } /** * Plex . */ void define_plex(VM *vm, ScopeTable *st) { } /** * Function . */ void define_function(VM *vm, ScopeTable *st) { Symbol s; s.scope = LOCAL; s.type = FUNCTION; Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length, name.start); exit(1); } memcpy(s.name, name.start, name.length); s.name[name.length] = '\0'; s.name_length = name.length; next_token_is(TOKEN_LPAREN); i32 temp = st->scope_ref; st->count++; st->scopes[st->count].parent = st->scope_ref; st->scope_ref = (i32)st->count; Token next = next_token(); while (next.type != TOKEN_RPAREN) { define_var(st, next); next = next_token(); if (next.type == TOKEN_COMMA) { next = next_token(); continue; } else if (next.type == TOKEN_RPAREN) { break; } else { printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start); exit(1); } } s.ref = vm->cp; st->scope_ref = temp; // need to add to the parents scope symbol_table_add(st, s); st->scope_ref = (i32)st->count; } /** * Branch. */ void define_branch(VM *vm, ScopeTable *st) { Symbol s; s.scope = LOCAL; s.type = VOID; Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length, name.start); exit(1); } memcpy(s.name, name.start, name.length); s.name_length = name.length; s.name[name.length] = '\0'; s.ref = vm->cp; symbol_table_add(st, s); } int get_instruction_byte_size(const char *opname) { if (strcmp(opname, "return") == 0) { return 2; } if (strcmp(opname, "neg_int") == 0 || strcmp(opname, "abs_int") == 0 || strcmp(opname, "neg_nat") == 0 || strcmp(opname, "abs_nat") == 0 || strcmp(opname, "neg_real") == 0 || strcmp(opname, "abs_real") == 0 || strcmp(opname, "int_to_string") == 0 || strcmp(opname, "load_indirect_8") == 0 || strcmp(opname, "nat_to_string") == 0 || strcmp(opname, "load_indirect_16") == 0 || strcmp(opname, "real_to_string") == 0 || strcmp(opname, "load_indirect_32") == 0 || strcmp(opname, "int_to_real") == 0 || strcmp(opname, "store_indirect_8") == 0 || strcmp(opname, "nat_to_real") == 0 || strcmp(opname, "store_indirect_16") == 0 || strcmp(opname, "real_to_int") == 0 || strcmp(opname, "store_indirect_32") == 0 || strcmp(opname, "real_to_nat") == 0 || strcmp(opname, "nat_to_int") == 0 || strcmp(opname, "int_to_nat") == 0 || strcmp(opname, "string_length") == 0 || strcmp(opname, "memset") == 0 || strcmp(opname, "memset") == 0 || strcmp(opname, "memset_8") == 0 || strcmp(opname, "memset_16") == 0 || strcmp(opname, "register_move") == 0 || strcmp(opname, "malloc") == 0) { return 3; } if (strcmp(opname, "add_int") == 0 || strcmp(opname, "sub_int") == 0 || strcmp(opname, "mul_int") == 0 || strcmp(opname, "div_int") == 0 || strcmp(opname, "add_nat") == 0 || strcmp(opname, "sub_nat") == 0 || strcmp(opname, "mul_nat") == 0 || strcmp(opname, "div_nat") == 0 || strcmp(opname, "add_real") == 0 || strcmp(opname, "sub_real") == 0 || strcmp(opname, "mul_real") == 0 || strcmp(opname, "div_real") == 0 || strcmp(opname, "bit_shift_left") == 0 || strcmp(opname, "bit_shift_right") == 0 || strcmp(opname, "bit_shift_r_ext") == 0 || strcmp(opname, "bit_and") == 0 || strcmp(opname, "bit_or") == 0 || strcmp(opname, "bit_xor") == 0) { return 4; } if (strcmp(opname, "halt") == 0 || strcmp(opname, "jump_if_flag") == 0 || strcmp(opname, "jump") == 0) { return 5; } if (strcmp(opname, "load_absolute_32") == 0 || strcmp(opname, "load_immediate") == 0 || strcmp(opname, "load_address") == 0 || strcmp(opname, "load_absolute_16") == 0 || strcmp(opname, "load_absolute_8") == 0 || strcmp(opname, "store_absolute_32") == 0 || strcmp(opname, "store_absolute_8") == 0 || strcmp(opname, "store_absolute_16") == 0) { return 6; } if (strcmp(opname, "jump_eq_int") == 0 || strcmp(opname, "jump_neq_int") == 0 || strcmp(opname, "jump_gt_int") == 0 || strcmp(opname, "jump_lt_int") == 0 || strcmp(opname, "jump_le_int") == 0 || strcmp(opname, "jump_ge_int") == 0 || strcmp(opname, "jump_eq_nat") == 0 || strcmp(opname, "jump_neq_nat") == 0 || strcmp(opname, "jump_gt_nat") == 0 || strcmp(opname, "jump_lt_nat") == 0 || strcmp(opname, "jump_le_nat") == 0 || strcmp(opname, "jump_ge_nat") == 0 || strcmp(opname, "jump_eq_real") == 0 || strcmp(opname, "jump_neq_real") == 0 || strcmp(opname, "jump_gt_real") == 0 || strcmp(opname, "jump_lt_real") == 0 || strcmp(opname, "jump_le_real") == 0 || strcmp(opname, "jump_ge_real") == 0 || strcmp(opname, "store_offset_8") == 0 || strcmp(opname, "store_offset_16") == 0 || strcmp(opname, "store_offset_32") == 0 || strcmp(opname, "load_offset_8") == 0 || strcmp(opname, "load_offset_16") == 0 || strcmp(opname, "load_offset_32") == 0) { return 7; } fprintf(stderr, "Unknown opcode for sizing: %s\n", opname); exit(-1); } /** * Build the symbol table and calculate the types/size/offsets of all values. */ void build_symbol_table(VM *vm, char *source, ScopeTable *st) { Token token; init_parser(source); do { token = next_token(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } if (token.type != TOKEN_EOF) { if (token.type == TOKEN_LBRACE) { st->count++; st->scopes[st->count].parent = st->scope_ref; st->scope_ref = (i32)st->count; st->depth++; continue; } if (token.type == TOKEN_RBRACE) { i32 current_scope = st->scope_ref; i32 parent = st->scopes[current_scope].parent; if (parent < 0) parent = 0; st->scope_ref = parent; st->depth--; continue; } if (token.type == TOKEN_KEYWORD_FN) { define_function(vm, st); continue; } if (token.type == TOKEN_KEYWORD_PLEX) { define_plex(vm, st); continue; } if (token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { define_var(st, token); next_token_is(TOKEN_SEMICOLON); continue; } if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || token.type == TOKEN_KEYWORD_FOR) { define_branch(vm, st); continue; } if (token.type == TOKEN_KEYWORD_RETURN) { vm->cp++; Token next = next_token(); if (next.type == TOKEN_SEMICOLON) { /* put 0xFF as return register */ vm->cp++; continue; } get_ref(st, next.start, next.length); vm->cp++; next_token_is(TOKEN_SEMICOLON); continue; } #ifdef DEBUG_PRINT printf("-- %.*s --\n", token.length, token.start); #endif if (token.type == TOKEN_IDENTIFIER) { /* check to see if it is an opcode first */ if (false) { } else { /* some other identifier */ printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } } } } while (token.type != TOKEN_EOF); } /** * 2nd pass, emit the bytecode */ void emit_bytecode(VM *vm, char *source, ScopeTable *st) { Token token; init_parser(source); do { token = next_token(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); break; } if (token.type != TOKEN_EOF) { if (token.type == TOKEN_LBRACE) { st->count++; st->scopes[st->count].parent = st->scope_ref; st->scope_ref = (i32)st->count; st->depth++; continue; } if (token.type == TOKEN_RBRACE) { i32 current_scope = st->scope_ref; i32 parent = st->scopes[current_scope].parent; if (parent < 0) parent = 0; st->scope_ref = parent; st->depth--; continue; } if (token.type == TOKEN_KEYWORD_FN) { /* ignore, already processed */ Token next = next_token(); while (next.type != TOKEN_RPAREN) { next = next_token(); } continue; } if (token.type == TOKEN_KEYWORD_PLEX) { /* ignore, already processed */ /* FIXME: consume all tokens for this plex */ continue; } if (token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { /* ignore, already processed */ next_token(); /* type */ next_token(); /* var */ next_token(); /* reg */ next_token(); /* ; */ continue; } if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || token.type == TOKEN_KEYWORD_FOR) { /* ignore, already processed */ next_token(); /* id */ } if (token.type == TOKEN_KEYWORD_RETURN) { emit_op(vm, OP_RETURN); vm->cp++; Token next = next_token(); if (next.type == TOKEN_SEMICOLON) { /* put 0xFF as return register */ emit_byte(vm, 0xFF); vm->cp++; continue; } u32 reg = get_ref(st, next.start, next.length); emit_byte(vm, reg); vm->cp++; next_token_is(TOKEN_SEMICOLON); continue; } #ifdef DEBUG_PRINT printf("-- %.*s --\n", token.length, token.start); #endif if (token.type == TOKEN_IDENTIFIER) { if (false) { } else { /* some other identifier */ printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } } } } while (token.type != TOKEN_EOF); } /** * Compile. */ bool compile(VM *vm, ScopeTable *st, char *source) { build_symbol_table(vm, source, st); vm->cp = 0; /* actually start emitting code */ st->count = 0; emit_bytecode(vm, source, st); return true; }