#include "assembler.h" #include "../../vm/common.h" #include "../../vm/fixed.h" #include "../../vm/libc.h" #include "../../vm/opcodes.h" #include "lexer.h" #include #include #include void emit_byte(VM *vm, u8 byte) { vm->code[vm->cp++] = byte; } void emit_u32(VM *vm, u32 value) { write_u32(vm, code, vm->cp, value); vm->cp += 4; } SymbolTable *symbol_table_init() { SymbolTable *table = malloc(sizeof(SymbolTable)); table->symbols = malloc(16 * sizeof(Symbol)); table->count = 0; table->capacity = 16; return table; } u32 symbol_table_add(SymbolTable *table, Symbol s) { if (table->count >= table->capacity) { table->capacity *= 2; table->symbols = realloc(table->symbols, table->capacity * sizeof(Symbol)); } table->symbols[table->count] = s; u32 index = table->count; table->count++; return index; } Symbol *symbol_table_lookup(SymbolTable *table, const char *name) { for (u32 i = 0; i < table->count; i++) { if (streq(table->symbols[i].name, name)) { return &table->symbols[i]; } } return nil; } u32 get_ref(SymbolTable *st, const char *name) { Symbol *sym = symbol_table_lookup(st, name); if (!sym) { fprintf(stderr, "Error: Undefined Symbol '%s'\n", name); exit(1); return 0; } return sym->ref; } Token next_id_or_reg() { Token token = next_token(); if (token.type == TOKEN_IDENTIFIER) { return token; } if (token.type == TOKEN_BIG_MONEY) { token = next_token(); return token; } printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, token.start); exit(1); return token; } Token next_id_or_ptr() { Token token = next_token(); if (token.type != TOKEN_IDENTIFIER && token.type != TOKEN_LITERAL_NAT && token.type != TOKEN_LITERAL_INT && token.type != TOKEN_LITERAL_REAL) { printf("Not an ID or register at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } return token; } Token next_token_is(TokenType type) { Token token = next_token(); if (token.type != type) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } return token; } Token next_token_is_either(TokenType type, TokenType type2) { Token token = next_token(); if (token.type != type && token.type != type2) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } return token; } /** * Global . */ bool define_global(VM *vm, SymbolTable *st) { Symbol s; Token token_type = next_token(); switch (token_type.type) { case TOKEN_TYPE_BOOL: s.type = BOOL; s.size = 1; break; case TOKEN_TYPE_I8: s.type = I8; s.size = 1; break; case TOKEN_TYPE_U8: s.type = U8; s.size = 1; break; case TOKEN_TYPE_I16: s.type = I16; s.size = 2; break; case TOKEN_TYPE_U16: s.type = U16; s.size = 2; break; case TOKEN_TYPE_INT: s.type = I32; s.size = 4; break; case TOKEN_TYPE_NAT: s.type = U32; s.size = 4; break; case TOKEN_TYPE_REAL: s.type = F32; s.size = 4; break; case TOKEN_TYPE_STR: s.type = STR; break; case TOKEN_IDENTIFIER: break; default: return false; } Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { return false; } memcpy(s.name, name.start, name.length); u32 addr = vm->mp; s.ref = addr; s.scope = GLOBAL; next_token_is(TOKEN_EQ); Token value = next_token(); switch (value.type) { case TOKEN_KEYWORD_TRUE: { u32 addr = vm->mp; write_u8(vm, memory, addr, 1); vm->mp += s.size; vm->frames[vm->fp].end += s.size; break; } case TOKEN_KEYWORD_FALSE: { u32 addr = vm->mp; write_u8(vm, memory, addr, 0); vm->mp += s.size; vm->frames[vm->fp].end += s.size; break; } case TOKEN_LITERAL_INT: { i32 out = atoi(value.start); u32 addr = vm->mp; write_u32(vm, memory, addr, out); vm->mp += s.size; vm->frames[vm->fp].end += s.size; break; } case TOKEN_LITERAL_NAT: { char *endptr; u32 out = (u32)strtoul(value.start, &endptr, 10); if (endptr == value.start || *endptr != '\0') { fprintf(stderr, "Invalid decimal literal: %s\n", value.start); exit(1); } u32 addr = vm->mp; write_u32(vm, memory, addr, out); vm->mp += s.size; vm->frames[vm->fp].end += s.size; break; } case TOKEN_LITERAL_REAL: { fixed_t out = float_to_fixed(atof(value.start)); u32 addr = vm->mp; write_u32(vm, memory, addr, out); vm->mp += s.size; vm->frames[vm->fp].end += s.size; break; } case TOKEN_LITERAL_STR: { const char *src = value.start; i32 len = 0; i32 i = 0; while (i < value.length) { char c = src[i++]; if (c == '\\' && i < value.length) { switch (src[i++]) { case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'r': c = '\r'; break; case '\\': case '"': case '\'': break; // Keep as-is default: i--; // Rewind for unknown escapes } } write_u8(vm, memory, addr + 4 + len++, c); } u32 size = len + 5; // 4 (len) + dst_len + 1 (null) s.size = size; vm->mp += size; vm->frames[vm->fp].end += size; write_u32(vm, memory, addr, len); write_u8(vm, memory, addr + 4 + len, '\0'); break; } default: return false; } next_token_is(TOKEN_SEMICOLON); symbol_table_add(st, s); return true; } /** * Var . */ void define_var(SymbolTable *st, Token regType) { Symbol s; s.scope = VAR; switch (regType.type) { case TOKEN_KEYWORD_PLEX: { s.type = PLEX; s.size = 4; /* not really this type, pointer alias which is 4 */ break; } case TOKEN_TYPE_I8: { s.type = I8; s.size = 1; break; } case TOKEN_TYPE_I16: { s.type = I16; s.size = 2; break; } case TOKEN_TYPE_INT: { s.type = I32; s.size = 4; break; } case TOKEN_TYPE_U8: { s.type = U8; s.size = 1; break; } case TOKEN_TYPE_U16: { s.type = U16; s.size = 2; break; } case TOKEN_TYPE_NAT: { s.type = U32; s.size = 4; break; } case TOKEN_TYPE_REAL: { s.type = F32; s.size = 4; break; } case TOKEN_TYPE_BOOL: { s.type = BOOL; s.size = 1; break; } case TOKEN_TYPE_STR: { s.type = STR; s.size = 4; /* not really this type, pointer alias which is 4 */ break; } default: printf("ERROR at line %d: %.*s\n", regType.line, regType.length, regType.start); exit(1); } Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("VARIABLE NAME TOO LONG at line %d: %.*s\n", regType.line, regType.length, regType.start); exit(1); } memcpy(s.name, name.start, name.length); next_token_is(TOKEN_BIG_MONEY); Token reg_num = next_token_is(TOKEN_LITERAL_INT); s.ref = atoi(reg_num.start); symbol_table_add(st, s); } /** * function . */ void define_function(VM *vm, SymbolTable *st) { Symbol s; s.scope = LOCAL; s.type = FUNCTION; Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("FUNCITON NAME TOO LONG at line %d: %.*s\n", name.line, name.length, name.start); exit(1); } memcpy(s.name, name.start, name.length); next_token_is(TOKEN_LPAREN); Token next = next_token(); while (next.type != TOKEN_RPAREN) { define_var(st, next); next = next_token(); if (next.type == TOKEN_COMMA) { next = next_token(); continue; } else if (next.type == TOKEN_RPAREN) { break; } else { printf("ERROR at line %d: %.*s\n", next.line, next.length, next.start); exit(1); } } s.ref = vm->pc; symbol_table_add(st, s); } /** * Branch. */ void define_branch(VM *vm, SymbolTable *st) { Symbol s; s.scope = LOCAL; s.type = VOID; Token name = next_token_is(TOKEN_IDENTIFIER); if (name.length > MAX_SYMBOL_NAME_LENGTH) { printf("BRANCH NAME TOO LONG at line %d: %.*s\n", name.line, name.length, name.start); exit(1); } memcpy(s.name, name.start, name.length); s.ref = vm->pc; symbol_table_add(st, s); } /** * Build the symbol table and calculate the types/size/offsets of all values. */ void build_symbol_table(VM *vm, char *source, SymbolTable *st) { Token token; init_lexer(source); do { token = next_token(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } printf("Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { define_global(vm, st); continue; } if (token.type == TOKEN_KEYWORD_FN) { define_function(vm, st); continue; } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR || token.type == TOKEN_TYPE_BOOL) { define_var(st, token); next_token_is(TOKEN_SEMICOLON); continue; } if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || token.type == TOKEN_KEYWORD_FOR) { define_branch(vm, st); continue; } if (token.type == TOKEN_KEYWORD_RETURN) { vm->pc++; Token next = next_token(); if (next.type == TOKEN_SEMICOLON) { /* put 0xFF as return register */ vm->pc++; continue; } vm->pc++; next_token_is(TOKEN_SEMICOLON); continue; } if (token.type == TOKEN_IDENTIFIER) { // check to see if it is an opcode first if (strleq(token.start, "exit", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "call", token.length)) { vm->pc++; next_token_is(TOKEN_IDENTIFIER); vm->pc += 4; vm->pc++; /* number of args (implied) */ Token next = next_token(); while (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; next = next_token(); } /* return type */ next = next_token(); vm->pc++; /* we emit a value regardless, a void is register 255 */ if (next.type == TOKEN_SEMICOLON) { /* exit early because no return type */ continue; } /* if it is not void, then it was the value */ next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "syscall", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; Token next = next_token(); while (next.type != TOKEN_SEMICOLON) { if (next.type != TOKEN_ARROW_RIGHT) { vm->pc++; } next = next_token(); } } else if (strleq(token.start, "load_immediate", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_address", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "malloc", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_8", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_16", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "memset_32", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_8", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_16", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_offset_32", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_8", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_16", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_indirect_32", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_8", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_16", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "load_absolute_32", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_8", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_16", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_absolute_32", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_8", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_16", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_indirect_32", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_8", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ vm->pc++; next_token_is(TOKEN_LITERAL_NAT); /* offset */ vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); /* dest */ vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_16", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ vm->pc++; next_token_is(TOKEN_LITERAL_NAT); /* offset */ vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); /* dest */ vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "store_offset_32", token.length)) { vm->pc++; next_id_or_reg(); /* src1 */ vm->pc++; next_token_is(TOKEN_LITERAL_NAT); /* offset */ vm->pc += 4; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); /* dest */ vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "register_move", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "add_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "sub_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "mul_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "div_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "abs_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "neg_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_real", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_int", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_nat", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_left", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_right", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_shift_r_ext", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_and", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_or", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "bit_xor", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_if_flag", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_int", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_int", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_int", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_int", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_int", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_int", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_nat", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_nat", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_nat", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_nat", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_nat", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_nat", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_eq_real", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_neq_real", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_ge_real", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_gt_real", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_lt_real", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "jump_le_real", token.length)) { vm->pc++; next_id_or_ptr(); vm->pc += 4; next_id_or_reg(); vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_length", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "int_to_string", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "nat_to_string", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "real_to_string", token.length)) { vm->pc++; next_id_or_reg(); vm->pc++; next_token_is(TOKEN_ARROW_RIGHT); next_id_or_reg(); vm->pc++; next_token_is(TOKEN_SEMICOLON); } else if (strleq(token.start, "string_eq", token.length)) { } else if (strleq(token.start, "string_concat", token.length)) { } else if (strleq(token.start, "string_get_char", token.length)) { } else if (strleq(token.start, "string_find_char", token.length)) { } else if (strleq(token.start, "string_slice", token.length)) { } else if (strleq(token.start, "string_to_int", token.length)) { } else if (strleq(token.start, "string_to_nat", token.length)) { } else if (strleq(token.start, "string_to_real", token.length)) { } else { // some other identifier printf("Unknown id at line %d: %.*s\n", token.line, token.length, token.start); exit(1); } } } while (token.type != TOKEN_EOF); } /** * 2nd pass, emit the bytecode */ void emit_bytecode(VM *vm, char *source, SymbolTable *st) { USED(st); Token token; init_lexer(source); do { token = next_token(); if (token.type == TOKEN_ERROR) { printf("ERROR at line %d: %.*s\n", token.line, token.length, token.start); break; } if (token.type != TOKEN_EOF) { printf("[Generate Bytecode] Line %d [%s]: %.*s\n", token.line, token_type_to_string(token.type), token.length, token.start); if (token.type == TOKEN_KEYWORD_GLOBAL) { // ignore, already processed next_token(); // type next_token(); // var next_token(); // eq next_token(); // value next_token(); // ; continue; } if (token.type == TOKEN_KEYWORD_FN) { // ignore, already processed Token next = next_token(); while (next.type != TOKEN_RPAREN) { next = next_token(); } continue; } if (token.type == TOKEN_KEYWORD_PLEX || token.type == TOKEN_TYPE_I8 || token.type == TOKEN_TYPE_I16 || token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_U8 || token.type == TOKEN_TYPE_U16 || token.type == TOKEN_TYPE_NAT || token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { // ignore, already processed next_token(); // type next_token(); // var next_token(); // reg next_token(); // ; continue; } if (token.type == TOKEN_KEYWORD_LOOP || token.type == TOKEN_KEYWORD_IF || token.type == TOKEN_KEYWORD_ELSE || token.type == TOKEN_KEYWORD_DO || token.type == TOKEN_KEYWORD_FOR) { // ignore, already processed next_token(); // id } if (token.type == TOKEN_KEYWORD_RETURN) { vm->pc++; Token next = next_token(); if (next.type == TOKEN_SEMICOLON) { /* put 0xFF as return register */ emit_u8(vm, 0xFF); vm->pc++; continue; } vm->pc++; next_token_is(TOKEN_SEMICOLON); continue; } } } while (token.type != TOKEN_EOF); } /** * Emit bytecode to the VM from the source string. */ void assemble(VM *vm, char *source) { SymbolTable *st = symbol_table_init(); build_symbol_table(vm, source, st); vm->pc = 0; /* actuall start emitting code */ emit_bytecode(vm, source, st); free(st->symbols); free(st); }