#include "compiler.h" #include "common.h" #include "emit.h" #include "lexer.h" Emitter emitter = {0}; Parser parser = {0}; Arena *arena; /**************************************************** * Scope ***************************************************/ void scope_push() { Scope *child = aalloc(arena, sizeof(Scope)); child->symbols = List_init(arena); child->parent = parser.current_scope; child->locals_offset = 0; parser.current_scope = child; parser.depth++; } void scope_pop() { Scope *prev = parser.current_scope->parent; parser.current_scope = prev; parser.depth--; } Symbol * scope_get_symbol(Scope *scope, const char *name, u32 name_length) { u32 count, i; if(!scope) return nil; count = scope->symbols->count; for(i = 0; i < count; i++) { Symbol *sym = List_get(scope->symbols, i); if(sym->name_length == name_length && sleq(sym->name, name, name_length)) return sym; } return scope_get_symbol(scope->parent, name, name_length); } Symbol * scope_add_symbol(const char *name, u32 name_length, SymbolType type, u32 size) { Symbol *sym = scope_get_symbol(parser.current_scope, name, name_length); if(sym != nil) { if(parser.pass) return sym; else emitter.error("duplicate found", 14, parser.previous.line); } sym = aalloc(arena, sizeof(Symbol)); scpy(sym->name, name, name_length); sym->name_length = name_length; sym->type = type; sym->secondary_type = SYMBOL_VOID; sym->size = size; sym->ref = parser.current_scope->locals_offset; parser.current_scope->locals_offset += size; if(type == SYMBOL_PLEX || type == SYMBOL_METHOD || type == SYMBOL_TRAIT || type == SYMBOL_FUNCTION) { sym->args = List_init(arena); } if(type == SYMBOL_PLEX || type == SYMBOL_METHOD || type == SYMBOL_TRAIT) sym->fields = List_init(arena); List_push(arena, parser.current_scope->symbols, sym, sizeof(Symbol)); return sym; } bool is_global() { return parser.current_scope != nil && parser.current_scope->parent == nil; } bool is_type() { return parser.previous.type >= TOKEN_TYPE_I8 && parser.previous.type <= TOKEN_TYPE_PTR; } SymbolType token_type_to_sym_type(TokenType t) { switch(t) { case TOKEN_TYPE_BOOL: return SYMBOL_BOOL; case TOKEN_TYPE_BYTE: return SYMBOL_BYTE; case TOKEN_TYPE_INT: return SYMBOL_INT; case TOKEN_TYPE_NAT: return SYMBOL_NAT; case TOKEN_TYPE_REAL: return SYMBOL_REAL; case TOKEN_TYPE_STR: return SYMBOL_STR; case TOKEN_TYPE_U8: return SYMBOL_U8; case TOKEN_TYPE_I8: return SYMBOL_I8; case TOKEN_TYPE_I16: return SYMBOL_I16; case TOKEN_TYPE_U16: return SYMBOL_U16; default: break; } return SYMBOL_UNDEFINED; } /**************************************************** * Parser ***************************************************/ bool advance() { parser.previous = parser.current; for(;;) { parser.current = next_token(); if(parser.current.type != TOKEN_ERROR) return true; return false; } } bool check(TokenType type) { return parser.current.type == type; } bool consume(TokenType type) { if(check(type)) { advance(); return true; } return false; } bool match(TokenType type) { if(!check(type)) return false; advance(); return true; } void expect(TokenType type) { if(!consume(type)) { emitter.error(parser.previous.start, parser.previous.length, parser.previous.line); } } u32 variable_declaration(Symbol *def); void define_plex() { } void define_trait() { } void define_function() { Symbol *fn; u32 size = 0; advance(); fn = scope_add_symbol(parser.previous.start, parser.previous.length, SYMBOL_FUNCTION, 0); /* need to push scope early because the variables need to be a part of the fn * scope */ scope_push(); expect(TOKEN_LPAREN); advance(); /* parse the args */ while(!check(TOKEN_LBRACE)) { size += variable_declaration(fn); advance(); advance(); } if(is_type()) { /* get return slot */ fn->secondary_type = token_type_to_sym_type(parser.previous.type); /* now parse the fn body */ expect(TOKEN_LBRACE); } /* get the size of all the locals for the function in the body */ while(!match(TOKEN_RBRACE)) { if(is_type() && parser.current.type == TOKEN_IDENTIFIER) size += variable_declaration(fn); advance(); } fn->size = size; scope_pop(); } void build_symbol_table(char *source) { init_lexer(source); advance(); scope_push(); while(!match(TOKEN_EOF)) { if(match(TOKEN_LBRACE)) { scope_push(); } else if(match(TOKEN_RBRACE)) { scope_pop(); } else if(match(TOKEN_KEYWORD_FN)) { define_function(); } else if(match(TOKEN_KEYWORD_PLEX)) { define_plex(); } else if(match(TOKEN_KEYWORD_TRAIT)) { define_trait(); } else { /* in binary bytecode output mode we need to count the bytes here */ /* otherwise ignore everything */ } } } void expression(); void statement(); void declaration(); ParseRule *get_rule(TokenType type); void parse_precedence(Precedence precedence); void binary() { TokenType operatorType = parser.previous.type; ParseRule *rule = get_rule(operatorType); parse_precedence((Precedence)(rule->precedence + 1)); switch(operatorType) { case TOKEN_BANG_EQ: emitter.emit_ne(); break; case TOKEN_EQ_EQ: emitter.emit_eq(); break; case TOKEN_GT: emitter.emit_gt(); break; case TOKEN_GTE: emitter.emit_ge(); break; case TOKEN_LT: emitter.emit_lt(); break; case TOKEN_LTE: emitter.emit_le(); break; case TOKEN_PLUS: emitter.emit_add(); break; case TOKEN_MINUS: emitter.emit_sub(); break; case TOKEN_STAR: emitter.emit_mul(); break; case TOKEN_SLASH: emitter.emit_div(); break; case TOKEN_OPERATOR_AND: emitter.emit_and(); break; case TOKEN_OPERATOR_OR: emitter.emit_or(); break; case TOKEN_OPERATOR_XOR: emitter.emit_xor(); break; case TOKEN_OPERATOR_MOD: emitter.emit_mod(); break; case TOKEN_SLL: emitter.emit_sll(); break; case TOKEN_SRL: emitter.emit_srl(); break; default: return; } } void literal() { switch(parser.previous.type) { case TOKEN_KEYWORD_FALSE: emitter.emit_false(); break; case TOKEN_KEYWORD_TRUE: emitter.emit_true(); break; case TOKEN_KEYWORD_NIL: emitter.emit_nil(); break; default: return; } } void expression() { parse_precedence(PREC_ASSIGNMENT); } void variable() { Symbol *sym = scope_get_symbol(parser.current_scope, parser.previous.start, parser.previous.length); if(sym == nil) { emitter.error(parser.previous.start, parser.previous.length, parser.previous.line); } if (sym->type == SYMBOL_FUNCTION) { parser.call_fn = sym; return; } parser.current_type = sym->type; if(match(TOKEN_EQ)) { expression(); emitter.emit_set_variable(sym, parser.depth); } else { emitter.emit_variable(sym, parser.depth); } } void cast_type() { SymbolType st = parser.current_type; TokenType cast_type = parser.current.type; advance(); switch(st) { case SYMBOL_INT: { switch(cast_type) { case TOKEN_TYPE_NAT: { emitter.emit_cast_int_to_nat(); break; } case TOKEN_TYPE_REAL: { emitter.emit_cast_int_to_real(); break; } case TOKEN_TYPE_STR: { emitter.emit_cast_int_to_str(); break; } default: emitter.error("Not castable to this type", 26, parser.previous.line); } break; } case SYMBOL_NAT: { switch(cast_type) { case TOKEN_TYPE_INT: { emitter.emit_cast_nat_to_int(); break; } case TOKEN_TYPE_REAL: { emitter.emit_cast_nat_to_real(); break; } case TOKEN_TYPE_STR: { emitter.emit_cast_nat_to_str(); break; } default: emitter.error("Not castable to this type", 26, parser.previous.line); } break; } case SYMBOL_REAL: { switch(cast_type) { case TOKEN_TYPE_NAT: { emitter.emit_cast_real_to_nat(); break; } case TOKEN_TYPE_INT: { emitter.emit_cast_real_to_int(); break; } case TOKEN_TYPE_STR: { emitter.emit_cast_real_to_str(); break; } default: emitter.error("Not castable to this type", 26, parser.previous.line); } break; } case SYMBOL_STR: { switch(cast_type) { case TOKEN_TYPE_NAT: { emitter.emit_cast_str_to_nat(); break; } case TOKEN_TYPE_REAL: { emitter.emit_cast_str_to_real(); break; } case TOKEN_TYPE_INT: { emitter.emit_cast_str_to_int(); break; } default: emitter.error("Not castable to this type", 26, parser.previous.line); } break; } default: emitter.error("Not castable to this type", 26, parser.previous.line); } } u32 variable_declaration(Symbol *def) { i32 size = 0; TokenType tt = parser.previous.type; Token var = parser.current; SymbolType st = token_type_to_sym_type(tt); Symbol *variable; if(st != SYMBOL_UNDEFINED) { size = emitter.get_size(st); variable = scope_add_symbol(parser.current.start, parser.current.length, st, size); if(def) { List_push(arena, def->args, variable, sizeof(Symbol)); } else { emitter.emit_type(variable, parser.depth); parser.current_type = st; } } else { /* we need to look up the type */ emitter.error("Not implemented", 16, parser.previous.line); } advance(); if(def && (check(TOKEN_COMMA) || check(TOKEN_RPAREN))) return size; if(def) emitter.error(parser.previous.start, parser.previous.length, parser.previous.line); if(match(TOKEN_EQ)) { emitter.emit_set_value(); expression(); emitter.emit_constant(var.start, var.length, parser.depth); } consume(TOKEN_SEMICOLON); emitter.emit_end_statement(); parser.current_type = SYMBOL_UNDEFINED; return size; } void declaration() { if(is_type()) { variable_declaration(nil); }else{ statement(); } } void print_statement() { expression(); consume(TOKEN_SEMICOLON); emitter.emit_print(); emitter.emit_end_statement(); } void putchar_statement() { expression(); consume(TOKEN_SEMICOLON); emitter.emit_end_statement(); emitter.emit_putchar(); } void getchar_statement() { expression(); consume(TOKEN_SEMICOLON); emitter.emit_end_statement(); emitter.emit_putchar(); } void if_statement() { consume(TOKEN_LPAREN); expression(); consume(TOKEN_RPAREN); emitter.emit_if(); statement(); emitter.emit_patch_if(emitter.ifs); if(match(TOKEN_KEYWORD_ELSE)) { emitter.else_if_depth++; statement(); emitter.else_if_depth--; } if(emitter.else_if_depth == 0) emitter.emit_patch_if_done(emitter.ifs++); } void while_statement() { emitter.emit_while(emitter.loops); consume(TOKEN_LPAREN); expression(); consume(TOKEN_RPAREN); emitter.emit_while_postfix(); statement(); emitter.emit_patch_while(emitter.loops++); } void block() { while(!check(TOKEN_RBRACE) && !check(TOKEN_EOF)) declaration(); consume(TOKEN_RBRACE); } void function() { Symbol *sym = scope_get_symbol(parser.current_scope, parser.previous.start, parser.previous.length); emitter.emit_function(sym); while(!check(TOKEN_LBRACE)) { advance(); } block(); emitter.emit_arena_fn_return(); } void call() { if (!check(TOKEN_RPAREN)) { do { expression(); } while (match(TOKEN_COMMA)); } consume(TOKEN_RPAREN); emitter.emit_arena_fn_call(parser.call_fn); parser.current_type = parser.call_fn->secondary_type; advance(); return; } void statement() { if(match(TOKEN_LBRACE)) { scope_push(); block(); scope_pop(); } else if(match(TOKEN_KEYWORD_FN)) { advance(); function(); } else if(match(TOKEN_KEYWORD_IF)) { if_statement(); } else if(match(TOKEN_KEYWORD_WHILE)) { while_statement(); } else if(match(TOKEN_KEYWORD_PRINT)) { print_statement(); } else if(match(TOKEN_KEYWORD_PUTCHAR)) { putchar_statement(); } else if(match(TOKEN_KEYWORD_GETCHAR)) { getchar_statement(); } else { expression(); } } void grouping() { emitter.emit_open_paren(); expression(); emitter.emit_close_paren(); consume(TOKEN_RPAREN); } void number() { emitter.emit_int(parser.previous.start, parser.previous.length); } void string() { emitter.emit_str(parser.previous.start, parser.previous.length); } void unary() { TokenType operatorType = parser.previous.type; parse_precedence(PREC_UNARY); switch(operatorType) { case TOKEN_MINUS: emitter.emit_neg(); break; case TOKEN_BANG: emitter.emit_not(); break; default: return; } } ParseRule rules[] = { /* TOKEN_ERROR */ {nil, nil, PREC_NONE}, /* TOKEN_EOF */ {nil, nil, PREC_NONE}, /* TOKEN_LPAREN */ {grouping, call, PREC_CALL}, /* TOKEN_RPAREN */ {nil, nil, PREC_NONE}, /* TOKEN_LBRACE */ {nil, nil, PREC_NONE}, /* TOKEN_RBRACE */ {nil, nil, PREC_NONE}, /* TOKEN_LBRACKET */ {nil, nil, PREC_NONE}, /* TOKEN_RBRACKET */ {nil, nil, PREC_NONE}, /* TOKEN_IDENTIFIER */ {variable, nil, PREC_NONE}, /* TOKEN_LITERAL_INT */ {number, nil, PREC_NONE}, /* TOKEN_LITERAL_NAT */ {number, nil, PREC_NONE}, /* TOKEN_LITERAL_REAL */ {number, nil, PREC_NONE}, /* TOKEN_LITERAL_STR */ {string, nil, PREC_NONE}, /* TOKEN_TYPE_I8 */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_I16 */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_INT */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_U8 */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_U16 */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_NAT */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_REAL */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_STR */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_BOOL */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_BYTE */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_VOID */ {nil, nil, PREC_NONE}, /* TOKEN_TYPE_PTR */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_PLEX */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_FN */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_CONST */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_IF */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_IS */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_AS */ {nil, cast_type, PREC_CAST}, /* TOKEN_KEYWORD_ELSE */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_WHILE */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_FOR */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_RETURN */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_USE */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_INIT */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_THIS */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_OPEN */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_READ */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_WRITE */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_STAT */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_CLOSE */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_LOOP */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_DO */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_NIL */ {literal, nil, PREC_NONE}, /* TOKEN_KEYWORD_TRUE */ {literal, nil, PREC_NONE}, /* TOKEN_KEYWORD_FALSE */ {literal, nil, PREC_NONE}, /* TOKEN_KEYWORD_PUTCHAR */ {nil, nil, PREC_NONE}, /* TOKEN_KEYWORD_GETCHAR */ {nil, nil, PREC_NONE}, /* TOKEN_OPERATOR_AND */ {nil, binary, PREC_NONE}, /* TOKEN_OPERATOR_OR */ {nil, binary, PREC_NONE}, /* TOKEN_OPERATOR_XOR */ {nil, binary, PREC_NONE}, /* TOKEN_OPERATOR_MOD */ {nil, binary, PREC_NONE}, /* TOKEN_BANG */ {unary, nil, PREC_NONE}, /* TOKEN_BANG_EQ */ {nil, binary, PREC_EQUALITY}, /* TOKEN_EQ */ {nil, nil, PREC_NONE}, /* TOKEN_EQ_EQ */ {nil, binary, PREC_EQUALITY}, /* TOKEN_AND */ {nil, nil, PREC_NONE}, /* TOKEN_AND_AND */ {nil, nil, PREC_NONE}, /* TOKEN_PIPE */ {nil, nil, PREC_NONE}, /* TOKEN_PIPE_PIPE */ {nil, nil, PREC_NONE}, /* TOKEN_QUESTION */ {nil, nil, PREC_NONE}, /* TOKEN_QUESTION_DOT */ {nil, nil, PREC_NONE}, /* TOKEN_PLUS */ {nil, binary, PREC_TERM}, /* TOKEN_MINUS */ {unary, binary, PREC_TERM}, /* TOKEN_STAR */ {nil, binary, PREC_FACTOR}, /* TOKEN_SLASH */ {nil, binary, PREC_FACTOR}, /* TOKEN_MESH */ {nil, nil, PREC_NONE}, /* TOKEN_BIG_MONEY */ {nil, nil, PREC_NONE}, /* TOKEN_GT */ {nil, binary, PREC_COMPARISON}, /* TOKEN_LT */ {nil, binary, PREC_COMPARISON}, /* TOKEN_GTE */ {nil, binary, PREC_COMPARISON}, /* TOKEN_LTE */ {nil, binary, PREC_COMPARISON}, /* TOKEN_DOT */ {nil, nil, PREC_NONE}, /* TOKEN_COMMA */ {nil, nil, PREC_NONE}, /* TOKEN_COLON */ {nil, nil, PREC_NONE}, /* TOKEN_CARET */ {nil, nil, PREC_NONE}, /* TOKEN_SEMICOLON */ {nil, nil, PREC_NONE}, /* TOKEN_ARROW_RIGHT */ {nil, nil, PREC_NONE}, /* TOKEN_SLL */ {nil, binary, PREC_NONE}, /* TOKEN_SRL */ {nil, binary, PREC_NONE}, /*TOKEN_KEYWORD_PRINT*/ {nil, binary, PREC_NONE}}; void parse_precedence(Precedence precedence) { ParseFn prefixRule; ParseFn infixRule; advance(); prefixRule = get_rule(parser.previous.type)->prefix; if(prefixRule == nil) return; prefixRule(); while(precedence <= get_rule(parser.current.type)->precedence) { advance(); infixRule = get_rule(parser.previous.type)->infix; infixRule(); } } ParseRule * get_rule(TokenType type) { return &rules[type]; } void emit_program(char *source) { init_lexer(source); advance(); emitter.prolog(); while(!match(TOKEN_EOF)) declaration(); emitter.epilogue(); } bool compile(Arena *a, Emitter e, char *source) { arena = a; emitter = e; build_symbol_table(source); parser.pass++; emit_program(source); return true; }