undar-lang/compiler.c

844 lines
18 KiB
C

#include "compiler.h"
#include "common.h"
#include "emit.h"
#include "lexer.h"
Emitter emitter = {0};
Parser parser = {0};
Arena *arena;
/****************************************************
* Scope
***************************************************/
void
scopes_init()
{
Scope *current;
Scope child = {0};
child.symbols = List_init(arena);
child.locals_offset = 0;
current = (Scope*)List_push(arena, parser.scopes, &child, sizeof(Scope));
parser.current_scope = current;
}
Scope*
scope_new()
{
Scope *current;
Scope child = {0};
child.symbols = List_init(arena);
child.parent = parser.current_scope;
child.locals_offset = 0;
current = (Scope*)List_push(arena, parser.scopes, &child, sizeof(Scope));
parser.current_scope = current;
parser.scope_idx++;
parser.depth++;
return current;
}
void
scope_pop()
{
if (parser.current_scope) {
parser.current_scope = parser.current_scope->parent;
parser.depth--;
}
}
void
scope_push()
{
Scope *scope = List_get(parser.scopes, parser.scope_idx);
parser.current_scope = scope;
parser.depth++;
parser.scope_idx++;
}
Symbol *
scope_get_symbol(Scope *scope, const char *name, u32 name_length)
{
u32 count, i;
if(!scope) return nil;
count = scope->symbols->count;
for(i = 0; i < count; i++) {
Symbol *sym = List_get(scope->symbols, i);
if(sym->name_length == name_length && sleq(sym->name, name, name_length)) return sym;
}
return scope_get_symbol(scope->parent, name, name_length);
}
Symbol *
scope_add_symbol(const char *name, u32 name_length, SymbolType type, u32 size)
{
Symbol new_sym = {0};
Symbol *sym = scope_get_symbol(parser.current_scope, name, name_length);
if(sym != nil) {
if(parser.pass)
return sym;
else
emitter.error("duplicate found", 14, parser.previous.line);
}
scpy(new_sym.name, name, name_length);
new_sym.name_length = name_length;
new_sym.type = type;
new_sym.secondary_type = SYMBOL_VOID;
new_sym.size = size;
new_sym.ref = parser.current_scope->locals_offset;
new_sym.scope = parser.scope_idx;
parser.current_scope->locals_offset += size;
if(type == SYMBOL_PLEX || type == SYMBOL_METHOD || type == SYMBOL_TRAIT ||
type == SYMBOL_FUNCTION) {
new_sym.args = List_init(arena);
}
if(type == SYMBOL_PLEX || type == SYMBOL_METHOD || type == SYMBOL_TRAIT)
new_sym.fields = List_init(arena);
return List_push(arena, parser.current_scope->symbols, &new_sym, sizeof(Symbol));
}
bool
is_global()
{
return parser.current_scope != nil && parser.current_scope->parent == nil;
}
bool
is_type()
{
return parser.previous.type >= TOKEN_TYPE_I8 &&
parser.previous.type <= TOKEN_TYPE_PTR;
}
SymbolType
token_type_to_sym_type(TokenType t)
{
switch(t) {
case TOKEN_TYPE_BOOL:
return SYMBOL_BOOL;
case TOKEN_TYPE_BYTE:
return SYMBOL_BYTE;
case TOKEN_TYPE_INT:
return SYMBOL_INT;
case TOKEN_TYPE_NAT:
return SYMBOL_NAT;
case TOKEN_TYPE_REAL:
return SYMBOL_REAL;
case TOKEN_TYPE_STR:
return SYMBOL_STR;
case TOKEN_TYPE_U8:
return SYMBOL_U8;
case TOKEN_TYPE_I8:
return SYMBOL_I8;
case TOKEN_TYPE_I16:
return SYMBOL_I16;
case TOKEN_TYPE_U16:
return SYMBOL_U16;
default:
break;
}
return SYMBOL_UNDEFINED;
}
/****************************************************
* Parser
***************************************************/
bool
advance()
{
parser.previous = parser.current;
for(;;) {
parser.current = next_token();
if(parser.current.type != TOKEN_ERROR) return true;
return false;
}
}
bool
check(TokenType type)
{
return parser.current.type == type;
}
bool
consume(TokenType type)
{
if(check(type)) {
advance();
return true;
}
return false;
}
bool
match(TokenType type)
{
if(!check(type)) return false;
advance();
return true;
}
void
expect(TokenType type)
{
if(!consume(type)) {
emitter.error(parser.previous.start, parser.previous.length,
parser.previous.line);
}
}
u32 variable_declaration(Symbol *def);
void
define_plex()
{
}
void
define_trait()
{
}
void
define_function()
{
Symbol *fn;
u32 size = 0;
advance();
fn = scope_add_symbol(parser.previous.start, parser.previous.length,
SYMBOL_FUNCTION, 0);
/* need to push scope early because the variables need to be a part of the fn
* scope */
scope_new();
expect(TOKEN_LPAREN);
advance();
/* parse the args */
while(!check(TOKEN_LBRACE)) {
size += variable_declaration(fn);
advance();
if (check(TOKEN_LBRACE)) break;
advance();
}
if(is_type()) {
/* get return slot */
fn->secondary_type = token_type_to_sym_type(parser.previous.type);
/* now parse the fn body */
}
expect(TOKEN_LBRACE);
/* get the size of all the locals for the function in the body */
while(parser.depth > 0) {
if(match(TOKEN_LBRACE)) {
scope_new();
continue;
} else if(match(TOKEN_RBRACE)) {
scope_pop();
continue;
} else if(is_type() && parser.current.type == TOKEN_IDENTIFIER)
size += variable_declaration(nil);
advance();
}
fn->size = size;
}
void
build_symbol_table(char *source)
{
init_lexer(source);
advance();
scopes_init();
while(!match(TOKEN_EOF)) {
if(match(TOKEN_LBRACE)) {
scope_new();
} else if(match(TOKEN_RBRACE)) {
scope_pop();
} else if(match(TOKEN_KEYWORD_FN)) {
define_function();
} else if(match(TOKEN_KEYWORD_PLEX)) {
define_plex();
} else if(match(TOKEN_KEYWORD_TRAIT)) {
define_trait();
} else {
/* in binary bytecode output mode we need to count the bytes here */
/* otherwise ignore everything */
advance();
}
}
}
void expression();
void statement();
void declaration();
ParseRule *get_rule(TokenType type);
void parse_precedence(Precedence precedence);
u32
variable_declaration(Symbol *def)
{
i32 size = 0;
TokenType tt = parser.previous.type;
SymbolType st = token_type_to_sym_type(tt);
Symbol *variable;
if(st != SYMBOL_UNDEFINED) {
size = emitter.get_size(st);
variable =
scope_add_symbol(parser.current.start, parser.current.length, st, size);
if (!def && (parser.pass == 0)) {
return size;
}
if(def) {
List_push(arena, def->args, variable, sizeof(Symbol));
} else {
emitter.emit_type(variable, parser.depth);
parser.current_type = st;
}
} else {
/* we need to look up the type */
emitter.error("Not implemented", 16, parser.previous.line);
}
advance();
if(def && (check(TOKEN_COMMA) || check(TOKEN_RPAREN))) return size;
if(def)
emitter.error(parser.previous.start, parser.previous.length,
parser.previous.line);
if (parser.pass == 0) return size;
if(match(TOKEN_EQ)) {
emitter.emit_set_value();
expression();
emitter.emit_constant(variable, parser.depth);
}
consume(TOKEN_SEMICOLON);
emitter.emit_end_statement();
parser.current_type = SYMBOL_UNDEFINED;
return size;
}
void
binary()
{
TokenType operatorType = parser.previous.type;
ParseRule *rule = get_rule(operatorType);
parse_precedence((Precedence)(rule->precedence + 1));
switch(operatorType) {
case TOKEN_BANG_EQ:
emitter.emit_ne();
break;
case TOKEN_EQ_EQ:
emitter.emit_eq();
break;
case TOKEN_GT:
emitter.emit_gt();
break;
case TOKEN_GTE:
emitter.emit_ge();
break;
case TOKEN_LT:
emitter.emit_lt();
break;
case TOKEN_LTE:
emitter.emit_le();
break;
case TOKEN_PLUS:
emitter.emit_add();
break;
case TOKEN_MINUS:
emitter.emit_sub();
break;
case TOKEN_STAR:
emitter.emit_mul();
break;
case TOKEN_SLASH:
emitter.emit_div();
break;
case TOKEN_OPERATOR_AND:
emitter.emit_and();
break;
case TOKEN_OPERATOR_OR:
emitter.emit_or();
break;
case TOKEN_OPERATOR_XOR:
emitter.emit_xor();
break;
case TOKEN_OPERATOR_MOD:
emitter.emit_mod();
break;
case TOKEN_SLL:
emitter.emit_sll();
break;
case TOKEN_SRL:
emitter.emit_srl();
break;
default:
return;
}
}
void
literal()
{
switch(parser.previous.type) {
case TOKEN_KEYWORD_FALSE:
emitter.emit_false();
break;
case TOKEN_KEYWORD_TRUE:
emitter.emit_true();
break;
case TOKEN_KEYWORD_NIL:
emitter.emit_nil();
break;
default:
return;
}
}
void
expression()
{
parse_precedence(PREC_ASSIGNMENT);
}
void
variable()
{
Symbol *sym = scope_get_symbol(parser.current_scope, parser.previous.start,
parser.previous.length);
if(sym == nil) {
emitter.error(parser.previous.start, parser.previous.length,
parser.previous.line);
}
if (sym->type == SYMBOL_FUNCTION) {
parser.call_fn = sym;
return;
}
parser.current_type = sym->type;
if(match(TOKEN_EQ)) {
expression();
emitter.emit_set_variable(sym, sym->scope);
} else {
emitter.emit_variable(sym, sym->scope);
}
}
void
cast_type()
{
SymbolType st = parser.current_type;
TokenType cast_type;
if (!(parser.current.type >= TOKEN_TYPE_I8 &&
parser.current.type <= TOKEN_TYPE_PTR)) {
emitter.error("Expect type name after 'as'.", 26, parser.current.line);
return;
}
cast_type = parser.current.type;
advance();
switch(st) {
case SYMBOL_INT: {
switch(cast_type) {
case TOKEN_TYPE_NAT: {
emitter.emit_cast_int_to_nat();
break;
}
case TOKEN_TYPE_REAL: {
emitter.emit_cast_int_to_real();
break;
}
case TOKEN_TYPE_STR: {
emitter.emit_cast_int_to_str();
break;
}
default:
emitter.error("Not castable to this type", 26, parser.previous.line);
}
break;
}
case SYMBOL_NAT: {
switch(cast_type) {
case TOKEN_TYPE_INT: {
emitter.emit_cast_nat_to_int();
break;
}
case TOKEN_TYPE_REAL: {
emitter.emit_cast_nat_to_real();
break;
}
case TOKEN_TYPE_STR: {
emitter.emit_cast_nat_to_str();
break;
}
default:
emitter.error("Not castable to this type", 26, parser.previous.line);
}
break;
}
case SYMBOL_REAL: {
switch(cast_type) {
case TOKEN_TYPE_NAT: {
emitter.emit_cast_real_to_nat();
break;
}
case TOKEN_TYPE_INT: {
emitter.emit_cast_real_to_int();
break;
}
case TOKEN_TYPE_STR: {
emitter.emit_cast_real_to_str();
break;
}
default:
emitter.error("Not castable to this type", 26, parser.previous.line);
}
break;
}
case SYMBOL_STR: {
switch(cast_type) {
case TOKEN_TYPE_NAT: {
emitter.emit_cast_str_to_nat();
break;
}
case TOKEN_TYPE_REAL: {
emitter.emit_cast_str_to_real();
break;
}
case TOKEN_TYPE_INT: {
emitter.emit_cast_str_to_int();
break;
}
default:
emitter.error("Not castable to this type", 26, parser.previous.line);
}
break;
}
default:
emitter.error("Not castable to this type", 26, parser.previous.line);
}
}
void
declaration()
{
if(is_type() && parser.current.type == TOKEN_IDENTIFIER) {
variable_declaration(nil);
}else{
statement();
}
}
void
print_statement()
{
expression();
consume(TOKEN_SEMICOLON);
emitter.emit_print();
emitter.emit_end_statement();
}
void
if_statement()
{
consume(TOKEN_LPAREN);
expression();
consume(TOKEN_RPAREN);
emitter.emit_if();
statement();
emitter.emit_patch_if(emitter.ifs);
if(match(TOKEN_KEYWORD_ELSE)) {
emitter.else_if_depth++;
statement();
emitter.else_if_depth--;
}
if(emitter.else_if_depth == 0) emitter.emit_patch_if_done(emitter.ifs++);
}
void
while_statement()
{
emitter.emit_while(emitter.loops);
consume(TOKEN_LPAREN);
expression();
consume(TOKEN_RPAREN);
emitter.emit_while_postfix();
statement();
emitter.emit_patch_while(emitter.loops++);
}
void
block()
{
while(!check(TOKEN_RBRACE) && !check(TOKEN_EOF)) declaration();
consume(TOKEN_RBRACE);
}
void
function()
{
Symbol *sym = scope_get_symbol(parser.current_scope, parser.previous.start,
parser.previous.length);
emitter.emit_function(sym);
scope_push();
while(!check(TOKEN_RPAREN)) {
advance();
}
consume(TOKEN_RPAREN);
consume(TOKEN_LBRACE);
block();
emitter.emit_arena_fn_return();
}
void call() {
if (!check(TOKEN_RPAREN)) {
do {
expression();
} while (match(TOKEN_COMMA));
}
consume(TOKEN_RPAREN);
emitter.emit_arena_fn_call(parser.call_fn);
parser.current_type = parser.call_fn->secondary_type;
return;
}
void
return_statement()
{
if (match(TOKEN_SEMICOLON)) {
emitter.emit_early_return();
} else {
expression();
consume(TOKEN_SEMICOLON);
emitter.emit_early_return();
}
}
void
statement()
{
if(match(TOKEN_LBRACE)) {
scope_push();
block();
scope_pop();
} else if(match(TOKEN_KEYWORD_FN)) {
advance();
function();
} else if(match(TOKEN_KEYWORD_RETURN)) {
return_statement();
} else if(match(TOKEN_KEYWORD_IF)) {
if_statement();
} else if(match(TOKEN_KEYWORD_WHILE)) {
while_statement();
} else if(match(TOKEN_KEYWORD_PRINT)) {
print_statement();
} else if (match(TOKEN_KEYWORD_HALT)) {
emitter.emit_halt();
} else {
expression();
}
}
void
grouping()
{
emitter.emit_open_paren();
expression();
emitter.emit_close_paren();
consume(TOKEN_RPAREN);
}
void
number()
{
emitter.emit_int(parser.previous.start, parser.previous.length);
}
void
string()
{
emitter.emit_str(parser.previous.start, parser.previous.length);
}
void
unary()
{
TokenType operatorType = parser.previous.type;
parse_precedence(PREC_UNARY);
switch(operatorType) {
case TOKEN_MINUS:
emitter.emit_neg();
break;
case TOKEN_BANG:
emitter.emit_not();
break;
default:
return;
}
}
ParseRule rules[] = {
/* TOKEN_ERROR */ {nil, nil, PREC_NONE},
/* TOKEN_EOF */ {nil, nil, PREC_NONE},
/* TOKEN_LPAREN */ {grouping, call, PREC_CALL},
/* TOKEN_RPAREN */ {nil, nil, PREC_NONE},
/* TOKEN_LBRACE */ {nil, nil, PREC_NONE},
/* TOKEN_RBRACE */ {nil, nil, PREC_NONE},
/* TOKEN_LBRACKET */ {nil, nil, PREC_NONE},
/* TOKEN_RBRACKET */ {nil, nil, PREC_NONE},
/* TOKEN_IDENTIFIER */ {variable, nil, PREC_NONE},
/* TOKEN_LITERAL_INT */ {number, nil, PREC_NONE},
/* TOKEN_LITERAL_NAT */ {number, nil, PREC_NONE},
/* TOKEN_LITERAL_REAL */ {number, nil, PREC_NONE},
/* TOKEN_LITERAL_STR */ {string, nil, PREC_NONE},
/* TOKEN_TYPE_I8 */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_I16 */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_INT */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_U8 */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_U16 */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_NAT */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_REAL */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_STR */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_BOOL */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_BYTE */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_VOID */ {nil, nil, PREC_NONE},
/* TOKEN_TYPE_PTR */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_PLEX */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_FN */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_CONST */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_IF */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_IS */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_AS */ {nil, cast_type, PREC_CAST},
/* TOKEN_KEYWORD_ELSE */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_WHILE */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_FOR */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_RETURN */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_USE */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_INIT */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_THIS */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_OPEN */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_READ */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_WRITE */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_STAT */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_CLOSE */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_LOOP */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_DO */ {nil, nil, PREC_NONE},
/* TOKEN_KEYWORD_NIL */ {literal, nil, PREC_NONE},
/* TOKEN_KEYWORD_TRUE */ {literal, nil, PREC_NONE},
/* TOKEN_KEYWORD_FALSE */ {literal, nil, PREC_NONE},
/* TOKEN_OPERATOR_AND */ {nil, binary, PREC_NONE},
/* TOKEN_OPERATOR_OR */ {nil, binary, PREC_NONE},
/* TOKEN_OPERATOR_XOR */ {nil, binary, PREC_NONE},
/* TOKEN_OPERATOR_MOD */ {nil, binary, PREC_NONE},
/* TOKEN_BANG */ {unary, nil, PREC_NONE},
/* TOKEN_BANG_EQ */ {nil, binary, PREC_EQUALITY},
/* TOKEN_EQ */ {nil, nil, PREC_NONE},
/* TOKEN_EQ_EQ */ {nil, binary, PREC_EQUALITY},
/* TOKEN_AND */ {nil, nil, PREC_NONE},
/* TOKEN_AND_AND */ {nil, nil, PREC_NONE},
/* TOKEN_PIPE */ {nil, nil, PREC_NONE},
/* TOKEN_PIPE_PIPE */ {nil, nil, PREC_NONE},
/* TOKEN_QUESTION */ {nil, nil, PREC_NONE},
/* TOKEN_QUESTION_DOT */ {nil, nil, PREC_NONE},
/* TOKEN_PLUS */ {nil, binary, PREC_TERM},
/* TOKEN_MINUS */ {unary, binary, PREC_TERM},
/* TOKEN_STAR */ {nil, binary, PREC_FACTOR},
/* TOKEN_SLASH */ {nil, binary, PREC_FACTOR},
/* TOKEN_MESH */ {nil, nil, PREC_NONE},
/* TOKEN_BIG_MONEY */ {nil, nil, PREC_NONE},
/* TOKEN_GT */ {nil, binary, PREC_COMPARISON},
/* TOKEN_LT */ {nil, binary, PREC_COMPARISON},
/* TOKEN_GTE */ {nil, binary, PREC_COMPARISON},
/* TOKEN_LTE */ {nil, binary, PREC_COMPARISON},
/* TOKEN_DOT */ {nil, nil, PREC_NONE},
/* TOKEN_COMMA */ {nil, nil, PREC_NONE},
/* TOKEN_COLON */ {nil, nil, PREC_NONE},
/* TOKEN_CARET */ {nil, nil, PREC_NONE},
/* TOKEN_SEMICOLON */ {nil, nil, PREC_NONE},
/* TOKEN_ARROW_RIGHT */ {nil, nil, PREC_NONE},
/* TOKEN_SLL */ {nil, binary, PREC_NONE},
/* TOKEN_SRL */ {nil, binary, PREC_NONE},
/* TOKEN_KEYWORD_PRINT*/ {nil, binary, PREC_NONE}};
void
parse_precedence(Precedence precedence)
{
ParseFn prefixRule;
ParseFn infixRule;
advance();
prefixRule = get_rule(parser.previous.type)->prefix;
if(prefixRule == nil) return;
prefixRule();
while(precedence <= get_rule(parser.current.type)->precedence) {
advance();
infixRule = get_rule(parser.previous.type)->infix;
infixRule();
}
}
ParseRule *
get_rule(TokenType type)
{
return &rules[type];
}
void
emit_program(char *source)
{
init_lexer(source);
advance();
emitter.prolog();
while(!match(TOKEN_EOF)) declaration();
emitter.epilogue();
}
bool
compile(Arena *a, Emitter e, char *source)
{
arena = a;
emitter = e;
parser.scopes = List_init(arena);
build_symbol_table(source);
parser.pass++;
parser.scope_idx = 0;
emit_program(source);
return true;
}