diff --git a/src/tools/assembler/assembler.c b/src/tools/assembler/assembler.c index ca8ca68..c137e64 100644 --- a/src/tools/assembler/assembler.c +++ b/src/tools/assembler/assembler.c @@ -1,89 +1,22 @@ +#include "../../vm/libc.h" #include "assembler.h" #include -typedef enum { GLOBAL, LOCAL } ScopeType; -typedef enum { - VOID, - BOOL, - I8, - I16, - I32, - U8, - U16, - U32, - F8, - F16, - F32, - STR, - PLEX, - ARRAY, - FUNCTION - } SymbolType; +bool global() { + Token token = nextToken(); + if (token.type == TOKEN_KEYWORD_CONST) { + token = nextToken(); + } -typedef struct field_s { - char *name; - SymbolType type; - u32 offset; - u32 size; -} Field; + if (token.type == TOKEN_TYPE_INT || token.type == TOKEN_TYPE_NAT || + token.type == TOKEN_TYPE_REAL || token.type == TOKEN_TYPE_STR) { + return true; + } -typedef struct function_def_s { - char *name; - SymbolType args[8]; - u8 arg_count; - SymbolType return_type; -} FunctionDef; + return false; +} -typedef struct trait_def_s { - char *name; - Field *fields; - u32 field_count; - FunctionDef *methods; - u32 method_count; -} TraitDef; - -typedef struct plex_def_s { - char *name; - u32 logical_size; - u32 physical_size; - Field *fields; - u32 field_count; - TraitDef *traits; - u32 trait_count; - FunctionDef *methods; - u32 method_count; -} PlexDef; - -typedef struct array_def_s { - SymbolType type; - u32 length; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - struct array_def_s *array; - } ref; -} ArrayDef; - -typedef struct symbol_s { - char *name; - u32 address; - ScopeType scope; - SymbolType type; - u32 logical_size; - u32 physical_size; // logical_size * type_size + 4 - union { - PlexDef *plex; - ArrayDef *array; - FunctionDef *function; - } ref; -} Symbol; - -typedef struct symbol_tab_s { - Symbol *symbols; - int count; - int capacity; -} SymbolTable; +void function() {} void assemble(VM *vm, char *source) { USED(vm); @@ -98,6 +31,109 @@ void assemble(VM *vm, char *source) { if (token.type != TOKEN_EOF) { printf("Line %d [%s]: %.*s\n", token.line, tokenTypeToString(token.type), token.length, token.start); + + if (token.type == TOKEN_KEYWORD_GLOBAL) { + if (!global()) { + printf("ERROR at line %d: %.*s\n", token.line, token.length, + token.start); + } + } + + if (token.type == TOKEN_KEYWORD_FN) { + function(); + } + + if (token.type == TOKEN_IDENTIFIER) { + if (streq(token.start, "exit")) { + } else if (streq(token.start, "call")) { + } else if (streq(token.start, "syscall")) { + } else if (streq(token.start, "load_immediate")) { + } else if (streq(token.start, "load_indirect_8")) { + } else if (streq(token.start, "load_indirect_16")) { + } else if (streq(token.start, "load_indirect_32")) { + } else if (streq(token.start, "load_absolute_8")) { + } else if (streq(token.start, "load_absolute_16")) { + } else if (streq(token.start, "load_absolute_32")) { + } else if (streq(token.start, "load_offset_8")) { + } else if (streq(token.start, "load_offset_16")) { + } else if (streq(token.start, "load_offset_32")) { + } else if (streq(token.start, "store_absolute_8")) { + } else if (streq(token.start, "store_absolute_16")) { + } else if (streq(token.start, "store_absolute_32")) { + } else if (streq(token.start, "store_indirect_8")) { + } else if (streq(token.start, "store_indirect_16")) { + } else if (streq(token.start, "store_indirect_32")) { + } else if (streq(token.start, "store_offset_8")) { + } else if (streq(token.start, "store_offset_16")) { + } else if (streq(token.start, "store_offset_32")) { + } else if (streq(token.start, "malloc")) { + } else if (streq(token.start, "malloc_immediate")) { + } else if (streq(token.start, "memset_8")) { + } else if (streq(token.start, "memset_16")) { + } else if (streq(token.start, "memset_32")) { + } else if (streq(token.start, "register_move")) { + } else if (streq(token.start, "add_int")) { + } else if (streq(token.start, "sub_int")) { + } else if (streq(token.start, "mul_int")) { + } else if (streq(token.start, "div_int")) { + } else if (streq(token.start, "abs_int")) { + } else if (streq(token.start, "neg_int")) { + } else if (streq(token.start, "add_nat")) { + } else if (streq(token.start, "sub_nat")) { + } else if (streq(token.start, "mul_nat")) { + } else if (streq(token.start, "div_nat")) { + } else if (streq(token.start, "abs_nat")) { + } else if (streq(token.start, "neg_nat")) { + } else if (streq(token.start, "add_real")) { + } else if (streq(token.start, "sub_real")) { + } else if (streq(token.start, "mul_real")) { + } else if (streq(token.start, "div_real")) { + } else if (streq(token.start, "abs_real")) { + } else if (streq(token.start, "neg_real")) { + } else if (streq(token.start, "int_to_real")) { + } else if (streq(token.start, "nat_to_real")) { + } else if (streq(token.start, "real_to_int")) { + } else if (streq(token.start, "real_to_nat")) { + } else if (streq(token.start, "bit_shift_left")) { + } else if (streq(token.start, "bit_shift_right")) { + } else if (streq(token.start, "bit_shift_r_ext")) { + } else if (streq(token.start, "bit_and")) { + } else if (streq(token.start, "bit_or")) { + } else if (streq(token.start, "bit_xor")) { + } else if (streq(token.start, "jump")) { + } else if (streq(token.start, "jump_if_flag")) { + } else if (streq(token.start, "jump_eq_int")) { + } else if (streq(token.start, "jump_neq_int")) { + } else if (streq(token.start, "jump_gt_int")) { + } else if (streq(token.start, "jump_lt_int")) { + } else if (streq(token.start, "jump_le_int")) { + } else if (streq(token.start, "jump_ge_int")) { + } else if (streq(token.start, "jump_eq_nat")) { + } else if (streq(token.start, "jump_neq_nat")) { + } else if (streq(token.start, "jump_gt_nat")) { + } else if (streq(token.start, "jump_lt_nat")) { + } else if (streq(token.start, "jump_le_nat")) { + } else if (streq(token.start, "jump_ge_nat")) { + } else if (streq(token.start, "jump_eq_real")) { + } else if (streq(token.start, "jump_neq_real")) { + } else if (streq(token.start, "jump_ge_real")) { + } else if (streq(token.start, "jump_gt_real")) { + } else if (streq(token.start, "jump_lt_real")) { + } else if (streq(token.start, "jump_le_real")) { + } else if (streq(token.start, "string_length")) { + } else if (streq(token.start, "string_eq")) { + } else if (streq(token.start, "string_concat")) { + } else if (streq(token.start, "string_get_char")) { + } else if (streq(token.start, "string_find_char")) { + } else if (streq(token.start, "string_slice")) { + } else if (streq(token.start, "int_to_string")) { + } else if (streq(token.start, "nat_to_string")) { + } else if (streq(token.start, "real_to_string")) { + } else if (streq(token.start, "string_to_int")) { + } else if (streq(token.start, "string_to_nat")) { + } else if (streq(token.start, "string_to_real")) { + } + } } } while (token.type != TOKEN_EOF); } diff --git a/src/tools/assembler/lexer.c b/src/tools/assembler/lexer.c index b634dc5..8c26f61 100644 --- a/src/tools/assembler/lexer.c +++ b/src/tools/assembler/lexer.c @@ -81,19 +81,20 @@ static void skipWhitespace() { case '/': if (peekNext() == '/') { // Single-line comment: skip until newline or end of file - advance(); + advance(); while (peek() != '\n' && !isAtEnd()) advance(); } else if (peekNext() == '*') { // Multi-line comment: skip until '*/' or end of file - advance(); - advance(); + advance(); + advance(); while (!isAtEnd()) { - if (peek() == '\n') lexer.line++; + if (peek() == '\n') + lexer.line++; if (peek() == '*' && peekNext() == '/') { - advance(); - advance(); - break; // Exit loop, comment ended + advance(); + advance(); + break; // Exit loop, comment ended } advance(); } @@ -120,7 +121,15 @@ static TokenType checkKeyword(int start, int length, const char *rest, static TokenType identifierType() { switch (lexer.start[0]) { case 'a': - return checkKeyword(1, 2, "nd", TOKEN_OPERATOR_AND); + if (lexer.current - lexer.start > 1) { + switch (lexer.start[1]) { + case 'n': + return checkKeyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; case 'c': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { @@ -130,7 +139,7 @@ static TokenType identifierType() { return checkKeyword(2, 3, "nst", TOKEN_KEYWORD_CONST); } } - break; + break; case 'e': return checkKeyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); case 'f': @@ -149,13 +158,15 @@ static TokenType identifierType() { switch (lexer.start[1]) { case 'f': return checkKeyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return checkKeyword(2, 0, "", TOKEN_KEYWORD_IS); case 'n': if (lexer.current - lexer.start > 2) { switch (lexer.start[2]) { case 'i': return checkKeyword(3, 2, "t", TOKEN_KEYWORD_INIT); case 't': - return checkKeyword(3, 1, "", TOKEN_TYPE_INT); + return checkKeyword(3, 0, "", TOKEN_TYPE_INT); } } break; @@ -215,7 +226,7 @@ static TokenType identifierType() { return checkKeyword(2, 1, "r", TOKEN_TYPE_STR); } } - break; + break; case 't': if (lexer.current - lexer.start > 1) { switch (lexer.start[1]) { @@ -244,6 +255,8 @@ static TokenType identifierType() { } } break; + case 'g': + return checkKeyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); } return TOKEN_IDENTIFIER; @@ -267,10 +280,10 @@ static Token number() { while (isDigit(peek())) advance(); - return makeToken(TOKEN_FLOAT_LITERAL); + return makeToken(TOKEN_LITERAL_REAL); } - return makeToken(TOKEN_INT_LITERAL); + return makeToken(TOKEN_LITERAL_INT); } static Token string() { @@ -285,7 +298,7 @@ static Token string() { /* The closing quote. */ advance(); - return makeToken(TOKEN_STRING_LITERAL); + return makeToken(TOKEN_LITERAL_STR); } Token nextToken() { @@ -321,11 +334,17 @@ Token nextToken() { case '.': return makeToken(TOKEN_DOT); case '-': - return makeToken(TOKEN_MINUS); + return makeToken(match('>') ? TOKEN_ARROW_LEFT : TOKEN_MINUS); case '+': return makeToken(TOKEN_PLUS); case '/': return makeToken(TOKEN_SLASH); + case '&': + return makeToken(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return makeToken(TOKEN_MESH); + case '$': + return makeToken(TOKEN_BIG_MONEY); case '*': return makeToken(TOKEN_STAR); case '!': @@ -343,64 +362,135 @@ Token nextToken() { return errorToken("Unexpected character."); } -const char* tokenTypeToString(TokenType type) { +const char *tokenTypeToString(TokenType type) { switch (type) { - case TOKEN_EOF: return "EOF"; - case TOKEN_IDENTIFIER: return "IDENTIFIER"; - case TOKEN_INT_LITERAL: return "INT_LITERAL"; - case TOKEN_UINT_LITERAL: return "UINT_LITERAL"; - case TOKEN_FLOAT_LITERAL: return "FLOAT_LITERAL"; - case TOKEN_STRING_LITERAL: return "STRING_LITERAL"; - case TOKEN_TYPE_INT: return "TYPE_INT"; - case TOKEN_TYPE_NAT: return "TYPE_NAT"; - case TOKEN_TYPE_REAL: return "TYPE_REAL"; - case TOKEN_TYPE_STR: return "TYPE_STR"; - case TOKEN_KEYWORD_PLEX: return "KEYWORD_PLEX"; - case TOKEN_KEYWORD_FN: return "KEYWORD_FN"; - case TOKEN_KEYWORD_CONST: return "KEYWORD_CONST"; - case TOKEN_KEYWORD_IF: return "KEYWORD_IF"; - case TOKEN_KEYWORD_ELSE: return "KEYWORD_ELSE"; - case TOKEN_KEYWORD_WHILE: return "KEYWORD_WHILE"; - case TOKEN_KEYWORD_FOR: return "KEYWORD_FOR"; - case TOKEN_KEYWORD_RETURN: return "KEYWORD_RETURN"; - case TOKEN_KEYWORD_USE: return "KEYWORD_USE"; - case TOKEN_KEYWORD_INIT: return "KEYWORD_INIT"; - case TOKEN_KEYWORD_THIS: return "KEYWORD_THIS"; - case TOKEN_KEYWORD_OPEN: return "TOKEN_KEYWORD_OPEN"; - case TOKEN_KEYWORD_READ: return "TOKEN_KEYWORD_READ"; - case TOKEN_KEYWORD_WRITE: return "TOKEN_KEYWORD_WRITE"; - case TOKEN_KEYWORD_REFRESH: return "TOKEN_KEYWORD_REFRESH"; - case TOKEN_KEYWORD_CLOSE: return "TOKEN_KEYWORD_CLOSE"; - case TOKEN_KEYWORD_NIL: return "KEYWORD_NIL"; - case TOKEN_KEYWORD_TRUE: return "KEYWORD_TRUE"; - case TOKEN_KEYWORD_FALSE: return "KEYWORD_FALSE"; - case TOKEN_OPERATOR_IS: return "OPERATOR_IS"; - case TOKEN_OPERATOR_NOT: return "OPERATOR_NOT"; - case TOKEN_OPERATOR_AND: return "OPERATOR_AND"; - case TOKEN_OPERATOR_OR: return "OPERATOR_OR"; - case TOKEN_BANG: return "BANG"; - case TOKEN_BANG_EQ: return "BANG_EQ"; - case TOKEN_EQ: return "EQ"; - case TOKEN_EQ_EQ: return "EQ_EQ"; - case TOKEN_GT: return "GT"; - case TOKEN_LT: return "LT"; - case TOKEN_GTE: return "GTE"; - case TOKEN_LTE: return "LTE"; - case TOKEN_DOT: return "DOT"; - case TOKEN_COMMA: return "COMMA"; - case TOKEN_COLON: return "COLON"; - case TOKEN_SEMICOLON: return "SEMICOLON"; - case TOKEN_PLUS: return "PLUS"; - case TOKEN_MINUS: return "MINUS"; - case TOKEN_STAR: return "STAR"; - case TOKEN_SLASH: return "SLASH"; - case TOKEN_LPAREN: return "LPAREN"; - case TOKEN_RPAREN: return "RPAREN"; - case TOKEN_LBRACE: return "LBRACE"; - case TOKEN_RBRACE: return "RBRACE"; - case TOKEN_LBRACKET: return "LBRACKET"; - case TOKEN_RBRACKET: return "RBRACKET"; - case TOKEN_ERROR: return "ERROR"; - default: return "UNKNOWN_TOKEN"; + case TOKEN_EOF: + return "EOF"; + case TOKEN_IDENTIFIER: + return "IDENTIFIER"; + case TOKEN_LITERAL_INT: + return "LITERAL_INT"; + case TOKEN_LITERAL_NAT: + return "LITERAL_NAT"; + case TOKEN_LITERAL_REAL: + return "LITERAL_REAL"; + case TOKEN_LITERAL_STR: + return "LITERAL_STR"; + case TOKEN_TYPE_INT: + return "TYPE_INT"; + case TOKEN_TYPE_NAT: + return "TYPE_NAT"; + case TOKEN_TYPE_REAL: + return "TYPE_REAL"; + case TOKEN_TYPE_STR: + return "TYPE_STR"; + case TOKEN_KEYWORD_PLEX: + return "KEYWORD_PLEX"; + case TOKEN_KEYWORD_FN: + return "KEYWORD_FN"; + case TOKEN_KEYWORD_CONST: + return "KEYWORD_CONST"; + case TOKEN_KEYWORD_IF: + return "KEYWORD_IF"; + case TOKEN_KEYWORD_IS: + return "IS"; + case TOKEN_KEYWORD_AS: + return "AS"; + case TOKEN_KEYWORD_ELSE: + return "KEYWORD_ELSE"; + case TOKEN_KEYWORD_WHILE: + return "KEYWORD_WHILE"; + case TOKEN_KEYWORD_FOR: + return "KEYWORD_FOR"; + case TOKEN_KEYWORD_RETURN: + return "KEYWORD_RETURN"; + case TOKEN_KEYWORD_USE: + return "KEYWORD_USE"; + case TOKEN_KEYWORD_INIT: + return "KEYWORD_INIT"; + case TOKEN_KEYWORD_THIS: + return "KEYWORD_THIS"; + case TOKEN_KEYWORD_OPEN: + return "TOKEN_KEYWORD_OPEN"; + case TOKEN_KEYWORD_READ: + return "TOKEN_KEYWORD_READ"; + case TOKEN_KEYWORD_WRITE: + return "TOKEN_KEYWORD_WRITE"; + case TOKEN_KEYWORD_REFRESH: + return "TOKEN_KEYWORD_REFRESH"; + case TOKEN_KEYWORD_CLOSE: + return "TOKEN_KEYWORD_CLOSE"; + case TOKEN_KEYWORD_NIL: + return "KEYWORD_NIL"; + case TOKEN_KEYWORD_TRUE: + return "KEYWORD_TRUE"; + case TOKEN_KEYWORD_FALSE: + return "KEYWORD_FALSE"; + case TOKEN_KEYWORD_GLOBAL: + return "KEYWORD_GLOBAL"; + case TOKEN_OPERATOR_NOT: + return "OPERATOR_NOT"; + case TOKEN_OPERATOR_AND: + return "OPERATOR_AND"; + case TOKEN_OPERATOR_OR: + return "OPERATOR_OR"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_BANG_EQ: + return "BANG_EQ"; + case TOKEN_EQ: + return "EQ"; + case TOKEN_EQ_EQ: + return "EQ_EQ"; + case TOKEN_GT: + return "GT"; + case TOKEN_LT: + return "LT"; + case TOKEN_GTE: + return "GTE"; + case TOKEN_LTE: + return "LTE"; + case TOKEN_DOT: + return "DOT"; + case TOKEN_COMMA: + return "COMMA"; + case TOKEN_COLON: + return "COLON"; + case TOKEN_SEMICOLON: + return "SEMICOLON"; + case TOKEN_PLUS: + return "PLUS"; + case TOKEN_MINUS: + return "MINUS"; + case TOKEN_STAR: + return "STAR"; + case TOKEN_SLASH: + return "SLASH"; + case TOKEN_LPAREN: + return "LPAREN"; + case TOKEN_RPAREN: + return "RPAREN"; + case TOKEN_LBRACE: + return "LBRACE"; + case TOKEN_RBRACE: + return "RBRACE"; + case TOKEN_LBRACKET: + return "LBRACKET"; + case TOKEN_RBRACKET: + return "RBRACKET"; + case TOKEN_ARROW_LEFT: + return "ARROW_LEFT"; + case TOKEN_MESH: + return "MESH"; + case TOKEN_BIG_MONEY: + return "BIG_MONEY"; + case TOKEN_AND: + return "AND"; + case TOKEN_AND_AND: + return "AND_AND"; + case TOKEN_ERROR: + return "ERROR"; + default: + return "UNKNOWN_TOKEN"; } -} \ No newline at end of file +} diff --git a/src/tools/assembler/lexer.h b/src/tools/assembler/lexer.h index 086cc31..4b7a8ae 100644 --- a/src/tools/assembler/lexer.h +++ b/src/tools/assembler/lexer.h @@ -4,10 +4,10 @@ typedef enum { TOKEN_EOF, TOKEN_IDENTIFIER, - TOKEN_INT_LITERAL, - TOKEN_UINT_LITERAL, - TOKEN_FLOAT_LITERAL, - TOKEN_STRING_LITERAL, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, TOKEN_TYPE_INT, TOKEN_TYPE_NAT, TOKEN_TYPE_REAL, @@ -16,6 +16,8 @@ typedef enum { TOKEN_KEYWORD_FN, TOKEN_KEYWORD_CONST, TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, TOKEN_KEYWORD_ELSE, TOKEN_KEYWORD_WHILE, TOKEN_KEYWORD_FOR, @@ -23,15 +25,15 @@ typedef enum { TOKEN_KEYWORD_USE, TOKEN_KEYWORD_INIT, TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, TOKEN_KEYWORD_OPEN, TOKEN_KEYWORD_READ, TOKEN_KEYWORD_WRITE, TOKEN_KEYWORD_REFRESH, - TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_CLOSE, TOKEN_KEYWORD_NIL, TOKEN_KEYWORD_TRUE, TOKEN_KEYWORD_FALSE, - TOKEN_OPERATOR_IS, TOKEN_OPERATOR_NOT, TOKEN_OPERATOR_AND, TOKEN_OPERATOR_OR, @@ -39,6 +41,8 @@ typedef enum { TOKEN_BANG_EQ, TOKEN_EQ, TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, TOKEN_GT, TOKEN_LT, TOKEN_GTE, @@ -51,12 +55,15 @@ typedef enum { TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_LBRACE, TOKEN_RBRACE, TOKEN_LBRACKET, TOKEN_RBRACKET, + TOKEN_ARROW_LEFT, TOKEN_ERROR } TokenType; @@ -71,4 +78,4 @@ void initLexer(const char *source); Token nextToken(); const char* tokenTypeToString(TokenType type); -#endif \ No newline at end of file +#endif diff --git a/src/tools/compiler/compiler.h b/src/tools/compiler/compiler.h new file mode 100644 index 0000000..21f1a25 --- /dev/null +++ b/src/tools/compiler/compiler.h @@ -0,0 +1,90 @@ +#ifndef UNDAR_COMPILER_H +#define UNDAR_COMPILER_H + +#import "../../vm/common.h" + +typedef enum { GLOBAL, LOCAL } ScopeType; +typedef enum { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + PLEX, + ARRAY, + FUNCTION + } SymbolType; + +typedef struct field_s { + char *name; + SymbolType type; + u32 offset; + u32 size; +} Field; + +typedef struct function_def_s { + char *name; + SymbolType args[8]; + u8 arg_count; + SymbolType return_type; +} FunctionDef; + +typedef struct trait_def_s { + char *name; + Field *fields; + u32 field_count; + FunctionDef *methods; + u32 method_count; +} TraitDef; + +typedef struct plex_def_s { + char *name; + u32 logical_size; + u32 physical_size; + Field *fields; + u32 field_count; + TraitDef *traits; + u32 trait_count; + FunctionDef *methods; + u32 method_count; +} PlexDef; + +typedef struct array_def_s { + SymbolType type; + u32 length; + u32 logical_size; + u32 physical_size; // logical_size * type_size + 4 + union { + PlexDef *plex; + struct array_def_s *array; + } ref; +} ArrayDef; + +typedef struct symbol_s { + char *name; + u32 address; + ScopeType scope; + SymbolType type; + u32 logical_size; + u32 physical_size; // logical_size * type_size + 4 + union { + PlexDef *plex; + ArrayDef *array; + FunctionDef *function; + } ref; +} Symbol; + +typedef struct symbol_tab_s { + Symbol *symbols; + int count; + int capacity; +} SymbolTable; + +#endif