diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..ed5b4ad --- /dev/null +++ b/.clang-format @@ -0,0 +1,33 @@ +# Plan 9 coding conventions for C (http://man.9front.org/6/style) +BasedOnStyle: LLVM +IndentWidth: 2 +TabWidth: 2 +UseTab: Always + +SpaceBeforeParens: Never +SpaceBeforeAssignmentOperators: true +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +RemoveBracesLLVM: true + +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: true + AfterNamespace: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +AlwaysBreakAfterReturnType: TopLevelDefinitions +DerivePointerAlignment: false +PointerAlignment: Right +AlignOperands: Align +AlignAfterOpenBracket: Align +SortIncludes: Never +IndentCaseLabels: false diff --git a/lexer.c b/lexer.c index 0d73ff3..3e4914f 100644 --- a/lexer.c +++ b/lexer.c @@ -2,541 +2,571 @@ #include "lexer.h" -typedef struct { - const char *start; - const char *current; - i32 line; -} Lexer; +typedef struct lexer_s Lexer; +struct lexer_s { + const char *start; + const char *current; + i32 line; +}; Lexer lexer; -void init_lexer(const char *source) { - lexer.start = source; - lexer.current = source; - lexer.line = 1; +void +init_lexer(const char *source) +{ + lexer.start = source; + lexer.current = source; + lexer.line = 1; } -static bool is_alpha(char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +static bool +is_alpha(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; } -static bool is_digit(char c) { return c >= '0' && c <= '9'; } - -static bool is_at_end() { return *lexer.current == '\0'; } - -static char advance() { - lexer.current++; - return lexer.current[-1]; +static bool +is_digit(char c) +{ + return c >= '0' && c <= '9'; } -char peek() { return *lexer.current; } - -static char peek_next() { - if (is_at_end()) - return '\0'; - return lexer.current[1]; +static bool +is_at_end() +{ + return *lexer.current == '\0'; } -static bool match(char expected) { - if (is_at_end()) - return false; - if (*lexer.current != expected) - return false; - lexer.current++; - return true; +static char +advance() +{ + lexer.current++; + return lexer.current[-1]; } -static Token make_token(TokenType type) { - Token token; - token.type = type; - token.start = lexer.start; - token.length = (i32)(lexer.current - lexer.start); - token.line = lexer.line; - return token; +char +peek() +{ + return *lexer.current; } -static Token error_token(const char *message) { - Token token; - token.type = TOKEN_ERROR; - token.start = message; - token.length = (i32)strlen(message); - token.line = lexer.line; - return token; +static char +peek_next() +{ + if(is_at_end()) return '\0'; + return lexer.current[1]; } -static void skip_whitespace() { - for (;;) { - char c = peek(); - switch (c) { - case ' ': - case '\r': - case '\t': - advance(); - break; - case '\n': - lexer.line++; - advance(); - break; - case '/': - if (peek_next() == '/') { - // Single-line comment: skip until newline or end of file - advance(); - while (peek() != '\n' && !is_at_end()) - advance(); - } else if (peek_next() == '*') { - // Multi-line comment: skip until '*/' or end of file - advance(); - advance(); - while (!is_at_end()) { - if (peek() == '\n') - lexer.line++; - if (peek() == '*' && peek_next() == '/') { - advance(); - advance(); - break; // Exit loop, comment ended - } - advance(); - } - } else { - return; // Not a comment, let tokenization handle it - } - break; - default: - return; - } - } +static bool +match(char expected) +{ + if(is_at_end()) return false; + if(*lexer.current != expected) return false; + lexer.current++; + return true; } -static TokenType check_keyword(i32 start, i32 length, const char *rest, - TokenType type) { - if (lexer.current - lexer.start == start + length && - memcmp(lexer.start + start, rest, length) == 0) { - return type; - } - - return TOKEN_IDENTIFIER; +static Token +make_token(TokenType type) +{ + Token token; + token.type = type; + token.start = lexer.start; + token.length = (i32)(lexer.current - lexer.start); + token.line = lexer.line; + return token; } -static TokenType identifierType() { - switch (lexer.start[0]) { - case 'a': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'n': - return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); - case 's': - return check_keyword(2, 0, "", TOKEN_KEYWORD_AS); - } - } - break; - case 'c': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'l': - return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); - case 'o': - return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST); - } - } - break; - case 'e': - return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); - case 'f': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'a': - return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); - case 'o': - return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR); - case '3': - return check_keyword(1, 1, "2", TOKEN_TYPE_REAL); - } - return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN); - } - break; - case 'i': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'f': - return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); - case 's': - return check_keyword(2, 0, "", TOKEN_KEYWORD_IS); - case '8': - return check_keyword(2, 0, "", TOKEN_TYPE_I8); - case '1': - return check_keyword(2, 1, "6", TOKEN_TYPE_I16); - case '3': - return check_keyword(2, 1, "2", TOKEN_TYPE_INT); - case 'n': - if (lexer.current - lexer.start > 2) { - switch (lexer.start[2]) { - case 'i': - return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); - case 't': - return check_keyword(3, 0, "", TOKEN_TYPE_INT); - } - } - break; - } - } - break; - case 'n': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'a': - return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); - case 'i': - return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL); - } - } - break; - case 'o': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'p': - return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); - case 'r': - return check_keyword(2, 0, "", TOKEN_OPERATOR_OR); - } - } - break; - case 'p': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { case 't': - return check_keyword(2, 1, "r", TOKEN_TYPE_PTR); - - case 'l': - return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); - } - } - break; - case 'r': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'e': - if (lexer.current - lexer.start > 2) { - switch (lexer.start[2]) { - case 't': - return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); - case 'a': - if (lexer.current - lexer.start > 3) { - switch(lexer.start[3]) { - case 'd': - return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); - case 'l': - return check_keyword(4, 0, "", TOKEN_TYPE_REAL); - } - } - } - } - break; - } - } - break; - case 's': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 't': - if (lexer.current - lexer.start > 2) { - switch (lexer.start[2]) { - case 'r': - return check_keyword(3, 0, "", TOKEN_TYPE_STR); - case 'a': - return check_keyword(3, 1, "t", TOKEN_KEYWORD_STAT); - } - } - } - } - break; - case 't': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'h': - return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); - case 'r': - return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); - } - } - break; - case 'u': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 's': - return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); - case '8': - return check_keyword(2, 0, "", TOKEN_TYPE_U8); - case '1': - return check_keyword(2, 1, "6", TOKEN_TYPE_U16); - case '3': - return check_keyword(2, 1, "2", TOKEN_TYPE_NAT); - } - } - break; - case 'w': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'h': - return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); - case 'r': - return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); - } - } - break; - case 'b': - if (lexer.current - lexer.start > 1) { - switch (lexer.start[1]) { - case 'y': - return check_keyword(2, 2, "te", TOKEN_TYPE_U8); - case 'o': - return check_keyword(2, 2, "ol", TOKEN_TYPE_U8); - } - } - break; - case 'g': - return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); - case 'l': - return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); - case 'd': - return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); - case 'v': - return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID); - } - - return TOKEN_IDENTIFIER; +static Token +error_token(const char *message) +{ + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (i32)strlen(message); + token.line = lexer.line; + return token; } -static Token identifier() { - while (is_alpha(peek()) || is_digit(peek())) - advance(); - return make_token(identifierType()); +static void +skip_whitespace() +{ + for(;;) { + char c = peek(); + switch(c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + lexer.line++; + advance(); + break; + case '/': + if(peek_next() == '/') { + // Single-line comment: skip until newline or end of file + advance(); + while(peek() != '\n' && !is_at_end()) advance(); + } else if(peek_next() == '*') { + // Multi-line comment: skip until '*/' or end of file + advance(); + advance(); + while(!is_at_end()) { + if(peek() == '\n') lexer.line++; + if(peek() == '*' && peek_next() == '/') { + advance(); + advance(); + break; // Exit loop, comment ended + } + advance(); + } + } else { + return; // Not a comment, let tokenization handle it + } + break; + default: + return; + } + } } -static Token number() { - while (is_digit(peek())) - advance(); +static TokenType +check_keyword(i32 start, i32 length, const char *rest, TokenType type) +{ + if(lexer.current - lexer.start == start + length && + memcmp(lexer.start + start, rest, length) == 0) { + return type; + } - /* Look for a fractional part. */ - if (peek() == '.' && is_digit(peek_next())) { - /* Consume the ".". */ - advance(); - - while (is_digit(peek())) - advance(); - - return make_token(TOKEN_LITERAL_REAL); - } - - return make_token(TOKEN_LITERAL_INT); + return TOKEN_IDENTIFIER; } -static Token string() { - while (peek() != '"' && !is_at_end()) { - if (peek() == '\n') - lexer.line++; - advance(); - } +static TokenType +identifierType() +{ + switch(lexer.start[0]) { + case 'a': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'n': + return check_keyword(2, 1, "d", TOKEN_OPERATOR_AND); + case 's': + return check_keyword(2, 0, "", TOKEN_KEYWORD_AS); + } + } + break; + case 'c': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'l': + return check_keyword(2, 3, "ose", TOKEN_KEYWORD_CLOSE); + case 'o': + return check_keyword(2, 3, "nst", TOKEN_KEYWORD_CONST); + } + } + break; + case 'e': + return check_keyword(1, 3, "lse", TOKEN_KEYWORD_ELSE); + case 'f': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'a': + return check_keyword(2, 3, "lse", TOKEN_KEYWORD_FALSE); + case 'o': + return check_keyword(2, 1, "r", TOKEN_KEYWORD_FOR); + case '3': + return check_keyword(1, 1, "2", TOKEN_TYPE_REAL); + } + return check_keyword(1, 7, "unction", TOKEN_KEYWORD_FN); + } + break; + case 'i': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'f': + return check_keyword(2, 0, "", TOKEN_KEYWORD_IF); + case 's': + return check_keyword(2, 0, "", TOKEN_KEYWORD_IS); + case '8': + return check_keyword(2, 0, "", TOKEN_TYPE_I8); + case '1': + return check_keyword(2, 1, "6", TOKEN_TYPE_I16); + case '3': + return check_keyword(2, 1, "2", TOKEN_TYPE_INT); + case 'n': + if(lexer.current - lexer.start > 2) { + switch(lexer.start[2]) { + case 'i': + return check_keyword(3, 2, "t", TOKEN_KEYWORD_INIT); + case 't': + return check_keyword(3, 0, "", TOKEN_TYPE_INT); + } + } + break; + } + } + break; + case 'n': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'a': + return check_keyword(2, 1, "t", TOKEN_TYPE_NAT); + case 'i': + return check_keyword(2, 1, "l", TOKEN_KEYWORD_NIL); + } + } + break; + case 'o': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'p': + return check_keyword(2, 2, "en", TOKEN_KEYWORD_OPEN); + case 'r': + return check_keyword(2, 0, "", TOKEN_OPERATOR_OR); + } + } + break; + case 'p': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 't': + return check_keyword(2, 1, "r", TOKEN_TYPE_PTR); - if (is_at_end()) - return error_token("Unterminated string."); + case 'l': + return check_keyword(2, 2, "ex", TOKEN_KEYWORD_PLEX); + } + } + break; + case 'r': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'e': + if(lexer.current - lexer.start > 2) { + switch(lexer.start[2]) { + case 't': + return check_keyword(3, 3, "urn", TOKEN_KEYWORD_RETURN); + case 'a': + if(lexer.current - lexer.start > 3) { + switch(lexer.start[3]) { + case 'd': + return check_keyword(4, 0, "", TOKEN_KEYWORD_READ); + case 'l': + return check_keyword(4, 0, "", TOKEN_TYPE_REAL); + } + } + } + } + break; + } + } + break; + case 's': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 't': + if(lexer.current - lexer.start > 2) { + switch(lexer.start[2]) { + case 'r': + return check_keyword(3, 0, "", TOKEN_TYPE_STR); + case 'a': + return check_keyword(3, 1, "t", TOKEN_KEYWORD_STAT); + } + } + } + } + break; + case 't': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'h': + return check_keyword(2, 2, "is", TOKEN_KEYWORD_THIS); + case 'r': + return check_keyword(2, 2, "ue", TOKEN_KEYWORD_TRUE); + } + } + break; + case 'u': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 's': + return check_keyword(2, 1, "e", TOKEN_KEYWORD_USE); + case '8': + return check_keyword(2, 0, "", TOKEN_TYPE_U8); + case '1': + return check_keyword(2, 1, "6", TOKEN_TYPE_U16); + case '3': + return check_keyword(2, 1, "2", TOKEN_TYPE_NAT); + } + } + break; + case 'w': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'h': + return check_keyword(2, 3, "ile", TOKEN_KEYWORD_WHILE); + case 'r': + return check_keyword(2, 3, "ite", TOKEN_KEYWORD_WRITE); + } + } + break; + case 'b': + if(lexer.current - lexer.start > 1) { + switch(lexer.start[1]) { + case 'y': + return check_keyword(2, 2, "te", TOKEN_TYPE_U8); + case 'o': + return check_keyword(2, 2, "ol", TOKEN_TYPE_U8); + } + } + break; + case 'g': + return check_keyword(1, 5, "lobal", TOKEN_KEYWORD_GLOBAL); + case 'l': + return check_keyword(1, 3, "oop", TOKEN_KEYWORD_LOOP); + case 'd': + return check_keyword(1, 1, "o", TOKEN_KEYWORD_DO); + case 'v': + return check_keyword(1, 3, "oid", TOKEN_TYPE_VOID); + } - /* The closing quote. */ - advance(); - return make_token(TOKEN_LITERAL_STR); + return TOKEN_IDENTIFIER; } -Token next_token() { - skip_whitespace(); - lexer.start = lexer.current; - - if (is_at_end()) - return make_token(TOKEN_EOF); - - char c = advance(); - if (is_alpha(c)) - return identifier(); - char next = peek(); - if ((c == '-' && is_digit(next)) || is_digit(c)) - return number(); - - switch (c) { - case '(': - return make_token(TOKEN_LPAREN); - case ')': - return make_token(TOKEN_RPAREN); - case '{': - return make_token(TOKEN_LBRACE); - case '}': - return make_token(TOKEN_RBRACE); - case '[': - return make_token(TOKEN_LBRACKET); - case ']': - return make_token(TOKEN_RBRACKET); - case ';': - return make_token(TOKEN_SEMICOLON); - case ',': - return make_token(TOKEN_COMMA); - case '.': - return make_token(TOKEN_DOT); - case '-': - return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); - case '+': - return make_token(TOKEN_PLUS); - case '/': - return make_token(TOKEN_SLASH); - case '&': - return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND); - case '#': - return make_token(TOKEN_MESH); - case '$': - return make_token(TOKEN_BIG_MONEY); - case '*': - return make_token(TOKEN_STAR); - case '!': - return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); - case '=': - return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); - case '<': - return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); - case '>': - return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); - case '"': - return string(); - } - - return error_token("Unexpected character."); +static Token +identifier() +{ + while(is_alpha(peek()) || is_digit(peek())) advance(); + return make_token(identifierType()); } -const char *token_type_to_string(TokenType type) { - switch (type) { - case TOKEN_EOF: - return "EOF"; - case TOKEN_IDENTIFIER: - return "IDENTIFIER"; - case TOKEN_LITERAL_INT: - return "LITERAL_INT"; - case TOKEN_LITERAL_NAT: - return "LITERAL_NAT"; - case TOKEN_LITERAL_REAL: - return "LITERAL_REAL"; - case TOKEN_LITERAL_STR: - return "LITERAL_STR"; - case TOKEN_TYPE_INT: - return "TYPE_INT"; - case TOKEN_TYPE_NAT: - return "TYPE_NAT"; - case TOKEN_TYPE_REAL: - return "TYPE_REAL"; - case TOKEN_TYPE_STR: - return "TYPE_STR"; - case TOKEN_TYPE_PTR: - return "TYPE_PTR"; - case TOKEN_KEYWORD_PLEX: - return "KEYWORD_PLEX"; - case TOKEN_KEYWORD_FN: - return "KEYWORD_FN"; - case TOKEN_KEYWORD_CONST: - return "KEYWORD_CONST"; - case TOKEN_KEYWORD_IF: - return "KEYWORD_IF"; - case TOKEN_KEYWORD_IS: - return "IS"; - case TOKEN_KEYWORD_AS: - return "AS"; - case TOKEN_KEYWORD_ELSE: - return "KEYWORD_ELSE"; - case TOKEN_KEYWORD_WHILE: - return "KEYWORD_WHILE"; - case TOKEN_KEYWORD_FOR: - return "KEYWORD_FOR"; - case TOKEN_KEYWORD_RETURN: - return "KEYWORD_RETURN"; - case TOKEN_KEYWORD_USE: - return "KEYWORD_USE"; - case TOKEN_KEYWORD_INIT: - return "KEYWORD_INIT"; - case TOKEN_KEYWORD_THIS: - return "KEYWORD_THIS"; - case TOKEN_KEYWORD_OPEN: - return "TOKEN_KEYWORD_OPEN"; - case TOKEN_KEYWORD_READ: - return "TOKEN_KEYWORD_READ"; - case TOKEN_KEYWORD_WRITE: - return "TOKEN_KEYWORD_WRITE"; - case TOKEN_KEYWORD_STAT: - return "TOKEN_KEYWORD_STAT"; - case TOKEN_KEYWORD_CLOSE: - return "TOKEN_KEYWORD_CLOSE"; - case TOKEN_KEYWORD_NIL: - return "KEYWORD_NIL"; - case TOKEN_KEYWORD_TRUE: - return "KEYWORD_TRUE"; - case TOKEN_KEYWORD_FALSE: - return "KEYWORD_FALSE"; - case TOKEN_KEYWORD_GLOBAL: - return "KEYWORD_GLOBAL"; - case TOKEN_OPERATOR_NOT: - return "OPERATOR_NOT"; - case TOKEN_OPERATOR_AND: - return "OPERATOR_AND"; - case TOKEN_OPERATOR_OR: - return "OPERATOR_OR"; - case TOKEN_BANG: - return "BANG"; - case TOKEN_BANG_EQ: - return "BANG_EQ"; - case TOKEN_EQ: - return "EQ"; - case TOKEN_EQ_EQ: - return "EQ_EQ"; - case TOKEN_GT: - return "GT"; - case TOKEN_LT: - return "LT"; - case TOKEN_GTE: - return "GTE"; - case TOKEN_LTE: - return "LTE"; - case TOKEN_DOT: - return "DOT"; - case TOKEN_COMMA: - return "COMMA"; - case TOKEN_COLON: - return "COLON"; - case TOKEN_SEMICOLON: - return "SEMICOLON"; - case TOKEN_PLUS: - return "PLUS"; - case TOKEN_MINUS: - return "MINUS"; - case TOKEN_STAR: - return "STAR"; - case TOKEN_SLASH: - return "SLASH"; - case TOKEN_LPAREN: - return "LPAREN"; - case TOKEN_RPAREN: - return "RPAREN"; - case TOKEN_LBRACE: - return "LBRACE"; - case TOKEN_RBRACE: - return "RBRACE"; - case TOKEN_LBRACKET: - return "LBRACKET"; - case TOKEN_RBRACKET: - return "RBRACKET"; - case TOKEN_ARROW_RIGHT: - return "ARROW_RIGHT"; - case TOKEN_MESH: - return "MESH"; - case TOKEN_BIG_MONEY: - return "BIG_MONEY"; - case TOKEN_AND: - return "AND"; - case TOKEN_AND_AND: - return "AND_AND"; - case TOKEN_ERROR: - return "ERROR"; - default: - return "UNKNOWN_TOKEN"; - } +static Token +number() +{ + while(is_digit(peek())) advance(); + + /* Look for a fractional part. */ + if(peek() == '.' && is_digit(peek_next())) { + /* Consume the ".". */ + advance(); + + while(is_digit(peek())) advance(); + + return make_token(TOKEN_LITERAL_REAL); + } + + return make_token(TOKEN_LITERAL_INT); +} + +static Token +string() +{ + while(peek() != '"' && !is_at_end()) { + if(peek() == '\n') lexer.line++; + advance(); + } + + if(is_at_end()) return error_token("Unterminated string."); + + /* The closing quote. */ + advance(); + return make_token(TOKEN_LITERAL_STR); +} + +Token +next_token() +{ + skip_whitespace(); + lexer.start = lexer.current; + + if(is_at_end()) return make_token(TOKEN_EOF); + + char c = advance(); + if(is_alpha(c)) return identifier(); + char next = peek(); + if((c == '-' && is_digit(next)) || is_digit(c)) return number(); + + switch(c) { + case '(': + return make_token(TOKEN_LPAREN); + case ')': + return make_token(TOKEN_RPAREN); + case '{': + return make_token(TOKEN_LBRACE); + case '}': + return make_token(TOKEN_RBRACE); + case '[': + return make_token(TOKEN_LBRACKET); + case ']': + return make_token(TOKEN_RBRACKET); + case ';': + return make_token(TOKEN_SEMICOLON); + case ',': + return make_token(TOKEN_COMMA); + case '.': + return make_token(TOKEN_DOT); + case '-': + return make_token(match('>') ? TOKEN_ARROW_RIGHT : TOKEN_MINUS); + case '+': + return make_token(TOKEN_PLUS); + case '/': + return make_token(TOKEN_SLASH); + case '&': + return make_token(match('&') ? TOKEN_AND_AND : TOKEN_AND); + case '#': + return make_token(TOKEN_MESH); + case '$': + return make_token(TOKEN_BIG_MONEY); + case '*': + return make_token(TOKEN_STAR); + case '!': + return make_token(match('=') ? TOKEN_BANG_EQ : TOKEN_BANG); + case '=': + return make_token(match('=') ? TOKEN_EQ_EQ : TOKEN_EQ); + case '<': + return make_token(match('=') ? TOKEN_LTE : TOKEN_LT); + case '>': + return make_token(match('=') ? TOKEN_GTE : TOKEN_GT); + case '"': + return string(); + } + + return error_token("Unexpected character."); +} + +const char * +token_type_to_string(TokenType type) +{ + switch(type) { + case TOKEN_EOF: + return "EOF"; + case TOKEN_IDENTIFIER: + return "IDENTIFIER"; + case TOKEN_LITERAL_INT: + return "LITERAL_INT"; + case TOKEN_LITERAL_NAT: + return "LITERAL_NAT"; + case TOKEN_LITERAL_REAL: + return "LITERAL_REAL"; + case TOKEN_LITERAL_STR: + return "LITERAL_STR"; + case TOKEN_TYPE_INT: + return "TYPE_INT"; + case TOKEN_TYPE_NAT: + return "TYPE_NAT"; + case TOKEN_TYPE_REAL: + return "TYPE_REAL"; + case TOKEN_TYPE_STR: + return "TYPE_STR"; + case TOKEN_TYPE_PTR: + return "TYPE_PTR"; + case TOKEN_KEYWORD_PLEX: + return "KEYWORD_PLEX"; + case TOKEN_KEYWORD_FN: + return "KEYWORD_FN"; + case TOKEN_KEYWORD_CONST: + return "KEYWORD_CONST"; + case TOKEN_KEYWORD_IF: + return "KEYWORD_IF"; + case TOKEN_KEYWORD_IS: + return "IS"; + case TOKEN_KEYWORD_AS: + return "AS"; + case TOKEN_KEYWORD_ELSE: + return "KEYWORD_ELSE"; + case TOKEN_KEYWORD_WHILE: + return "KEYWORD_WHILE"; + case TOKEN_KEYWORD_FOR: + return "KEYWORD_FOR"; + case TOKEN_KEYWORD_RETURN: + return "KEYWORD_RETURN"; + case TOKEN_KEYWORD_USE: + return "KEYWORD_USE"; + case TOKEN_KEYWORD_INIT: + return "KEYWORD_INIT"; + case TOKEN_KEYWORD_THIS: + return "KEYWORD_THIS"; + case TOKEN_KEYWORD_OPEN: + return "TOKEN_KEYWORD_OPEN"; + case TOKEN_KEYWORD_READ: + return "TOKEN_KEYWORD_READ"; + case TOKEN_KEYWORD_WRITE: + return "TOKEN_KEYWORD_WRITE"; + case TOKEN_KEYWORD_STAT: + return "TOKEN_KEYWORD_STAT"; + case TOKEN_KEYWORD_CLOSE: + return "TOKEN_KEYWORD_CLOSE"; + case TOKEN_KEYWORD_NIL: + return "KEYWORD_NIL"; + case TOKEN_KEYWORD_TRUE: + return "KEYWORD_TRUE"; + case TOKEN_KEYWORD_FALSE: + return "KEYWORD_FALSE"; + case TOKEN_KEYWORD_GLOBAL: + return "KEYWORD_GLOBAL"; + case TOKEN_OPERATOR_NOT: + return "OPERATOR_NOT"; + case TOKEN_OPERATOR_AND: + return "OPERATOR_AND"; + case TOKEN_OPERATOR_OR: + return "OPERATOR_OR"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_BANG_EQ: + return "BANG_EQ"; + case TOKEN_EQ: + return "EQ"; + case TOKEN_EQ_EQ: + return "EQ_EQ"; + case TOKEN_GT: + return "GT"; + case TOKEN_LT: + return "LT"; + case TOKEN_GTE: + return "GTE"; + case TOKEN_LTE: + return "LTE"; + case TOKEN_DOT: + return "DOT"; + case TOKEN_COMMA: + return "COMMA"; + case TOKEN_COLON: + return "COLON"; + case TOKEN_SEMICOLON: + return "SEMICOLON"; + case TOKEN_PLUS: + return "PLUS"; + case TOKEN_MINUS: + return "MINUS"; + case TOKEN_STAR: + return "STAR"; + case TOKEN_SLASH: + return "SLASH"; + case TOKEN_LPAREN: + return "LPAREN"; + case TOKEN_RPAREN: + return "RPAREN"; + case TOKEN_LBRACE: + return "LBRACE"; + case TOKEN_RBRACE: + return "RBRACE"; + case TOKEN_LBRACKET: + return "LBRACKET"; + case TOKEN_RBRACKET: + return "RBRACKET"; + case TOKEN_ARROW_RIGHT: + return "ARROW_RIGHT"; + case TOKEN_MESH: + return "MESH"; + case TOKEN_BIG_MONEY: + return "BIG_MONEY"; + case TOKEN_AND: + return "AND"; + case TOKEN_AND_AND: + return "AND_AND"; + case TOKEN_ERROR: + return "ERROR"; + default: + return "UNKNOWN_TOKEN"; + } } diff --git a/lexer.h b/lexer.h index 0543945..b32f5dd 100644 --- a/lexer.h +++ b/lexer.h @@ -4,95 +4,96 @@ #include "libc.h" typedef enum { - TOKEN_ERROR, - TOKEN_EOF, - TOKEN_IDENTIFIER, - TOKEN_LITERAL_INT, - TOKEN_LITERAL_NAT, - TOKEN_LITERAL_REAL, - TOKEN_LITERAL_STR, - TOKEN_TYPE_I8, - TOKEN_TYPE_I16, - TOKEN_TYPE_INT, - TOKEN_TYPE_U8, - TOKEN_TYPE_U16, - TOKEN_TYPE_NAT, - TOKEN_TYPE_REAL, - TOKEN_TYPE_STR, - TOKEN_TYPE_BOOL, - TOKEN_TYPE_VOID, - TOKEN_TYPE_PTR, - TOKEN_KEYWORD_PLEX, - TOKEN_KEYWORD_FN, - TOKEN_KEYWORD_CONST, - TOKEN_KEYWORD_IF, - TOKEN_KEYWORD_IS, - TOKEN_KEYWORD_AS, - TOKEN_KEYWORD_ELSE, - TOKEN_KEYWORD_WHILE, - TOKEN_KEYWORD_FOR, - TOKEN_KEYWORD_RETURN, - TOKEN_KEYWORD_USE, - TOKEN_KEYWORD_INIT, - TOKEN_KEYWORD_THIS, - TOKEN_KEYWORD_GLOBAL, - TOKEN_KEYWORD_OPEN, - TOKEN_KEYWORD_READ, - TOKEN_KEYWORD_WRITE, - TOKEN_KEYWORD_STAT, - TOKEN_KEYWORD_CLOSE, - TOKEN_KEYWORD_LOOP, - TOKEN_KEYWORD_DO, - TOKEN_KEYWORD_NIL, - TOKEN_KEYWORD_TRUE, - TOKEN_KEYWORD_FALSE, - TOKEN_OPERATOR_NOT, - TOKEN_OPERATOR_AND, - TOKEN_OPERATOR_OR, - TOKEN_BANG, - TOKEN_BANG_EQ, - TOKEN_EQ, - TOKEN_EQ_EQ, - TOKEN_AND, - TOKEN_AND_AND, - TOKEN_PIPE, - TOKEN_PIPE_PIPE, - TOKEN_QUESTION, - TOKEN_QUESTION_DOT, - TOKEN_PLUS, - TOKEN_MINUS, - TOKEN_STAR, - TOKEN_SLASH, - TOKEN_MESH, - TOKEN_BIG_MONEY, - TOKEN_GT, - TOKEN_LT, - TOKEN_GTE, - TOKEN_LTE, - TOKEN_DOT, - TOKEN_COMMA, - TOKEN_COLON, - TOKEN_CARET, - TOKEN_SEMICOLON, - TOKEN_LPAREN, - TOKEN_RPAREN, - TOKEN_LBRACE, - TOKEN_RBRACE, - TOKEN_LBRACKET, - TOKEN_RBRACKET, - TOKEN_ARROW_RIGHT + TOKEN_ERROR, + TOKEN_EOF, + TOKEN_IDENTIFIER, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_NAT, + TOKEN_LITERAL_REAL, + TOKEN_LITERAL_STR, + TOKEN_TYPE_I8, + TOKEN_TYPE_I16, + TOKEN_TYPE_INT, + TOKEN_TYPE_U8, + TOKEN_TYPE_U16, + TOKEN_TYPE_NAT, + TOKEN_TYPE_REAL, + TOKEN_TYPE_STR, + TOKEN_TYPE_BOOL, + TOKEN_TYPE_VOID, + TOKEN_TYPE_PTR, + TOKEN_KEYWORD_PLEX, + TOKEN_KEYWORD_FN, + TOKEN_KEYWORD_CONST, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_AS, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHILE, + TOKEN_KEYWORD_FOR, + TOKEN_KEYWORD_RETURN, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_INIT, + TOKEN_KEYWORD_THIS, + TOKEN_KEYWORD_GLOBAL, + TOKEN_KEYWORD_OPEN, + TOKEN_KEYWORD_READ, + TOKEN_KEYWORD_WRITE, + TOKEN_KEYWORD_STAT, + TOKEN_KEYWORD_CLOSE, + TOKEN_KEYWORD_LOOP, + TOKEN_KEYWORD_DO, + TOKEN_KEYWORD_NIL, + TOKEN_KEYWORD_TRUE, + TOKEN_KEYWORD_FALSE, + TOKEN_OPERATOR_NOT, + TOKEN_OPERATOR_AND, + TOKEN_OPERATOR_OR, + TOKEN_BANG, + TOKEN_BANG_EQ, + TOKEN_EQ, + TOKEN_EQ_EQ, + TOKEN_AND, + TOKEN_AND_AND, + TOKEN_PIPE, + TOKEN_PIPE_PIPE, + TOKEN_QUESTION, + TOKEN_QUESTION_DOT, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_MESH, + TOKEN_BIG_MONEY, + TOKEN_GT, + TOKEN_LT, + TOKEN_GTE, + TOKEN_LTE, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_CARET, + TOKEN_SEMICOLON, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_ARROW_RIGHT } TokenType; -typedef struct { - TokenType type; - const char *start; - i32 length; - i32 line; -} Token; +typedef struct token_s Token; +struct token_s { + TokenType type; + const char *start; + i32 length; + i32 line; +}; void init_lexer(const char *source); Token next_token(); -const char* token_type_to_string(TokenType type); +const char *token_type_to_string(TokenType type); char peek(); #endif diff --git a/libc.c b/libc.c index dd14c9f..a48a07f 100644 --- a/libc.c +++ b/libc.c @@ -1,72 +1,96 @@ #include "libc.h" -void mcpy(void *to, void *from, u32 length) { - u8 *src, *dest; - if (to == nil || from == nil) return; +void +mcpy(void *to, void *from, u32 length) +{ + u8 *src, *dest; + if(to == nil || from == nil) return; - src = (u8 *)from; - dest = (u8 *)to; + src = (u8 *)from; + dest = (u8 *)to; - while (length-- > 0) { - *(dest++) = *(src++); - } - return; + while(length-- > 0) *(dest++) = *(src++); + return; } -i32 scpy(char *to, const char *from, u32 length) { - u32 i; - if (to == nil || from == nil) return -1; - if (length == 0) {return 0;} - for (i = 0; i < length - 1 && from[i] != '\0'; i++) { - to[i] = from[i]; - } - to[i] = '\0'; - return 0; +i32 +scpy(char *to, const char *from, u32 length) +{ + u32 i; + if(to == nil || from == nil) return -1; + if(length == 0) return 0; + for(i = 0; i < length - 1 && from[i] != '\0'; i++) to[i] = from[i]; + to[i] = '\0'; + return 0; } -bool seq(const char *s1, const char *s2) { - if (s1 == nil && s2 == nil) return true; - if (s1 == nil || s2 == nil) return false; +bool +seq(const char *s1, const char *s2) +{ + if(s1 == nil && s2 == nil) return true; + if(s1 == nil || s2 == nil) return false; - while (*s1 && *s2) { - if (*s1 != *s2) return false; - s1++; - s2++; - } + while(*s1 && *s2) { + if(*s1 != *s2) return false; + s1++; + s2++; + } - return (*s1 == '\0' && *s2 == '\0'); + return (*s1 == '\0' && *s2 == '\0'); } -bool sleq(const char *s1, const char *s2, u32 length) { - u32 i; - if (s1 == nil && s2 == nil) return true; - if (s1 == nil || s2 == nil) return false; +bool +sleq(const char *s1, const char *s2, u32 length) +{ + u32 i; + if(s1 == nil && s2 == nil) return true; + if(s1 == nil || s2 == nil) return false; - i = 0; - while (i < length && *s1 && *s2) { - if (*s1 != *s2) return false; - s1++; - s2++; - i++; - } - if (i == length) return true; - return (*s1 == '\0' && *s2 == '\0'); + i = 0; + while(i < length && *s1 && *s2) { + if(*s1 != *s2) return false; + s1++; + s2++; + i++; + } + if(i == length) return true; + return (*s1 == '\0' && *s2 == '\0'); } -u32 slen(const char *str) { - u32 i; - if (str == nil) {return 0;} - for (i = 0; str[i] != '\0'; i++) { - ; - } - return i; +u32 +slen(const char *str) +{ + u32 i; + if(str == nil) return 0; + for(i = 0; str[i] != '\0'; i++); + return i; } -u32 snlen(const char *str, u32 max_len) { - u32 i; - if (str == nil) {return 0;} - for (i = 0; i < max_len && str[i] != '\0'; i++) { - ; - } - return i; -} \ No newline at end of file +u32 +snlen(const char *str, u32 max_len) +{ + u32 i; + if(str == nil) return 0; + for(i = 0; i < max_len && str[i] != '\0'; i++); + return i; +} + +void * +aaloc(Arena *arena, u32 size) +{ + u32 pos; + if(arena == nil) return nil; + if(arena->count + size > arena->capacity) return nil; + + pos = arena->count; + arena->count += size; + return &arena->tape[pos]; +} + +u32 +afree(Arena *arena) +{ + u32 freed = arena->count; + arena->count = 0; + return freed; +} diff --git a/libc.h b/libc.h index ced5cb4..ffa4230 100644 --- a/libc.h +++ b/libc.h @@ -15,21 +15,21 @@ #ifdef HAVE_STDINT #include - typedef uint8_t u8; - typedef int8_t i8; - typedef uint16_t u16; - typedef int16_t i16; - typedef uint32_t u32; - typedef int32_t i32; - typedef float f32; +typedef uint8_t u8; +typedef int8_t i8; +typedef uint16_t u16; +typedef int16_t i16; +typedef uint32_t u32; +typedef int32_t i32; +typedef float f32; #else - typedef unsigned char u8; - typedef signed char i8; - typedef unsigned short u16; - typedef signed short i16; - typedef unsigned int u32; - typedef signed int i32; - typedef float f32; +typedef unsigned char u8; +typedef signed char i8; +typedef unsigned short u16; +typedef signed short i16; +typedef unsigned int u32; +typedef signed int i32; +typedef float f32; #endif #ifdef HAVE_STDBOOL @@ -44,17 +44,17 @@ typedef u8 bool; #include #define nil NULL #else -#define nil ((void*)0) +#define nil ((void *)0) #endif #define I8_MIN -128 #define I8_MAX 127 #define U8_MAX 255 - + #define I16_MIN -32768 #define I16_MAX 32767 #define U16_MAX 65535 - + #define I32_MIN -2147483648 #define I32_MAX 2147483647 #define U32_MAX 4294967295 @@ -69,11 +69,20 @@ typedef u8 bool; #define USED(x) ((void)(x)) +typedef struct arena_s Arena; +struct arena_s { + u8 *tape; + u32 count; + u32 capacity; +}; + void mcpy(void *dest, void *src, u32 n); -i32 scpy(char* to, const char *from, u32 length); +i32 scpy(char *to, const char *from, u32 length); bool seq(const char *s1, const char *s2); bool sleq(const char *s1, const char *s2, u32 length); u32 slen(const char *str); u32 snlen(const char *str, u32 max_len); +void *aalloc(Arena *arena, u32 size); +u32 afree(Arena *arena); #endif diff --git a/main.c b/main.c index 099815b..6216242 100644 --- a/main.c +++ b/main.c @@ -1,24 +1,26 @@ #include #include -#define EMBED_FILE(name) \ - void emit_##name(const char *filename) { \ - FILE *f = fopen(filename, "wb"); \ - if (f) { \ - fwrite(name, 1, name##_len, f); \ - fclose(f); \ - } \ - } +#define EMBED_FILE(name) \ + void emit_##name(const char *filename) \ + { \ + FILE *f = fopen(filename, "wb"); \ + if(f) { \ + fwrite(name, 1, name##_len, f); \ + fclose(f); \ + } \ + } -int main(int argc, char **argv) { - char *name; +int +main(int argc, char **argv) +{ + char *name; - if (argc > 1) { - name = argv[1]; - } else { - name = "'u'"; - } + if(argc > 1) + name = argv[1]; + else + name = "'u'"; - printf("nuqneH %s?\n", name); - return EXIT_SUCCESS; + printf("nuqneH %s?\n", name); + return EXIT_SUCCESS; } diff --git a/parser.c b/parser.c index a160a03..9c3f0c5 100644 --- a/parser.c +++ b/parser.c @@ -1,51 +1,177 @@ #include "parser.h" -bool push(TokenStack *ts, Token t) { - if (ts->count >= ts->capacity) return false; - ts->stack[ts->count++] = t; - return true; +Parser parser; + +bool +advance() +{ + parser.previous = parser.current; + + for(;;) { + parser.current = next_token(); + if(parser.current.type != TOKEN_ERROR) return true; + + return false; + } } -Token pop(TokenStack *ts) { - if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; - return ts->stack[--ts->count]; +bool +push(TokenStack *ts, Token t) +{ + if(ts->count >= ts->capacity) return false; + ts->stack[ts->count++] = t; + return true; } -Token top(TokenStack *ts) { - if (ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; - return ts->stack[ts->count - 1]; +Token +pop(TokenStack *ts) +{ + if(ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; + return ts->stack[--ts->count]; } -bool enqueue(TokenQueue *tq, Token t) { - if (tq->count >= tq->capacity) return false; - - tq->queue[tq->end] = t; - tq->end = (tq->end + 1) % tq->capacity; // Wrap around - tq->count++; - return true; +Token +top(TokenStack *ts) +{ + if(ts->count == 0) return (Token){TOKEN_ERROR, nil, -1, -1}; + return ts->stack[ts->count - 1]; } -Token dequeue(TokenQueue *tq) { - if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; - - Token t = tq->queue[tq->start]; - tq->start = (tq->start + 1) % tq->capacity; // Wrap around - tq->count--; - return t; +bool +enqueue(TokenQueue *tq, Token t) +{ + if(tq->count >= tq->capacity) return false; + + tq->queue[tq->end] = t; + tq->end = (tq->end + 1) % tq->capacity; // Wrap around + tq->count++; + return true; } -Token peek_queue(TokenQueue *tq) { - if (tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; - return tq->queue[tq->start]; +Token +dequeue(TokenQueue *tq) +{ + if(tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; + + Token t = tq->queue[tq->start]; + tq->start = (tq->start + 1) % tq->capacity; // Wrap around + tq->count--; + return t; } -bool expression() { - +Token +peek_queue(TokenQueue *tq) +{ + if(tq->count == 0) return (Token){TOKEN_ERROR, NULL, -1, -1}; + return tq->queue[tq->start]; } -bool compile(char *source) { - TokenStack operators; - TokenQueue output; - - return true; +u32 +idx_from_arena(Arena *arena, void *p) +{ + return (u32)((u8 *)p - arena->tape); +} + +void * +ptr_from_arena(Arena *arena, u32 i) +{ + return &arena->tape[i]; +} + +ArenaList * +al_create(Arena *arena, u32 size) +{ + ArenaList *meta = aalloc(arena, sizeof(ArenaList)); + if(!meta) return nil; + + meta->size = size + sizeof(u32); + meta->arena = arena; + meta->head = 0; + meta->tail = 0; + + return meta; +} + +void * +al_append(ArenaList *list, void **out_payload) +{ + void *node = aalloc(list->arena, list->size); + if(!node) return nil; + + u32 idx = idx_from_arena(list->arena, node); + void *payload = node; /* Payload starts at offset 0 */ + + void *cdr_ptr = (u8 *)node + (list->size - sizeof(u32)); + + *(u32 *)cdr_ptr = 0; + + if(list->tail != 0) { + void *prev_node = ptr_from_arena(list->arena, list->tail); + void *prev_cdr = (u8 *)prev_node + (list->size - sizeof(u32)); + *(u32 *)prev_cdr = idx; + } else { + list->head = idx; + } + + list->tail = idx; + + if(out_payload) *out_payload = payload; + return payload; +} + +void * +al_head(ArenaList *list) +{ + if(list->head == 0) return nil; + return ptr_from_arena(list->arena, list->head); +} + +void * +al_tail(ArenaList *list) +{ + if(list->tail == 0) return nil; + return ptr_from_arena(list->arena, list->tail); +} + +SymbolLink * +symbol_table_find(ArenaList *table, const char *name) +{ + void *current = al_head(table); + Arena *arena = table->arena; + + while(current != nil) { + SymbolLink *link = (SymbolLink *)current; + + if(seq(link->s.name.start, name)) return link; + + u32 next_idx = link->cdr; + current = (next_idx == 0) ? nil : ptr_from_arena(arena, next_idx); + } + return nil; +} + +/**************************************************** + * Parser + ***************************************************/ + +bool +expression() +{ + Token operator_stack[256]; + TokenStack operators = {0}; + operators.stack = operator_stack; + operators.capacity = 256; + + Token output_queue[256]; + TokenQueue output = {0}; + output.queue = output_queue; + output.capacity = 256; + + return true; +} + +bool +compile(char *source) +{ + return true; } diff --git a/parser.h b/parser.h index b9174bb..7b975dd 100644 --- a/parser.h +++ b/parser.h @@ -4,100 +4,92 @@ #include "libc.h" #include "lexer.h" -typedef enum { GLOBAL, LOCAL, VAR } ScopeType; -typedef enum { - VOID, - BOOL, - I8, - I16, - I32, - U8, - U16, - U32, - F8, - F16, - F32, - STR, - PLEX, - ARRAY, - FUNCTION +typedef enum symbol_type_e { + VOID, + BOOL, + I8, + I16, + I32, + U8, + U16, + U32, + F8, + F16, + F32, + STR, + ARRAY, + FUNCTION, + PLEX, + METHOD, + TRAIT, } SymbolType; +typedef struct arena_list_s ArenaList; typedef struct symbol_s Symbol; -typedef struct symbol_tab_s SymbolTable; -typedef struct value_type_s ValueType; -typedef struct plex_fields_tab_s PlexFieldsTable; -typedef struct plex_def_s PlexDef; -typedef struct plex_tab_s PlexTable; -typedef struct scope_s Scope; -typedef struct scope_tab_s ScopeTable; +typedef struct symbol_link_s SymbolLink; typedef struct token_stack_s TokenStack; typedef struct queue_s TokenQueue; +typedef struct parser_s Parser; -struct value_type_s { - SymbolType type; - u32 name; - u32 size; - u32 table_ref; // if it is a heap object -}; - -struct plex_def_s { - u32 name; - u32 size; - u32 field_ref_start; - u32 field_count; -}; - -struct plex_fields_tab_s { - u32 *plex_refs; - ValueType *fields; - u32 count; - u32 capacity; -}; - -struct plex_tab_s { - PlexDef *symbols; - u32 count; - u32 capacity; -}; - -#define MAX_SYMBOL_NAME_LENGTH 64 struct symbol_s { - char name[MAX_SYMBOL_NAME_LENGTH]; - u8 name_length; - SymbolType type; - ScopeType scope; - u32 ref; // vm->mp if global, vm->pc local, register if var - u32 size; // size of symbol + Token name; + SymbolType type; + u32 size; + i32 scope; + union type_def { + struct trait_def { + u32 field_ref_start; /* reference to field list of symbols */ + u32 methods_ref_start; /* zero if none */ + } trait; + struct plex_def { + u32 field_ref_start; /* reference to field list of symbols */ + u32 methods_ref_start; /* zero if none */ + } plex; + struct function_def { + SymbolType return_type; + u32 arguments_ref_start; /* reference to field list of symbols */ + } function; + struct array_def { + SymbolType type; + u32 length; /* zero means "unbounded" */ + } array; + struct field_def { + u32 offset; + } field; + } def; }; -#define MAX_SYMBOLS 256 -struct symbol_tab_s { - Symbol symbols[MAX_SYMBOLS]; - u8 count; - i32 parent; +struct symbol_link_s { + Symbol s; + u32 cdr; /* zero means "end of list" */ }; -struct scope_tab_s { - SymbolTable *scopes; - u32 count; - u32 capacity; - i32 scope_ref; - u32 depth; +struct arena_list_s { + Arena *arena; + u32 head; + u32 tail; + u32 size; + u32 count; + i32 parent; }; struct token_stack_s { - Token *stack; - i32 capacity; - i32 count; + Token *stack; + i32 capacity; + i32 count; }; struct queue_s { - Token *queue; - i32 capacity; - i32 start; - i32 end; - i32 count; + Token *queue; + i32 capacity; + i32 start; + i32 end; + i32 count; +}; + +struct parser_s { + Token current; + Token previous; }; bool push(TokenStack *ts, Token t); diff --git a/tools/file2header.c b/tools/file2header.c index 8c5e3ac..6c5071a 100644 --- a/tools/file2header.c +++ b/tools/file2header.c @@ -3,64 +3,63 @@ #include #include -int main(int argc, char *argv[]) { - FILE *in; - int c; - long count = 0; - long col = 0; - char *var_name; - char *p; - - if (argc != 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } +int +main(int argc, char *argv[]) +{ + FILE *in; + int c; + long count = 0; + long col = 0; + char *var_name; + char *p; - in = fopen(argv[1], "rb"); - if (!in) { - perror("Error opening input file"); - return 1; - } + if(argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } - var_name = (char *)malloc(strlen(argv[1]) + 1); - if (!var_name) { - perror("Memory allocation failed"); - fclose(in); - return 1; - } - - strcpy(var_name, argv[1]); - - for (p = var_name; *p; ++p) { - if (!isalnum((unsigned char)*p)) { - *p = '_'; - } - } + in = fopen(argv[1], "rb"); + if(!in) { + perror("Error opening input file"); + return 1; + } - printf("unsigned char %s[] = {\n", var_name); + var_name = (char *)malloc(strlen(argv[1]) + 1); + if(!var_name) { + perror("Memory allocation failed"); + fclose(in); + return 1; + } - c = fgetc(in); - while (c != EOF) { - printf(" 0x%02x", c); - count++; + strcpy(var_name, argv[1]); - int next = fgetc(in); - if (next != EOF) { - printf(","); - ungetc(next, in); - if (++col >= 12) { - printf("\n"); - col = 0; - } - } - - c = fgetc(in); - } - - printf("\n};\n"); - printf("unsigned int %s_len = %lu;\n", var_name, count); - free(var_name); - fclose(in); + for(p = var_name; *p; ++p) + if(!isalnum((unsigned char)*p)) *p = '_'; - return 0; + printf("unsigned char %s[] = {\n", var_name); + + c = fgetc(in); + while(c != EOF) { + printf(" 0x%02x", c); + count++; + + int next = fgetc(in); + if(next != EOF) { + printf(","); + ungetc(next, in); + if(++col >= 12) { + printf("\n"); + col = 0; + } + } + + c = fgetc(in); + } + + printf("\n};\n"); + printf("unsigned int %s_len = %lu;\n", var_name, count); + free(var_name); + fclose(in); + + return 0; }