#include "parser.h" #define HASHSIZE 150 static TokenMap *hashtab_Token[HASHSIZE]; unsigned int hash_Token(char *s) { unsigned int hashval; for (hashval = 0; *s != '\0'; s++) hashval = *s + 31 * hashval; return hashval % HASHSIZE; } TokenMap *lookup_Token(char *s) { TokenMap *np; for (np = hashtab_Token[hash_Token(s)]; np != NULL; np = np->next) if (strcmp(s, np->keyword) == 0) return np; return NULL; } TokenType get_Token(char *s) { TokenMap *np; for (np = hashtab_Token[hash_Token(s)]; np != NULL; np = np->next) if (strcmp(s, np->keyword) == 0) return np->token; return TOKEN_IDENTIFIER; } char *strdup(const char *s) { size_t len = strlen(s) + 1; char *copy = malloc(len); if (copy) { memcpy(copy, s, len); } return copy; } TokenMap *put_Token(char *keyword, TokenType token) { TokenMap *np; unsigned int hashval; if ((np = lookup_Token(keyword)) == NULL) { np = (TokenMap *)malloc(sizeof(*np)); if (np == NULL || (np->keyword = strdup(keyword)) == NULL) return NULL; hashval = hash_Token(keyword); np->next = hashtab_Token[hashval]; hashtab_Token[hashval] = np; } np->token = token; return np; } void new_TokenMap() { put_Token("nil", TOKEN_NULL); put_Token("and", TOKEN_AND); put_Token("or", TOKEN_OR); put_Token("xor", TOKEN_XOR); put_Token("mod", TOKEN_MOD); put_Token("eq", TOKEN_EQ); put_Token("ge", TOKEN_GE); put_Token("lt", TOKEN_LT); put_Token("le", TOKEN_LE); put_Token("ne", TOKEN_NE); put_Token("gt", TOKEN_GT); put_Token("ge", TOKEN_GE); put_Token("srl", TOKEN_SHIFTRIGHT); put_Token("sll", TOKEN_SHIFTLEFT); put_Token("int", TOKEN_INT); put_Token("print", TOKEN_PRINT); } typedef struct Tokenizer Tokenizer; struct Tokenizer { char *start; char *current; int32_t line; }; Tokenizer tokenizer; void new_Tokenizer(char *src) { tokenizer.start = src; tokenizer.current = src; tokenizer.line = 1; } static bool isAlpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '\'' || c == '?'; } static bool isDigit(char c) { return (c >= '0' && c <= '9') || c == '-'; } static bool isAtEnd() { return *tokenizer.current == '\0'; } static Token makeToken(TokenType type) { Token token; token.type = type; token.start = tokenizer.start; token.length = (int32_t)(tokenizer.current - tokenizer.start); token.line = tokenizer.line; return token; } static Token errorToken(char *msg) { Token token; token.type = TOKEN_ERROR; token.start = msg; token.length = (int32_t)strlen(msg); token.line = tokenizer.line; return token; } static char advance() { tokenizer.current++; return tokenizer.current[-1]; } static char peek() { return *tokenizer.current; } static char peekNext() { if (isAtEnd()) return '\0'; return tokenizer.current[1]; } static void skipWhitespace() { for (;;) { char c = peek(); switch (c) { case ' ': case '\r': case '\t': advance(); break; case '\n': tokenizer.line++; advance(); break; case '!': while (peek() != '\n' && !isAtEnd()) advance(); break; default: return; } } } static char *currentTokenToS() { int32_t size = tokenizer.current - tokenizer.start; char *str = (char *)malloc(sizeof(size)); strncpy(str, tokenizer.start, size); str[size] = '\0'; return str; } static TokenType identifierType() { char *check = currentTokenToS(); TokenType t = get_Token(check); free(check); return t; } static Token identifier() { while (isAlpha(peek()) || isDigit(peek())) advance(); return makeToken(identifierType()); } static Token number() { bool is_float = false; while (isDigit(peek())) advance(); /* Look for a fractional part. */ if (peek() == '.' && isDigit(peekNext())) { is_float = true; /* Consume the ".". */ advance(); while (isDigit(peek())) advance(); } return makeToken((is_float) ? TOKEN_FLOAT : TOKEN_INT); /* or measure if ends in postscript */ } static Token string() { while (peek() != '"' && !isAtEnd()) { if (peek() == '\n') tokenizer.line++; advance(); } if (isAtEnd()) return errorToken("Unterminated string."); /* The closing quote. */ advance(); return makeToken(TOKEN_STRING); } Token nextToken() { skipWhitespace(); tokenizer.start = tokenizer.current; if (isAtEnd()) return makeToken(TOKEN_EOF); char c = advance(); if (isAlpha(c)) return identifier(); if (isDigit(c)) return number(); switch (c) { case '(': return makeToken(TOKEN_LEFT_PAREN); case ')': return makeToken(TOKEN_RIGHT_PAREN); case '{': return makeToken(TOKEN_LEFT_BRACE); case '}': return makeToken(TOKEN_RIGHT_BRACE); case '+': return makeToken(TOKEN_ADD); case '/': return makeToken(TOKEN_DIV); case '-': return makeToken(TOKEN_SUB); case '*': return makeToken(TOKEN_MUL); case ';': return makeToken(TOKEN_SEMICOLON); case '"': return string(); } return errorToken("Unexpected character."); } void debug_printToken(Token t) { char *str = currentTokenToS(); switch (t.type) { case TOKEN_LEFT_PAREN: printf("TOKEN_LEFT_PAREN %s line_no=%d\n", str, t.line); break; case TOKEN_RIGHT_PAREN: printf("TOKEN_RIGHT_PAREN %s line_no=%d\n", str, t.line); break; case TOKEN_LEFT_BRACE: printf("TOKEN_LEFT_BRACE %s line_no=%d\n", str, t.line); break; case TOKEN_RIGHT_BRACE: printf("TOKEN_RIGHT_BRACE %s line_no=%d\n", str, t.line); break; case TOKEN_IDENTIFIER: printf("TOKEN_IDENTIFIER %s line_no=%d\n", str, t.line); break; case TOKEN_STRING: printf("TOKEN_STRING %s line_no=%d\n", str, t.line); break; case TOKEN_FLOAT: printf("TOKEN_FLOAT %s line_no=%d\n", str, t.line); break; case TOKEN_ERROR: printf("TOKEN_ERROR %s line_no=%d\n", str, t.line); break; case TOKEN_FALSE: printf("TOKEN_FALSE %s line_no=%d\n", str, t.line); break; case TOKEN_TRUE: printf("TOKEN_TRUE %s line_no=%d\n", str, t.line); break; case TOKEN_EOF: printf("TOKEN_EOF %s line_no=%d\n", str, t.line); break; case TOKEN_ADD: printf("TOKEN_ADD %s line_no=%d\n", str, t.line); break; case TOKEN_SUB: printf("TOKEN_SUB %s line_no=%d\n", str, t.line); break; case TOKEN_MUL: printf("TOKEN_MUL %s line_no=%d\n", str, t.line); break; case TOKEN_DIV: printf("TOKEN_DIV %s line_no=%d\n", str, t.line); break; case TOKEN_MOD: printf("TOKEN_MOD %s line_no=%d\n", str, t.line); break; case TOKEN_INT: printf("TOKEN_INT %s line_no=%d\n", str, t.line); break; case TOKEN_UINT: printf("TOKEN_UINT %s line_no=%d\n", str, t.line); break; case TOKEN_SHIFTRIGHT: printf("TOKEN_SHIFTRIGHT %s line_no=%d\n", str, t.line); break; case TOKEN_SHIFTLEFT: printf("TOKEN_SHIFTLEFT %s line_no=%d\n", str, t.line); break; case TOKEN_GT: printf("TOKEN_GT %s line_no=%d\n", str, t.line); break; case TOKEN_LT: printf("TOKEN_LT %s line_no=%d\n", str, t.line); break; case TOKEN_EQ: printf("TOKEN_EQ %s line_no=%d\n", str, t.line); break; case TOKEN_GE: printf("TOKEN_GE %s line_no=%d\n", str, t.line); break; case TOKEN_LE: printf("TOKEN_LE %s line_no=%d\n", str, t.line); break; case TOKEN_NE: printf("TOKEN_NE %s line_no=%d\n", str, t.line); break; case TOKEN_NULL: printf("TOKEN_NULL %s line_no=%d\n", str, t.line); break; case TOKEN_AND: printf("TOKEN_AND %s line_no=%d\n", str, t.line); break; case TOKEN_OR: printf("TOKEN_OR %s line_no=%d\n", str, t.line); break; case TOKEN_XOR: printf("TOKEN_XOR %s line_no=%d\n", str, t.line); break; case TOKEN_SEMICOLON: printf("TOKEN_SEMICOLON %s line_no=%d\n", str, t.line); break; case TOKEN_PRINT: printf("TOKEN_PRINT %s line_no=%d\n", str, t.line); break; } free(str); }