346 lines
7.9 KiB
C
346 lines
7.9 KiB
C
#include "parser.h"
|
|
|
|
#define HASHSIZE 150
|
|
static TokenMap *hashtab_Token[HASHSIZE];
|
|
|
|
unsigned int hash_Token(char *s) {
|
|
unsigned int hashval;
|
|
for (hashval = 0; *s != '\0'; s++)
|
|
hashval = *s + 31 * hashval;
|
|
return hashval % HASHSIZE;
|
|
}
|
|
|
|
TokenMap *lookup_Token(char *s) {
|
|
TokenMap *np;
|
|
for (np = hashtab_Token[hash_Token(s)]; np != NULL; np = np->next)
|
|
if (strcmp(s, np->keyword) == 0)
|
|
return np;
|
|
return NULL;
|
|
}
|
|
|
|
TokenType get_Token(char *s) {
|
|
TokenMap *np;
|
|
for (np = hashtab_Token[hash_Token(s)]; np != NULL; np = np->next)
|
|
if (strcmp(s, np->keyword) == 0)
|
|
return np->token;
|
|
return TOKEN_IDENTIFIER;
|
|
}
|
|
|
|
char *strdup(const char *s) {
|
|
size_t len = strlen(s) + 1;
|
|
char *copy = malloc(len);
|
|
if (copy) {
|
|
memcpy(copy, s, len);
|
|
}
|
|
return copy;
|
|
}
|
|
|
|
TokenMap *put_Token(char *keyword, TokenType token) {
|
|
TokenMap *np;
|
|
unsigned int hashval;
|
|
if ((np = lookup_Token(keyword)) == NULL) {
|
|
np = (TokenMap *)malloc(sizeof(*np));
|
|
if (np == NULL || (np->keyword = strdup(keyword)) == NULL)
|
|
return NULL;
|
|
hashval = hash_Token(keyword);
|
|
np->next = hashtab_Token[hashval];
|
|
hashtab_Token[hashval] = np;
|
|
}
|
|
np->token = token;
|
|
return np;
|
|
}
|
|
|
|
void new_TokenMap() {
|
|
put_Token("nil", TOKEN_NULL);
|
|
put_Token("and", TOKEN_AND);
|
|
put_Token("or", TOKEN_OR);
|
|
put_Token("xor", TOKEN_XOR);
|
|
put_Token("mod", TOKEN_MOD);
|
|
put_Token("eq", TOKEN_EQ);
|
|
put_Token("ge", TOKEN_GE);
|
|
put_Token("lt", TOKEN_LT);
|
|
put_Token("le", TOKEN_LE);
|
|
put_Token("ne", TOKEN_NE);
|
|
put_Token("gt", TOKEN_GT);
|
|
put_Token("ge", TOKEN_GE);
|
|
put_Token("srl", TOKEN_SHIFTRIGHT);
|
|
put_Token("sll", TOKEN_SHIFTLEFT);
|
|
put_Token("int", TOKEN_INT);
|
|
put_Token("print", TOKEN_PRINT);
|
|
}
|
|
|
|
typedef struct Tokenizer Tokenizer;
|
|
struct Tokenizer {
|
|
char *start;
|
|
char *current;
|
|
int32_t line;
|
|
};
|
|
|
|
Tokenizer tokenizer;
|
|
|
|
void new_Tokenizer(char *src) {
|
|
tokenizer.start = src;
|
|
tokenizer.current = src;
|
|
tokenizer.line = 1;
|
|
}
|
|
|
|
static bool isAlpha(char c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
|
|
c == '\'' || c == '?';
|
|
}
|
|
|
|
static bool isDigit(char c) { return (c >= '0' && c <= '9') || c == '-'; }
|
|
|
|
static bool isAtEnd() { return *tokenizer.current == '\0'; }
|
|
|
|
static Token makeToken(TokenType type) {
|
|
Token token;
|
|
token.type = type;
|
|
token.start = tokenizer.start;
|
|
token.length = (int32_t)(tokenizer.current - tokenizer.start);
|
|
token.line = tokenizer.line;
|
|
return token;
|
|
}
|
|
|
|
static Token errorToken(char *msg) {
|
|
Token token;
|
|
token.type = TOKEN_ERROR;
|
|
token.start = msg;
|
|
token.length = (int32_t)strlen(msg);
|
|
token.line = tokenizer.line;
|
|
return token;
|
|
}
|
|
|
|
static char advance() {
|
|
tokenizer.current++;
|
|
return tokenizer.current[-1];
|
|
}
|
|
|
|
static char peek() { return *tokenizer.current; }
|
|
|
|
static char peekNext() {
|
|
if (isAtEnd())
|
|
return '\0';
|
|
return tokenizer.current[1];
|
|
}
|
|
|
|
static void skipWhitespace() {
|
|
for (;;) {
|
|
char c = peek();
|
|
switch (c) {
|
|
case ' ':
|
|
case '\r':
|
|
case '\t':
|
|
advance();
|
|
break;
|
|
case '\n':
|
|
tokenizer.line++;
|
|
advance();
|
|
break;
|
|
case '!':
|
|
while (peek() != '\n' && !isAtEnd())
|
|
advance();
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static char *currentTokenToS() {
|
|
int32_t size = tokenizer.current - tokenizer.start;
|
|
char *str = (char *)malloc(sizeof(size));
|
|
strncpy(str, tokenizer.start, size);
|
|
str[size] = '\0';
|
|
return str;
|
|
}
|
|
|
|
static TokenType identifierType() {
|
|
char *check = currentTokenToS();
|
|
TokenType t = get_Token(check);
|
|
free(check);
|
|
return t;
|
|
}
|
|
|
|
static Token identifier() {
|
|
while (isAlpha(peek()) || isDigit(peek()))
|
|
advance();
|
|
return makeToken(identifierType());
|
|
}
|
|
|
|
static Token number() {
|
|
bool is_float = false;
|
|
while (isDigit(peek()))
|
|
advance();
|
|
|
|
/* Look for a fractional part. */
|
|
if (peek() == '.' && isDigit(peekNext())) {
|
|
is_float = true;
|
|
/* Consume the ".". */
|
|
advance();
|
|
|
|
while (isDigit(peek()))
|
|
advance();
|
|
}
|
|
|
|
return makeToken((is_float)
|
|
? TOKEN_FLOAT
|
|
: TOKEN_INT); /* or measure if ends in postscript */
|
|
}
|
|
|
|
static Token string() {
|
|
while (peek() != '"' && !isAtEnd()) {
|
|
if (peek() == '\n')
|
|
tokenizer.line++;
|
|
advance();
|
|
}
|
|
|
|
if (isAtEnd())
|
|
return errorToken("Unterminated string.");
|
|
|
|
/* The closing quote. */
|
|
advance();
|
|
return makeToken(TOKEN_STRING);
|
|
}
|
|
|
|
Token nextToken() {
|
|
skipWhitespace();
|
|
tokenizer.start = tokenizer.current;
|
|
if (isAtEnd())
|
|
return makeToken(TOKEN_EOF);
|
|
|
|
char c = advance();
|
|
if (isAlpha(c))
|
|
return identifier();
|
|
if (isDigit(c))
|
|
return number();
|
|
switch (c) {
|
|
case '(':
|
|
return makeToken(TOKEN_LEFT_PAREN);
|
|
case ')':
|
|
return makeToken(TOKEN_RIGHT_PAREN);
|
|
case '{':
|
|
return makeToken(TOKEN_LEFT_BRACE);
|
|
case '}':
|
|
return makeToken(TOKEN_RIGHT_BRACE);
|
|
case '+':
|
|
return makeToken(TOKEN_ADD);
|
|
case '/':
|
|
return makeToken(TOKEN_DIV);
|
|
case '-':
|
|
return makeToken(TOKEN_SUB);
|
|
case '*':
|
|
return makeToken(TOKEN_MUL);
|
|
case ';':
|
|
return makeToken(TOKEN_SEMICOLON);
|
|
case '"':
|
|
return string();
|
|
}
|
|
|
|
return errorToken("Unexpected character.");
|
|
}
|
|
|
|
void debug_printToken(Token t) {
|
|
char *str = currentTokenToS();
|
|
|
|
switch (t.type) {
|
|
case TOKEN_LEFT_PAREN:
|
|
printf("TOKEN_LEFT_PAREN %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_RIGHT_PAREN:
|
|
printf("TOKEN_RIGHT_PAREN %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_LEFT_BRACE:
|
|
printf("TOKEN_LEFT_BRACE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_RIGHT_BRACE:
|
|
printf("TOKEN_RIGHT_BRACE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_IDENTIFIER:
|
|
printf("TOKEN_IDENTIFIER %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_STRING:
|
|
printf("TOKEN_STRING %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_FLOAT:
|
|
printf("TOKEN_FLOAT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_ERROR:
|
|
printf("TOKEN_ERROR %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_FALSE:
|
|
printf("TOKEN_FALSE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_TRUE:
|
|
printf("TOKEN_TRUE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_EOF:
|
|
printf("TOKEN_EOF %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_ADD:
|
|
printf("TOKEN_ADD %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_SUB:
|
|
printf("TOKEN_SUB %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_MUL:
|
|
printf("TOKEN_MUL %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_DIV:
|
|
printf("TOKEN_DIV %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_MOD:
|
|
printf("TOKEN_MOD %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_INT:
|
|
printf("TOKEN_INT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_UINT:
|
|
printf("TOKEN_UINT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_SHIFTRIGHT:
|
|
printf("TOKEN_SHIFTRIGHT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_SHIFTLEFT:
|
|
printf("TOKEN_SHIFTLEFT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_GT:
|
|
printf("TOKEN_GT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_LT:
|
|
printf("TOKEN_LT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_EQ:
|
|
printf("TOKEN_EQ %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_GE:
|
|
printf("TOKEN_GE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_LE:
|
|
printf("TOKEN_LE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_NE:
|
|
printf("TOKEN_NE %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_NULL:
|
|
printf("TOKEN_NULL %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_AND:
|
|
printf("TOKEN_AND %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_OR:
|
|
printf("TOKEN_OR %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_XOR:
|
|
printf("TOKEN_XOR %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_SEMICOLON:
|
|
printf("TOKEN_SEMICOLON %s line_no=%d\n", str, t.line);
|
|
break;
|
|
case TOKEN_PRINT:
|
|
printf("TOKEN_PRINT %s line_no=%d\n", str, t.line);
|
|
break;
|
|
}
|
|
free(str);
|
|
}
|