undar-lang/src/parser.c

346 lines
7.9 KiB
C

#include "parser.h"
#define HASHSIZE 150
static TokenMap *hashtab_Token[HASHSIZE];
unsigned int hash_Token(char *s) {
unsigned int hashval;
for (hashval = 0; *s != '\0'; s++)
hashval = *s + 31 * hashval;
return hashval % HASHSIZE;
}
TokenMap *lookup_Token(char *s) {
TokenMap *np;
for (np = hashtab_Token[hash_Token(s)]; np != NULL; np = np->next)
if (strcmp(s, np->keyword) == 0)
return np;
return NULL;
}
TokenType get_Token(char *s) {
TokenMap *np;
for (np = hashtab_Token[hash_Token(s)]; np != NULL; np = np->next)
if (strcmp(s, np->keyword) == 0)
return np->token;
return TOKEN_IDENTIFIER;
}
char *strdup(const char *s) {
size_t len = strlen(s) + 1;
char *copy = malloc(len);
if (copy) {
memcpy(copy, s, len);
}
return copy;
}
TokenMap *put_Token(char *keyword, TokenType token) {
TokenMap *np;
unsigned int hashval;
if ((np = lookup_Token(keyword)) == NULL) {
np = (TokenMap *)malloc(sizeof(*np));
if (np == NULL || (np->keyword = strdup(keyword)) == NULL)
return NULL;
hashval = hash_Token(keyword);
np->next = hashtab_Token[hashval];
hashtab_Token[hashval] = np;
}
np->token = token;
return np;
}
void new_TokenMap() {
put_Token("nil", TOKEN_NULL);
put_Token("and", TOKEN_AND);
put_Token("or", TOKEN_OR);
put_Token("xor", TOKEN_XOR);
put_Token("mod", TOKEN_MOD);
put_Token("eq", TOKEN_EQ);
put_Token("ge", TOKEN_GE);
put_Token("lt", TOKEN_LT);
put_Token("le", TOKEN_LE);
put_Token("ne", TOKEN_NE);
put_Token("gt", TOKEN_GT);
put_Token("ge", TOKEN_GE);
put_Token("srl", TOKEN_SHIFTRIGHT);
put_Token("sll", TOKEN_SHIFTLEFT);
put_Token("int", TOKEN_INT);
put_Token("print", TOKEN_PRINT);
}
typedef struct Tokenizer Tokenizer;
struct Tokenizer {
char *start;
char *current;
int32_t line;
};
Tokenizer tokenizer;
void new_Tokenizer(char *src) {
tokenizer.start = src;
tokenizer.current = src;
tokenizer.line = 1;
}
static bool isAlpha(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ||
c == '\'' || c == '?';
}
static bool isDigit(char c) { return (c >= '0' && c <= '9') || c == '-'; }
static bool isAtEnd() { return *tokenizer.current == '\0'; }
static Token makeToken(TokenType type) {
Token token;
token.type = type;
token.start = tokenizer.start;
token.length = (int32_t)(tokenizer.current - tokenizer.start);
token.line = tokenizer.line;
return token;
}
static Token errorToken(char *msg) {
Token token;
token.type = TOKEN_ERROR;
token.start = msg;
token.length = (int32_t)strlen(msg);
token.line = tokenizer.line;
return token;
}
static char advance() {
tokenizer.current++;
return tokenizer.current[-1];
}
static char peek() { return *tokenizer.current; }
static char peekNext() {
if (isAtEnd())
return '\0';
return tokenizer.current[1];
}
static void skipWhitespace() {
for (;;) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
tokenizer.line++;
advance();
break;
case '!':
while (peek() != '\n' && !isAtEnd())
advance();
break;
default:
return;
}
}
}
static char *currentTokenToS() {
int32_t size = tokenizer.current - tokenizer.start;
char *str = (char *)malloc(sizeof(size));
strncpy(str, tokenizer.start, size);
str[size] = '\0';
return str;
}
static TokenType identifierType() {
char *check = currentTokenToS();
TokenType t = get_Token(check);
free(check);
return t;
}
static Token identifier() {
while (isAlpha(peek()) || isDigit(peek()))
advance();
return makeToken(identifierType());
}
static Token number() {
bool is_float = false;
while (isDigit(peek()))
advance();
/* Look for a fractional part. */
if (peek() == '.' && isDigit(peekNext())) {
is_float = true;
/* Consume the ".". */
advance();
while (isDigit(peek()))
advance();
}
return makeToken((is_float)
? TOKEN_FLOAT
: TOKEN_INT); /* or measure if ends in postscript */
}
static Token string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n')
tokenizer.line++;
advance();
}
if (isAtEnd())
return errorToken("Unterminated string.");
/* The closing quote. */
advance();
return makeToken(TOKEN_STRING);
}
Token nextToken() {
skipWhitespace();
tokenizer.start = tokenizer.current;
if (isAtEnd())
return makeToken(TOKEN_EOF);
char c = advance();
if (isAlpha(c))
return identifier();
if (isDigit(c))
return number();
switch (c) {
case '(':
return makeToken(TOKEN_LEFT_PAREN);
case ')':
return makeToken(TOKEN_RIGHT_PAREN);
case '{':
return makeToken(TOKEN_LEFT_BRACE);
case '}':
return makeToken(TOKEN_RIGHT_BRACE);
case '+':
return makeToken(TOKEN_ADD);
case '/':
return makeToken(TOKEN_DIV);
case '-':
return makeToken(TOKEN_SUB);
case '*':
return makeToken(TOKEN_MUL);
case ';':
return makeToken(TOKEN_SEMICOLON);
case '"':
return string();
}
return errorToken("Unexpected character.");
}
void debug_printToken(Token t) {
char *str = currentTokenToS();
switch (t.type) {
case TOKEN_LEFT_PAREN:
printf("TOKEN_LEFT_PAREN %s line_no=%d\n", str, t.line);
break;
case TOKEN_RIGHT_PAREN:
printf("TOKEN_RIGHT_PAREN %s line_no=%d\n", str, t.line);
break;
case TOKEN_LEFT_BRACE:
printf("TOKEN_LEFT_BRACE %s line_no=%d\n", str, t.line);
break;
case TOKEN_RIGHT_BRACE:
printf("TOKEN_RIGHT_BRACE %s line_no=%d\n", str, t.line);
break;
case TOKEN_IDENTIFIER:
printf("TOKEN_IDENTIFIER %s line_no=%d\n", str, t.line);
break;
case TOKEN_STRING:
printf("TOKEN_STRING %s line_no=%d\n", str, t.line);
break;
case TOKEN_FLOAT:
printf("TOKEN_FLOAT %s line_no=%d\n", str, t.line);
break;
case TOKEN_ERROR:
printf("TOKEN_ERROR %s line_no=%d\n", str, t.line);
break;
case TOKEN_FALSE:
printf("TOKEN_FALSE %s line_no=%d\n", str, t.line);
break;
case TOKEN_TRUE:
printf("TOKEN_TRUE %s line_no=%d\n", str, t.line);
break;
case TOKEN_EOF:
printf("TOKEN_EOF %s line_no=%d\n", str, t.line);
break;
case TOKEN_ADD:
printf("TOKEN_ADD %s line_no=%d\n", str, t.line);
break;
case TOKEN_SUB:
printf("TOKEN_SUB %s line_no=%d\n", str, t.line);
break;
case TOKEN_MUL:
printf("TOKEN_MUL %s line_no=%d\n", str, t.line);
break;
case TOKEN_DIV:
printf("TOKEN_DIV %s line_no=%d\n", str, t.line);
break;
case TOKEN_MOD:
printf("TOKEN_MOD %s line_no=%d\n", str, t.line);
break;
case TOKEN_INT:
printf("TOKEN_INT %s line_no=%d\n", str, t.line);
break;
case TOKEN_UINT:
printf("TOKEN_UINT %s line_no=%d\n", str, t.line);
break;
case TOKEN_SHIFTRIGHT:
printf("TOKEN_SHIFTRIGHT %s line_no=%d\n", str, t.line);
break;
case TOKEN_SHIFTLEFT:
printf("TOKEN_SHIFTLEFT %s line_no=%d\n", str, t.line);
break;
case TOKEN_GT:
printf("TOKEN_GT %s line_no=%d\n", str, t.line);
break;
case TOKEN_LT:
printf("TOKEN_LT %s line_no=%d\n", str, t.line);
break;
case TOKEN_EQ:
printf("TOKEN_EQ %s line_no=%d\n", str, t.line);
break;
case TOKEN_GE:
printf("TOKEN_GE %s line_no=%d\n", str, t.line);
break;
case TOKEN_LE:
printf("TOKEN_LE %s line_no=%d\n", str, t.line);
break;
case TOKEN_NE:
printf("TOKEN_NE %s line_no=%d\n", str, t.line);
break;
case TOKEN_NULL:
printf("TOKEN_NULL %s line_no=%d\n", str, t.line);
break;
case TOKEN_AND:
printf("TOKEN_AND %s line_no=%d\n", str, t.line);
break;
case TOKEN_OR:
printf("TOKEN_OR %s line_no=%d\n", str, t.line);
break;
case TOKEN_XOR:
printf("TOKEN_XOR %s line_no=%d\n", str, t.line);
break;
case TOKEN_SEMICOLON:
printf("TOKEN_SEMICOLON %s line_no=%d\n", str, t.line);
break;
case TOKEN_PRINT:
printf("TOKEN_PRINT %s line_no=%d\n", str, t.line);
break;
}
free(str);
}