undar-lang/src/tools/parser.c

227 lines
5.9 KiB
C

#include "parser.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// Helper function to allocate memory and handle errors
static void *safe_malloc(size_t size) {
void *ptr = malloc(size);
if (!ptr) {
fprintf(stderr, "Memory allocation failed\n");
exit(1);
}
return ptr;
}
// Helper function to create a new node
static ExprNode *expr_node_create(const char *token, int line) {
ExprNode *node = (ExprNode *)safe_malloc(sizeof(ExprNode));
node->token = strdup(token ? token : "");
node->children = NULL;
node->child_count = 0;
node->line = line;
return node;
}
// Forward declaration
static ExprNode *parse_expression(const char **ptr, int line);
// Skip whitespace characters
static const char *skip_whitespace(const char *ptr) {
while (*ptr && isspace(*ptr)) {
ptr++;
}
return ptr;
}
// Parse a token (atom)
static char *parse_token(const char **ptr, int line) {
const char *start = *ptr;
// Skip leading whitespace
start = skip_whitespace(start);
if (!*start) {
printf("Error at line:%d\n", line);
return NULL;
}
const char *end = start;
// Handle quoted strings
if (*start == '"') {
end++; // Skip opening quote
// Read until closing quote or end of string
while (*end && *end != '"') {
if (*end == '\\' && *(end + 1)) {
end += 2; // Skip escaped character
} else {
end++;
}
}
if (*end == '"') {
end++; // Include closing quote
}
}
// Handle parentheses as separate tokens
else if (*end == '(' || *end == ')') {
end++;
} else {
// Read until whitespace or parentheses
while (*end && !isspace(*end) && *end != '(' && *end != ')') {
end++;
}
}
if (end == start) return NULL;
size_t len = end - start;
char *token = (char *)safe_malloc(len + 1);
memcpy(token, start, len);
token[len] = '\0';
*ptr = end;
return token;
}
// Parse a list (expression starting with '(')
static ExprNode *parse_list(const char **ptr, int line) {
// Skip the opening parenthesis
(*ptr)++;
*ptr = skip_whitespace(*ptr);
if (**ptr == ')') {
// Empty list
(*ptr)++;
return expr_node_create("nil", line);
}
// Parse all children first
ExprNode **temp_children = NULL;
size_t temp_count = 0;
while (**ptr && **ptr != ')') {
ExprNode *child = parse_expression(ptr, line);
if (child) {
// Resize temp children array
ExprNode **new_temp = (ExprNode **)safe_malloc(sizeof(ExprNode *) * (temp_count + 1));
// Copy existing children
for (size_t i = 0; i < temp_count; i++) {
new_temp[i] = temp_children[i];
}
// Add new child
new_temp[temp_count] = child;
temp_count++;
// Free old array and update
free(temp_children);
temp_children = new_temp;
}
*ptr = skip_whitespace(*ptr);
}
if (**ptr == ')') {
(*ptr)++; // Skip closing parenthesis
} else {
fprintf(stderr, "Error: Missing closing parenthesis at line %d\n", line);
}
// Create the actual node
ExprNode *node;
if (temp_count > 0 && temp_children[0]->child_count == 0) {
// First child is an atom, use it as the operator
node = expr_node_create(temp_children[0]->token, line);
// Move remaining children
node->child_count = temp_count - 1;
if (node->child_count > 0) {
node->children = (ExprNode **)safe_malloc(sizeof(ExprNode *) * node->child_count);
for (size_t i = 0; i < node->child_count; i++) {
node->children[i] = temp_children[i + 1];
}
}
// Free the first child since we used its token
expr_free(temp_children[0]);
} else {
// No operator or first child is a list
node = expr_node_create("list", line);
node->children = temp_children;
node->child_count = temp_count;
}
if (temp_count == 0) {
free(temp_children);
}
return node;
}
// Parse an expression (either atom or list)
static ExprNode *parse_expression(const char **ptr, int line) {
*ptr = skip_whitespace(*ptr);
if (!**ptr) return NULL;
if (**ptr == '(') {
return parse_list(ptr, line);
} else {
// Parse atom
char *token = parse_token(ptr, line);
if (token) {
ExprNode *node = expr_node_create(token, line);
free(token);
return node;
}
return NULL;
}
}
// Main parsing function
ExprNode *expr_parse(const char *source, size_t source_len) {
if (!source || source_len == 0) return NULL;
const char *ptr = source;
int line = 1;
ptr = skip_whitespace(ptr);
if (!*ptr) return NULL;
return parse_expression(&ptr, line);
}
// Free an Expr AST (and all children)
void expr_free(ExprNode *node) {
if (!node) return;
free(node->token);
for (size_t i = 0; i < node->child_count; i++) {
expr_free(node->children[i]);
}
free(node->children);
free(node);
}
// Debug: print AST (for dev)
void expr_print(ExprNode *node, int indent) {
if (!node) return;
for (int i = 0; i < indent; i++) {
printf(" ");
}
if (node->child_count == 0) {
// Atom
printf("Atom: '%s' (line %d)\n", node->token, node->line);
} else {
// List
printf("List: '%s' (line %d) [%zu children]\n",
node->token, node->line, node->child_count);
for (size_t i = 0; i < node->child_count; i++) {
expr_print(node->children[i], indent + 1);
}
}
}