227 lines
5.9 KiB
C
227 lines
5.9 KiB
C
#include "parser.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
// Helper function to allocate memory and handle errors
|
|
static void *safe_malloc(size_t size) {
|
|
void *ptr = malloc(size);
|
|
if (!ptr) {
|
|
fprintf(stderr, "Memory allocation failed\n");
|
|
exit(1);
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
// Helper function to create a new node
|
|
static ExprNode *expr_node_create(const char *token, int line) {
|
|
ExprNode *node = (ExprNode *)safe_malloc(sizeof(ExprNode));
|
|
node->token = strdup(token ? token : "");
|
|
node->children = NULL;
|
|
node->child_count = 0;
|
|
node->line = line;
|
|
return node;
|
|
}
|
|
|
|
// Forward declaration
|
|
static ExprNode *parse_expression(const char **ptr, int line);
|
|
|
|
// Skip whitespace characters
|
|
static const char *skip_whitespace(const char *ptr) {
|
|
while (*ptr && isspace(*ptr)) {
|
|
ptr++;
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
// Parse a token (atom)
|
|
static char *parse_token(const char **ptr, int line) {
|
|
const char *start = *ptr;
|
|
|
|
// Skip leading whitespace
|
|
start = skip_whitespace(start);
|
|
if (!*start) {
|
|
printf("Error at line:%d\n", line);
|
|
return NULL;
|
|
}
|
|
|
|
const char *end = start;
|
|
|
|
// Handle quoted strings
|
|
if (*start == '"') {
|
|
end++; // Skip opening quote
|
|
// Read until closing quote or end of string
|
|
while (*end && *end != '"') {
|
|
if (*end == '\\' && *(end + 1)) {
|
|
end += 2; // Skip escaped character
|
|
} else {
|
|
end++;
|
|
}
|
|
}
|
|
if (*end == '"') {
|
|
end++; // Include closing quote
|
|
}
|
|
}
|
|
// Handle parentheses as separate tokens
|
|
else if (*end == '(' || *end == ')') {
|
|
end++;
|
|
} else {
|
|
// Read until whitespace or parentheses
|
|
while (*end && !isspace(*end) && *end != '(' && *end != ')') {
|
|
end++;
|
|
}
|
|
}
|
|
|
|
if (end == start) return NULL;
|
|
|
|
size_t len = end - start;
|
|
char *token = (char *)safe_malloc(len + 1);
|
|
memcpy(token, start, len);
|
|
token[len] = '\0';
|
|
|
|
*ptr = end;
|
|
return token;
|
|
}
|
|
|
|
// Parse a list (expression starting with '(')
|
|
static ExprNode *parse_list(const char **ptr, int line) {
|
|
// Skip the opening parenthesis
|
|
(*ptr)++;
|
|
|
|
*ptr = skip_whitespace(*ptr);
|
|
if (**ptr == ')') {
|
|
// Empty list
|
|
(*ptr)++;
|
|
return expr_node_create("nil", line);
|
|
}
|
|
|
|
// Parse all children first
|
|
ExprNode **temp_children = NULL;
|
|
size_t temp_count = 0;
|
|
|
|
while (**ptr && **ptr != ')') {
|
|
ExprNode *child = parse_expression(ptr, line);
|
|
if (child) {
|
|
// Resize temp children array
|
|
ExprNode **new_temp = (ExprNode **)safe_malloc(sizeof(ExprNode *) * (temp_count + 1));
|
|
|
|
// Copy existing children
|
|
for (size_t i = 0; i < temp_count; i++) {
|
|
new_temp[i] = temp_children[i];
|
|
}
|
|
|
|
// Add new child
|
|
new_temp[temp_count] = child;
|
|
temp_count++;
|
|
|
|
// Free old array and update
|
|
free(temp_children);
|
|
temp_children = new_temp;
|
|
}
|
|
|
|
*ptr = skip_whitespace(*ptr);
|
|
}
|
|
|
|
if (**ptr == ')') {
|
|
(*ptr)++; // Skip closing parenthesis
|
|
} else {
|
|
fprintf(stderr, "Error: Missing closing parenthesis at line %d\n", line);
|
|
}
|
|
|
|
// Create the actual node
|
|
ExprNode *node;
|
|
if (temp_count > 0 && temp_children[0]->child_count == 0) {
|
|
// First child is an atom, use it as the operator
|
|
node = expr_node_create(temp_children[0]->token, line);
|
|
// Move remaining children
|
|
node->child_count = temp_count - 1;
|
|
if (node->child_count > 0) {
|
|
node->children = (ExprNode **)safe_malloc(sizeof(ExprNode *) * node->child_count);
|
|
for (size_t i = 0; i < node->child_count; i++) {
|
|
node->children[i] = temp_children[i + 1];
|
|
}
|
|
}
|
|
// Free the first child since we used its token
|
|
expr_free(temp_children[0]);
|
|
} else {
|
|
// No operator or first child is a list
|
|
node = expr_node_create("list", line);
|
|
node->children = temp_children;
|
|
node->child_count = temp_count;
|
|
}
|
|
|
|
if (temp_count == 0) {
|
|
free(temp_children);
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
// Parse an expression (either atom or list)
|
|
static ExprNode *parse_expression(const char **ptr, int line) {
|
|
*ptr = skip_whitespace(*ptr);
|
|
|
|
if (!**ptr) return NULL;
|
|
|
|
if (**ptr == '(') {
|
|
return parse_list(ptr, line);
|
|
} else {
|
|
// Parse atom
|
|
char *token = parse_token(ptr, line);
|
|
if (token) {
|
|
ExprNode *node = expr_node_create(token, line);
|
|
free(token);
|
|
return node;
|
|
}
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// Main parsing function
|
|
ExprNode *expr_parse(const char *source, size_t source_len) {
|
|
if (!source || source_len == 0) return NULL;
|
|
|
|
const char *ptr = source;
|
|
int line = 1;
|
|
|
|
ptr = skip_whitespace(ptr);
|
|
if (!*ptr) return NULL;
|
|
|
|
return parse_expression(&ptr, line);
|
|
}
|
|
|
|
// Free an Expr AST (and all children)
|
|
void expr_free(ExprNode *node) {
|
|
if (!node) return;
|
|
|
|
free(node->token);
|
|
|
|
for (size_t i = 0; i < node->child_count; i++) {
|
|
expr_free(node->children[i]);
|
|
}
|
|
free(node->children);
|
|
free(node);
|
|
}
|
|
|
|
// Debug: print AST (for dev)
|
|
void expr_print(ExprNode *node, int indent) {
|
|
if (!node) return;
|
|
|
|
for (int i = 0; i < indent; i++) {
|
|
printf(" ");
|
|
}
|
|
|
|
if (node->child_count == 0) {
|
|
// Atom
|
|
printf("Atom: '%s' (line %d)\n", node->token, node->line);
|
|
} else {
|
|
// List
|
|
printf("List: '%s' (line %d) [%zu children]\n",
|
|
node->token, node->line, node->child_count);
|
|
|
|
for (size_t i = 0; i < node->child_count; i++) {
|
|
expr_print(node->children[i], indent + 1);
|
|
}
|
|
}
|
|
} |