From b634d3dac61fc6bb42bafd01eae3dbf6ae8e961d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Sun, 21 Aug 2005 15:49:35 +0000 Subject: [PATCH] First version of libgrammatic with rdp and lr0 parser Recursive descent parser and LR(0) parser are implemented. Table generator for LR(0) is yet to be written. git-archimport-id: cascardo@tlscascardo--private/libgrammatic--dev--0.1--base-0 --- lr0.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lr0.h | 40 +++++++++++ parser.c | 42 ++++++++++++ parser.h | 24 +++++++ rdp.c | 148 ++++++++++++++++++++++++++++++++++++++++ rdp.h | 24 +++++++ 6 files changed, 482 insertions(+) create mode 100644 lr0.c create mode 100644 lr0.h create mode 100644 parser.c create mode 100644 parser.h create mode 100644 rdp.c create mode 100644 rdp.h diff --git a/lr0.c b/lr0.c new file mode 100644 index 0000000..ad40ec4 --- /dev/null +++ b/lr0.c @@ -0,0 +1,204 @@ +#include "lr0.h" +#include + +transition_t* transition_new (gint action, gint state) +{ + transition_t* transition; + transition = g_malloc (sizeof (transition_t)); + transition->action = action; + transition->state = state; + return transition; +} + +static void lr0_push (lr0_t* parser, gint st, gpointer attrib) +{ + state_t* state; + state = g_malloc (sizeof (state_t)); + state->state = st; + state->attrib = attrib; + parser->stack = g_list_prepend (parser->stack, state); +} + +static gboolean lr0_pop (lr0_t* parser, gpointer* attrib) +{ + + GList* l; + state_t* state; + if ((l = g_list_first (parser->stack)) == NULL) + return FALSE; + parser->stack = g_list_remove_link (l, l); + state = (state_t*) l->data; + if (attrib) + *attrib = state->attrib; + g_free (state); + g_list_free (l); + return TRUE; + +} + +lr0_t* lr0_new (nextcb cb, gpointer data) +{ + + lr0_t* parser; + + parser = g_malloc (sizeof (lr0_t)); + parser->cb = cb; + parser->data = data; + + parser->stack = NULL; + parser_push (parser, 0, NULL); + parser->table = g_hash_table_new_full (g_direct_hash, g_direct_equal, + NULL, g_hash_table_destroy); + parser->rules = NULL; + + return parser; + +} + +void lr0_delete (lr0_t* parser) +{ + + GList* l; + + for (l = g_list_first (parser->stack); l != NULL; l = g_list_next (l)) + { + g_free (l->data); + } + + g_list_free (parser->stack); + + g_hash_table_destroy (parser->table); + + g_free (parser); + +} + +void lr0_add (lr0_t* parser, gint state, symbol_t* symbol, + transition_t* transition) +{ + + GHashTable* table; + + if (!g_hash_table_lookup_extended (parser->table, state, + NULL, (gpointer*) &table)) + { + table = g_hash_table_new_full (symbol_hash, symbol_equal, + g_free, g_free); + g_hash_table_insert (parser->table, state, table); + } + + g_hash_table_insert (table, symbol, transition); + +} + +gboolean lr0_lookup (lr0_t* parser, gint state, symbol_t* symbol, + transition_t** transition) +{ + + GHashTable* table; + transition_t* trans; + + if (!g_hash_table_lookup_extended (parser->table, state, + NULL, (gpointer*) &table)) + { + return FALSE; + } + + if (!g_hash_table_lookup_extended (table, symbol, + NULL, (gpointer*) &trans)) + { + return FALSE; + } + + if (transition) + *transition = trans; + + return TRUE; + +} + +gpointer leaf_new (gpointer data) +{ + return g_node_new (data); +} + +gpointer tree_new (rule_t* rule) +{ + return g_node_new (rule); +} + +gpointer tree_add (gpointer tree, gpointer data) +{ + return g_node_prepend (tree, data); +} + +gpointer lr0_build (lr0_t* parser) +{ + + state_t* state; + symbol_t* symbol; + transition_t* transition; + gpointer attrib; + GList* l; + + symbol = g_malloc (sizeof (symbol_t)); + + symbol->value = parser->cb (parser->data, &attrib); + symbol->terminal = FALSE; + + while (1) + { + + l = g_list_first (parser->stack); + state = (state_t*) l->data; + if (!lr0_lookup (parser, state->state, symbol, &transition)) + return NULL; + + if (transition->action == PARSER_SHIFT) + { + lr0_push (parser, transition->state, leaf_new (attrib)); + symbol->value = parser->cb (parser->data, &attrib); + symbol->terminal = FALSE; + } + + else if (transition->action == PARSER_REDUCE) + { + + rule_t* rule; + state_t* state; + transition_t* trans; + GList* l; + gpointer attrib; + + rule = g_list_nth_data (parser->rules, transition->state); + attrib = tree_new (rule); + + for (l = g_list_last (rule->right); + l != NULL; + l = g_list_previous (l)) + { + gpointer attr; + if (!lr0_pop (parser, &attr)) + return NULL; + tree_add (attrib, attr); + } + + l = g_list_first (parser->stack); + state = (state_t*) l->data; + lr0_lookup (parser, state->state, rule->left, &trans); + lr0_push (parser, trans->state, attrib); + + } + + else if (transition->action == PARSER_ACCEPT) + { + l = g_list_first (parser->stack); + state = (state_t*) l->data; + return state->attrib; + } + + } + + return NULL; + +} diff --git a/lr0.h b/lr0.h new file mode 100644 index 0000000..b79590d --- /dev/null +++ b/lr0.h @@ -0,0 +1,40 @@ +#ifndef LR0_H +#define LR0_H + +#include "parser.h" + +typedef enum + { + PARSER_SHIFT, + PARSER_REDUCE, + PARSER_ACCEPT, + } action_t; + +typedef struct +{ + gint state; + gpointer attrib; +} state_t; + +typedef struct +{ + gint action; + gint state; +} transition_t; + +typedef struct +{ + nextcb cb; + gpointer data; + GList* stack; + GHashTable* table; + GList* rules; +} lr0_t; + +transition_t* transition_new (gint, gint); +lr0_t* lr0_new (nextcb, gpointer); +void lr0_delete (lr0_t*); +void lr0_add (lr0_t*, gint, symbol_t*, transition_t*); +gpointer lr0_build (lr0_t*); + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..e3f098c --- /dev/null +++ b/parser.c @@ -0,0 +1,42 @@ +#include "parser.h" + +symbol_t* symbol_new (gboolean terminal, gint value) +{ + symbol_t* symbol; + symbol = g_malloc (sizeof (symbol_t)); + symbol->terminal = terminal; + symbol->value = value; + return symbol; +} + +rule_t* rule_new (symbol_t* left) +{ + rule_t* rule; + rule = g_malloc (sizeof (rule_t)); + rule->left = left; + rule->right = NULL; + return rule; +} + +void rule_append (rule_t* rule, symbol_t* right) +{ + rule->right = g_list_append (rule->right, right); +} + +guint symbol_hash (gconstpointer data) +{ + symbol_t* symbol; + symbol = (symbol_t*) data; + return g_direct_hash (symbol->value); +} + +gboolean symbol_equal (gconstpointer data1, gconstpointer data2) +{ + symbol_t* symbol1; + symbol_t* symbol2; + symbol1 = (symbol_t*) data1; + symbol2 = (symbol_t*) data2; + return symbol1->value == symbol2->value && + symbol1->terminal == symbol2->terminal; +} + diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..54ee8d0 --- /dev/null +++ b/parser.h @@ -0,0 +1,24 @@ +#ifndef PARSER_H +#define PARSER_H + +#include + +typedef gint (*nextcb) (gpointer, gpointer*); + +typedef struct +{ + gboolean terminal; + gint value; +} symbol_t; + +typedef struct +{ + symbol_t* left; + GList* right; +} rule_t; + +symbol_t* symbol_new (gboolean, gint); +rule_t* rule_new (symbol_t*); +void rule_append (rule_t*, symbol_t* right); + +#endif diff --git a/rdp.c b/rdp.c new file mode 100644 index 0000000..a6fca7c --- /dev/null +++ b/rdp.c @@ -0,0 +1,148 @@ +#include "rdp.h" +#include + +rdp_t* rdp_new (nextcb cb, gpointer data) +{ + + rdp_t* parser; + + parser = g_malloc (sizeof (rdp_t)); + parser->cb = cb; + parser->data = data; + + parser->rules = NULL; + + parser->buffer = g_list_append (NULL, NULL); + + return parser; + +} + +void rdp_delete (rdp_t* parser) +{ + + g_free (parser); + +} + +gpointer leaf_new (gpointer data) +{ + return g_node_new (data); +} + +gpointer tree_new (rule_t* rule) +{ + return g_node_new (rule); +} + +gpointer tree_add (gpointer tree, gpointer data) +{ + return g_node_append (tree, data); +} + +void tree_delete (gpointer tree) +{ + g_node_destroy (tree); +} + +symbol_t* buffer_next (rdp_t* parser, gpointer* attrib) +{ + + buffer_t* buffer; + + if (parser->buffer->next == NULL) + { + buffer = g_malloc (sizeof (buffer_t)); + buffer->symbol = g_malloc (sizeof (symbol_t)); + buffer->symbol->terminal = TRUE; + buffer->symbol->value = parser->cb (parser->data, &(buffer->attrib)); + g_list_append (parser->buffer, buffer); + } + + parser->buffer = g_list_next (parser->buffer); + buffer = (buffer_t*) parser->buffer->data; + + if (attrib) + *attrib = buffer->attrib; + + return buffer->symbol; + +} + +gboolean rdp_step (rdp_t* parser, symbol_t* symbol, gpointer* attrib) +{ + + GList* l; + GList* buffer; + gpointer attr; + + buffer = parser->buffer; + + if (symbol != NULL && symbol->terminal) + { + symbol_t* s; + s = buffer_next (parser, &attr); + if (!symbol_equal (symbol, s)) + { + parser->buffer = buffer; + return FALSE; + } + *attrib = leaf_new (attr); + return TRUE; + } + + for (l = g_list_first (parser->rules); l != NULL; l = g_list_next (l)) + { + + rule_t* rule; + + rule = (rule_t*) l->data; + + if (symbol == NULL || symbol_equal (symbol, rule->left)) + { + + GList* m; + + *attrib = tree_new (rule); + + for (m = g_list_first (rule->right); m != NULL; m = g_list_next (m)) + { + + symbol_t* s; + + s = (symbol_t*) m->data; + + if (!rdp_step (parser, s, &attr)) + { + parser->buffer = buffer; + break; + } + + tree_add (*attrib, attr); + + } + + if (m == NULL) + return TRUE; + else + tree_delete (*attrib); + + } + + } + + return FALSE; + +} + +gpointer rdp_build (rdp_t* parser) +{ + + gpointer attrib; + + if (rdp_step (parser, NULL, &attrib)) + return attrib; + + return NULL; + +} diff --git a/rdp.h b/rdp.h new file mode 100644 index 0000000..fc700fc --- /dev/null +++ b/rdp.h @@ -0,0 +1,24 @@ +#ifndef RDP_H +#define RDP_H + +#include "parser.h" + +typedef struct +{ + symbol_t* symbol; + gpointer attrib; +} buffer_t; + +typedef struct +{ + nextcb cb; + gpointer data; + GList* rules; + GList* buffer; +} rdp_t; + +rdp_t* rdp_new (nextcb, gpointer); +void rdp_delete (rdp_t*); +gpointer rdp_build (rdp_t*); + +#endif -- 2.20.1