First version of libgrammatic with rdp and lr0 parser cascardo@tlscascardo--private,libgrammatic--dev--0.1--base-0
authorThadeu Lima de Souza Cascardo <cascardo@dcc.ufmg.br>
Sun, 21 Aug 2005 15:49:35 +0000 (15:49 +0000)
committerThadeu Lima de Souza Cascardo <cascardo@dcc.ufmg.br>
Sun, 21 Aug 2005 15:49:35 +0000 (15:49 +0000)
Recursive descent parser and LR(0) parser are implemented. Table
generator for LR(0) is yet to be written.

git-archimport-id: cascardo@tlscascardo--private/libgrammatic--dev--0.1--base-0

lr0.c [new file with mode: 0644]
lr0.h [new file with mode: 0644]
parser.c [new file with mode: 0644]
parser.h [new file with mode: 0644]
rdp.c [new file with mode: 0644]
rdp.h [new file with mode: 0644]

diff --git a/lr0.c b/lr0.c
new file mode 100644 (file)
index 0000000..ad40ec4
--- /dev/null
+++ b/lr0.c
@@ -0,0 +1,204 @@
+#include "lr0.h"
+#include <stdlib.h>
+
+transition_t* transition_new (gint action, gint state)
+{
+  transition_t* transition;
+  transition = g_malloc (sizeof (transition_t));
+  transition->action = action;
+  transition->state = state;
+  return transition;
+}
+
+static void lr0_push (lr0_t* parser, gint st, gpointer attrib)
+{
+  state_t* state;
+  state = g_malloc (sizeof (state_t));
+  state->state = st;
+  state->attrib = attrib;
+  parser->stack = g_list_prepend (parser->stack, state);
+}
+
+static gboolean lr0_pop (lr0_t* parser, gpointer* attrib)
+{
+
+  GList* l;
+  state_t* state;
+  if ((l = g_list_first (parser->stack)) == NULL)
+    return FALSE;
+  parser->stack = g_list_remove_link (l, l);
+  state = (state_t*) l->data;
+  if (attrib)
+    *attrib = state->attrib;
+  g_free (state);
+  g_list_free (l);
+  return TRUE;
+
+}
+
+lr0_t* lr0_new (nextcb cb, gpointer data)
+{
+
+  lr0_t* parser;
+
+  parser = g_malloc (sizeof (lr0_t));
+  parser->cb = cb;
+  parser->data = data;
+
+  parser->stack = NULL;
+  parser_push (parser, 0, NULL);
+  parser->table = g_hash_table_new_full (g_direct_hash, g_direct_equal,
+                                        NULL, g_hash_table_destroy);
+  parser->rules = NULL;
+
+  return parser;
+
+}
+
+void lr0_delete (lr0_t* parser)
+{
+
+  GList* l;
+
+  for (l = g_list_first (parser->stack); l != NULL; l = g_list_next (l))
+    {
+      g_free (l->data);
+    }
+
+  g_list_free (parser->stack);
+
+  g_hash_table_destroy (parser->table);
+
+  g_free (parser);
+
+}
+
+void lr0_add (lr0_t* parser, gint state, symbol_t* symbol,
+                transition_t* transition)
+{
+
+  GHashTable* table;
+
+  if (!g_hash_table_lookup_extended (parser->table, state,
+                                    NULL, (gpointer*) &table))
+    {
+      table = g_hash_table_new_full (symbol_hash, symbol_equal,
+                                    g_free, g_free);
+      g_hash_table_insert (parser->table, state, table);
+    }
+
+  g_hash_table_insert (table, symbol, transition);
+
+}
+
+gboolean lr0_lookup (lr0_t* parser, gint state, symbol_t* symbol,
+                       transition_t** transition)
+{
+
+  GHashTable* table;
+  transition_t* trans;
+
+  if (!g_hash_table_lookup_extended (parser->table, state,
+                                    NULL, (gpointer*) &table))
+    {
+      return FALSE;
+    }
+
+  if (!g_hash_table_lookup_extended (table, symbol,
+                                    NULL, (gpointer*) &trans))
+    {
+      return FALSE;
+    }
+
+  if (transition)
+    *transition = trans;
+
+  return TRUE;
+
+}
+
+gpointer leaf_new (gpointer data)
+{
+  return g_node_new (data);
+}
+
+gpointer tree_new (rule_t* rule)
+{
+  return g_node_new (rule);
+}
+
+gpointer tree_add (gpointer tree, gpointer data)
+{
+  return g_node_prepend (tree, data);
+}
+
+gpointer lr0_build (lr0_t* parser)
+{
+
+  state_t* state;
+  symbol_t* symbol;
+  transition_t* transition;
+  gpointer attrib;
+  GList* l;
+
+  symbol = g_malloc (sizeof (symbol_t));
+
+  symbol->value = parser->cb (parser->data, &attrib);
+  symbol->terminal = FALSE;
+
+  while (1)
+    {
+
+      l = g_list_first (parser->stack);
+      state = (state_t*) l->data;
+      if (!lr0_lookup (parser, state->state, symbol, &transition))
+       return NULL;
+
+      if (transition->action == PARSER_SHIFT)
+       {
+         lr0_push (parser, transition->state, leaf_new (attrib));
+         symbol->value = parser->cb (parser->data, &attrib);
+         symbol->terminal = FALSE;
+       }
+
+      else if (transition->action == PARSER_REDUCE)
+       {
+
+         rule_t* rule;
+         state_t* state;
+         transition_t* trans;
+         GList* l;
+         gpointer attrib;
+
+         rule = g_list_nth_data (parser->rules, transition->state);
+         attrib = tree_new (rule);
+
+         for (l = g_list_last (rule->right);
+              l != NULL;
+              l = g_list_previous (l))
+           {
+             gpointer attr;
+             if (!lr0_pop (parser, &attr))
+               return NULL;
+             tree_add (attrib, attr);
+           }
+
+         l = g_list_first (parser->stack);
+         state = (state_t*) l->data;
+         lr0_lookup (parser, state->state, rule->left, &trans);
+         lr0_push (parser, trans->state, attrib);
+
+       }
+
+      else if (transition->action == PARSER_ACCEPT)
+       {
+         l = g_list_first (parser->stack);
+         state = (state_t*) l->data;
+         return state->attrib;
+       }
+
+    }
+
+  return NULL;
+
+}
diff --git a/lr0.h b/lr0.h
new file mode 100644 (file)
index 0000000..b79590d
--- /dev/null
+++ b/lr0.h
@@ -0,0 +1,40 @@
+#ifndef LR0_H
+#define LR0_H
+
+#include "parser.h"
+
+typedef enum
+  {
+    PARSER_SHIFT,
+    PARSER_REDUCE,
+    PARSER_ACCEPT,
+  } action_t;
+
+typedef struct
+{
+  gint state;
+  gpointer attrib;
+} state_t;
+
+typedef struct
+{
+  gint action;
+  gint state;
+} transition_t;
+
+typedef struct
+{
+  nextcb cb;
+  gpointer data;
+  GList* stack;
+  GHashTable* table;
+  GList* rules;
+} lr0_t;
+
+transition_t* transition_new (gint, gint);
+lr0_t* lr0_new (nextcb, gpointer);
+void lr0_delete (lr0_t*);
+void lr0_add (lr0_t*, gint, symbol_t*, transition_t*);
+gpointer lr0_build (lr0_t*);
+
+#endif
diff --git a/parser.c b/parser.c
new file mode 100644 (file)
index 0000000..e3f098c
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,42 @@
+#include "parser.h"
+
+symbol_t* symbol_new (gboolean terminal, gint value)
+{
+  symbol_t* symbol;
+  symbol = g_malloc (sizeof (symbol_t));
+  symbol->terminal = terminal;
+  symbol->value = value;
+  return symbol;
+}
+
+rule_t* rule_new (symbol_t* left)
+{
+  rule_t* rule;
+  rule = g_malloc (sizeof (rule_t));
+  rule->left = left;
+  rule->right = NULL;
+  return rule;
+}
+
+void rule_append (rule_t* rule, symbol_t* right)
+{
+  rule->right = g_list_append (rule->right, right);
+}
+
+guint symbol_hash (gconstpointer data)
+{
+  symbol_t* symbol;
+  symbol = (symbol_t*) data;
+  return g_direct_hash (symbol->value);
+}
+
+gboolean symbol_equal (gconstpointer data1, gconstpointer data2)
+{
+  symbol_t* symbol1;
+  symbol_t* symbol2;
+  symbol1 = (symbol_t*) data1;
+  symbol2 = (symbol_t*) data2;
+  return symbol1->value == symbol2->value &&
+    symbol1->terminal == symbol2->terminal;
+}
+
diff --git a/parser.h b/parser.h
new file mode 100644 (file)
index 0000000..54ee8d0
--- /dev/null
+++ b/parser.h
@@ -0,0 +1,24 @@
+#ifndef PARSER_H
+#define PARSER_H
+
+#include <glib.h>
+
+typedef gint (*nextcb) (gpointer, gpointer*);
+
+typedef struct
+{
+  gboolean terminal;
+  gint value;
+} symbol_t;
+
+typedef struct
+{
+  symbol_t* left;
+  GList* right;
+} rule_t;
+
+symbol_t* symbol_new (gboolean, gint);
+rule_t* rule_new (symbol_t*);
+void rule_append (rule_t*, symbol_t* right);
+
+#endif
diff --git a/rdp.c b/rdp.c
new file mode 100644 (file)
index 0000000..a6fca7c
--- /dev/null
+++ b/rdp.c
@@ -0,0 +1,148 @@
+#include "rdp.h"
+#include <stdlib.h>
+
+rdp_t* rdp_new (nextcb cb, gpointer data)
+{
+
+  rdp_t* parser;
+
+  parser = g_malloc (sizeof (rdp_t));
+  parser->cb = cb;
+  parser->data = data;
+
+  parser->rules = NULL;
+
+  parser->buffer = g_list_append (NULL, NULL);
+
+  return parser;
+
+}
+
+void rdp_delete (rdp_t* parser)
+{
+
+  g_free (parser);
+
+}
+
+gpointer leaf_new (gpointer data)
+{
+  return g_node_new (data);
+}
+
+gpointer tree_new (rule_t* rule)
+{
+  return g_node_new (rule);
+}
+
+gpointer tree_add (gpointer tree, gpointer data)
+{
+  return g_node_append (tree, data);
+}
+
+void tree_delete (gpointer tree)
+{
+  g_node_destroy (tree);
+}
+
+symbol_t* buffer_next (rdp_t* parser, gpointer* attrib)
+{
+
+  buffer_t* buffer;
+
+  if (parser->buffer->next == NULL)
+    {
+      buffer = g_malloc (sizeof (buffer_t));
+      buffer->symbol = g_malloc (sizeof (symbol_t));
+      buffer->symbol->terminal = TRUE;
+      buffer->symbol->value = parser->cb (parser->data, &(buffer->attrib));
+      g_list_append (parser->buffer, buffer);
+    }
+
+  parser->buffer = g_list_next (parser->buffer);
+  buffer = (buffer_t*) parser->buffer->data;
+
+  if (attrib)
+    *attrib = buffer->attrib;
+
+  return buffer->symbol;
+      
+}
+
+gboolean rdp_step (rdp_t* parser, symbol_t* symbol, gpointer* attrib)
+{
+
+  GList* l;
+  GList* buffer;
+  gpointer attr;
+
+  buffer = parser->buffer;
+
+  if (symbol != NULL && symbol->terminal)
+    {
+      symbol_t* s;
+      s = buffer_next (parser, &attr);
+      if (!symbol_equal (symbol, s))
+       {
+         parser->buffer = buffer;
+         return FALSE;
+       }
+      *attrib = leaf_new (attr);
+      return TRUE;
+    }
+
+  for (l = g_list_first (parser->rules); l != NULL; l = g_list_next (l))
+    {
+
+      rule_t* rule;
+
+      rule = (rule_t*) l->data;
+
+      if (symbol == NULL || symbol_equal (symbol, rule->left))
+       {
+
+         GList* m;
+
+         *attrib = tree_new (rule);
+
+         for (m = g_list_first (rule->right); m != NULL; m = g_list_next (m))
+           {
+
+             symbol_t* s;
+
+             s = (symbol_t*) m->data;
+
+             if (!rdp_step (parser, s, &attr))
+               {
+                 parser->buffer = buffer;
+                 break;
+               }
+
+             tree_add (*attrib, attr);
+
+           }
+
+         if (m == NULL)
+           return TRUE;
+         else
+           tree_delete (*attrib);
+
+       }
+
+    }
+
+  return FALSE;
+
+}
+
+gpointer rdp_build (rdp_t* parser)
+{
+
+  gpointer attrib;
+
+  if (rdp_step (parser, NULL, &attrib))
+    return attrib;
+
+  return NULL;
+
+}
diff --git a/rdp.h b/rdp.h
new file mode 100644 (file)
index 0000000..fc700fc
--- /dev/null
+++ b/rdp.h
@@ -0,0 +1,24 @@
+#ifndef RDP_H
+#define RDP_H
+
+#include "parser.h"
+
+typedef struct
+{
+  symbol_t* symbol;
+  gpointer attrib;
+} buffer_t;
+
+typedef struct
+{
+  nextcb cb;
+  gpointer data;
+  GList* rules;
+  GList* buffer;
+} rdp_t;
+
+rdp_t* rdp_new (nextcb, gpointer);
+void rdp_delete (rdp_t*);
+gpointer rdp_build (rdp_t*);
+
+#endif