From: Thadeu Lima de Souza Cascardo Date: Thu, 29 Sep 2005 17:09:52 +0000 (+0000) Subject: Added file to compute LR(0) collection of item sets X-Git-Tag: cascardo@tlscascardo--private,libgrammatic--lr1--0.1--base-0~5 X-Git-Url: http://git.cascardo.info/?p=cascardo%2Fgrammar.git;a=commitdiff_plain;h=3a6785fdd5c2b67ba5c79d29ece8ff83e14eba3c Added file to compute LR(0) collection of item sets item.c file contains functions and data types that allow the representation and building of a collection of item sets. Those include the CLOSURE and GOTO functions. git-archimport-id: cascardo@tlscascardo--private/libgrammatic--dev--0.1--patch-13 --- diff --git a/item.c b/item.c new file mode 100644 index 0000000..5a41dca --- /dev/null +++ b/item.c @@ -0,0 +1,366 @@ +#include + +typedef struct +{ + symbol_t* left; + rule_t* right; + GList* dot; +} item_t; + +item_t* item_new (symbol_t* left, rule_t* right) +{ + item_t* item; + item = g_malloc (sizeof (item_t)); + item->left = left; + item->right = right; + item->dot = grammar_get_rule (right); + return item; +} + +item_t* item_copy (item_t* item) +{ + item_t* newitem; + int n; + newitem = item_new (symbol_copy (item->left), rule_copy (item->right)); + n = g_list_position (grammar_get_rule (item->right), item->dot); + newitem->dot = g_list_nth (grammar_get_rule (newitem->right), n); + return newitem; +} + +gint item_cmp (const item_t* a, const item_t* b) +{ + int c; + int na; + int nb; + if ((c = symbol_cmp (a->left, b->left)) != 0) + return c; + if ((c = rule_cmp (a->right, b->right)) != 0) + return c; + na = g_list_position (grammar_get_rule (a->right), a->dot); + nb = g_list_position (grammar_get_rule (b->right), b->dot); + if (na < nb) + return -1; + else if (na > nb) + return 1; + return 0; +} + +gboolean item_equal (gconstpointer data1, gconstpointer data2) +{ + return (item_cmp (data1, data2) == 0); +} + +guint item_hash (gconstpointer data) +{ + item_t* item; + guint hash; + item = (item_t*) data; + hash = rule_hash (item->right) * 37 + symbol_hash (item->left); + return hash; +} + +void item_delete (item_t* item) +{ + g_free (item->left); + rule_delete (item->right); + g_free (item); +} + +GHashTable* item_set_new () +{ + return g_hash_table_new_full (item_hash, item_equal, item_delete, NULL); +} + +gboolean item_set_add (GHashTable* item_set, item_t* item) +{ + if (!g_hash_table_lookup_extended (item_set, item, NULL, NULL)) + { + g_hash_table_insert (item_set, item, NULL); + return TRUE; + } + return FALSE; +} + +gboolean item_set_find (gpointer key, gpointer val, gpointer data) +{ + return !g_hash_table_lookup_extended (data, key, NULL, NULL); +} + +gboolean item_set_equal (gconstpointer a, gconstpointer b) +{ + if (g_hash_table_size (a) != g_hash_table_size (b)) + return FALSE; + return (g_hash_table_find (a, item_set_find, b) == NULL); +} + +void hash_item (gpointer key, gpointer val, gpointer data) +{ + guint* hash; + hash = (guint*) data; + *hash = *hash * 37 + item_hash (key); +} + +guint item_set_hash (gconstpointer a) +{ + guint hash = 0; + g_hash_table_foreach (a, hash_item, &hash); + return hash; +} + +void item_set_add_each (gpointer key, gpointer val, gpointer data) +{ + item_set_add (data, item_copy (key)); +} + +GHashTable* item_set_copy (GHashTable* item_set) +{ + GHashTable* newitem_set; + newitem_set = item_set_new (); + g_hash_table_foreach (item_set, item_set_add_each, newitem_set); + return newitem_set; +} + +void item_set_closure_step (GHashTable* item_set, Grammar* grammar, + item_t* item) +{ + if (item->dot != NULL) + { + symbol_t* symbol; + symbol = (symbol_t*) item->dot->data; + if (symbol->terminal == FALSE) + { + GList* rules; + rules = grammar_get_rules (grammar, symbol); + while (rules != NULL) + { + rule_t* rule; + item_t* newitem; + rule = rule_copy (rules->data); + newitem = item_new (symbol_copy (symbol), rule); + if (!item_set_add (item_set, newitem)) + item_delete (newitem); + rules = g_list_next (rules); + } + } + } +} + +void put_key_on_list (gpointer key, gpointer val, gpointer data) +{ + GList** l; + l = (GList**) data; + *l = g_list_prepend (*l, key); +} + +GHashTable* item_set_closure (GHashTable* item_set, Grammar* grammar) +{ + int size; + int last_size; + GList* l; + size = g_hash_table_size (item_set); + do + { + last_size = size; + l = NULL; + g_hash_table_foreach (item_set, put_key_on_list, &l); + while (l != NULL) + { + item_set_closure_step (item_set, grammar, l->data); + l = g_list_next (l); + } + g_list_free (l); + size = g_hash_table_size (item_set); + } while (last_size < size); + return item_set; +} + +GHashTable* item_set_goto (GHashTable* item_set, Grammar* grammar, + symbol_t* symbol) +{ + GList* l; + GHashTable* newitem_set; + newitem_set = item_set_new (); + l = NULL; + g_hash_table_foreach (item_set, put_key_on_list, &l); + while (l != NULL) + { + item_t* item; + item = item_copy (l->data); + if (item->dot == NULL || !symbol_equal (item->dot->data, symbol)) + { + item_delete (item); + } + else + { + item->dot = g_list_next (item->dot); + if (!item_set_add (newitem_set, item)) + item_delete (item); + } + l = g_list_next (l); + } + return item_set_closure (newitem_set, grammar); +} + + +/* + * This part is a little tricky. We can simply take an approach similar + * to the goto approach, adding a new set for every other set and grammar + * symbol every time we get a new set. + * Although a similar argument about the goto may apply (which is a very + * good reason to optimize it), the case for the collection is worse, since + * we will be building a new set for the number of sets in the collection + * times the number of symbol grammar. (Not building in fact, but computing + * it). + * Here is an approach that may work: + * For each *new* item set, we compute its gotos. Each goto is looked up + * in the collection. If it's a new item set, it's included in a list of + * new item sets. To compute the gotos of an item set, we should pick only + * the grammar symbols that appear in the items' dots. That solves the + * problem of getting a list of all the grammar symbols. + * So, there will be a list of item sets that we must iterate through. We + * add each new list to the hash table too, so we can look it up when we + * get a new one. When the list is finished, we are done and do not have to + * check the hash table size for each iteration. + * So, we need the following functions: + * One to check if an item set is already there and add it if it's not. + * One to get the symbols hash table, so we can put the computed goto + * there anyway. So, that's an add and a lookup. + */ + +/* + * TODO: associate a number to each item set. + * Use a structure to the collection with a counter. + * Use another structure to the value in the hash table for each item set. + * This structure will have the symbol hash table (for the goto) and the + * number associated with the item set. + * In fact, the counter may be the hash table size. + */ + +typedef struct +{ + GHashTable* symbols; + gint code; +} state_t; + +state_t* state_new (gint code) +{ + state_t* state; + state = g_malloc (sizeof (state_t)); + state->code = code; + state->symbols = g_hash_table_new_full (symbol_hash, symbol_equal, + g_free, NULL); + return state; +} + +void state_delete (state_t* state) +{ + g_hash_table_destroy (state->symbols); + g_free (state); +} + +GHashTable* item_collection_new () +{ + return g_hash_table_new_full (item_set_hash, item_set_equal, + g_hash_table_destroy, state_delete); +} + +gboolean item_collection_add (GHashTable* collection, GHashTable* item_set) +{ + if (!g_hash_table_lookup_extended (collection, item_set, NULL, NULL)) + { + state_t* state; + state = state_new (g_hash_table_size (collection)); + g_hash_table_insert (collection, item_set, state); + return TRUE; + } + return FALSE; +} + +state_t* item_collection_lookup (GHashTable* collection, + GHashTable* item_set) +{ + state_t* state; + if (!g_hash_table_lookup_extended (collection, item_set, + NULL, (gpointer*)&state)) + { + return NULL; + } + return state; +} + +GHashTable* item_collection_goto (GHashTable* collection, Grammar* grammar, + GHashTable* item_set, symbol_t* symbol) +{ + state_t* state; + state_t* goto_state; + GHashTable* newitem_set; + GHashTable* goto_item_set; + GHashTable* return_item_set; + newitem_set = item_set_copy (item_set); + if (!item_collection_add (collection, newitem_set)) + { + g_hash_table_destroy (newitem_set); + } + state = item_collection_lookup (collection, item_set); + if (g_hash_table_lookup_extended (state->symbols, symbol, NULL, NULL)) + { + return NULL; + } + goto_item_set = item_set_goto (item_set, grammar, symbol); + if (!item_collection_add (collection, goto_item_set)) + return_item_set = NULL; + else + return_item_set = goto_item_set; + goto_state = item_collection_lookup (collection, goto_item_set); + g_hash_table_insert (state->symbols, symbol, + GINT_TO_POINTER (goto_state->code)); + return return_item_set; +} + +void item_set_collection (Grammar* grammar, symbol_t* start) +{ + GHashTable* collection; + GHashTable* item_set; + item_t* item; + rule_t* rule; + GList* new_item_sets; + rule = rule_new (); + rule_append (rule, symbol_copy (start)); + item = item_new (symbol_new (FALSE, -1), rule); + item_set = item_set_new (); + item_set_add (item_set, item); + item_set_closure (item_set, grammar); + collection = g_hash_table_new_full (item_set_hash, item_set_equal, + g_hash_table_destroy, NULL); + item_collection_add (collection, item_set); + new_item_sets = g_list_append (NULL, item_set); + while (new_item_sets != NULL) + { + GHashTable* next_item_set; + GHashTable* new_item_set; + GList* items; + next_item_set = (GHashTable*) new_item_sets->data; + items = NULL; + g_hash_table_foreach (next_item_set, put_key_on_list, &items); + while (items != NULL) + { + item_t* item; + symbol_t* symbol; + item = (item_t*) items->data; + if (item->dot != NULL) + { + symbol = (symbol_t*) item->dot->data; + if ((new_item_set = + item_collection_goto (collection, grammar, + next_item_set, symbol)) != NULL) + { + g_list_append (new_item_sets, new_item_set); + } + } + items = g_list_next (items); + } + g_list_free (items); + new_item_sets = g_list_next (new_item_sets); + } + g_list_free (new_item_sets); +}