2 * Copyright (C) 2005 Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 item_t* item_new (symbol_t* left, rule_t* right, symbol_t* lookahead)
31 item = g_malloc (sizeof (item_t));
34 item->dot = grammar_get_rule (right);
35 item->lookahead = lookahead;
39 item_t* item_copy (item_t* item)
43 newitem = item_new (symbol_copy (item->left), rule_copy (item->right),
44 symbol_copy (item->lookahead));
45 n = g_list_position (grammar_get_rule (item->right), item->dot);
46 newitem->dot = g_list_nth (grammar_get_rule (newitem->right), n);
50 gint item_cmp (const item_t* a, const item_t* b)
55 if ((c = symbol_cmp (a->left, b->left)) != 0)
57 if ((c = rule_cmp (a->right, b->right)) != 0)
59 if ((c = symbol_cmp (a->lookahead, b->lookahead)) != 0)
61 na = g_list_position (grammar_get_rule (a->right), a->dot);
62 nb = g_list_position (grammar_get_rule (b->right), b->dot);
70 gboolean item_equal (gconstpointer data1, gconstpointer data2)
72 return (item_cmp (data1, data2) == 0);
75 guint item_hash (gconstpointer data)
79 item = (item_t*) data;
80 hash = rule_hash (item->right) * 37 + symbol_hash (item->left);
81 hash = hash * 37 + symbol_hash (item->lookahead);
85 void item_delete (item_t* item)
88 rule_delete (item->right);
89 g_free (item->lookahead);
94 void item_print (item_t* item)
97 fprintf (stdout, "%s -> ", g_quark_to_string (item->left->value));
98 l = grammar_get_rule (item->right);
102 symbol = (symbol_t*) l->data;
105 fprintf (stdout, ".");
107 fprintf (stdout, " %s ", g_quark_to_string (symbol->value));
110 if (item->dot == NULL)
112 fprintf (stdout, ".");
114 fprintf (stdout, ", %s\n", g_quark_to_string (item->lookahead->value));
118 GHashTable* item_set_new ()
120 return g_hash_table_new_full (item_hash, item_equal, item_delete, NULL);
123 gboolean item_set_add (GHashTable* item_set, item_t* item)
125 if (!g_hash_table_lookup_extended (item_set, item, NULL, NULL))
127 g_hash_table_insert (item_set, item, NULL);
133 static void put_key_on_list (gpointer key, gpointer val, gpointer data)
137 *l = g_list_prepend (*l, key);
140 gboolean item_set_equal (gconstpointer a, gconstpointer b)
144 if (g_hash_table_size (a) != g_hash_table_size (b))
147 g_hash_table_foreach (a, put_key_on_list, (gpointer) &l);
149 while (l != NULL && equal == TRUE)
151 equal = g_hash_table_lookup_extended (b, l->data, NULL, NULL);
158 void hash_item (gpointer key, gpointer val, gpointer data)
161 hash = (guint*) data;
162 *hash = *hash * 37 + item_hash (key);
165 guint item_set_hash (gconstpointer a)
168 g_hash_table_foreach (a, hash_item, &hash);
172 void item_set_add_each (gpointer key, gpointer val, gpointer data)
174 item_set_add (data, item_copy (key));
177 GHashTable* item_set_copy (GHashTable* item_set)
179 GHashTable* newitem_set;
180 newitem_set = item_set_new ();
181 g_hash_table_foreach (item_set, item_set_add_each, newitem_set);
186 void item_set_print_each (gpointer key, gpointer val, gpointer data)
191 void item_set_print (GHashTable* item_set)
193 g_hash_table_foreach (item_set, item_set_print_each, NULL);
197 rule_t* rule_new_item (item_t* item)
203 l = g_list_next (item->dot);
206 rule_append (rule, symbol_copy (l->data));
209 rule_append (rule, symbol_copy (item->lookahead));
214 void item_set_closure_step (GHashTable* item_set, grammar_t* grammar,
215 GHashTable* first, item_t* item)
217 if (item->dot != NULL)
220 symbol = (symbol_t*) item->dot->data;
221 if (symbol->terminal == FALSE)
226 rule = rule_new_item (item);
227 terminals = first_rule (first, rule);
229 rules = grammar_get_rules (grammar, symbol);
230 while (rules != NULL)
233 lookahead = terminals;
234 while (lookahead != NULL)
238 rule = rule_copy (rules->data);
239 newitem = item_new (symbol_copy (symbol), rule,
240 symbol_copy (lookahead->data));
241 if (!item_set_add (item_set, newitem))
242 item_delete (newitem);
243 lookahead = g_list_next (lookahead);
245 rules = g_list_next (rules);
247 g_list_free (terminals);
252 GHashTable* item_set_closure (GHashTable* item_set, grammar_t* grammar,
258 size = g_hash_table_size (item_set);
263 g_hash_table_foreach (item_set, put_key_on_list, &l);
266 item_set_closure_step (item_set, grammar, first, l->data);
270 size = g_hash_table_size (item_set);
271 } while (last_size < size);
275 GHashTable* item_set_goto (GHashTable* item_set, grammar_t* grammar,
276 GHashTable* first, symbol_t* symbol)
279 GHashTable* newitem_set;
280 newitem_set = item_set_new ();
282 g_hash_table_foreach (item_set, put_key_on_list, &l);
286 item = item_copy (l->data);
287 if (item->dot == NULL || !symbol_equal (item->dot->data, symbol))
293 item->dot = g_list_next (item->dot);
294 if (!item_set_add (newitem_set, item))
299 return item_set_closure (newitem_set, grammar, first);
304 * This part is a little tricky. We can simply take an approach similar
305 * to the goto approach, adding a new set for every other set and grammar
306 * symbol every time we get a new set.
307 * Although a similar argument about the goto may apply (which is a very
308 * good reason to optimize it), the case for the collection is worse, since
309 * we will be building a new set for the number of sets in the collection
310 * times the number of symbol grammar. (Not building in fact, but computing
312 * Here is an approach that may work:
313 * For each *new* item set, we compute its gotos. Each goto is looked up
314 * in the collection. If it's a new item set, it's included in a list of
315 * new item sets. To compute the gotos of an item set, we should pick only
316 * the grammar symbols that appear in the items' dots. That solves the
317 * problem of getting a list of all the grammar symbols.
318 * So, there will be a list of item sets that we must iterate through. We
319 * add each new list to the hash table too, so we can look it up when we
320 * get a new one. When the list is finished, we are done and do not have to
321 * check the hash table size for each iteration.
322 * So, we need the following functions:
323 * One to check if an item set is already there and add it if it's not.
324 * One to get the symbols hash table, so we can put the computed goto
325 * there anyway. So, that's an add and a lookup.
329 * TODO: associate a number to each item set.
330 * Use a structure to the collection with a counter.
331 * Use another structure to the value in the hash table for each item set.
332 * This structure will have the symbol hash table (for the goto) and the
333 * number associated with the item set.
334 * In fact, the counter may be the hash table size.
337 GHashTable* item_collection_new ()
339 return g_hash_table_new_full (item_set_hash, item_set_equal,
340 g_hash_table_destroy, g_hash_table_destroy);
343 gboolean item_collection_add (GHashTable* collection, GHashTable* item_set,
346 if (!g_hash_table_lookup_extended (collection, item_set,
347 (gpointer*)key, NULL))
350 symbols = g_hash_table_new_full (symbol_hash, symbol_equal,
352 g_hash_table_insert (collection, item_set, symbols);
358 GHashTable* item_collection_lookup (GHashTable* collection,
359 GHashTable* item_set)
362 if (!g_hash_table_lookup_extended (collection, item_set,
363 NULL, (gpointer*)&symbols))
370 #define HASH_ITEM_SET(item_set) (item_set)
372 void item_collection_print_each (gpointer key, gpointer val, gpointer data)
374 GHashTable* item_set;
375 item_set = (GHashTable*) key;
376 fprintf (stdout, "Item %p:\n", HASH_ITEM_SET(key));
377 item_set_print (item_set);
378 fprintf (stdout, "\n");
381 void item_set_print_goto (gpointer key, gpointer val, gpointer data)
384 symbol = (symbol_t*) key;
385 fprintf (stdout, "GOTO (%p, %s) =\t %p\n", HASH_ITEM_SET(data),
386 g_quark_to_string (symbol->value), HASH_ITEM_SET(val));
389 void item_collection_print_goto (gpointer key, gpointer val, gpointer data)
392 symbols = (GHashTable*) val;
393 g_hash_table_foreach (symbols, item_set_print_goto, key);
394 fprintf (stdout, "\n");
397 void item_collection_print (GHashTable* collection)
399 g_hash_table_foreach (collection, item_collection_print_each, NULL);
400 g_hash_table_foreach (collection, item_collection_print_goto, NULL);
404 GHashTable* item_collection_goto (GHashTable* collection, grammar_t* grammar,
405 GHashTable* first, GHashTable* item_set,
410 GHashTable* newitem_set;
411 GHashTable* goto_item_set;
412 GHashTable* old_item_set;
415 * Is it a new item set? Why should we insert a copy of it? Do we
416 * destroy the original later?
418 * TODO: Check why this code was once here. WTF! Caused some damn
422 newitem_set = item_set_copy (item_set);
423 if (!item_collection_add (collection, newitem_set, NULL))
425 g_hash_table_destroy (newitem_set);
430 * Is there a computed goto from this item set with this symbol
433 symbols = item_collection_lookup (collection, item_set);
434 if (g_hash_table_lookup_extended (symbols, symbol, NULL, NULL))
440 * If the computed goto already exists in the table, retrieve it and
441 * add the entry in the table pointing to it instead of the newly
442 * computed item set. Destroy the new copy. We don't need it. Only
443 * return the computed goto if it's a new item set.
445 goto_item_set = item_set_goto (item_set, grammar, first, symbol);
446 if (!item_collection_add (collection, goto_item_set, &old_item_set))
448 g_hash_table_insert (symbols, symbol, old_item_set);
449 g_hash_table_destroy (goto_item_set);
454 g_hash_table_insert (symbols, symbol, goto_item_set);
455 return goto_item_set;
459 void get_symbol_at_dot (gpointer key, gpointer val, gpointer data)
464 item = (item_t*) key;
465 symbols = (GHashTable*) data;
466 if (item->dot != NULL)
468 symbol = item->dot->data;
469 if (!g_hash_table_lookup_extended (symbols, symbol, NULL, NULL))
471 g_hash_table_insert (symbols, symbol, NULL);
476 /* Get the symbols at the dot in this item set */
477 GList* item_set_symbols_at_dot (GHashTable* item_set)
482 symbols = g_hash_table_new_full (symbol_hash, symbol_equal,
484 g_hash_table_foreach (item_set, get_symbol_at_dot, symbols);
485 g_hash_table_foreach (symbols, put_key_on_list, &symbols_list);
486 g_hash_table_destroy (symbols);
490 GHashTable* item_set_collection (grammar_t* grammar, GHashTable* first,
494 GHashTable* collection;
495 GHashTable* item_set;
498 GList* new_item_sets;
500 /* Augmented grammar has start symbol S' -> S */
502 rule_append (rule, symbol_copy (start));
504 /* First item set is the closure of the item set with S' -> . S , $ */
505 item = item_new (symbol_new (FALSE, -1), rule, symbol_new (TRUE, 0));
506 item_set = item_set_new ();
507 item_set_add (item_set, item);
508 item_set_closure (item_set, grammar, first);
510 /* Insert first item set in the collection of item sets and start working */
511 collection = g_hash_table_new_full (item_set_hash, item_set_equal,
512 g_hash_table_destroy, NULL);
513 item_collection_add (collection, item_set, NULL);
515 new_item_sets = g_list_append (NULL, item_set);
517 while (new_item_sets != NULL)
519 GHashTable* next_item_set;
520 GHashTable* new_item_set;
523 * For each item, compute the goto for the symbol at its
524 * dot. Should be done only for each symbol at a dot in the
525 * whole item set. Only new item sets are put in the new item
526 * sets list. Check item_collection_goto.
528 * TODO: Only do this for each symbol at a dot in the item set.
530 next_item_set = (GHashTable*) new_item_sets->data;
531 symbols = item_set_symbols_at_dot (next_item_set);
532 while (symbols != NULL)
535 symbol = (symbol_t*) symbols->data;
537 item_collection_goto (collection, grammar, first,
538 next_item_set, symbol)) != NULL)
540 g_list_append (new_item_sets, new_item_set);
542 symbols = g_list_next (symbols);
544 g_list_free (symbols);
545 new_item_sets = g_list_next (new_item_sets);
548 g_list_free (new_item_sets);
551 item_collection_print (collection);