8 item_t* item_new (symbol_t* left, rule_t* right, symbol_t* lookahead)
11 item = g_malloc (sizeof (item_t));
14 item->dot = grammar_get_rule (right);
15 item->lookahead = lookahead;
19 item_t* item_copy (item_t* item)
23 newitem = item_new (symbol_copy (item->left), rule_copy (item->right),
24 symbol_copy (item->lookahead));
25 n = g_list_position (grammar_get_rule (item->right), item->dot);
26 newitem->dot = g_list_nth (grammar_get_rule (newitem->right), n);
30 gint item_cmp (const item_t* a, const item_t* b)
35 if ((c = symbol_cmp (a->left, b->left)) != 0)
37 if ((c = rule_cmp (a->right, b->right)) != 0)
39 if ((c = symbol_cmp (a->lookahead, b->lookahead)) != 0)
41 na = g_list_position (grammar_get_rule (a->right), a->dot);
42 nb = g_list_position (grammar_get_rule (b->right), b->dot);
50 gboolean item_equal (gconstpointer data1, gconstpointer data2)
52 return (item_cmp (data1, data2) == 0);
55 guint item_hash (gconstpointer data)
59 item = (item_t*) data;
60 hash = rule_hash (item->right) * 37 + symbol_hash (item->left);
61 hash = hash * 37 + symbol_hash (item->lookahead);
65 void item_delete (item_t* item)
68 rule_delete (item->right);
69 g_free (item->lookahead);
74 void item_print (item_t* item)
77 fprintf (stdout, "%s -> ", g_quark_to_string (item->left->value));
78 l = grammar_get_rule (item->right);
82 symbol = (symbol_t*) l->data;
85 fprintf (stdout, ".");
87 fprintf (stdout, " %s ", g_quark_to_string (symbol->value));
90 if (item->dot == NULL)
92 fprintf (stdout, ".");
94 fprintf (stdout, ", %s\n", g_quark_to_string (item->lookahead->value));
98 GHashTable* item_set_new ()
100 return g_hash_table_new_full (item_hash, item_equal, item_delete, NULL);
103 gboolean item_set_add (GHashTable* item_set, item_t* item)
105 if (!g_hash_table_lookup_extended (item_set, item, NULL, NULL))
107 g_hash_table_insert (item_set, item, NULL);
113 static void put_key_on_list (gpointer key, gpointer val, gpointer data)
117 *l = g_list_prepend (*l, key);
120 gboolean item_set_equal (gconstpointer a, gconstpointer b)
124 if (g_hash_table_size (a) != g_hash_table_size (b))
127 g_hash_table_foreach (a, put_key_on_list, (gpointer) &l);
129 while (l != NULL && equal == TRUE)
131 equal = g_hash_table_lookup_extended (b, l->data, NULL, NULL);
138 void hash_item (gpointer key, gpointer val, gpointer data)
141 hash = (guint*) data;
142 *hash = *hash * 37 + item_hash (key);
145 guint item_set_hash (gconstpointer a)
148 g_hash_table_foreach (a, hash_item, &hash);
152 void item_set_add_each (gpointer key, gpointer val, gpointer data)
154 item_set_add (data, item_copy (key));
157 GHashTable* item_set_copy (GHashTable* item_set)
159 GHashTable* newitem_set;
160 newitem_set = item_set_new ();
161 g_hash_table_foreach (item_set, item_set_add_each, newitem_set);
166 void item_set_print_each (gpointer key, gpointer val, gpointer data)
171 void item_set_print (GHashTable* item_set)
173 g_hash_table_foreach (item_set, item_set_print_each, NULL);
177 rule_t* rule_new_item (item_t* item)
183 l = g_list_next (item->dot);
186 rule_append (rule, symbol_copy (l->data));
189 rule_append (rule, symbol_copy (item->lookahead));
194 void item_set_closure_step (GHashTable* item_set, grammar_t* grammar,
195 GHashTable* first, item_t* item)
197 if (item->dot != NULL)
200 symbol = (symbol_t*) item->dot->data;
201 if (symbol->terminal == FALSE)
206 rule = rule_new_item (item);
207 terminals = first_rule (first, rule);
209 rules = grammar_get_rules (grammar, symbol);
210 while (rules != NULL)
213 lookahead = terminals;
214 while (lookahead != NULL)
218 rule = rule_copy (rules->data);
219 newitem = item_new (symbol_copy (symbol), rule,
220 symbol_copy (lookahead->data));
221 if (!item_set_add (item_set, newitem))
222 item_delete (newitem);
223 lookahead = g_list_next (lookahead);
225 rules = g_list_next (rules);
227 g_list_free (terminals);
232 GHashTable* item_set_closure (GHashTable* item_set, grammar_t* grammar,
238 size = g_hash_table_size (item_set);
243 g_hash_table_foreach (item_set, put_key_on_list, &l);
246 item_set_closure_step (item_set, grammar, first, l->data);
250 size = g_hash_table_size (item_set);
251 } while (last_size < size);
255 GHashTable* item_set_goto (GHashTable* item_set, grammar_t* grammar,
256 GHashTable* first, symbol_t* symbol)
259 GHashTable* newitem_set;
260 newitem_set = item_set_new ();
262 g_hash_table_foreach (item_set, put_key_on_list, &l);
266 item = item_copy (l->data);
267 if (item->dot == NULL || !symbol_equal (item->dot->data, symbol))
273 item->dot = g_list_next (item->dot);
274 if (!item_set_add (newitem_set, item))
279 return item_set_closure (newitem_set, grammar, first);
284 * This part is a little tricky. We can simply take an approach similar
285 * to the goto approach, adding a new set for every other set and grammar
286 * symbol every time we get a new set.
287 * Although a similar argument about the goto may apply (which is a very
288 * good reason to optimize it), the case for the collection is worse, since
289 * we will be building a new set for the number of sets in the collection
290 * times the number of symbol grammar. (Not building in fact, but computing
292 * Here is an approach that may work:
293 * For each *new* item set, we compute its gotos. Each goto is looked up
294 * in the collection. If it's a new item set, it's included in a list of
295 * new item sets. To compute the gotos of an item set, we should pick only
296 * the grammar symbols that appear in the items' dots. That solves the
297 * problem of getting a list of all the grammar symbols.
298 * So, there will be a list of item sets that we must iterate through. We
299 * add each new list to the hash table too, so we can look it up when we
300 * get a new one. When the list is finished, we are done and do not have to
301 * check the hash table size for each iteration.
302 * So, we need the following functions:
303 * One to check if an item set is already there and add it if it's not.
304 * One to get the symbols hash table, so we can put the computed goto
305 * there anyway. So, that's an add and a lookup.
309 * TODO: associate a number to each item set.
310 * Use a structure to the collection with a counter.
311 * Use another structure to the value in the hash table for each item set.
312 * This structure will have the symbol hash table (for the goto) and the
313 * number associated with the item set.
314 * In fact, the counter may be the hash table size.
317 GHashTable* item_collection_new ()
319 return g_hash_table_new_full (item_set_hash, item_set_equal,
320 g_hash_table_destroy, g_hash_table_destroy);
323 gboolean item_collection_add (GHashTable* collection, GHashTable* item_set,
326 if (!g_hash_table_lookup_extended (collection, item_set,
327 (gpointer*)key, NULL))
330 symbols = g_hash_table_new_full (symbol_hash, symbol_equal,
332 g_hash_table_insert (collection, item_set, symbols);
338 GHashTable* item_collection_lookup (GHashTable* collection,
339 GHashTable* item_set)
342 if (!g_hash_table_lookup_extended (collection, item_set,
343 NULL, (gpointer*)&symbols))
350 #define HASH_ITEM_SET(item_set) (item_set)
352 void item_collection_print_each (gpointer key, gpointer val, gpointer data)
354 GHashTable* item_set;
355 item_set = (GHashTable*) key;
356 fprintf (stdout, "Item %p:\n", HASH_ITEM_SET(key));
357 item_set_print (item_set);
358 fprintf (stdout, "\n");
361 void item_set_print_goto (gpointer key, gpointer val, gpointer data)
364 symbol = (symbol_t*) key;
365 fprintf (stdout, "GOTO (%p, %s) =\t %p\n", HASH_ITEM_SET(data),
366 g_quark_to_string (symbol->value), HASH_ITEM_SET(val));
369 void item_collection_print_goto (gpointer key, gpointer val, gpointer data)
372 symbols = (GHashTable*) val;
373 g_hash_table_foreach (symbols, item_set_print_goto, key);
374 fprintf (stdout, "\n");
377 void item_collection_print (GHashTable* collection)
379 g_hash_table_foreach (collection, item_collection_print_each, NULL);
380 g_hash_table_foreach (collection, item_collection_print_goto, NULL);
384 GHashTable* item_collection_goto (GHashTable* collection, grammar_t* grammar,
385 GHashTable* first, GHashTable* item_set,
390 GHashTable* newitem_set;
391 GHashTable* goto_item_set;
392 GHashTable* old_item_set;
395 * Is it a new item set? Why should we insert a copy of it? Do we
396 * destroy the original later?
398 * TODO: Check why this code was once here. WTF! Caused some damn
402 newitem_set = item_set_copy (item_set);
403 if (!item_collection_add (collection, newitem_set, NULL))
405 g_hash_table_destroy (newitem_set);
410 * Is there a computed goto from this item set with this symbol
413 symbols = item_collection_lookup (collection, item_set);
414 if (g_hash_table_lookup_extended (symbols, symbol, NULL, NULL))
420 * If the computed goto already exists in the table, retrieve it and
421 * add the entry in the table pointing to it instead of the newly
422 * computed item set. Destroy the new copy. We don't need it. Only
423 * return the computed goto if it's a new item set.
425 goto_item_set = item_set_goto (item_set, grammar, first, symbol);
426 if (!item_collection_add (collection, goto_item_set, &old_item_set))
428 g_hash_table_insert (symbols, symbol, old_item_set);
429 g_hash_table_destroy (goto_item_set);
434 g_hash_table_insert (symbols, symbol, goto_item_set);
435 return goto_item_set;
439 void get_symbol_at_dot (gpointer key, gpointer val, gpointer data)
444 item = (item_t*) key;
445 symbols = (GHashTable*) data;
446 if (item->dot != NULL)
448 symbol = item->dot->data;
449 if (!g_hash_table_lookup_extended (symbols, symbol, NULL, NULL))
451 g_hash_table_insert (symbols, symbol, NULL);
456 /* Get the symbols at the dot in this item set */
457 GList* item_set_symbols_at_dot (GHashTable* item_set)
462 symbols = g_hash_table_new_full (symbol_hash, symbol_equal,
464 g_hash_table_foreach (item_set, get_symbol_at_dot, symbols);
465 g_hash_table_foreach (symbols, put_key_on_list, &symbols_list);
466 g_hash_table_destroy (symbols);
470 GHashTable* item_set_collection (grammar_t* grammar, GHashTable* first,
474 GHashTable* collection;
475 GHashTable* item_set;
478 GList* new_item_sets;
480 /* Augmented grammar has start symbol S' -> S */
482 rule_append (rule, symbol_copy (start));
484 /* First item set is the closure of the item set with S' -> . S , $ */
485 item = item_new (symbol_new (FALSE, -1), rule, symbol_new (TRUE, 0));
486 item_set = item_set_new ();
487 item_set_add (item_set, item);
488 item_set_closure (item_set, grammar, first);
490 /* Insert first item set in the collection of item sets and start working */
491 collection = g_hash_table_new_full (item_set_hash, item_set_equal,
492 g_hash_table_destroy, NULL);
493 item_collection_add (collection, item_set, NULL);
495 new_item_sets = g_list_append (NULL, item_set);
497 while (new_item_sets != NULL)
499 GHashTable* next_item_set;
500 GHashTable* new_item_set;
503 * For each item, compute the goto for the symbol at its
504 * dot. Should be done only for each symbol at a dot in the
505 * whole item set. Only new item sets are put in the new item
506 * sets list. Check item_collection_goto.
508 * TODO: Only do this for each symbol at a dot in the item set.
510 next_item_set = (GHashTable*) new_item_sets->data;
511 symbols = item_set_symbols_at_dot (next_item_set);
512 while (symbols != NULL)
515 symbol = (symbol_t*) symbols->data;
517 item_collection_goto (collection, grammar, first,
518 next_item_set, symbol)) != NULL)
520 g_list_append (new_item_sets, new_item_set);
522 symbols = g_list_next (symbols);
524 g_list_free (symbols);
525 new_item_sets = g_list_next (new_item_sets);
528 g_list_free (new_item_sets);
531 item_collection_print (collection);