13 first_set_t* first_set_new ()
15 first_set_t* first_set;
16 first_set = g_malloc (sizeof (first_set_t));
17 first_set->has_empty = FALSE;
18 first_set->terminals = g_hash_table_new_full (symbol_hash, symbol_equal,
20 first_set->rules = NULL;
21 first_set->recursive = NULL;
25 void first_set_delete (first_set_t* first_set)
28 g_hash_table_destroy (first_set->terminals);
32 rule_delete (l->data);
35 g_list_free (first_set->rules);
36 l = first_set->recursive;
39 rule_delete (l->data);
42 g_list_free (first_set->recursive);
46 void first_set_insert_terminal (first_set_t* first_set, symbol_t* symbol)
48 g_hash_table_insert (first_set->terminals, symbol, NULL);
51 void first_set_insert (gpointer key, gpointer val, gpointer data)
53 g_hash_table_insert (data, symbol_copy (key), NULL);
56 GHashTable* first_set_union (GHashTable* a, GHashTable* b)
58 g_hash_table_foreach (b, first_set_insert, a);
62 void first_advance_recursive (first_set_t* first_set, symbol_t* symbol)
66 rules = first_set->recursive;
69 * For each recursive rule, remove recursiveness and insert any new
70 * rule to list of non-recursive rules.
78 symbol_t* first_symbol;
81 symbols = grammar_get_rule (rule);
83 assert (symbols != NULL);
85 first_symbol = symbols->data;
87 assert (first_symbol->value == symbol->value);
89 /* Remove any leading symbols equal to the recursive nonterminal */
90 while (first_symbol->value == symbol->value)
92 first_symbol = rule_pop (rule);
94 /* If no symbol remains after removing leading recursive
95 * nonterminal, it may produce the empty string */
96 if (first_symbol == NULL)
98 first_set->has_empty = TRUE;
102 /* If after removing leading recursive nonterminal, there
103 * is a terminal, it is in the first set of the nonterminal */
104 else if (first_symbol->terminal)
106 first_set_insert_terminal (first_set,
107 symbol_copy (first_symbol));
112 * If after removing leading recursive nonterminal, there
113 * is a nonterminal, the first of the remaining sequence
114 * of symbols is in the first set of the recursive
117 else if (first_symbol->value != symbol->value)
119 first_set->rules = g_list_append (first_set->rules, rule);
124 rules = g_list_next (rules);
128 g_list_free (first_set->recursive);
129 first_set->recursive = NULL;
133 void first_prepare (gpointer key, gpointer val, gpointer data)
140 first_set_t* first_set;
142 symbol = (symbol_t*) key;
143 rules = *(GList**) val;
144 first = (GHashTable*) data;
146 assert (symbol->terminal == FALSE);
148 /* For each nonterminal in the grammar, there is a first set */
149 first_set = first_set_new ();
150 g_hash_table_insert (first, symbol_copy (symbol), first_set);
153 * For each rule for the nonterminal in the grammar, it will either
154 * produce the empty string directly, start with a terminal that will
155 * be included in the first set, start with a different nonterminal or
156 * be a recursive rule.
158 while (rules != NULL)
163 symbol_t* first_symbol;
166 symbols = grammar_get_rule (rule);
170 first_set->has_empty = TRUE;
174 first_symbol = symbols->data;
175 if (first_symbol->terminal)
177 first_set_insert_terminal (first_set,
178 symbol_copy (first_symbol));
180 else if (first_symbol->value == symbol->value)
182 first_set->recursive = g_list_prepend (first_set->recursive,
187 first_set->rules = g_list_prepend (first_set->rules,
192 rules = g_list_next (rules);
197 * If there any no non-recursive rules starting with a nonterminal
198 * recursiveness may be easily eliminated.
200 if (first_set->rules == NULL && first_set->recursive != NULL)
203 * If the nonterminal may produce the empty string, each recursive
204 * rule will lead to a new rule with the leading recursive nonterminal
207 if (first_set->has_empty)
209 first_advance_recursive (first_set, symbol);
211 /* If it does not produce the empty string, every recursive rule
212 * may be simply removed. All terminals in first set are known at
218 l = first_set->recursive;
221 rule_delete (l->data);
224 g_list_free (first_set->recursive);
225 first_set->recursive = NULL;
227 assert (first_set->recursive == NULL);
232 void first_iterate (gpointer key, gpointer val, gpointer data)
236 first_set_t* first_set;
242 symbol = (symbol_t*) key;
243 first_set = (first_set_t*) val;
244 first = (GHashTable*) data;
246 assert (symbol->terminal == FALSE);
248 /* If there are no non-recursive rules starting with a
249 * nonterminal, there's not much to do. Only remove all
250 * the recursive rules or advance them as described in
253 if (first_set->rules == NULL)
255 if (first_set->recursive != NULL)
257 if (first_set->has_empty)
259 first_advance_recursive (first_set, symbol);
264 l = first_set->recursive;
267 rule_delete (l->data);
270 g_list_free (first_set->recursive);
271 first_set->recursive = NULL;
273 assert (first_set->recursive == NULL);
280 rules = first_set->rules;
284 * For each rule, try to eliminate it by checking the first set of
285 * the nonterminal with starts it. Rules may have been added by
286 * previous iterations or the advancement of recursive rules.
287 * So, trivial checks like empty rules and rules starting with
288 * terminals should be checked too.
290 while (rules != NULL)
295 symbol_t* first_symbol;
298 symbols = grammar_get_rule (rule);
300 /* If it is an empty rule, nonterminal may produce the empty
304 first_set->has_empty = TRUE;
310 first_set_t* next_set;
312 first_symbol = symbols->data;
314 /* If it starts with a terminal, include it in the first
315 * set and remove the rule. */
316 if (first_symbol->terminal)
318 first_set_insert_terminal (first_set,
319 symbol_copy (first_symbol));
322 /* Get the first set of the nonterminal which starts the rule */
323 else if (g_hash_table_lookup_extended (first, first_symbol, NULL,
324 (gpointer*) &next_set))
327 * If nonterminal produces the empty string, the first set
328 * of the sequence of symbols next to it is in the first
329 * set of the nonterminal we are treating. Add a new rule
330 * corresponding to this sequence of symbols. This is the
331 * rule which says: if there is a production A -> y1 y2 yk
332 * and the empty string is in the first of y1, then first
333 * of y2 yk is in first of A.
335 if (next_set->has_empty)
338 new_rule = rule_copy (rule);
340 new_rules = g_list_prepend (new_rules, new_rule);
342 /* The terminals in the first of the leading nonterminal in
343 * the rule are in the first of our current nonterminal */
344 first_set_union (first_set->terminals,
345 next_set->terminals);
347 * If there are other rules starting with a nonterminal
348 * for this nonterminal, they should be copied to the
349 * current nonterminal, so we can detect and treat indirect
350 * recursiveness properly.
352 if (next_set->rules != NULL)
354 /* FIXME: not much of an advance here, watch for
355 * this in a possible unstopabble loop */
356 /* FIXME: copy recursive rules too, with care */
358 next_rules = next_set->rules;
359 while (next_rules != NULL)
361 rule_t* next_rule = rule_copy (next_rules->data);
363 next_symbols = grammar_get_rule (next_rule);
365 * If the rule is empty, both terminals produce the
366 * empty set. This will be detected for the other
367 * nonterminal later. Optimization could be done
370 if (next_symbols == NULL)
372 first_set->has_empty = TRUE;
373 rule_delete (next_rule);
377 symbol_t* next_symbol;
378 next_symbol = next_symbols->data;
379 /* TODO: Check this assertion is correct. */
380 assert (next_symbol->terminal == FALSE);
381 /* This is an indirect recursive rule. */
382 if (next_symbol->value == symbol->value)
384 first_set->recursive =
385 g_list_prepend (first_set->recursive,
388 /* Next time we treat current nonterminal,
389 * we will check this rule directly. */
392 new_rules = g_list_prepend (new_rules, next_rule);
395 next_rules = g_list_next (next_rules);
399 * If nonterminal has recursive rules, we should expect
400 * more rules to add here and so we will check it again
401 * in the next iteration. Keep the rule. If there is only
402 * indirect recursiveness, we will detect it through the
403 * rules we have added before. We should remove it in this
404 * case so we make advance.
406 if (next_set->recursive != NULL)
408 new_rules = g_list_prepend (new_rules, rule);
422 rules = g_list_next (rules);
426 g_list_free (first_set->rules);
427 first_set->rules = new_rules;
433 void first_check (gpointer key, gpointer val, gpointer data)
437 first_set_t* first_set;
440 symbol = (symbol_t*) key;
441 first_set = (first_set_t*) val;
442 stop = (gboolean*) data;
444 assert (symbol->terminal == FALSE);
446 /* If there is anything besides terminals in the first set,
447 * we must iterate more and remove all of them. */
448 if (first_set->rules != NULL || first_set->recursive != NULL)
456 * Calculate the first set for every nonterminal symbol in the grammar.
457 * We should iterate through the rules for each nonterminal until only
458 * terminals are known to be in the first set of it.
460 GHashTable* grammar_first (Grammar* grammar)
464 first = g_hash_table_new_full (symbol_hash, symbol_equal,
467 g_hash_table_foreach (grammar->grammar, first_prepare, first);
470 g_hash_table_foreach (first, first_iterate, first);
472 g_hash_table_foreach (first, first_check, &stop);
473 } while (stop == FALSE);
477 void put_key_on_list (gpointer key, gpointer val, gpointer data)
481 *l = g_list_prepend (*l, symbol_copy (key));
484 GList* first_get (GHashTable* first, symbol_t* symbol)
487 first_set_t* first_set;
490 if (g_hash_table_lookup_extended (first, symbol, NULL,
491 (gpointer*) &first_set))
493 g_hash_table_foreach (first->terminals, put_key_on_list, &l);