#include
#include
#include
typedef struct
{
gboolean has_empty;
GHashTable* terminals;
GList* rules;
GList* recursive;
} first_set_t;
first_set_t* first_set_new ()
{
first_set_t* first_set;
first_set = g_malloc (sizeof (first_set_t));
first_set->has_empty = FALSE;
first_set->terminals = g_hash_table_new_full (symbol_hash, symbol_equal,
g_free, NULL);
first_set->rules = NULL;
first_set->recursive = NULL;
return first_set;
}
void first_set_delete (first_set_t* first_set)
{
GList* l;
g_hash_table_destroy (first_set->terminals);
l = first_set->rules;
while (l != NULL)
{
rule_delete (l->data);
l = g_list_next (l);
}
g_list_free (first_set->rules);
l = first_set->recursive;
while (l != NULL)
{
rule_delete (l->data);
l = g_list_next (l);
}
g_list_free (first_set->recursive);
g_free (first_set);
}
void first_set_insert_terminal (first_set_t* first_set, symbol_t* symbol)
{
g_hash_table_insert (first_set->terminals, symbol, NULL);
}
void first_set_insert (gpointer key, gpointer val, gpointer data)
{
g_hash_table_insert (data, symbol_copy (key), NULL);
}
GHashTable* first_set_union (GHashTable* a, GHashTable* b)
{
g_hash_table_foreach (b, first_set_insert, a);
return a;
}
void first_advance_recursive (first_set_t* first_set, symbol_t* symbol)
{
GList* rules;
rules = first_set->recursive;
/*
* For each recursive rule, remove recursiveness and insert any new
* rule to list of non-recursive rules.
*/
while (rules != NULL)
{
rule_t* rule;
GList* symbols;
symbol_t* first_symbol;
rule = rules->data;
symbols = grammar_get_rule (rule);
assert (symbols != NULL);
first_symbol = symbols->data;
assert (first_symbol->value == symbol->value);
/* Remove any leading symbols equal to the recursive nonterminal */
while (first_symbol->value == symbol->value)
{
first_symbol = rule_pop (rule);
/* If no symbol remains after removing leading recursive
* nonterminal, it may produce the empty string */
if (first_symbol == NULL)
{
first_set->has_empty = TRUE;
rule_delete (rule);
break;
}
/* If after removing leading recursive nonterminal, there
* is a terminal, it is in the first set of the nonterminal */
else if (first_symbol->terminal)
{
first_set_insert_terminal (first_set,
symbol_copy (first_symbol));
rule_delete (rule);
break;
}
/*
* If after removing leading recursive nonterminal, there
* is a nonterminal, the first of the remaining sequence
* of symbols is in the first set of the recursive
* nonterminal
*/
else if (first_symbol->value != symbol->value)
{
first_set->rules = g_list_append (first_set->rules, rule);
break;
}
}
rules = g_list_next (rules);
}
g_list_free (first_set->recursive);
first_set->recursive = NULL;
}
void first_prepare (gpointer key, gpointer val, gpointer data)
{
symbol_t* symbol;
GList* rules;
GHashTable* first;
first_set_t* first_set;
symbol = (symbol_t*) key;
rules = *(GList**) val;
first = (GHashTable*) data;
assert (symbol->terminal == FALSE);
/* For each nonterminal in the grammar, there is a first set */
first_set = first_set_new ();
g_hash_table_insert (first, symbol_copy (symbol), first_set);
/*
* For each rule for the nonterminal in the grammar, it will either
* produce the empty string directly, start with a terminal that will
* be included in the first set, start with a different nonterminal or
* be a recursive rule.
*/
while (rules != NULL)
{
rule_t* rule;
GList* symbols;
symbol_t* first_symbol;
rule = rules->data;
symbols = grammar_get_rule (rule);
if (symbols == NULL)
{
first_set->has_empty = TRUE;
}
else
{
first_symbol = symbols->data;
if (first_symbol->terminal)
{
first_set_insert_terminal (first_set,
symbol_copy (first_symbol));
}
else if (first_symbol->value == symbol->value)
{
first_set->recursive = g_list_prepend (first_set->recursive,
rule_copy (rule));
}
else
{
first_set->rules = g_list_prepend (first_set->rules,
rule_copy (rule));
}
}
rules = g_list_next (rules);
}
/*
* If there any no non-recursive rules starting with a nonterminal
* recursiveness may be easily eliminated.
*/
if (first_set->rules == NULL && first_set->recursive != NULL)
{
/*
* If the nonterminal may produce the empty string, each recursive
* rule will lead to a new rule with the leading recursive nonterminal
* removed.
*/
if (first_set->has_empty)
{
first_advance_recursive (first_set, symbol);
}
/* If it does not produce the empty string, every recursive rule
* may be simply removed. All terminals in first set are known at
* this point.
*/
else
{
GList* l;
l = first_set->recursive;
while (l != NULL)
{
rule_delete (l->data);
l = g_list_next (l);
}
g_list_free (first_set->recursive);
first_set->recursive = NULL;
}
assert (first_set->recursive == NULL);
}
}
void first_iterate (gpointer key, gpointer val, gpointer data)
{
symbol_t* symbol;
first_set_t* first_set;
GHashTable* first;
GList* rules;
GList* new_rules;
symbol = (symbol_t*) key;
first_set = (first_set_t*) val;
first = (GHashTable*) data;
assert (symbol->terminal == FALSE);
/* If there are no non-recursive rules starting with a
* nonterminal, there's not much to do. Only remove all
* the recursive rules or advance them as described in
* the first_prepare.
*/
if (first_set->rules == NULL)
{
if (first_set->recursive != NULL)
{
if (first_set->has_empty)
{
first_advance_recursive (first_set, symbol);
}
else
{
GList* l;
l = first_set->recursive;
while (l != NULL)
{
rule_delete (l->data);
l = g_list_next (l);
}
g_list_free (first_set->recursive);
first_set->recursive = NULL;
}
assert (first_set->recursive == NULL);
}
return;
}
else
{
rules = first_set->rules;
new_rules = NULL;
/*
* For each rule, try to eliminate it by checking the first set of
* the nonterminal with starts it. Rules may have been added by
* previous iterations or the advancement of recursive rules.
* So, trivial checks like empty rules and rules starting with
* terminals should be checked too.
*/
while (rules != NULL)
{
rule_t* rule;
GList* symbols;
symbol_t* first_symbol;
rule = rules->data;
symbols = grammar_get_rule (rule);
/* If it is an empty rule, nonterminal may produce the empty
* string */
if (symbols == NULL)
{
first_set->has_empty = TRUE;
rule_delete (rule);
}
else
{
first_set_t* next_set;
first_symbol = symbols->data;
/* If it starts with a terminal, include it in the first
* set and remove the rule. */
if (first_symbol->terminal)
{
first_set_insert_terminal (first_set,
symbol_copy (first_symbol));
rule_delete (rule);
}
/* Get the first set of the nonterminal which starts the rule */
else if (g_hash_table_lookup_extended (first, first_symbol, NULL,
(gpointer*) &next_set))
{
/*
* If nonterminal produces the empty string, the first set
* of the sequence of symbols next to it is in the first
* set of the nonterminal we are treating. Add a new rule
* corresponding to this sequence of symbols. This is the
* rule which says: if there is a production A -> y1 y2 yk
* and the empty string is in the first of y1, then first
* of y2 yk is in first of A.
*/
if (next_set->has_empty)
{
rule_t* new_rule;
new_rule = rule_copy (rule);
rule_pop (new_rule);
new_rules = g_list_prepend (new_rules, new_rule);
}
/* The terminals in the first of the leading nonterminal in
* the rule are in the first of our current nonterminal */
first_set_union (first_set->terminals,
next_set->terminals);
/*
* If there are other rules starting with a nonterminal
* for this nonterminal, they should be copied to the
* current nonterminal, so we can detect and treat indirect
* recursiveness properly.
*/
if (next_set->rules != NULL)
{
/* FIXME: not much of an advance here, watch for
* this in a possible unstopabble loop */
/* FIXME: copy recursive rules too, with care */
GList* next_rules;
next_rules = next_set->rules;
while (next_rules != NULL)
{
rule_t* next_rule = rule_copy (next_rules->data);
GList* next_symbols;
next_symbols = grammar_get_rule (next_rule);
/*
* If the rule is empty, both terminals produce the
* empty set. This will be detected for the other
* nonterminal later. Optimization could be done
* here. TODO
*/
if (next_symbols == NULL)
{
first_set->has_empty = TRUE;
rule_delete (next_rule);
}
else
{
symbol_t* next_symbol;
next_symbol = next_symbols->data;
/*
* This rule starts with a terminal. Ignore it.
* We could add it as an optimization. TODO
*/
if (next_symbol->terminal)
{
rule_delete (next_rule);
}
/* This is an indirect recursive rule. */
else if (next_symbol->value == symbol->value)
{
first_set->recursive =
g_list_prepend (first_set->recursive,
next_rule);
}
/* Next time we treat current nonterminal,
* we will check this rule directly. */
else
{
new_rules = g_list_prepend (new_rules, next_rule);
}
}
next_rules = g_list_next (next_rules);
}
}
/*
* If nonterminal has recursive rules, we should expect
* more rules to add here and so we will check it again
* in the next iteration. Keep the rule. If there is only
* indirect recursiveness, we will detect it through the
* rules we have added before. We should remove it in this
* case so we make advance.
*/
if (next_set->recursive != NULL)
{
new_rules = g_list_prepend (new_rules, rule);
}
else
{
rule_delete (rule);
}
}
else
{
assert (TRUE);
}
}
rules = g_list_next (rules);
}
g_list_free (first_set->rules);
first_set->rules = new_rules;
}
}
void first_check (gpointer key, gpointer val, gpointer data)
{
symbol_t* symbol;
first_set_t* first_set;
gboolean* stop;
symbol = (symbol_t*) key;
first_set = (first_set_t*) val;
stop = (gboolean*) data;
assert (symbol->terminal == FALSE);
/* If there is anything besides terminals in the first set,
* we must iterate more and remove all of them. */
if (first_set->rules != NULL || first_set->recursive != NULL)
{
*stop = FALSE;
}
}
/*
* Calculate the first set for every nonterminal symbol in the grammar.
* We should iterate through the rules for each nonterminal until only
* terminals are known to be in the first set of it.
*/
GHashTable* grammar_first (grammar_t* grammar)
{
GHashTable* first;
gboolean stop;
first = g_hash_table_new_full (symbol_hash, symbol_equal,
g_free,
first_set_delete);
g_hash_table_foreach (grammar->grammar, first_prepare, first);
do
{
g_hash_table_foreach (first, first_iterate, first);
stop = TRUE;
g_hash_table_foreach (first, first_check, &stop);
} while (stop == FALSE);
return first;
}
static void put_key_on_list (gpointer key, gpointer val, gpointer data)
{
GList** l;
l = (GList**) data;
*l = g_list_prepend (*l, symbol_copy (key));
}
GList* first_get (GHashTable* first, symbol_t* symbol)
{
first_set_t* first_set;
GList* l;
l = NULL;
if (g_hash_table_lookup_extended (first, symbol, NULL,
(gpointer*) &first_set))
{
if (first_set->has_empty)
l = g_list_prepend (l, symbol_new (TRUE, 0));
g_hash_table_foreach (first_set->terminals, put_key_on_list, &l);
}
return l;
}
GList* first_rule (GHashTable* first, rule_t* rule)
{
GHashTable* terminals;
GList* symbols;
GList* l;
l = NULL;
symbols = grammar_get_rule (rule);
terminals = g_hash_table_new_full (symbol_hash, symbol_equal, g_free, NULL);
while (symbols != NULL)
{
first_set_t* first_set;
symbol_t* symbol;
symbol = (symbol_t*) symbols->data;
if (symbol->terminal)
{
g_hash_table_insert (terminals, symbol_copy (symbol), NULL);
break;
}
if (!g_hash_table_lookup_extended (first, symbol, NULL,
(gpointer*) &first_set))
{
g_hash_table_destroy (terminals);
return NULL;
}
first_set_union (terminals, first_set->terminals);
if (first_set->has_empty == FALSE)
break;
symbols = g_list_next (symbols);
}
if (symbols == NULL)
l = g_list_prepend (l, symbol_new (TRUE, 0));
g_hash_table_foreach (terminals, put_key_on_list, &l);
g_hash_table_destroy (terminals);
return l;
}
void first_print_symbol (gpointer key, gpointer val, gpointer data)
{
symbol_t* symbol;
symbol = (symbol_t*) key;
fprintf (stdout, "%s\n", g_quark_to_string (symbol->value));
}
void first_print_set (gpointer key, gpointer val, gpointer data)
{
symbol_t* symbol;
first_set_t* first_set;
symbol = (symbol_t*) key;
first_set = (first_set_t*) val;
fprintf (stdout, "FIRST of %s:\n", g_quark_to_string (symbol->value));
if (first_set->has_empty)
fprintf (stdout, "empty symbol\n");
g_hash_table_foreach (first_set->terminals, first_print_symbol, NULL);
}
void first_print (GHashTable* first)
{
g_hash_table_foreach (first, first_print_set, NULL);
}