1 #include "Teuchos_make_lalr1_parser.hpp"
10 #include "Teuchos_vector.hpp"
11 #include "Teuchos_Graph.hpp"
12 #include "Teuchos_stack.hpp"
13 #include "Teuchos_set.hpp"
29 Config::Config(
int p,
int d):
35 StateConfig::StateConfig(
int s,
int cis):
41 void swap(StateInProgress& a, StateInProgress& b) {
43 swap(a.configs, b.configs);
44 swap(a.actions, b.actions);
48 static Configs make_configs(Grammar
const& g) {
50 for (
int i = 0; i <
size(g.productions); ++i) {
51 const Grammar::Production& production = at(g.productions, i);
52 for (
int j = 0; j <=
size(production.rhs); ++j) {
53 configs.push_back(Config(i,j));
59 static Graph get_left_hand_sides_to_start_configs(
60 Configs
const& cs, Grammar
const& grammar) {
61 Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
62 for (
int c_i = 0; c_i <
size(cs); ++c_i) {
63 const Config& c = at(cs, c_i);
64 if (c.dot > 0)
continue;
65 int p_i = c.production;
66 const Grammar::Production& p = at(grammar.productions, p_i);
67 add_edge(lhs2sc, p.lhs, c_i);
73 typedef StateInProgress
const* Value;
74 bool operator()(Value
const& a, Value
const& b)
const {
75 return a->configs < b->configs;
79 typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
81 static void close(StateInProgress& state,
82 Configs
const& cs, Grammar
const& grammar,
83 Graph
const& lhs2sc) {
84 std::queue<int> config_q;
85 std::set<int> config_set;
86 for (std::vector<int>::const_iterator it = state.configs.begin();
87 it != state.configs.end(); ++it) {
89 config_q.push(config_i);
91 config_set.insert(config_i);
93 while (!config_q.empty()) {
94 int config_i = config_q.front(); config_q.pop();
95 const Config& config = at(cs, config_i);
96 int prod_i = config.production;
97 const Grammar::Production& prod = at(grammar.productions, prod_i);
98 if (config.dot ==
size(prod.rhs))
continue;
99 int symbol_after_dot = at(prod.rhs, config.dot);
100 if (is_terminal(grammar, symbol_after_dot))
continue;
101 const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
102 for (NodeEdges::const_iterator it = edges.begin();
103 it != edges.end(); ++it) {
105 if (!config_set.count(sc)) {
106 config_set.insert(sc);
111 state.configs.assign(config_set.begin(), config_set.end());
114 static void add_back(StatesInProgress& sips, StateInProgress& sip) {
116 StateInProgressPtr
ptr(
new StateInProgress());
121 static void add_reduction_actions(StatesInProgress& states,
122 Configs
const& cs, Grammar
const& grammar) {
123 for (StatesInProgress::iterator it = states.begin();
124 it != states.end(); ++it) {
125 StateInProgressPtr& state_uptr = *it;
126 StateInProgress& state = *state_uptr;
127 for (std::vector<int>::const_iterator it2 = state.configs.begin();
128 it2 != state.configs.end(); ++it2) {
130 const Config& config = at(cs, config_i);
131 int prod_i = config.production;
132 const Grammar::Production& prod = at(grammar.productions, prod_i);
133 if (config.dot !=
size(prod.rhs))
continue;
134 ActionInProgress reduction;
135 reduction.action.kind = ACTION_REDUCE;
136 reduction.action.production = config.production;
137 state.actions.push_back(reduction);
142 static void set_lr0_contexts(
143 StatesInProgress& states,
144 Grammar
const& grammar) {
145 for (StatesInProgress::iterator it = states.begin();
146 it != states.end(); ++it) {
147 StateInProgressPtr& state_uptr = *it;
148 StateInProgress& state = *state_uptr;
149 for (StateInProgress::Actions::iterator it2 = state.actions.begin();
150 it2 != state.actions.end(); ++it2) {
151 ActionInProgress& action = *it2;
152 if (action.action.kind != ACTION_REDUCE)
continue;
153 if (action.action.production == get_accept_production(grammar)) {
154 action.context.insert(get_end_terminal(grammar));
156 for (
int terminal = 0; terminal < grammar.nterminals; ++terminal) {
157 action.context.insert(terminal);
164 static StatesInProgress make_lr0_parser(Configs
const& cs, Grammar
const& grammar,
165 Graph
const& lhs2sc) {
166 StatesInProgress states;
167 StatePtr2StateIndex state_ptrs2idxs;
168 std::queue<int> state_q;
170 StateInProgress start_state;
171 int accept_nt = get_accept_nonterminal(grammar);
173 int start_accept_config = get_edges(lhs2sc, accept_nt).front();
174 start_state.configs.push_back(start_accept_config);
175 close(start_state, cs, grammar, lhs2sc);
176 int start_state_i =
size(states);
177 state_q.push(start_state_i);
178 add_back(states, start_state);
179 state_ptrs2idxs[states.back().get()] = start_state_i;
181 while (!state_q.empty()) {
182 int state_i = state_q.front(); state_q.pop();
183 StateInProgress& state = *at(states, state_i);
184 std::set<int> transition_symbols;
185 for (std::vector<int>::const_iterator it = state.configs.begin();
186 it != state.configs.end(); ++it) {
188 const Config& config = at(cs, config_i);
189 int prod_i = config.production;
190 const Grammar::Production& prod = at(grammar.productions, prod_i);
191 if (config.dot ==
size(prod.rhs))
continue;
192 int symbol_after_dot = at(prod.rhs, config.dot);
193 transition_symbols.insert(symbol_after_dot);
195 for (std::set<int>::const_iterator it = transition_symbols.begin();
196 it != transition_symbols.end(); ++it) {
197 int transition_symbol = *it;
198 StateInProgress next_state;
199 for (std::vector<int>::const_iterator it2 = state.configs.begin();
200 it2 != state.configs.end(); ++it2) {
202 const Config& config = at(cs, config_i);
203 int prod_i = config.production;
204 const Grammar::Production& prod = at(grammar.productions, prod_i);
205 if (config.dot ==
size(prod.rhs))
continue;
206 int symbol_after_dot = at(prod.rhs, config.dot);
207 if (symbol_after_dot != transition_symbol)
continue;
209 int next_config_i = config_i + 1;
210 next_state.configs.push_back(next_config_i);
212 close(next_state, cs, grammar, lhs2sc);
213 StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
215 if (it2 == state_ptrs2idxs.end()) {
216 next_state_i =
size(states);
217 state_q.push(next_state_i);
218 add_back(states, next_state);
219 state_ptrs2idxs[states.back().get()] = next_state_i;
221 next_state_i = it2->second;
223 ActionInProgress transition;
224 transition.action.kind = ACTION_SHIFT;
225 transition.action.next_state = next_state_i;
226 transition.context.insert(transition_symbol);
227 state.actions.push_back(transition);
230 add_reduction_actions(states, cs, grammar);
231 set_lr0_contexts(states, grammar);
235 static Graph get_productions_by_lhs(Grammar
const& grammar) {
236 int nsymbols = grammar.nsymbols;
237 Graph lhs2prods = make_graph_with_nnodes(nsymbols);
238 for (
int prod_i = 0; prod_i <
size(grammar.productions); ++prod_i) {
239 const Grammar::Production& prod = at(grammar.productions, prod_i);
240 add_edge(lhs2prods, prod.lhs, prod_i);
248 static Graph get_symbol_graph(Grammar
const& grammar, Graph
const& lhs2prods) {
249 int nsymbols = grammar.nsymbols;
250 Graph out = make_graph_with_nnodes(nsymbols);
251 for (
int lhs = 0; lhs < nsymbols; ++lhs) {
252 std::set<int> dependees;
253 for (
int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
254 int prod_i = at(lhs2prods, lhs, i);
255 const Grammar::Production& prod = at(grammar.productions, prod_i);
256 for (
int j = 0; j <
size(prod.rhs); ++j) {
257 int rhs_symb = at(prod.rhs, j);
258 dependees.insert(rhs_symb);
261 at(out, lhs).assign(dependees.begin(), dependees.end());
273 enum { FIRST_NULL = -425 };
274 typedef std::set<int> FirstSet;
276 static void print_set(std::set<int>
const& set, Grammar
const& grammar) {
278 for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
279 if (it != set.begin()) std::cerr <<
", ";
281 if (symb == FIRST_NULL) std::cerr <<
"null";
283 const std::string& symb_name = at(grammar.symbol_names, symb);
284 if (symb_name ==
",") std::cerr <<
"','";
285 else std::cerr << symb_name;
291 static FirstSet get_first_set_of_string(std::vector<int>
const&
string,
292 std::vector<FirstSet>
const& first_sets) {
297 for (i = 0; i <
size(
string); ++i) {
298 int symbol = at(
string, i);
299 bool has_null =
false;
300 const FirstSet& first_set = at(first_sets, symbol);
301 for (FirstSet::const_iterator it = first_set.begin();
302 it != first_set.end(); ++it) {
303 int first_symbol = *it;
304 if (first_symbol == FIRST_NULL) has_null =
true;
305 else out.insert(first_symbol);
307 if (!has_null)
break;
309 if (i ==
size(
string)) out.insert(FIRST_NULL);
316 Event(
int as,
int d):
325 static std::vector<FirstSet> compute_first_sets(Grammar
const& grammar,
327 if (verbose) std::cerr <<
"computing FIRST sets...\n";
328 std::queue<Event> event_q;
329 int nsymbols = grammar.nsymbols;
330 std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
331 Graph lhs2prods = get_productions_by_lhs(grammar);
332 for (
int symbol = 0; symbol < nsymbols; ++symbol) {
333 if (is_terminal(grammar, symbol)) {
334 event_q.push(Event(symbol, symbol));
336 for (
int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
337 int prod_i = at(lhs2prods, symbol, i);
338 const Grammar::Production& prod = at(grammar.productions, prod_i);
339 if (prod.rhs.empty()) {
340 event_q.push(Event(FIRST_NULL, symbol));
346 Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
347 Graph dependees2dependers = make_transpose(dependers2dependees);
348 while (!event_q.empty()) {
349 Event
event = event_q.front(); event_q.pop();
350 int added_symb =
event.added_symbol;
351 int dependee =
event.dependee;
352 FirstSet& dependee_firsts = at(first_sets, dependee);
354 if (dependee_firsts.count(added_symb))
continue;
355 dependee_firsts.insert(added_symb);
356 for (
int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
357 int depender = at(dependees2dependers, dependee, i);
359 const FirstSet& depender_firsts = at(first_sets, depender);
360 for (
int j = 0; j < count_edges(lhs2prods, depender); ++j) {
361 int prod_i = at(lhs2prods, depender, j);
362 const Grammar::Production& prod = at(grammar.productions, prod_i);
363 FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
364 for (FirstSet::iterator it = rhs_first_set.begin();
365 it != rhs_first_set.end(); ++it) {
366 int rhs_first_symb = *it;
367 if (!depender_firsts.count(rhs_first_symb)) {
368 event_q.push(Event(rhs_first_symb, depender));
375 for (
int symb = 0; symb < nsymbols; ++symb) {
376 const std::string& symb_name = at(grammar.symbol_names, symb);
377 std::cerr <<
"FIRST(" << symb_name <<
") = {";
378 const FirstSet& c = at(first_sets, symb);
379 for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
380 if (it != c.begin()) std::cerr <<
", ";
381 int first_symb = *it;
382 if (first_symb == FIRST_NULL) {
385 const std::string& first_name = at(grammar.symbol_names, first_symb);
386 std::cerr << first_name;
396 StateConfigs form_state_configs(StatesInProgress
const& states) {
398 for (
int i = 0; i <
size(states); ++i) {
399 StateInProgress& state = *at(states, i);
400 for (
int j = 0; j <
size(state.configs); ++j) {
401 out.push_back(StateConfig(i, j));
407 Graph form_states_to_state_configs(StateConfigs
const& scs,
408 StatesInProgress
const& states) {
409 Graph out = make_graph_with_nnodes(
size(states));
410 for (
int i = 0; i <
size(scs); ++i) {
411 const StateConfig& sc = at(scs, i);
412 at(out, sc.state).push_back(i);
417 static std::string escape_dot(std::string
const& s) {
419 for (std::size_t i = 0; i < s.size(); ++i) {
421 if (c ==
'\\' || c ==
'|' || c ==
'\"' || c ==
'<' || c ==
'>' || c ==
'{' || c ==
'}') {
424 }
else if (c ==
'.') {
437 std::string
const& filepath,
438 ParserInProgress
const& pip,
442 const StatesInProgress& sips = pip.states;
443 const Configs& cs = pip.configs;
444 GrammarPtr grammar = pip.grammar;
445 const Graph& states2scs = pip.states2state_configs;
446 os <<
"writing GraphViz file \"" << filepath <<
"\"\n";
447 os <<
"process with:\n";
448 os <<
" dot -Tpdf -o \"" << filepath <<
".pdf\" \"" << filepath <<
"\"\n";
449 std::ofstream file(filepath.c_str());
451 file <<
"digraph {\n";
453 file <<
"rankdir = \"LR\"\n";
455 for (
int s_i = 0; s_i <
size(sips); ++s_i) {
456 const StateInProgress& state = *at(sips, s_i);
457 file << s_i <<
" [\n";
458 file <<
"label = \"";
459 file <<
"State " << s_i <<
"\\l";
460 for (
int cis_i = 0; cis_i <
size(state.configs); ++cis_i) {
461 int c_i = at(state.configs, cis_i);
462 const Config& config = at(cs, c_i);
463 const Grammar::Production& prod = at(grammar->productions, config.production);
464 int sc_i = at(states2scs, s_i, cis_i);
465 file << sc_i <<
": ";
466 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
467 file << escape_dot(lhs_name) <<
" ::= ";
468 for (
int rhs_i = 0; rhs_i <=
size(prod.rhs); ++rhs_i) {
469 if (rhs_i == config.dot) file <<
" .";
470 if (rhs_i <
size(prod.rhs)) {
471 int rhs_symb = at(prod.rhs, rhs_i);
472 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
473 file <<
" " << escape_dot(rhs_symb_name);
476 if (config.dot ==
size(prod.rhs)) {
479 for (
int a_i = 0; a_i <
size(state.actions); ++a_i) {
480 const ActionInProgress& action = at(state.actions, a_i);
481 if (action.action.kind == ACTION_REDUCE &&
482 action.action.production == config.production) {
484 const Context& ac = action.context;
485 for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
486 if (it != ac.begin()) file <<
", ";
488 const std::string& symb_name = at(grammar->symbol_names, symb);
489 file << escape_dot(symb_name);
494 "BUG: missing reduce action in state " << s_i <<
" !!!\n");
500 file <<
"shape = \"record\"\n";
502 for (
int a_i = 0; a_i <
size(state.actions); ++a_i) {
503 const ActionInProgress& action = at(state.actions, a_i);
504 if (action.action.kind == ACTION_SHIFT) {
505 int symb = *(action.context.begin());
506 const std::string& symb_name = at(grammar->symbol_names, symb);
507 int next = action.action.next_state;
508 file << s_i <<
" -> " << next <<
" [\n";
509 file <<
"label = \"" << escape_dot(symb_name) <<
"\"\n";
517 static Graph make_immediate_predecessor_graph(
518 StateConfigs
const& scs,
519 StatesInProgress
const& states,
520 Graph
const& states2scs,
522 GrammarPtr grammar) {
523 Graph out = make_graph_with_nnodes(
size(scs));
524 for (
int s_i = 0; s_i <
size(states); ++s_i) {
525 const StateInProgress& state = *at(states, s_i);
526 for (
int cis_i = 0; cis_i <
size(state.configs); ++cis_i) {
527 int config_i = at(state.configs, cis_i);
528 const Config& config = at(cs, config_i);
529 const Grammar::Production& prod = at(grammar->productions, config.production);
530 int dot = config.dot;
531 if (dot ==
size(prod.rhs))
continue;
532 int s = at(prod.rhs, dot);
533 if (is_terminal(*grammar, s))
continue;
534 for (
int cis_j = 0; cis_j <
size(state.configs); ++cis_j) {
535 int config_j = at(state.configs, cis_j);
536 const Config& config2 = at(cs, config_j);
537 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
538 if (prod2.lhs == s) {
539 int sc_i = at(states2scs, s_i, cis_i);
540 int sc_j = at(states2scs, s_i, cis_j);
541 add_edge(out, sc_j, sc_i);
549 static Graph find_transition_predecessors(
550 StateConfigs
const& scs,
551 StatesInProgress
const& states,
552 Graph
const& states2scs,
554 GrammarPtr grammar) {
555 Graph out = make_graph_with_nnodes(
size(scs));
556 for (
int state_i = 0; state_i <
size(states); ++state_i) {
557 const StateInProgress& state = *at(states, state_i);
558 for (
int a_i = 0; a_i <
size(state.actions); ++a_i) {
559 const ActionInProgress& action = at(state.actions, a_i);
560 if (action.action.kind != ACTION_SHIFT)
continue;
562 int symbol = *(action.context.begin());
563 int state_j = action.action.next_state;
564 const StateInProgress& state2 = *at(states, state_j);
565 for (
int cis_i = 0; cis_i <
size(state.configs); ++cis_i) {
566 int config_i = at(state.configs, cis_i);
567 const Config& config = at(cs, config_i);
568 for (
int cis_j = 0; cis_j <
size(state2.configs); ++cis_j) {
569 int config_j = at(state2.configs, cis_j);
570 const Config& config2 = at(cs, config_j);
571 if (config.production == config2.production &&
572 config.dot + 1 == config2.dot) {
573 const Grammar::Production& prod = at(grammar->productions, config.production);
574 int rhs_symbol = at(prod.rhs, config.dot);
575 if (rhs_symbol == symbol) {
576 int sc_i = at(states2scs, state_i, cis_i);
577 int sc_j = at(states2scs, state_j, cis_j);
578 add_edge(out, sc_j, sc_i);
588 static Graph make_originator_graph(
589 StateConfigs
const& scs,
590 StatesInProgress
const& states,
591 Graph
const& states2scs,
593 GrammarPtr grammar) {
594 Graph out = make_graph_with_nnodes(
size(scs));
595 Graph ipg = make_immediate_predecessor_graph(
596 scs, states, states2scs, cs, grammar);
597 Graph tpg = find_transition_predecessors(
598 scs, states, states2scs, cs, grammar);
599 for (
int sc_i = 0; sc_i <
size(scs); ++sc_i) {
600 std::set<int> originators;
608 while (!tpq.empty()) {
609 int tpp = tpq.front(); tpq.pop();
610 for (
int i = 0; i < count_edges(tpg, tpp); ++i) {
611 int tpc = at(tpg, tpp, i);
612 if (tps.count(tpc))
continue;
616 for (
int i = 0; i < count_edges(ipg, tpp); ++i) {
617 int ip_i = at(ipg, tpp, i);
618 originators.insert(ip_i);
621 at(out, sc_i).assign(originators.begin(), originators.end());
626 static std::vector<int> get_follow_string(
628 StateConfigs
const& scs,
629 StatesInProgress
const& states,
631 GrammarPtr grammar) {
632 const StateConfig& sc = at(scs, sc_addr);
633 const StateInProgress& state = *at(states, sc.state);
634 int config_i = at(state.configs, sc.config_in_state);
635 const Config& config = at(cs, config_i);
636 const Grammar::Production& prod = at(grammar->productions, config.production);
637 int out_size =
size(prod.rhs) - (config.dot + 1);
638 std::vector<int> out;
640 if (out_size < 1)
return out;
641 reserve(out, out_size);
642 for (
int i = config.dot + 1; i <
size(prod.rhs); ++i) {
643 out.push_back(at(prod.rhs, i));
648 static void print_string(std::vector<int>
const& str, GrammarPtr grammar) {
650 for (
int i = 0; i <
size(str); ++i) {
651 int symb = at(str, i);
652 const std::string& symb_name = at(grammar->symbol_names, symb);
653 std::cerr << symb_name;
658 static bool has_non_null_terminal_descendant(FirstSet
const& first_set) {
659 if (first_set.empty())
return false;
660 if (first_set.size() > 1)
return true;
661 return *(first_set.begin()) != FIRST_NULL;
664 static Context get_contexts(FirstSet first_set) {
665 FirstSet::iterator it = first_set.find(FIRST_NULL);
666 if (it != first_set.end()) first_set.erase(it);
670 enum { MARKER = -433 };
671 enum { ZERO = -100 };
673 static void print_stack(std::vector<int>
const& stack) {
674 for (
int i = 0; i <
size(stack); ++i) {
675 int symb = at(stack, i);
676 if (symb == MARKER) std::cerr <<
" M";
677 else if (symb == ZERO) std::cerr <<
" Z";
678 else std::cerr <<
" " << symb;
683 static void move_markers(
684 std::vector<int>& lane,
685 std::vector<int>& in_lane,
690 int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
693 for (
int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
694 if (at(lane, i) == MARKER) {
701 top_addr = lane.back();
702 lane.resize(lane.size() - 1);
704 for (
int i = 0; i < r; ++i) lane.push_back(MARKER);
706 lane.push_back(top_addr);
707 at(in_lane, top_addr) =
size(lane) - 1;
711 typedef std::vector<Context> Contexts;
713 static void context_adding_routine(
714 std::vector<int>
const& lane,
716 Context& contexts_generated,
722 std::cerr <<
" CONTEXT ADDING ROUTINE\n";
723 std::cerr <<
" LANE:";
725 std::cerr <<
" $\\zeta$-POINTER = " << zeta_pointer <<
'\n';
727 for (
int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
728 int v_r = at(lane, r);
729 if (verbose) std::cerr <<
" r = " << r <<
", $v_r$ = ";
732 if (v_r == MARKER) std::cerr <<
"marker\n";
733 else if (v_r == ZERO) std::cerr <<
"zero\n";
737 int tau_r_addr = v_r;
739 std::cerr <<
"$\\tau_r$ = " << tau_r_addr <<
'\n';
740 std::cerr <<
" CONTEXTS_GENERATED = ";
741 print_set(contexts_generated, *grammar);
742 std::cerr <<
"\n CONTEXTS_$\\tau_r$ = ";
743 print_set(at(contexts, tau_r_addr), *grammar);
744 std::cerr <<
"\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
746 subtract_from(contexts_generated, at(contexts, tau_r_addr));
748 std::cerr <<
"\n CONTEXTS_GENERATED = ";
749 print_set(contexts_generated, *grammar);
750 std::cerr <<
"\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
752 unite_with(at(contexts, tau_r_addr), contexts_generated);
754 std::cerr <<
"\n CONTEXTS_$\\tau_r$ = ";
755 print_set(at(contexts, tau_r_addr), *grammar);
761 static void deal_with_tests_failed(
762 int& num_originators_failed,
763 int& first_originator_failed,
766 std::vector<int>& lane,
767 std::vector<int>& in_lane,
769 std::vector<int>& stack,
772 if (verbose) std::cerr <<
" Dealing with test failures\n";
773 if (num_originators_failed == 0) {
774 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the first originator of " << zeta_addr <<
" to fail the tests\n";
775 first_originator_failed = zeta_prime_addr;
776 if (verbose) std::cerr <<
" pushing " << zeta_prime_addr <<
" onto LANE:\n ";
777 lane.push_back(zeta_prime_addr);
778 if (verbose) print_stack(lane);
779 at(in_lane, zeta_prime_addr) =
size(lane) - 1;
780 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") <- ON\n";
782 if (verbose) std::cerr <<
" TESTS_FAILED <- ON\n";
783 }
else if (num_originators_failed == 1) {
784 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the second originator of " << zeta_addr <<
" to fail the tests\n";
785 int zeta_double_prime_addr = first_originator_failed;
786 if (verbose) std::cerr <<
" the first was " << zeta_double_prime_addr <<
'\n';
789 if (verbose) std::cerr <<
" pop LANE, push {marker, " << zeta_double_prime_addr <<
"} onto it:\n ";
790 lane.resize(lane.size() - 1);
791 lane.push_back(MARKER);
792 lane.push_back(zeta_double_prime_addr);
793 if (verbose) print_stack(lane);
794 if (verbose) std::cerr <<
" push {marker, " << zeta_prime_addr <<
"} onto STACK:\n ";
795 stack.push_back(MARKER);
796 stack.push_back(zeta_prime_addr);
797 if (verbose) print_stack(stack);
799 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the third or later originator of " << zeta_addr <<
" to fail the tests\n";
800 if (verbose) std::cerr <<
" pushing " << zeta_prime_addr <<
" onto STACK:\n ";
801 stack.push_back(zeta_prime_addr);
802 if (verbose) print_stack(stack);
804 ++num_originators_failed;
807 static void heuristic_propagation_of_context_sets(
810 std::vector<bool>& complete,
811 StateConfigs
const& scs,
812 StatesInProgress
const& states,
813 Graph
const& states2scs,
817 const StateConfig& tau = at(scs, tau_addr);
818 const StateInProgress& state = *at(states, tau.state);
819 int config_i = at(state.configs, tau.config_in_state);
820 const Config& config = at(cs, config_i);
821 if (config.dot != 0)
return;
822 const Grammar::Production& prod = at(grammar->productions, config.production);
823 for (
int cis_j = 0; cis_j <
size(state.configs); ++cis_j) {
824 int config_j = at(state.configs, cis_j);
825 if (config_j == config_i)
continue;
826 const Config& config2 = at(cs, config_j);
827 if (config2.dot != 0)
continue;
828 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
829 if (prod.lhs != prod2.lhs)
continue;
830 int tau_prime_addr = at(states2scs, tau.state, cis_j);
831 at(contexts, tau_prime_addr) = at(contexts, tau_addr);
832 at(complete, tau_prime_addr) =
true;
838 static void compute_context_set(
841 std::vector<bool>& complete,
842 StateConfigs
const& scs,
843 Graph
const& originator_graph,
844 StatesInProgress
const& states,
845 Graph
const& states2scs,
847 std::vector<FirstSet>
const& first_sets,
850 ParserInProgress
const& pip
852 if (verbose) std::cerr <<
"Computing context set for $\\zeta_j$ = " << zeta_j_addr <<
"...\n";
853 if (verbose) std::cerr <<
"BEGIN PROGRAM\n";
854 if (at(complete, zeta_j_addr)) {
855 if (verbose) std::cerr << zeta_j_addr <<
" was already complete!\nEND PROGRAM\n\n";
858 std::vector<int> stack;
860 std::vector<int> lane;
861 std::vector<int> in_lane = make_vector<int>(
size(scs), -1);
862 lane.push_back(zeta_j_addr);
863 at(in_lane, zeta_j_addr) =
size(lane) - 1;
864 bool tests_failed =
false;
865 Context contexts_generated;
867 std::cerr <<
"Initial LANE:";
872 int zeta_addr = lane.back();
874 std::cerr <<
"Top of LANE is $\\zeta$ = " << zeta_addr <<
'\n';
876 int zeta_pointer =
size(lane) - 1;
877 if (verbose) std::cerr <<
"$\\zeta$-POINTER <- " << zeta_pointer <<
'\n';
878 int num_originators_failed = 0;
879 int first_originator_failed = -1;
880 if (verbose) std::cerr <<
"DO_LOOP:\n";
882 for (
int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
883 int zeta_prime_addr = at(originator_graph, zeta_addr, i);
885 std::cerr <<
"Next originator of $\\zeta$ = " << zeta_addr <<
" is $\\zeta'$ = " << zeta_prime_addr <<
'\n';
887 std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
889 std::cerr <<
" FOLLOW string of $\\zeta'$ = " << zeta_prime_addr <<
" is ";
890 print_string(gamma, grammar);
893 FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
895 std::cerr <<
" FIRST set of ";
896 print_string(gamma, grammar);
898 print_set(gamma_first, *grammar);
901 if (has_non_null_terminal_descendant(gamma_first)) {
904 print_string(gamma, grammar);
905 std::cerr <<
" has a non-null terminal descendant\n";
907 contexts_generated = get_contexts(gamma_first);
909 std::cerr <<
" CONTEXTS_GENERATED = ";
910 print_set(contexts_generated, *grammar);
911 std::cerr <<
" = 1-heads of non-null descendants of ";
912 print_string(gamma, grammar);
915 if (gamma_first.count(FIRST_NULL)) {
918 print_string(gamma, grammar);
919 std::cerr <<
" has a null terminal descendant\n";
921 if (at(complete, zeta_prime_addr)) {
922 unite_with(contexts_generated, at(contexts, zeta_prime_addr));
923 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
925 }
else if (-1 == at(in_lane, zeta_prime_addr)) {
926 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
929 deal_with_tests_failed(num_originators_failed, first_originator_failed,
930 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
933 print_graphviz(
"ambiguous.dot", pip,
true, std::cerr);
934 std::stringstream ss;
935 ss <<
"error: grammar is ambiguous.\n";
936 ss <<
"zeta_j is " << zeta_j_addr <<
", zeta is " << zeta_addr <<
", and zeta prime is " << zeta_prime_addr <<
'\n';
937 throw ParserBuildFail(ss.str());
940 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
946 print_string(gamma, grammar);
947 std::cerr <<
" does not have a non-null terminal descendant\n";
949 if (at(complete, zeta_prime_addr)) {
950 if (verbose) std::cerr <<
" COMPLETE(" << zeta_prime_addr <<
") is ON\n";
951 contexts_generated = at(contexts, zeta_prime_addr);
952 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
955 if (verbose) std::cerr <<
" COMPLETE(" << zeta_prime_addr <<
") is OFF\n";
956 if (-1 != at(in_lane, zeta_prime_addr)) {
957 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") is ON\n";
958 move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
959 contexts_generated = at(contexts, zeta_prime_addr);
960 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
963 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") is OFF\n";
964 deal_with_tests_failed(num_originators_failed, first_originator_failed,
965 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
971 if (verbose) std::cerr <<
"END DO_LOOP\n";
974 std::cerr <<
" TESTS_FAILED was on, turning it off and going to next configuration\n";
976 tests_failed =
false;
979 bool keep_lane_popping =
true;
980 if (verbose) std::cerr <<
" Start LANE popping\n";
981 while (keep_lane_popping) {
984 std::cerr <<
" LANE:";
987 if (at(lane,
size(lane) - 1) == MARKER) {
988 if (verbose) std::cerr <<
" Top of LANE is a marker\n";
989 if (verbose) std::cerr <<
" Start STACK popping\n";
993 std::cerr <<
" STACK:";
995 std::cerr <<
" LANE:";
998 if (stack.back() == MARKER) {
999 if (verbose) std::cerr <<
" Top of STACK is a marker, pop STACK and LANE\n";
1000 resize(stack,
size(stack) - 1);
1001 resize(lane,
size(lane) - 1);
1003 }
else if (at(complete, stack.back())) {
1004 if (verbose) std::cerr <<
" Top of STACK is has COMPLETE flag, pop STACK\n";
1005 resize(stack,
size(stack) - 1);
1008 int addr = stack.back();
1009 if (verbose) std::cerr <<
" Top of STACK is " << addr <<
", pop STACK\n";
1010 resize(stack,
size(stack) - 1);
1011 if (verbose) std::cerr <<
" Push " << addr <<
" onto LANE\n";
1012 lane.push_back(addr);
1013 if (verbose) std::cerr <<
" IN_LANE(" << addr <<
") <- ON\n";
1014 at(in_lane, addr) =
size(lane) - 1;
1015 keep_lane_popping =
false;
1019 }
else if (at(lane,
size(lane) - 1) == ZERO) {
1020 if (verbose) std::cerr <<
" Top of LANE is a zero\n";
1021 if (verbose) std::cerr <<
" Pop LANE\n";
1022 resize(lane,
size(lane) - 1);
1025 int tau_addr = lane.back();
1026 if (verbose) std::cerr <<
" Top of LANE is $\\tau$ = " << tau_addr <<
"\n";
1027 at(in_lane, tau_addr) = -1;
1028 if (verbose) std::cerr <<
" IN_LANE(" << tau_addr <<
") <- OFF\n";
1029 at(complete, tau_addr) =
true;
1030 if (verbose) std::cerr <<
" COMPLETE(" << tau_addr <<
") <- ON\n";
1031 if (verbose) std::cerr <<
" HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1032 heuristic_propagation_of_context_sets(
1033 tau_addr, contexts, complete,
1034 scs, states, states2scs, cs, grammar);
1035 if (
size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1036 if (verbose) std::cerr <<
"END PROGRAM\n\n";
1039 if (verbose) std::cerr <<
" Pop LANE\n";
1040 resize(lane,
size(lane) - 1);
1047 static std::vector<bool> determine_adequate_states(
1048 StatesInProgress
const& states,
1052 std::vector<bool> out = make_vector<bool>(
size(states));
1053 for (
int s_i = 0; s_i <
size(states); ++s_i) {
1054 const StateInProgress& state = *at(states, s_i);
1055 bool state_is_adequate =
true;
1056 for (
int a_i = 0; a_i <
size(state.actions); ++a_i) {
1057 const ActionInProgress& action = at(state.actions, a_i);
1058 if (action.action.kind == ACTION_SHIFT &&
1059 is_nonterminal(*grammar, *(action.context.begin()))) {
1062 for (
int a_j = a_i + 1; a_j <
size(state.actions); ++a_j) {
1063 const ActionInProgress& action2 = at(state.actions, a_j);
1064 if (action2.action.kind == ACTION_SHIFT &&
1065 is_nonterminal(*grammar, *(action2.context.begin()))) {
1068 if (intersects(action2.context, action.context)) {
1070 const ActionInProgress* ap1 = &action;
1071 const ActionInProgress* ap2 = &action2;
1072 if (ap1->action.kind == ACTION_SHIFT) {
1073 std::swap(ap1, ap2);
1076 os <<
"shift-reduce conflict in state " << s_i <<
":\n";
1078 const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1079 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1080 os << lhs_name <<
" ::=";
1081 for (
int rhs_i = 0; rhs_i <
size(prod.rhs); ++rhs_i) {
1082 int rhs_symb = at(prod.rhs, rhs_i);
1083 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1084 os <<
" " << rhs_symb_name;
1086 int shift_symb = *(ap2->context.begin());
1087 const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1088 os <<
"\nshift " << shift_name <<
'\n';
1090 state_is_adequate =
false;
1094 if (!state_is_adequate)
break;
1096 at(out, s_i) = state_is_adequate;
1098 if (verbose) os <<
'\n';
1102 ParserInProgress draft_lalr1_parser(GrammarPtr grammar,
bool verbose) {
1103 ParserInProgress out;
1104 Configs& cs = out.configs;
1105 StatesInProgress& states = out.states;
1106 StateConfigs& scs = out.state_configs;
1107 Graph& states2scs = out.states2state_configs;
1108 out.grammar = grammar;
1109 cs = make_configs(*grammar);
1110 Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1111 if (verbose) std::cerr <<
"Building LR(0) parser\n";
1112 states = make_lr0_parser(cs, *grammar, lhs2cs);
1113 scs = form_state_configs(states);
1114 states2scs = form_states_to_state_configs(scs, states);
1115 if (verbose) print_graphviz(
"lr0.dot", out,
true, std::cerr);
1116 if (verbose) std::cerr <<
"Checking adequacy of LR(0) machine\n";
1117 std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1119 if (*(std::min_element(adequate.begin(), adequate.end()))) {
1120 if (verbose) std::cerr <<
"The grammar is LR(0)!\n";
1123 std::vector<bool> complete = make_vector<bool>(
size(scs),
false);
1124 std::vector<Context> contexts = make_vector<Context>(
size(scs));
1125 int accept_prod_i = get_accept_production(*grammar);
1128 for (
int sc_i = 0; sc_i <
size(scs); ++sc_i) {
1129 StateConfig& sc = at(scs, sc_i);
1130 StateInProgress& state = *at(states, sc.state);
1131 int config_i = at(state.configs, sc.config_in_state);
1132 Config& config = at(cs, config_i);
1133 if (config.production == accept_prod_i) {
1134 at(complete, sc_i) =
true;
1135 at(contexts, sc_i).insert(get_end_terminal(*grammar));
1138 Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1139 if (verbose) std::cerr <<
"Originator Graph:\n";
1140 if (verbose) std::cerr << og <<
'\n';
1141 std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1144 for (
int s_i = 0; s_i <
size(states); ++s_i) {
1145 if (at(adequate, s_i))
continue;
1146 StateInProgress& state = *at(states, s_i);
1147 for (
int cis_i = 0; cis_i <
size(state.configs); ++cis_i) {
1148 int config_i = at(state.configs, cis_i);
1149 const Config& config = at(cs, config_i);
1150 const Grammar::Production& prod = at(grammar->productions, config.production);
1151 if (config.dot !=
size(prod.rhs))
continue;
1152 int zeta_j_addr = at(states2scs, s_i, cis_i);
1153 compute_context_set(zeta_j_addr, contexts, complete, scs,
1154 og, states, states2scs, cs, first_sets, grammar, verbose, out);
1159 for (
int s_i = 0; s_i <
size(states); ++s_i) {
1160 StateInProgress& state = *at(states, s_i);
1161 for (
int cis_i = 0; cis_i <
size(state.configs); ++cis_i) {
1162 int sc_i = at(states2scs, s_i, cis_i);
1163 if (!at(complete, sc_i))
continue;
1164 int config_i = at(state.configs, cis_i);
1165 Config& config = at(cs, config_i);
1166 const Grammar::Production& prod = at(grammar->productions, config.production);
1167 if (config.dot !=
size(prod.rhs))
continue;
1168 for (
int a_i = 0; a_i <
size(state.actions); ++a_i) {
1169 ActionInProgress& action = at(state.actions, a_i);
1170 if (action.action.kind == ACTION_REDUCE &&
1171 action.action.production == config.production) {
1172 action.context = at(contexts, sc_i);
1177 if (verbose) std::cerr <<
"Checking adequacy of LALR(1) machine\n";
1178 adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1179 if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1180 std::stringstream ss;
1181 ss <<
"error: The grammar is not LALR(1).\n";
1182 determine_adequate_states(states, grammar,
true, ss);
1183 print_graphviz(
"error.dot", out,
true, ss);
1184 std::string s = ss.str();
1185 throw ParserBuildFail(s);
1187 if (verbose) std::cerr <<
"The grammar is LALR(1)!\n";
1188 if (verbose) print_graphviz(
"lalr1.dot", out,
true, std::cerr);
1192 Parser accept_parser(ParserInProgress
const& pip) {
1193 const StatesInProgress& sips = pip.states;
1194 GrammarPtr grammar = pip.grammar;
1195 Parser out(grammar,
size(sips));
1196 for (
int s_i = 0; s_i <
size(sips); ++s_i) {
1199 for (
int s_i = 0; s_i <
size(sips); ++s_i) {
1200 const StateInProgress& sip = *at(sips, s_i);
1201 for (
int a_i = 0; a_i <
size(sip.actions); ++a_i) {
1202 const ActionInProgress& action = at(sip.actions, a_i);
1203 if (action.action.kind == ACTION_SHIFT &&
1204 is_nonterminal(*grammar, *(action.context.begin()))) {
1205 int nt = as_nonterminal(*grammar, *(action.context.begin()));
1206 add_nonterminal_action(out, s_i, nt, action.action.next_state);
1208 for (Context::const_iterator it = action.context.begin();
1209 it != action.context.end(); ++it) {
1212 add_terminal_action(out, s_i, terminal, action.action);
1220 ParserBuildFail::ParserBuildFail(
const std::string& msg):
1221 std::invalid_argument(msg) {
1225 ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1226 return accept_parser(pip);
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
int size(const Comm< Ordinal > &comm)
Get the number of processes in the communicator.
Ptr< T > ptr(T *p)
Create a pointer to an object from a raw pointer.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void swap(Teuchos::any &a, Teuchos::any &b)
Special swap for other code to find via Argument Dependent Lookup.