10 #include "Teuchos_make_lalr1_parser.hpp"
19 #include "Teuchos_vector.hpp"
20 #include "Teuchos_Graph.hpp"
21 #include "Teuchos_stack.hpp"
22 #include "Teuchos_set.hpp"
38 Config::Config(
int p,
int d):
44 StateConfig::StateConfig(
int s,
int cis):
50 void swap(StateInProgress& a, StateInProgress& b) {
52 swap(a.configs, b.configs);
53 swap(a.actions, b.actions);
57 static Configs make_configs(Grammar
const& g) {
59 for (
int i = 0; i < Teuchos::size(g.productions); ++i) {
60 const Grammar::Production& production = at(g.productions, i);
61 for (
int j = 0; j <= Teuchos::size(production.rhs); ++j) {
62 configs.push_back(Config(i,j));
68 static Graph get_left_hand_sides_to_start_configs(
69 Configs
const& cs, Grammar
const& grammar) {
70 Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
71 for (
int c_i = 0; c_i < Teuchos::size(cs); ++c_i) {
72 const Config& c = at(cs, c_i);
73 if (c.dot > 0)
continue;
74 int p_i = c.production;
75 const Grammar::Production& p = at(grammar.productions, p_i);
76 add_edge(lhs2sc, p.lhs, c_i);
82 typedef StateInProgress
const* Value;
83 bool operator()(Value
const& a, Value
const& b)
const {
84 return a->configs < b->configs;
88 typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
90 static void close(StateInProgress& state,
91 Configs
const& cs, Grammar
const& grammar,
92 Graph
const& lhs2sc) {
93 std::queue<int> config_q;
94 std::set<int> config_set;
95 for (std::vector<int>::const_iterator it = state.configs.begin();
96 it != state.configs.end(); ++it) {
98 config_q.push(config_i);
100 config_set.insert(config_i);
102 while (!config_q.empty()) {
103 int config_i = config_q.front(); config_q.pop();
104 const Config& config = at(cs, config_i);
105 int prod_i = config.production;
106 const Grammar::Production& prod = at(grammar.productions, prod_i);
107 if (config.dot == Teuchos::size(prod.rhs))
continue;
108 int symbol_after_dot = at(prod.rhs, config.dot);
109 if (is_terminal(grammar, symbol_after_dot))
continue;
110 const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
111 for (NodeEdges::const_iterator it = edges.begin();
112 it != edges.end(); ++it) {
114 if (!config_set.count(sc)) {
115 config_set.insert(sc);
120 state.configs.assign(config_set.begin(), config_set.end());
123 static void add_back(StatesInProgress& sips, StateInProgress& sip) {
125 StateInProgressPtr
ptr(
new StateInProgress());
130 static void add_reduction_actions(StatesInProgress& states,
131 Configs
const& cs, Grammar
const& grammar) {
132 for (StatesInProgress::iterator it = states.begin();
133 it != states.end(); ++it) {
134 StateInProgressPtr& state_uptr = *it;
135 StateInProgress& state = *state_uptr;
136 for (std::vector<int>::const_iterator it2 = state.configs.begin();
137 it2 != state.configs.end(); ++it2) {
139 const Config& config = at(cs, config_i);
140 int prod_i = config.production;
141 const Grammar::Production& prod = at(grammar.productions, prod_i);
142 if (config.dot != Teuchos::size(prod.rhs))
continue;
143 ActionInProgress reduction;
144 reduction.action.kind = ACTION_REDUCE;
145 reduction.action.production = config.production;
146 state.actions.push_back(reduction);
151 static void set_lr0_contexts(
152 StatesInProgress& states,
153 Grammar
const& grammar) {
154 for (StatesInProgress::iterator it = states.begin();
155 it != states.end(); ++it) {
156 StateInProgressPtr& state_uptr = *it;
157 StateInProgress& state = *state_uptr;
158 for (StateInProgress::Actions::iterator it2 = state.actions.begin();
159 it2 != state.actions.end(); ++it2) {
160 ActionInProgress& action = *it2;
161 if (action.action.kind != ACTION_REDUCE)
continue;
162 if (action.action.production == get_accept_production(grammar)) {
163 action.context.insert(get_end_terminal(grammar));
165 for (
int terminal = 0; terminal < grammar.nterminals; ++terminal) {
166 action.context.insert(terminal);
173 static StatesInProgress make_lr0_parser(Configs
const& cs, Grammar
const& grammar,
174 Graph
const& lhs2sc) {
175 StatesInProgress states;
176 StatePtr2StateIndex state_ptrs2idxs;
177 std::queue<int> state_q;
179 StateInProgress start_state;
180 int accept_nt = get_accept_nonterminal(grammar);
182 int start_accept_config = get_edges(lhs2sc, accept_nt).front();
183 start_state.configs.push_back(start_accept_config);
184 close(start_state, cs, grammar, lhs2sc);
185 int start_state_i = Teuchos::size(states);
186 state_q.push(start_state_i);
187 add_back(states, start_state);
188 state_ptrs2idxs[states.back().get()] = start_state_i;
190 while (!state_q.empty()) {
191 int state_i = state_q.front(); state_q.pop();
192 StateInProgress& state = *at(states, state_i);
193 std::set<int> transition_symbols;
194 for (std::vector<int>::const_iterator it = state.configs.begin();
195 it != state.configs.end(); ++it) {
197 const Config& config = at(cs, config_i);
198 int prod_i = config.production;
199 const Grammar::Production& prod = at(grammar.productions, prod_i);
200 if (config.dot == Teuchos::size(prod.rhs))
continue;
201 int symbol_after_dot = at(prod.rhs, config.dot);
202 transition_symbols.insert(symbol_after_dot);
204 for (std::set<int>::const_iterator it = transition_symbols.begin();
205 it != transition_symbols.end(); ++it) {
206 int transition_symbol = *it;
207 StateInProgress next_state;
208 for (std::vector<int>::const_iterator it2 = state.configs.begin();
209 it2 != state.configs.end(); ++it2) {
211 const Config& config = at(cs, config_i);
212 int prod_i = config.production;
213 const Grammar::Production& prod = at(grammar.productions, prod_i);
214 if (config.dot == Teuchos::size(prod.rhs))
continue;
215 int symbol_after_dot = at(prod.rhs, config.dot);
216 if (symbol_after_dot != transition_symbol)
continue;
218 int next_config_i = config_i + 1;
219 next_state.configs.push_back(next_config_i);
221 close(next_state, cs, grammar, lhs2sc);
222 StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
224 if (it2 == state_ptrs2idxs.end()) {
225 next_state_i = Teuchos::size(states);
226 state_q.push(next_state_i);
227 add_back(states, next_state);
228 state_ptrs2idxs[states.back().get()] = next_state_i;
230 next_state_i = it2->second;
232 ActionInProgress transition;
233 transition.action.kind = ACTION_SHIFT;
234 transition.action.next_state = next_state_i;
235 transition.context.insert(transition_symbol);
236 state.actions.push_back(transition);
239 add_reduction_actions(states, cs, grammar);
240 set_lr0_contexts(states, grammar);
244 static Graph get_productions_by_lhs(Grammar
const& grammar) {
245 int nsymbols = grammar.nsymbols;
246 Graph lhs2prods = make_graph_with_nnodes(nsymbols);
247 for (
int prod_i = 0; prod_i < Teuchos::size(grammar.productions); ++prod_i) {
248 const Grammar::Production& prod = at(grammar.productions, prod_i);
249 add_edge(lhs2prods, prod.lhs, prod_i);
257 static Graph get_symbol_graph(Grammar
const& grammar, Graph
const& lhs2prods) {
258 int nsymbols = grammar.nsymbols;
259 Graph out = make_graph_with_nnodes(nsymbols);
260 for (
int lhs = 0; lhs < nsymbols; ++lhs) {
261 std::set<int> dependees;
262 for (
int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
263 int prod_i = at(lhs2prods, lhs, i);
264 const Grammar::Production& prod = at(grammar.productions, prod_i);
265 for (
int j = 0; j < Teuchos::size(prod.rhs); ++j) {
266 int rhs_symb = at(prod.rhs, j);
267 dependees.insert(rhs_symb);
270 at(out, lhs).assign(dependees.begin(), dependees.end());
282 enum { FIRST_NULL = -425 };
283 typedef std::set<int> FirstSet;
285 static void print_set(std::set<int>
const& set, Grammar
const& grammar) {
287 for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
288 if (it != set.begin()) std::cerr <<
", ";
290 if (symb == FIRST_NULL) std::cerr <<
"null";
292 const std::string& symb_name = at(grammar.symbol_names, symb);
293 if (symb_name ==
",") std::cerr <<
"','";
294 else std::cerr << symb_name;
300 static FirstSet get_first_set_of_string(std::vector<int>
const&
string,
301 std::vector<FirstSet>
const& first_sets) {
306 for (i = 0; i < Teuchos::size(
string); ++i) {
307 int symbol = at(
string, i);
308 bool has_null =
false;
309 const FirstSet& first_set = at(first_sets, symbol);
310 for (FirstSet::const_iterator it = first_set.begin();
311 it != first_set.end(); ++it) {
312 int first_symbol = *it;
313 if (first_symbol == FIRST_NULL) has_null =
true;
314 else out.insert(first_symbol);
316 if (!has_null)
break;
318 if (i == Teuchos::size(
string)) out.insert(FIRST_NULL);
325 Event(
int as,
int d):
334 static std::vector<FirstSet> compute_first_sets(Grammar
const& grammar,
336 if (verbose) std::cerr <<
"computing FIRST sets...\n";
337 std::queue<Event> event_q;
338 int nsymbols = grammar.nsymbols;
339 std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
340 Graph lhs2prods = get_productions_by_lhs(grammar);
341 for (
int symbol = 0; symbol < nsymbols; ++symbol) {
342 if (is_terminal(grammar, symbol)) {
343 event_q.push(Event(symbol, symbol));
345 for (
int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
346 int prod_i = at(lhs2prods, symbol, i);
347 const Grammar::Production& prod = at(grammar.productions, prod_i);
348 if (prod.rhs.empty()) {
349 event_q.push(Event(FIRST_NULL, symbol));
355 Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
356 Graph dependees2dependers = make_transpose(dependers2dependees);
357 while (!event_q.empty()) {
358 Event
event = event_q.front(); event_q.pop();
359 int added_symb =
event.added_symbol;
360 int dependee =
event.dependee;
361 FirstSet& dependee_firsts = at(first_sets, dependee);
363 if (dependee_firsts.count(added_symb))
continue;
364 dependee_firsts.insert(added_symb);
365 for (
int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
366 int depender = at(dependees2dependers, dependee, i);
368 const FirstSet& depender_firsts = at(first_sets, depender);
369 for (
int j = 0; j < count_edges(lhs2prods, depender); ++j) {
370 int prod_i = at(lhs2prods, depender, j);
371 const Grammar::Production& prod = at(grammar.productions, prod_i);
372 FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
373 for (FirstSet::iterator it = rhs_first_set.begin();
374 it != rhs_first_set.end(); ++it) {
375 int rhs_first_symb = *it;
376 if (!depender_firsts.count(rhs_first_symb)) {
377 event_q.push(Event(rhs_first_symb, depender));
384 for (
int symb = 0; symb < nsymbols; ++symb) {
385 const std::string& symb_name = at(grammar.symbol_names, symb);
386 std::cerr <<
"FIRST(" << symb_name <<
") = {";
387 const FirstSet& c = at(first_sets, symb);
388 for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
389 if (it != c.begin()) std::cerr <<
", ";
390 int first_symb = *it;
391 if (first_symb == FIRST_NULL) {
394 const std::string& first_name = at(grammar.symbol_names, first_symb);
395 std::cerr << first_name;
405 StateConfigs form_state_configs(StatesInProgress
const& states) {
407 for (
int i = 0; i < Teuchos::size(states); ++i) {
408 StateInProgress& state = *at(states, i);
409 for (
int j = 0; j < Teuchos::size(state.configs); ++j) {
410 out.push_back(StateConfig(i, j));
416 Graph form_states_to_state_configs(StateConfigs
const& scs,
417 StatesInProgress
const& states) {
418 Graph out = make_graph_with_nnodes(Teuchos::size(states));
419 for (
int i = 0; i < Teuchos::size(scs); ++i) {
420 const StateConfig& sc = at(scs, i);
421 at(out, sc.state).push_back(i);
426 static std::string escape_dot(std::string
const& s) {
428 for (std::size_t i = 0; i < s.size(); ++i) {
430 if (c ==
'\\' || c ==
'|' || c ==
'\"' || c ==
'<' || c ==
'>' || c ==
'{' || c ==
'}') {
433 }
else if (c ==
'.') {
446 std::string
const& filepath,
447 ParserInProgress
const& pip,
451 const StatesInProgress& sips = pip.states;
452 const Configs& cs = pip.configs;
453 GrammarPtr grammar = pip.grammar;
454 const Graph& states2scs = pip.states2state_configs;
455 os <<
"writing GraphViz file \"" << filepath <<
"\"\n";
456 os <<
"process with:\n";
457 os <<
" dot -Tpdf -o \"" << filepath <<
".pdf\" \"" << filepath <<
"\"\n";
458 std::ofstream file(filepath.c_str());
460 file <<
"digraph {\n";
462 file <<
"rankdir = \"LR\"\n";
464 for (
int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
465 const StateInProgress& state = *at(sips, s_i);
466 file << s_i <<
" [\n";
467 file <<
"label = \"";
468 file <<
"State " << s_i <<
"\\l";
469 for (
int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
470 int c_i = at(state.configs, cis_i);
471 const Config& config = at(cs, c_i);
472 const Grammar::Production& prod = at(grammar->productions, config.production);
473 int sc_i = at(states2scs, s_i, cis_i);
474 file << sc_i <<
": ";
475 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
476 file << escape_dot(lhs_name) <<
" ::= ";
477 for (
int rhs_i = 0; rhs_i <= Teuchos::size(prod.rhs); ++rhs_i) {
478 if (rhs_i == config.dot) file <<
" .";
479 if (rhs_i < Teuchos::size(prod.rhs)) {
480 int rhs_symb = at(prod.rhs, rhs_i);
481 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
482 file <<
" " << escape_dot(rhs_symb_name);
485 if (config.dot == Teuchos::size(prod.rhs)) {
488 for (
int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
489 const ActionInProgress& action = at(state.actions, a_i);
490 if (action.action.kind == ACTION_REDUCE &&
491 action.action.production == config.production) {
493 const Context& ac = action.context;
494 for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
495 if (it != ac.begin()) file <<
", ";
497 const std::string& symb_name = at(grammar->symbol_names, symb);
498 file << escape_dot(symb_name);
503 "BUG: missing reduce action in state " << s_i <<
" !!!\n");
509 file <<
"shape = \"record\"\n";
511 for (
int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
512 const ActionInProgress& action = at(state.actions, a_i);
513 if (action.action.kind == ACTION_SHIFT) {
514 int symb = *(action.context.begin());
515 const std::string& symb_name = at(grammar->symbol_names, symb);
516 int next = action.action.next_state;
517 file << s_i <<
" -> " << next <<
" [\n";
518 file <<
"label = \"" << escape_dot(symb_name) <<
"\"\n";
526 static Graph make_immediate_predecessor_graph(
527 StateConfigs
const& scs,
528 StatesInProgress
const& states,
529 Graph
const& states2scs,
531 GrammarPtr grammar) {
532 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
533 for (
int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
534 const StateInProgress& state = *at(states, s_i);
535 for (
int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
536 int config_i = at(state.configs, cis_i);
537 const Config& config = at(cs, config_i);
538 const Grammar::Production& prod = at(grammar->productions, config.production);
539 int dot = config.dot;
540 if (dot == Teuchos::size(prod.rhs))
continue;
541 int s = at(prod.rhs, dot);
542 if (is_terminal(*grammar, s))
continue;
543 for (
int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
544 int config_j = at(state.configs, cis_j);
545 const Config& config2 = at(cs, config_j);
546 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
547 if (prod2.lhs == s) {
548 int sc_i = at(states2scs, s_i, cis_i);
549 int sc_j = at(states2scs, s_i, cis_j);
550 add_edge(out, sc_j, sc_i);
558 static Graph find_transition_predecessors(
559 StateConfigs
const& scs,
560 StatesInProgress
const& states,
561 Graph
const& states2scs,
563 GrammarPtr grammar) {
564 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
565 for (
int state_i = 0; state_i < Teuchos::size(states); ++state_i) {
566 const StateInProgress& state = *at(states, state_i);
567 for (
int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
568 const ActionInProgress& action = at(state.actions, a_i);
569 if (action.action.kind != ACTION_SHIFT)
continue;
571 int symbol = *(action.context.begin());
572 int state_j = action.action.next_state;
573 const StateInProgress& state2 = *at(states, state_j);
574 for (
int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
575 int config_i = at(state.configs, cis_i);
576 const Config& config = at(cs, config_i);
577 for (
int cis_j = 0; cis_j < Teuchos::size(state2.configs); ++cis_j) {
578 int config_j = at(state2.configs, cis_j);
579 const Config& config2 = at(cs, config_j);
580 if (config.production == config2.production &&
581 config.dot + 1 == config2.dot) {
582 const Grammar::Production& prod = at(grammar->productions, config.production);
583 int rhs_symbol = at(prod.rhs, config.dot);
584 if (rhs_symbol == symbol) {
585 int sc_i = at(states2scs, state_i, cis_i);
586 int sc_j = at(states2scs, state_j, cis_j);
587 add_edge(out, sc_j, sc_i);
597 static Graph make_originator_graph(
598 StateConfigs
const& scs,
599 StatesInProgress
const& states,
600 Graph
const& states2scs,
602 GrammarPtr grammar) {
603 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
604 Graph ipg = make_immediate_predecessor_graph(
605 scs, states, states2scs, cs, grammar);
606 Graph tpg = find_transition_predecessors(
607 scs, states, states2scs, cs, grammar);
608 for (
int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
609 std::set<int> originators;
617 while (!tpq.empty()) {
618 int tpp = tpq.front(); tpq.pop();
619 for (
int i = 0; i < count_edges(tpg, tpp); ++i) {
620 int tpc = at(tpg, tpp, i);
621 if (tps.count(tpc))
continue;
625 for (
int i = 0; i < count_edges(ipg, tpp); ++i) {
626 int ip_i = at(ipg, tpp, i);
627 originators.insert(ip_i);
630 at(out, sc_i).assign(originators.begin(), originators.end());
635 static std::vector<int> get_follow_string(
637 StateConfigs
const& scs,
638 StatesInProgress
const& states,
640 GrammarPtr grammar) {
641 const StateConfig& sc = at(scs, sc_addr);
642 const StateInProgress& state = *at(states, sc.state);
643 int config_i = at(state.configs, sc.config_in_state);
644 const Config& config = at(cs, config_i);
645 const Grammar::Production& prod = at(grammar->productions, config.production);
646 int out_size = Teuchos::size(prod.rhs) - (config.dot + 1);
647 std::vector<int> out;
649 if (out_size < 1)
return out;
650 reserve(out, out_size);
651 for (
int i = config.dot + 1; i < Teuchos::size(prod.rhs); ++i) {
652 out.push_back(at(prod.rhs, i));
657 static void print_string(std::vector<int>
const& str, GrammarPtr grammar) {
659 for (
int i = 0; i < Teuchos::size(str); ++i) {
660 int symb = at(str, i);
661 const std::string& symb_name = at(grammar->symbol_names, symb);
662 std::cerr << symb_name;
667 static bool has_non_null_terminal_descendant(FirstSet
const& first_set) {
668 if (first_set.empty())
return false;
669 if (first_set.size() > 1)
return true;
670 return *(first_set.begin()) != FIRST_NULL;
673 static Context get_contexts(FirstSet first_set) {
674 FirstSet::iterator it = first_set.find(FIRST_NULL);
675 if (it != first_set.end()) first_set.erase(it);
679 enum { MARKER = -433 };
680 enum { ZERO = -100 };
682 static void print_stack(std::vector<int>
const& stack) {
683 for (
int i = 0; i < Teuchos::size(stack); ++i) {
684 int symb = at(stack, i);
685 if (symb == MARKER) std::cerr <<
" M";
686 else if (symb == ZERO) std::cerr <<
" Z";
687 else std::cerr <<
" " << symb;
692 static void move_markers(
693 std::vector<int>& lane,
694 std::vector<int>& in_lane,
699 int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
702 for (
int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
703 if (at(lane, i) == MARKER) {
710 top_addr = lane.back();
711 lane.resize(lane.size() - 1);
713 for (
int i = 0; i < r; ++i) lane.push_back(MARKER);
715 lane.push_back(top_addr);
716 at(in_lane, top_addr) = Teuchos::size(lane) - 1;
720 typedef std::vector<Context> Contexts;
722 static void context_adding_routine(
723 std::vector<int>
const& lane,
725 Context& contexts_generated,
731 std::cerr <<
" CONTEXT ADDING ROUTINE\n";
732 std::cerr <<
" LANE:";
734 std::cerr <<
" $\\zeta$-POINTER = " << zeta_pointer <<
'\n';
736 for (
int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
737 int v_r = at(lane, r);
738 if (verbose) std::cerr <<
" r = " << r <<
", $v_r$ = ";
741 if (v_r == MARKER) std::cerr <<
"marker\n";
742 else if (v_r == ZERO) std::cerr <<
"zero\n";
746 int tau_r_addr = v_r;
748 std::cerr <<
"$\\tau_r$ = " << tau_r_addr <<
'\n';
749 std::cerr <<
" CONTEXTS_GENERATED = ";
750 print_set(contexts_generated, *grammar);
751 std::cerr <<
"\n CONTEXTS_$\\tau_r$ = ";
752 print_set(at(contexts, tau_r_addr), *grammar);
753 std::cerr <<
"\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
755 subtract_from(contexts_generated, at(contexts, tau_r_addr));
757 std::cerr <<
"\n CONTEXTS_GENERATED = ";
758 print_set(contexts_generated, *grammar);
759 std::cerr <<
"\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
761 unite_with(at(contexts, tau_r_addr), contexts_generated);
763 std::cerr <<
"\n CONTEXTS_$\\tau_r$ = ";
764 print_set(at(contexts, tau_r_addr), *grammar);
770 static void deal_with_tests_failed(
771 int& num_originators_failed,
772 int& first_originator_failed,
775 std::vector<int>& lane,
776 std::vector<int>& in_lane,
778 std::vector<int>& stack,
781 if (verbose) std::cerr <<
" Dealing with test failures\n";
782 if (num_originators_failed == 0) {
783 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the first originator of " << zeta_addr <<
" to fail the tests\n";
784 first_originator_failed = zeta_prime_addr;
785 if (verbose) std::cerr <<
" pushing " << zeta_prime_addr <<
" onto LANE:\n ";
786 lane.push_back(zeta_prime_addr);
787 if (verbose) print_stack(lane);
788 at(in_lane, zeta_prime_addr) = Teuchos::size(lane) - 1;
789 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") <- ON\n";
791 if (verbose) std::cerr <<
" TESTS_FAILED <- ON\n";
792 }
else if (num_originators_failed == 1) {
793 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the second originator of " << zeta_addr <<
" to fail the tests\n";
794 int zeta_double_prime_addr = first_originator_failed;
795 if (verbose) std::cerr <<
" the first was " << zeta_double_prime_addr <<
'\n';
796 TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 1) == zeta_double_prime_addr);
798 if (verbose) std::cerr <<
" pop LANE, push {marker, " << zeta_double_prime_addr <<
"} onto it:\n ";
799 lane.resize(lane.size() - 1);
800 lane.push_back(MARKER);
801 lane.push_back(zeta_double_prime_addr);
802 if (verbose) print_stack(lane);
803 if (verbose) std::cerr <<
" push {marker, " << zeta_prime_addr <<
"} onto STACK:\n ";
804 stack.push_back(MARKER);
805 stack.push_back(zeta_prime_addr);
806 if (verbose) print_stack(stack);
808 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the third or later originator of " << zeta_addr <<
" to fail the tests\n";
809 if (verbose) std::cerr <<
" pushing " << zeta_prime_addr <<
" onto STACK:\n ";
810 stack.push_back(zeta_prime_addr);
811 if (verbose) print_stack(stack);
813 ++num_originators_failed;
816 static void heuristic_propagation_of_context_sets(
819 std::vector<bool>& complete,
820 StateConfigs
const& scs,
821 StatesInProgress
const& states,
822 Graph
const& states2scs,
826 const StateConfig& tau = at(scs, tau_addr);
827 const StateInProgress& state = *at(states, tau.state);
828 int config_i = at(state.configs, tau.config_in_state);
829 const Config& config = at(cs, config_i);
830 if (config.dot != 0)
return;
831 const Grammar::Production& prod = at(grammar->productions, config.production);
832 for (
int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
833 int config_j = at(state.configs, cis_j);
834 if (config_j == config_i)
continue;
835 const Config& config2 = at(cs, config_j);
836 if (config2.dot != 0)
continue;
837 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
838 if (prod.lhs != prod2.lhs)
continue;
839 int tau_prime_addr = at(states2scs, tau.state, cis_j);
840 at(contexts, tau_prime_addr) = at(contexts, tau_addr);
841 at(complete, tau_prime_addr) =
true;
847 static void compute_context_set(
850 std::vector<bool>& complete,
851 StateConfigs
const& scs,
852 Graph
const& originator_graph,
853 StatesInProgress
const& states,
854 Graph
const& states2scs,
856 std::vector<FirstSet>
const& first_sets,
859 ParserInProgress
const& pip
861 if (verbose) std::cerr <<
"Computing context set for $\\zeta_j$ = " << zeta_j_addr <<
"...\n";
862 if (verbose) std::cerr <<
"BEGIN PROGRAM\n";
863 if (at(complete, zeta_j_addr)) {
864 if (verbose) std::cerr << zeta_j_addr <<
" was already complete!\nEND PROGRAM\n\n";
867 std::vector<int> stack;
869 std::vector<int> lane;
870 std::vector<int> in_lane = make_vector<int>(Teuchos::size(scs), -1);
871 lane.push_back(zeta_j_addr);
872 at(in_lane, zeta_j_addr) = Teuchos::size(lane) - 1;
873 bool tests_failed =
false;
874 Context contexts_generated;
876 std::cerr <<
"Initial LANE:";
881 int zeta_addr = lane.back();
883 std::cerr <<
"Top of LANE is $\\zeta$ = " << zeta_addr <<
'\n';
885 int zeta_pointer = Teuchos::size(lane) - 1;
886 if (verbose) std::cerr <<
"$\\zeta$-POINTER <- " << zeta_pointer <<
'\n';
887 int num_originators_failed = 0;
888 int first_originator_failed = -1;
889 if (verbose) std::cerr <<
"DO_LOOP:\n";
891 for (
int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
892 int zeta_prime_addr = at(originator_graph, zeta_addr, i);
894 std::cerr <<
"Next originator of $\\zeta$ = " << zeta_addr <<
" is $\\zeta'$ = " << zeta_prime_addr <<
'\n';
896 std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
898 std::cerr <<
" FOLLOW string of $\\zeta'$ = " << zeta_prime_addr <<
" is ";
899 print_string(gamma, grammar);
902 FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
904 std::cerr <<
" FIRST set of ";
905 print_string(gamma, grammar);
907 print_set(gamma_first, *grammar);
910 if (has_non_null_terminal_descendant(gamma_first)) {
913 print_string(gamma, grammar);
914 std::cerr <<
" has a non-null terminal descendant\n";
916 contexts_generated = get_contexts(gamma_first);
918 std::cerr <<
" CONTEXTS_GENERATED = ";
919 print_set(contexts_generated, *grammar);
920 std::cerr <<
" = 1-heads of non-null descendants of ";
921 print_string(gamma, grammar);
924 if (gamma_first.count(FIRST_NULL)) {
927 print_string(gamma, grammar);
928 std::cerr <<
" has a null terminal descendant\n";
930 if (at(complete, zeta_prime_addr)) {
931 unite_with(contexts_generated, at(contexts, zeta_prime_addr));
932 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
934 }
else if (-1 == at(in_lane, zeta_prime_addr)) {
935 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
938 deal_with_tests_failed(num_originators_failed, first_originator_failed,
939 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
942 print_graphviz(
"ambiguous.dot", pip,
true, std::cerr);
943 std::stringstream ss;
944 ss <<
"error: grammar is ambiguous.\n";
945 ss <<
"zeta_j is " << zeta_j_addr <<
", zeta is " << zeta_addr <<
", and zeta prime is " << zeta_prime_addr <<
'\n';
946 throw ParserBuildFail(ss.str());
949 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
955 print_string(gamma, grammar);
956 std::cerr <<
" does not have a non-null terminal descendant\n";
958 if (at(complete, zeta_prime_addr)) {
959 if (verbose) std::cerr <<
" COMPLETE(" << zeta_prime_addr <<
") is ON\n";
960 contexts_generated = at(contexts, zeta_prime_addr);
961 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
964 if (verbose) std::cerr <<
" COMPLETE(" << zeta_prime_addr <<
") is OFF\n";
965 if (-1 != at(in_lane, zeta_prime_addr)) {
966 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") is ON\n";
967 move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
968 contexts_generated = at(contexts, zeta_prime_addr);
969 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
972 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") is OFF\n";
973 deal_with_tests_failed(num_originators_failed, first_originator_failed,
974 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
980 if (verbose) std::cerr <<
"END DO_LOOP\n";
983 std::cerr <<
" TESTS_FAILED was on, turning it off and going to next configuration\n";
985 tests_failed =
false;
988 bool keep_lane_popping =
true;
989 if (verbose) std::cerr <<
" Start LANE popping\n";
990 while (keep_lane_popping) {
993 std::cerr <<
" LANE:";
996 if (at(lane, Teuchos::size(lane) - 1) == MARKER) {
997 if (verbose) std::cerr <<
" Top of LANE is a marker\n";
998 if (verbose) std::cerr <<
" Start STACK popping\n";
1002 std::cerr <<
" STACK:";
1004 std::cerr <<
" LANE:";
1007 if (stack.back() == MARKER) {
1008 if (verbose) std::cerr <<
" Top of STACK is a marker, pop STACK and LANE\n";
1009 resize(stack, Teuchos::size(stack) - 1);
1010 resize(lane, Teuchos::size(lane) - 1);
1012 }
else if (at(complete, stack.back())) {
1013 if (verbose) std::cerr <<
" Top of STACK is has COMPLETE flag, pop STACK\n";
1014 resize(stack, Teuchos::size(stack) - 1);
1017 int addr = stack.back();
1018 if (verbose) std::cerr <<
" Top of STACK is " << addr <<
", pop STACK\n";
1019 resize(stack, Teuchos::size(stack) - 1);
1020 if (verbose) std::cerr <<
" Push " << addr <<
" onto LANE\n";
1021 lane.push_back(addr);
1022 if (verbose) std::cerr <<
" IN_LANE(" << addr <<
") <- ON\n";
1023 at(in_lane, addr) = Teuchos::size(lane) - 1;
1024 keep_lane_popping =
false;
1028 }
else if (at(lane, Teuchos::size(lane) - 1) == ZERO) {
1029 if (verbose) std::cerr <<
" Top of LANE is a zero\n";
1030 if (verbose) std::cerr <<
" Pop LANE\n";
1031 resize(lane, Teuchos::size(lane) - 1);
1034 int tau_addr = lane.back();
1035 if (verbose) std::cerr <<
" Top of LANE is $\\tau$ = " << tau_addr <<
"\n";
1036 at(in_lane, tau_addr) = -1;
1037 if (verbose) std::cerr <<
" IN_LANE(" << tau_addr <<
") <- OFF\n";
1038 at(complete, tau_addr) =
true;
1039 if (verbose) std::cerr <<
" COMPLETE(" << tau_addr <<
") <- ON\n";
1040 if (verbose) std::cerr <<
" HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1041 heuristic_propagation_of_context_sets(
1042 tau_addr, contexts, complete,
1043 scs, states, states2scs, cs, grammar);
1044 if (Teuchos::size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1045 if (verbose) std::cerr <<
"END PROGRAM\n\n";
1048 if (verbose) std::cerr <<
" Pop LANE\n";
1049 resize(lane, Teuchos::size(lane) - 1);
1056 static std::vector<bool> determine_adequate_states(
1057 StatesInProgress
const& states,
1061 std::vector<bool> out = make_vector<bool>(Teuchos::size(states));
1062 for (
int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1063 const StateInProgress& state = *at(states, s_i);
1064 bool state_is_adequate =
true;
1065 for (
int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1066 const ActionInProgress& action = at(state.actions, a_i);
1067 if (action.action.kind == ACTION_SHIFT &&
1068 is_nonterminal(*grammar, *(action.context.begin()))) {
1071 for (
int a_j = a_i + 1; a_j < Teuchos::size(state.actions); ++a_j) {
1072 const ActionInProgress& action2 = at(state.actions, a_j);
1073 if (action2.action.kind == ACTION_SHIFT &&
1074 is_nonterminal(*grammar, *(action2.context.begin()))) {
1077 if (intersects(action2.context, action.context)) {
1079 const ActionInProgress* ap1 = &action;
1080 const ActionInProgress* ap2 = &action2;
1081 if (ap1->action.kind == ACTION_SHIFT) {
1082 std::swap(ap1, ap2);
1085 os <<
"shift-reduce conflict in state " << s_i <<
":\n";
1087 const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1088 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1089 os << lhs_name <<
" ::=";
1090 for (
int rhs_i = 0; rhs_i < Teuchos::size(prod.rhs); ++rhs_i) {
1091 int rhs_symb = at(prod.rhs, rhs_i);
1092 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1093 os <<
" " << rhs_symb_name;
1095 int shift_symb = *(ap2->context.begin());
1096 const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1097 os <<
"\nshift " << shift_name <<
'\n';
1099 state_is_adequate =
false;
1103 if (!state_is_adequate)
break;
1105 at(out, s_i) = state_is_adequate;
1107 if (verbose) os <<
'\n';
1111 ParserInProgress draft_lalr1_parser(GrammarPtr grammar,
bool verbose) {
1112 ParserInProgress out;
1113 Configs& cs = out.configs;
1114 StatesInProgress& states = out.states;
1115 StateConfigs& scs = out.state_configs;
1116 Graph& states2scs = out.states2state_configs;
1117 out.grammar = grammar;
1118 cs = make_configs(*grammar);
1119 Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1120 if (verbose) std::cerr <<
"Building LR(0) parser\n";
1121 states = make_lr0_parser(cs, *grammar, lhs2cs);
1122 scs = form_state_configs(states);
1123 states2scs = form_states_to_state_configs(scs, states);
1124 if (verbose) print_graphviz(
"lr0.dot", out,
true, std::cerr);
1125 if (verbose) std::cerr <<
"Checking adequacy of LR(0) machine\n";
1126 std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1128 if (*(std::min_element(adequate.begin(), adequate.end()))) {
1129 if (verbose) std::cerr <<
"The grammar is LR(0)!\n";
1132 std::vector<bool> complete = make_vector<bool>(Teuchos::size(scs),
false);
1133 std::vector<Context> contexts = make_vector<Context>(Teuchos::size(scs));
1134 int accept_prod_i = get_accept_production(*grammar);
1137 for (
int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
1138 StateConfig& sc = at(scs, sc_i);
1139 StateInProgress& state = *at(states, sc.state);
1140 int config_i = at(state.configs, sc.config_in_state);
1141 Config& config = at(cs, config_i);
1142 if (config.production == accept_prod_i) {
1143 at(complete, sc_i) =
true;
1144 at(contexts, sc_i).insert(get_end_terminal(*grammar));
1147 Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1148 if (verbose) std::cerr <<
"Originator Graph:\n";
1149 if (verbose) std::cerr << og <<
'\n';
1150 std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1153 for (
int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1154 if (at(adequate, s_i))
continue;
1155 StateInProgress& state = *at(states, s_i);
1156 for (
int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1157 int config_i = at(state.configs, cis_i);
1158 const Config& config = at(cs, config_i);
1159 const Grammar::Production& prod = at(grammar->productions, config.production);
1160 if (config.dot != Teuchos::size(prod.rhs))
continue;
1161 int zeta_j_addr = at(states2scs, s_i, cis_i);
1162 compute_context_set(zeta_j_addr, contexts, complete, scs,
1163 og, states, states2scs, cs, first_sets, grammar, verbose, out);
1168 for (
int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1169 StateInProgress& state = *at(states, s_i);
1170 for (
int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1171 int sc_i = at(states2scs, s_i, cis_i);
1172 if (!at(complete, sc_i))
continue;
1173 int config_i = at(state.configs, cis_i);
1174 Config& config = at(cs, config_i);
1175 const Grammar::Production& prod = at(grammar->productions, config.production);
1176 if (config.dot != Teuchos::size(prod.rhs))
continue;
1177 for (
int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1178 ActionInProgress& action = at(state.actions, a_i);
1179 if (action.action.kind == ACTION_REDUCE &&
1180 action.action.production == config.production) {
1181 action.context = at(contexts, sc_i);
1186 if (verbose) std::cerr <<
"Checking adequacy of LALR(1) machine\n";
1187 adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1188 if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1189 std::stringstream ss;
1190 ss <<
"error: The grammar is not LALR(1).\n";
1191 determine_adequate_states(states, grammar,
true, ss);
1192 print_graphviz(
"error.dot", out,
true, ss);
1193 std::string s = ss.str();
1194 throw ParserBuildFail(s);
1196 if (verbose) std::cerr <<
"The grammar is LALR(1)!\n";
1197 if (verbose) print_graphviz(
"lalr1.dot", out,
true, std::cerr);
1201 Parser accept_parser(ParserInProgress
const& pip) {
1202 const StatesInProgress& sips = pip.states;
1203 GrammarPtr grammar = pip.grammar;
1204 Parser out(grammar, Teuchos::size(sips));
1205 for (
int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1208 for (
int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1209 const StateInProgress& sip = *at(sips, s_i);
1210 for (
int a_i = 0; a_i < Teuchos::size(sip.actions); ++a_i) {
1211 const ActionInProgress& action = at(sip.actions, a_i);
1212 if (action.action.kind == ACTION_SHIFT &&
1213 is_nonterminal(*grammar, *(action.context.begin()))) {
1214 int nt = as_nonterminal(*grammar, *(action.context.begin()));
1215 add_nonterminal_action(out, s_i, nt, action.action.next_state);
1217 for (Context::const_iterator it = action.context.begin();
1218 it != action.context.end(); ++it) {
1221 add_terminal_action(out, s_i, terminal, action.action);
1229 ParserBuildFail::ParserBuildFail(
const std::string& msg):
1230 std::invalid_argument(msg) {
1234 ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1235 return accept_parser(pip);
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Ptr< T > ptr(T *p)
Create a pointer to an object from a raw pointer.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void swap(Teuchos::any &a, Teuchos::any &b)
Special swap for other code to find via Argument Dependent Lookup.