Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_make_lalr1_parser.cpp
1 #include "Teuchos_make_lalr1_parser.hpp"
2 
3 #include <map>
4 #include <iostream>
5 #include <cstdlib>
6 #include <queue>
7 #include <algorithm>
8 #include <fstream>
9 
10 #include "Teuchos_vector.hpp"
11 #include "Teuchos_Graph.hpp"
12 #include "Teuchos_stack.hpp"
13 #include "Teuchos_set.hpp"
14 
15 namespace Teuchos {
16 
17 /* The LALR(1) parser construction implemented here is based on
18  David Pager's work:
19 
20  Pager, David.
21  "The lane-tracing algorithm for constructing LR (k) parsers
22  and ways of enhancing its efficiency."
23  Information Sciences 12.1 (1977): 19-42.
24 
25  The identifiers used in this code are consistent with the terminology
26  in that paper, except where we bring in FIRST set terminology, which
27  Pager doesn't go into detail about. */
28 
29 Config::Config(int p, int d):
30  production(p),
31  dot(d)
32 {
33 }
34 
35 StateConfig::StateConfig(int s, int cis):
36  state(s),
37  config_in_state(cis)
38 {
39 }
40 
41 void swap(StateInProgress& a, StateInProgress& b) {
42  using std::swap;
43  swap(a.configs, b.configs);
44  swap(a.actions, b.actions);
45 }
46 
47 // expand the grammar productions into marked productions
48 static Configs make_configs(Grammar const& g) {
49  Configs configs;
50  for (int i = 0; i < size(g.productions); ++i) {
51  const Grammar::Production& production = at(g.productions, i);
52  for (int j = 0; j <= size(production.rhs); ++j) {
53  configs.push_back(Config(i,j));
54  }
55  }
56  return configs;
57 }
58 
59 static Graph get_left_hand_sides_to_start_configs(
60  Configs const& cs, Grammar const& grammar) {
61  Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
62  for (int c_i = 0; c_i < size(cs); ++c_i) {
63  const Config& c = at(cs, c_i);
64  if (c.dot > 0) continue;
65  int p_i = c.production;
66  const Grammar::Production& p = at(grammar.productions, p_i);
67  add_edge(lhs2sc, p.lhs, c_i);
68  }
69  return lhs2sc;
70 }
71 
72 struct StateCompare {
73  typedef StateInProgress const* Value;
74  bool operator()(Value const& a, Value const& b) const {
75  return a->configs < b->configs;
76  }
77 };
78 
79 typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
80 
81 static void close(StateInProgress& state,
82  Configs const& cs, Grammar const& grammar,
83  Graph const& lhs2sc) {
84  std::queue<int> config_q;
85  std::set<int> config_set;
86  for (std::vector<int>::const_iterator it = state.configs.begin();
87  it != state.configs.end(); ++it) {
88  int config_i = *it;
89  config_q.push(config_i);
90  TEUCHOS_ASSERT(!config_set.count(config_i));
91  config_set.insert(config_i);
92  }
93  while (!config_q.empty()) {
94  int config_i = config_q.front(); config_q.pop();
95  const Config& config = at(cs, config_i);
96  int prod_i = config.production;
97  const Grammar::Production& prod = at(grammar.productions, prod_i);
98  if (config.dot == size(prod.rhs)) continue;
99  int symbol_after_dot = at(prod.rhs, config.dot);
100  if (is_terminal(grammar, symbol_after_dot)) continue;
101  const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
102  for (NodeEdges::const_iterator it = edges.begin();
103  it != edges.end(); ++it) {
104  int sc = *it;
105  if (!config_set.count(sc)) {
106  config_set.insert(sc);
107  config_q.push(sc);
108  }
109  }
110  }
111  state.configs.assign(config_set.begin(), config_set.end());
112 }
113 
114 static void add_back(StatesInProgress& sips, StateInProgress& sip) {
115  using std::swap;
116  StateInProgressPtr ptr(new StateInProgress());
117  swap(*ptr, sip);
118  sips.push_back(ptr);
119 }
120 
121 static void add_reduction_actions(StatesInProgress& states,
122  Configs const& cs, Grammar const& grammar) {
123  for (StatesInProgress::iterator it = states.begin();
124  it != states.end(); ++it) {
125  StateInProgressPtr& state_uptr = *it;
126  StateInProgress& state = *state_uptr;
127  for (std::vector<int>::const_iterator it2 = state.configs.begin();
128  it2 != state.configs.end(); ++it2) {
129  int config_i = *it2;
130  const Config& config = at(cs, config_i);
131  int prod_i = config.production;
132  const Grammar::Production& prod = at(grammar.productions, prod_i);
133  if (config.dot != size(prod.rhs)) continue;
134  ActionInProgress reduction;
135  reduction.action.kind = ACTION_REDUCE;
136  reduction.action.production = config.production;
137  state.actions.push_back(reduction);
138  }
139  }
140 }
141 
142 static void set_lr0_contexts(
143  StatesInProgress& states,
144  Grammar const& grammar) {
145  for (StatesInProgress::iterator it = states.begin();
146  it != states.end(); ++it) {
147  StateInProgressPtr& state_uptr = *it;
148  StateInProgress& state = *state_uptr;
149  for (StateInProgress::Actions::iterator it2 = state.actions.begin();
150  it2 != state.actions.end(); ++it2) {
151  ActionInProgress& action = *it2;
152  if (action.action.kind != ACTION_REDUCE) continue;
153  if (action.action.production == get_accept_production(grammar)) {
154  action.context.insert(get_end_terminal(grammar));
155  } else {
156  for (int terminal = 0; terminal < grammar.nterminals; ++terminal) {
157  action.context.insert(terminal);
158  }
159  }
160  }
161  }
162 }
163 
164 static StatesInProgress make_lr0_parser(Configs const& cs, Grammar const& grammar,
165  Graph const& lhs2sc) {
166  StatesInProgress states;
167  StatePtr2StateIndex state_ptrs2idxs;
168  std::queue<int> state_q;
169  { /* start state */
170  StateInProgress start_state;
171  int accept_nt = get_accept_nonterminal(grammar);
172  /* there should only be one start configuration for the accept symbol */
173  int start_accept_config = get_edges(lhs2sc, accept_nt).front();
174  start_state.configs.push_back(start_accept_config);
175  close(start_state, cs, grammar, lhs2sc);
176  int start_state_i = size(states);
177  state_q.push(start_state_i);
178  add_back(states, start_state);
179  state_ptrs2idxs[states.back().get()] = start_state_i;
180  }
181  while (!state_q.empty()) {
182  int state_i = state_q.front(); state_q.pop();
183  StateInProgress& state = *at(states, state_i);
184  std::set<int> transition_symbols;
185  for (std::vector<int>::const_iterator it = state.configs.begin();
186  it != state.configs.end(); ++it) {
187  int config_i = *it;
188  const Config& config = at(cs, config_i);
189  int prod_i = config.production;
190  const Grammar::Production& prod = at(grammar.productions, prod_i);
191  if (config.dot == size(prod.rhs)) continue;
192  int symbol_after_dot = at(prod.rhs, config.dot);
193  transition_symbols.insert(symbol_after_dot);
194  }
195  for (std::set<int>::const_iterator it = transition_symbols.begin();
196  it != transition_symbols.end(); ++it) {
197  int transition_symbol = *it;
198  StateInProgress next_state;
199  for (std::vector<int>::const_iterator it2 = state.configs.begin();
200  it2 != state.configs.end(); ++it2) {
201  int config_i = *it2;
202  const Config& config = at(cs, config_i);
203  int prod_i = config.production;
204  const Grammar::Production& prod = at(grammar.productions, prod_i);
205  if (config.dot == size(prod.rhs)) continue;
206  int symbol_after_dot = at(prod.rhs, config.dot);
207  if (symbol_after_dot != transition_symbol) continue;
208  /* transition successor should just be the next index */
209  int next_config_i = config_i + 1;
210  next_state.configs.push_back(next_config_i);
211  }
212  close(next_state, cs, grammar, lhs2sc);
213  StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
214  int next_state_i;
215  if (it2 == state_ptrs2idxs.end()) {
216  next_state_i = size(states);
217  state_q.push(next_state_i);
218  add_back(states, next_state);
219  state_ptrs2idxs[states.back().get()] = next_state_i;
220  } else {
221  next_state_i = it2->second;
222  }
223  ActionInProgress transition;
224  transition.action.kind = ACTION_SHIFT;
225  transition.action.next_state = next_state_i;
226  transition.context.insert(transition_symbol);
227  state.actions.push_back(transition);
228  }
229  }
230  add_reduction_actions(states, cs, grammar);
231  set_lr0_contexts(states, grammar);
232  return states;
233 }
234 
235 static Graph get_productions_by_lhs(Grammar const& grammar) {
236  int nsymbols = grammar.nsymbols;
237  Graph lhs2prods = make_graph_with_nnodes(nsymbols);
238  for (int prod_i = 0; prod_i < size(grammar.productions); ++prod_i) {
239  const Grammar::Production& prod = at(grammar.productions, prod_i);
240  add_edge(lhs2prods, prod.lhs, prod_i);
241  }
242  return lhs2prods;
243 }
244 
245 /* compute a graph where symbols are graph nodes, and
246  there exists an edge (A, B) if B appears in the RHS of
247  any production in which A is the LHS */
248 static Graph get_symbol_graph(Grammar const& grammar, Graph const& lhs2prods) {
249  int nsymbols = grammar.nsymbols;
250  Graph out = make_graph_with_nnodes(nsymbols);
251  for (int lhs = 0; lhs < nsymbols; ++lhs) {
252  std::set<int> dependees;
253  for (int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
254  int prod_i = at(lhs2prods, lhs, i);
255  const Grammar::Production& prod = at(grammar.productions, prod_i);
256  for (int j = 0; j < size(prod.rhs); ++j) {
257  int rhs_symb = at(prod.rhs, j);
258  dependees.insert(rhs_symb);
259  }
260  }
261  at(out, lhs).assign(dependees.begin(), dependees.end());
262  }
263  return out;
264 }
265 
266 /* the "FIRST" set, i.e. the set of 1-heads of non-null terminal descendants of
267  some string.
268  As suggested by Westley Weimer here:
269  https://www.cs.virginia.edu/~weimer/2008-415/reading/FirstFollowLL.pdf
270  we will also use the FIRST set for determining whether the string has
271  a null terminal descendant, indicated by the prescence of a special
272  FIRST_NULL symbol in the FIRST set */
273 enum { FIRST_NULL = -425 };
274 typedef std::set<int> FirstSet;
275 
276 static void print_set(std::set<int> const& set, Grammar const& grammar) {
277  std::cerr << "{";
278  for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
279  if (it != set.begin()) std::cerr << ", ";
280  int symb = *it;
281  if (symb == FIRST_NULL) std::cerr << "null";
282  else {
283  const std::string& symb_name = at(grammar.symbol_names, symb);
284  if (symb_name == ",") std::cerr << "','";
285  else std::cerr << symb_name;
286  }
287  }
288  std::cerr << "}";
289 }
290 
291 static FirstSet get_first_set_of_string(std::vector<int> const& string,
292  std::vector<FirstSet> const& first_sets) {
293  FirstSet out;
294  /* walk the string, stop when any symbol is found that doesn't
295  have a null terminal descendant */
296  int i;
297  for (i = 0; i < size(string); ++i) {
298  int symbol = at(string, i);
299  bool has_null = false;
300  const FirstSet& first_set = at(first_sets, symbol);
301  for (FirstSet::const_iterator it = first_set.begin();
302  it != first_set.end(); ++it) {
303  int first_symbol = *it;
304  if (first_symbol == FIRST_NULL) has_null = true;
305  else out.insert(first_symbol);
306  }
307  if (!has_null) break;
308  }
309  if (i == size(string)) out.insert(FIRST_NULL);
310  return out;
311 }
312 
313 struct Event {
314  int added_symbol;
315  int dependee;
316  Event(int as, int d):
317  added_symbol(as),
318  dependee(d)
319  {}
320 };
321 
322 /* figure out the FIRST sets for each non-terminal in the grammar.
323  I couldn't find a super-efficient way to do this, so here is a
324  free-for-all event-driven implementation. */
325 static std::vector<FirstSet> compute_first_sets(Grammar const& grammar,
326  bool verbose) {
327  if (verbose) std::cerr << "computing FIRST sets...\n";
328  std::queue<Event> event_q;
329  int nsymbols = grammar.nsymbols;
330  std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
331  Graph lhs2prods = get_productions_by_lhs(grammar);
332  for (int symbol = 0; symbol < nsymbols; ++symbol) {
333  if (is_terminal(grammar, symbol)) {
334  event_q.push(Event(symbol, symbol));
335  } else {
336  for (int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
337  int prod_i = at(lhs2prods, symbol, i);
338  const Grammar::Production& prod = at(grammar.productions, prod_i);
339  if (prod.rhs.empty()) {
340  event_q.push(Event(FIRST_NULL, symbol));
341  break;
342  }
343  }
344  }
345  }
346  Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
347  Graph dependees2dependers = make_transpose(dependers2dependees);
348  while (!event_q.empty()) {
349  Event event = event_q.front(); event_q.pop();
350  int added_symb = event.added_symbol;
351  int dependee = event.dependee;
352  FirstSet& dependee_firsts = at(first_sets, dependee);
353  /* hopefully we don't get too many duplicate events piled up... */
354  if (dependee_firsts.count(added_symb)) continue;
355  dependee_firsts.insert(added_symb);
356  for (int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
357  int depender = at(dependees2dependers, dependee, i);
358  TEUCHOS_ASSERT(is_nonterminal(grammar, depender));
359  const FirstSet& depender_firsts = at(first_sets, depender);
360  for (int j = 0; j < count_edges(lhs2prods, depender); ++j) {
361  int prod_i = at(lhs2prods, depender, j);
362  const Grammar::Production& prod = at(grammar.productions, prod_i);
363  FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
364  for (FirstSet::iterator it = rhs_first_set.begin();
365  it != rhs_first_set.end(); ++it) {
366  int rhs_first_symb = *it;
367  if (!depender_firsts.count(rhs_first_symb)) {
368  event_q.push(Event(rhs_first_symb, depender));
369  }
370  }
371  }
372  }
373  }
374  if (verbose) {
375  for (int symb = 0; symb < nsymbols; ++symb) {
376  const std::string& symb_name = at(grammar.symbol_names, symb);
377  std::cerr << "FIRST(" << symb_name << ") = {";
378  const FirstSet& c = at(first_sets, symb);
379  for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
380  if (it != c.begin()) std::cerr << ", ";
381  int first_symb = *it;
382  if (first_symb == FIRST_NULL) {
383  std::cerr << "null";
384  } else {
385  const std::string& first_name = at(grammar.symbol_names, first_symb);
386  std::cerr << first_name;
387  }
388  }
389  std::cerr << "}\n";
390  }
391  std::cerr << '\n';
392  }
393  return first_sets;
394 }
395 
396 StateConfigs form_state_configs(StatesInProgress const& states) {
397  StateConfigs out;
398  for (int i = 0; i < size(states); ++i) {
399  StateInProgress& state = *at(states, i);
400  for (int j = 0; j < size(state.configs); ++j) {
401  out.push_back(StateConfig(i, j));
402  }
403  }
404  return out;
405 }
406 
407 Graph form_states_to_state_configs(StateConfigs const& scs,
408  StatesInProgress const& states) {
409  Graph out = make_graph_with_nnodes(size(states));
410  for (int i = 0; i < size(scs); ++i) {
411  const StateConfig& sc = at(scs, i);
412  at(out, sc.state).push_back(i);
413  }
414  return out;
415 }
416 
417 static std::string escape_dot(std::string const& s) {
418  std::string out;
419  for (std::size_t i = 0; i < s.size(); ++i) {
420  char c = s[i];
421  if (c == '\\' || c == '|' || c == '\"' || c == '<' || c == '>' || c == '{' || c == '}') {
422  out.push_back('\\');
423  out.push_back(c);
424  } else if (c == '.') {
425  out = "'";
426  out += s;
427  out += "'";
428  return out;
429  } else {
430  out.push_back(c);
431  }
432  }
433  return out;
434 }
435 
436 void print_graphviz(
437  std::string const& filepath,
438  ParserInProgress const& pip,
439  bool /* verbose */,
440  std::ostream& os
441  ) {
442  const StatesInProgress& sips = pip.states;
443  const Configs& cs = pip.configs;
444  GrammarPtr grammar = pip.grammar;
445  const Graph& states2scs = pip.states2state_configs;
446  os << "writing GraphViz file \"" << filepath << "\"\n";
447  os << "process with:\n";
448  os << " dot -Tpdf -o \"" << filepath << ".pdf\" \"" << filepath << "\"\n";
449  std::ofstream file(filepath.c_str());
450  TEUCHOS_ASSERT(file.is_open());
451  file << "digraph {\n";
452  file << "graph [\n";
453  file << "rankdir = \"LR\"\n";
454  file << "]\n";
455  for (int s_i = 0; s_i < size(sips); ++s_i) {
456  const StateInProgress& state = *at(sips, s_i);
457  file << s_i << " [\n";
458  file << "label = \"";
459  file << "State " << s_i << "\\l";
460  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
461  int c_i = at(state.configs, cis_i);
462  const Config& config = at(cs, c_i);
463  const Grammar::Production& prod = at(grammar->productions, config.production);
464  int sc_i = at(states2scs, s_i, cis_i);
465  file << sc_i << ": ";
466  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
467  file << escape_dot(lhs_name) << " ::= ";
468  for (int rhs_i = 0; rhs_i <= size(prod.rhs); ++rhs_i) {
469  if (rhs_i == config.dot) file << " .";
470  if (rhs_i < size(prod.rhs)) {
471  int rhs_symb = at(prod.rhs, rhs_i);
472  const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
473  file << " " << escape_dot(rhs_symb_name);
474  }
475  }
476  if (config.dot == size(prod.rhs)) {
477  file << ", \\{";
478  bool found = false;
479  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
480  const ActionInProgress& action = at(state.actions, a_i);
481  if (action.action.kind == ACTION_REDUCE &&
482  action.action.production == config.production) {
483  found = true;
484  const Context& ac = action.context;
485  for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
486  if (it != ac.begin()) file << ", ";
487  int symb = *it;
488  const std::string& symb_name = at(grammar->symbol_names, symb);
489  file << escape_dot(symb_name);
490  }
491  }
492  }
493  TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error,
494  "BUG: missing reduce action in state " << s_i << " !!!\n");
495  file << "\\}";
496  }
497  file << "\\l";
498  }
499  file << "\"\n";
500  file << "shape = \"record\"\n";
501  file << "]\n";
502  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
503  const ActionInProgress& action = at(state.actions, a_i);
504  if (action.action.kind == ACTION_SHIFT) {
505  int symb = *(action.context.begin());
506  const std::string& symb_name = at(grammar->symbol_names, symb);
507  int next = action.action.next_state;
508  file << s_i << " -> " << next << " [\n";
509  file << "label = \"" << escape_dot(symb_name) << "\"\n";
510  file << "]\n";
511  }
512  }
513  }
514  file << "}\n";
515 }
516 
517 static Graph make_immediate_predecessor_graph(
518  StateConfigs const& scs,
519  StatesInProgress const& states,
520  Graph const& states2scs,
521  Configs const& cs,
522  GrammarPtr grammar) {
523  Graph out = make_graph_with_nnodes(size(scs));
524  for (int s_i = 0; s_i < size(states); ++s_i) {
525  const StateInProgress& state = *at(states, s_i);
526  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
527  int config_i = at(state.configs, cis_i);
528  const Config& config = at(cs, config_i);
529  const Grammar::Production& prod = at(grammar->productions, config.production);
530  int dot = config.dot;
531  if (dot == size(prod.rhs)) continue;
532  int s = at(prod.rhs, dot);
533  if (is_terminal(*grammar, s)) continue;
534  for (int cis_j = 0; cis_j < size(state.configs); ++cis_j) {
535  int config_j = at(state.configs, cis_j);
536  const Config& config2 = at(cs, config_j);
537  const Grammar::Production& prod2 = at(grammar->productions, config2.production);
538  if (prod2.lhs == s) {
539  int sc_i = at(states2scs, s_i, cis_i);
540  int sc_j = at(states2scs, s_i, cis_j);
541  add_edge(out, sc_j, sc_i);
542  }
543  }
544  }
545  }
546  return out;
547 }
548 
549 static Graph find_transition_predecessors(
550  StateConfigs const& scs,
551  StatesInProgress const& states,
552  Graph const& states2scs,
553  Configs const& cs,
554  GrammarPtr grammar) {
555  Graph out = make_graph_with_nnodes(size(scs));
556  for (int state_i = 0; state_i < size(states); ++state_i) {
557  const StateInProgress& state = *at(states, state_i);
558  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
559  const ActionInProgress& action = at(state.actions, a_i);
560  if (action.action.kind != ACTION_SHIFT) continue;
561  TEUCHOS_ASSERT(action.context.size() == 1);
562  int symbol = *(action.context.begin());
563  int state_j = action.action.next_state;
564  const StateInProgress& state2 = *at(states, state_j);
565  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
566  int config_i = at(state.configs, cis_i);
567  const Config& config = at(cs, config_i);
568  for (int cis_j = 0; cis_j < size(state2.configs); ++cis_j) {
569  int config_j = at(state2.configs, cis_j);
570  const Config& config2 = at(cs, config_j);
571  if (config.production == config2.production &&
572  config.dot + 1 == config2.dot) {
573  const Grammar::Production& prod = at(grammar->productions, config.production);
574  int rhs_symbol = at(prod.rhs, config.dot);
575  if (rhs_symbol == symbol) {
576  int sc_i = at(states2scs, state_i, cis_i);
577  int sc_j = at(states2scs, state_j, cis_j);
578  add_edge(out, sc_j, sc_i);
579  }
580  }
581  }
582  }
583  }
584  }
585  return out;
586 }
587 
588 static Graph make_originator_graph(
589  StateConfigs const& scs,
590  StatesInProgress const& states,
591  Graph const& states2scs,
592  Configs const& cs,
593  GrammarPtr grammar) {
594  Graph out = make_graph_with_nnodes(size(scs));
595  Graph ipg = make_immediate_predecessor_graph(
596  scs, states, states2scs, cs, grammar);
597  Graph tpg = find_transition_predecessors(
598  scs, states, states2scs, cs, grammar);
599  for (int sc_i = 0; sc_i < size(scs); ++sc_i) {
600  std::set<int> originators;
601  /* breadth-first search through the Transition
602  Precessor Graph (tpg), followed by a single hop
603  along the Immediate Predecessor Graph (ipg) */
604  std::queue<int> tpq;
605  std::set<int> tps;
606  tpq.push(sc_i);
607  tps.insert(sc_i);
608  while (!tpq.empty()) {
609  int tpp = tpq.front(); tpq.pop();
610  for (int i = 0; i < count_edges(tpg, tpp); ++i) {
611  int tpc = at(tpg, tpp, i);
612  if (tps.count(tpc)) continue;
613  tpq.push(tpc);
614  tps.insert(tpc);
615  }
616  for (int i = 0; i < count_edges(ipg, tpp); ++i) {
617  int ip_i = at(ipg, tpp, i);
618  originators.insert(ip_i);
619  }
620  }
621  at(out, sc_i).assign(originators.begin(), originators.end());
622  }
623  return out;
624 }
625 
626 static std::vector<int> get_follow_string(
627  int sc_addr,
628  StateConfigs const& scs,
629  StatesInProgress const& states,
630  Configs const& cs,
631  GrammarPtr grammar) {
632  const StateConfig& sc = at(scs, sc_addr);
633  const StateInProgress& state = *at(states, sc.state);
634  int config_i = at(state.configs, sc.config_in_state);
635  const Config& config = at(cs, config_i);
636  const Grammar::Production& prod = at(grammar->productions, config.production);
637  int out_size = size(prod.rhs) - (config.dot + 1);
638  std::vector<int> out;
639  /* out_size can be negative */
640  if (out_size < 1) return out;
641  reserve(out, out_size);
642  for (int i = config.dot + 1; i < size(prod.rhs); ++i) {
643  out.push_back(at(prod.rhs, i));
644  }
645  return out;
646 }
647 
648 static void print_string(std::vector<int> const& str, GrammarPtr grammar) {
649  std::cerr << "\"";
650  for (int i = 0; i < size(str); ++i) {
651  int symb = at(str, i);
652  const std::string& symb_name = at(grammar->symbol_names, symb);
653  std::cerr << symb_name;
654  }
655  std::cerr << "\"";
656 }
657 
658 static bool has_non_null_terminal_descendant(FirstSet const& first_set) {
659  if (first_set.empty()) return false;
660  if (first_set.size() > 1) return true;
661  return *(first_set.begin()) != FIRST_NULL;
662 }
663 
664 static Context get_contexts(FirstSet first_set) {
665  FirstSet::iterator it = first_set.find(FIRST_NULL);
666  if (it != first_set.end()) first_set.erase(it);
667  return first_set;
668 }
669 
670 enum { MARKER = -433 };
671 enum { ZERO = -100 }; // actual zero is a valid index for us
672 
673 static void print_stack(std::vector<int> const& stack) {
674  for (int i = 0; i < size(stack); ++i) {
675  int symb = at(stack, i);
676  if (symb == MARKER) std::cerr << " M";
677  else if (symb == ZERO) std::cerr << " Z";
678  else std::cerr << " " << symb;
679  }
680  std::cerr << '\n';
681 }
682 
683 static void move_markers(
684  std::vector<int>& lane,
685  std::vector<int>& in_lane,
686  int zeta_prime_addr,
687  int zeta_pointer,
688  bool tests_failed
689  ) {
690  int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
691  TEUCHOS_ASSERT(loc_of_zeta_prime != -1);
692  int r = 0;
693  for (int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
694  if (at(lane, i) == MARKER) {
695  ++r;
696  at(lane, i) = ZERO;
697  }
698  }
699  int top_addr = -1;
700  if (tests_failed) {
701  top_addr = lane.back();
702  lane.resize(lane.size() - 1); // pop
703  }
704  for (int i = 0; i < r; ++i) lane.push_back(MARKER);
705  if (tests_failed) {
706  lane.push_back(top_addr);
707  at(in_lane, top_addr) = size(lane) - 1;
708  }
709 }
710 
711 typedef std::vector<Context> Contexts;
712 
713 static void context_adding_routine(
714  std::vector<int> const& lane,
715  int zeta_pointer,
716  Context& contexts_generated,
717  Contexts& contexts,
718  bool verbose,
719  GrammarPtr grammar
720  ) {
721  if (verbose) {
722  std::cerr << " CONTEXT ADDING ROUTINE\n";
723  std::cerr << " LANE:";
724  print_stack(lane);
725  std::cerr << " $\\zeta$-POINTER = " << zeta_pointer << '\n';
726  }
727  for (int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
728  int v_r = at(lane, r);
729  if (verbose) std::cerr << " r = " << r << ", $v_r$ = ";
730  if (v_r < 0) {
731  if (verbose) {
732  if (v_r == MARKER) std::cerr << "marker\n";
733  else if (v_r == ZERO) std::cerr << "zero\n";
734  }
735  continue;
736  }
737  int tau_r_addr = v_r;
738  if (verbose) {
739  std::cerr << "$\\tau_r$ = " << tau_r_addr << '\n';
740  std::cerr << " CONTEXTS_GENERATED = ";
741  print_set(contexts_generated, *grammar);
742  std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
743  print_set(at(contexts, tau_r_addr), *grammar);
744  std::cerr << "\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
745  }
746  subtract_from(contexts_generated, at(contexts, tau_r_addr));
747  if (verbose) {
748  std::cerr << "\n CONTEXTS_GENERATED = ";
749  print_set(contexts_generated, *grammar);
750  std::cerr << "\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
751  }
752  unite_with(at(contexts, tau_r_addr), contexts_generated);
753  if (verbose) {
754  std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
755  print_set(at(contexts, tau_r_addr), *grammar);
756  std::cerr << "\n";
757  }
758  }
759 }
760 
761 static void deal_with_tests_failed(
762  int& num_originators_failed,
763  int& first_originator_failed,
764  int zeta_prime_addr,
765  bool& tests_failed,
766  std::vector<int>& lane,
767  std::vector<int>& in_lane,
768  int zeta_addr,
769  std::vector<int>& stack,
770  bool verbose
771  ) {
772  if (verbose) std::cerr << " Dealing with test failures\n";
773  if (num_originators_failed == 0) {
774  if (verbose) std::cerr << " " << zeta_prime_addr << " is the first originator of " << zeta_addr << " to fail the tests\n";
775  first_originator_failed = zeta_prime_addr;
776  if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto LANE:\n ";
777  lane.push_back(zeta_prime_addr);
778  if (verbose) print_stack(lane);
779  at(in_lane, zeta_prime_addr) = size(lane) - 1;
780  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") <- ON\n";
781  tests_failed = true;
782  if (verbose) std::cerr << " TESTS_FAILED <- ON\n";
783  } else if (num_originators_failed == 1) {
784  if (verbose) std::cerr << " " << zeta_prime_addr << " is the second originator of " << zeta_addr << " to fail the tests\n";
785  int zeta_double_prime_addr = first_originator_failed;
786  if (verbose) std::cerr << " the first was " << zeta_double_prime_addr << '\n';
787  TEUCHOS_ASSERT(at(lane, size(lane) - 1) == zeta_double_prime_addr);
788  TEUCHOS_ASSERT(at(lane, size(lane) - 2) == zeta_addr);
789  if (verbose) std::cerr << " pop LANE, push {marker, " << zeta_double_prime_addr << "} onto it:\n ";
790  lane.resize(lane.size() - 1);
791  lane.push_back(MARKER);
792  lane.push_back(zeta_double_prime_addr);
793  if (verbose) print_stack(lane);
794  if (verbose) std::cerr << " push {marker, " << zeta_prime_addr << "} onto STACK:\n ";
795  stack.push_back(MARKER);
796  stack.push_back(zeta_prime_addr);
797  if (verbose) print_stack(stack);
798  } else {
799  if (verbose) std::cerr << " " << zeta_prime_addr << " is the third or later originator of " << zeta_addr << " to fail the tests\n";
800  if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto STACK:\n ";
801  stack.push_back(zeta_prime_addr);
802  if (verbose) print_stack(stack);
803  }
804  ++num_originators_failed;
805 }
806 
807 static void heuristic_propagation_of_context_sets(
808  int tau_addr,
809  Contexts& contexts,
810  std::vector<bool>& complete,
811  StateConfigs const& scs,
812  StatesInProgress const& states,
813  Graph const& states2scs,
814  Configs const& cs,
815  GrammarPtr grammar
816  ) {
817  const StateConfig& tau = at(scs, tau_addr);
818  const StateInProgress& state = *at(states, tau.state);
819  int config_i = at(state.configs, tau.config_in_state);
820  const Config& config = at(cs, config_i);
821  if (config.dot != 0) return;
822  const Grammar::Production& prod = at(grammar->productions, config.production);
823  for (int cis_j = 0; cis_j < size(state.configs); ++cis_j) {
824  int config_j = at(state.configs, cis_j);
825  if (config_j == config_i) continue;
826  const Config& config2 = at(cs, config_j);
827  if (config2.dot != 0) continue;
828  const Grammar::Production& prod2 = at(grammar->productions, config2.production);
829  if (prod.lhs != prod2.lhs) continue;
830  int tau_prime_addr = at(states2scs, tau.state, cis_j);
831  at(contexts, tau_prime_addr) = at(contexts, tau_addr);
832  at(complete, tau_prime_addr) = true;
833  }
834 }
835 
836 /* Here it is! The magical algorithm described by a flowchart in
837  Figure 7 of David Pager's paper. */
838 static void compute_context_set(
839  int zeta_j_addr,
840  Contexts& contexts,
841  std::vector<bool>& complete,
842  StateConfigs const& scs,
843  Graph const& originator_graph,
844  StatesInProgress const& states,
845  Graph const& states2scs,
846  Configs const& cs,
847  std::vector<FirstSet> const& first_sets,
848  GrammarPtr grammar,
849  bool verbose,
850  ParserInProgress const& pip
851  ) {
852  if (verbose) std::cerr << "Computing context set for $\\zeta_j$ = " << zeta_j_addr << "...\n";
853  if (verbose) std::cerr << "BEGIN PROGRAM\n";
854  if (at(complete, zeta_j_addr)) {
855  if (verbose) std::cerr << zeta_j_addr << " was already complete!\nEND PROGRAM\n\n";
856  return;
857  }
858  std::vector<int> stack;
859  // need random access, inner insert, which std::stack doesn't provide
860  std::vector<int> lane;
861  std::vector<int> in_lane = make_vector<int>(size(scs), -1);
862  lane.push_back(zeta_j_addr);
863  at(in_lane, zeta_j_addr) = size(lane) - 1;
864  bool tests_failed = false;
865  Context contexts_generated;
866  if (verbose) {
867  std::cerr << "Initial LANE:";
868  print_stack(lane);
869  }
870  while (true) {
871  TEUCHOS_ASSERT(!lane.empty());
872  int zeta_addr = lane.back();
873  if (verbose) {
874  std::cerr << "Top of LANE is $\\zeta$ = " << zeta_addr << '\n';
875  }
876  int zeta_pointer = size(lane) - 1;
877  if (verbose) std::cerr << "$\\zeta$-POINTER <- " << zeta_pointer << '\n';
878  int num_originators_failed = 0;
879  int first_originator_failed = -1;
880  if (verbose) std::cerr << "DO_LOOP:\n";
881  /* DO_LOOP */
882  for (int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
883  int zeta_prime_addr = at(originator_graph, zeta_addr, i);
884  if (verbose) {
885  std::cerr << "Next originator of $\\zeta$ = " << zeta_addr << " is $\\zeta'$ = " << zeta_prime_addr << '\n';
886  }
887  std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
888  if (verbose) {
889  std::cerr << " FOLLOW string of $\\zeta'$ = " << zeta_prime_addr << " is ";
890  print_string(gamma, grammar);
891  std::cerr << '\n';
892  }
893  FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
894  if (verbose) {
895  std::cerr << " FIRST set of ";
896  print_string(gamma, grammar);
897  std::cerr << " is ";
898  print_set(gamma_first, *grammar);
899  std::cerr << "\n";
900  }
901  if (has_non_null_terminal_descendant(gamma_first)) { // test A
902  if (verbose) {
903  std::cerr << " ";
904  print_string(gamma, grammar);
905  std::cerr << " has a non-null terminal descendant\n";
906  }
907  contexts_generated = get_contexts(gamma_first);
908  if (verbose) {
909  std::cerr << " CONTEXTS_GENERATED = ";
910  print_set(contexts_generated, *grammar);
911  std::cerr << " = 1-heads of non-null descendants of ";
912  print_string(gamma, grammar);
913  std::cerr << '\n';
914  }
915  if (gamma_first.count(FIRST_NULL)) {
916  if (verbose) {
917  std::cerr << " ";
918  print_string(gamma, grammar);
919  std::cerr << " has a null terminal descendant\n";
920  }
921  if (at(complete, zeta_prime_addr)) {
922  unite_with(contexts_generated, at(contexts, zeta_prime_addr));
923  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
924  verbose, grammar);
925  } else if (-1 == at(in_lane, zeta_prime_addr)) {
926  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
927  verbose, grammar);
928  /* TRACE_FURTHER */
929  deal_with_tests_failed(num_originators_failed, first_originator_failed,
930  zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
931  verbose);
932  } else {
933  print_graphviz("ambiguous.dot", pip, true, std::cerr);
934  std::stringstream ss;
935  ss << "error: grammar is ambiguous.\n";
936  ss << "zeta_j is " << zeta_j_addr << ", zeta is " << zeta_addr << ", and zeta prime is " << zeta_prime_addr << '\n';
937  throw ParserBuildFail(ss.str());
938  }
939  } else {
940  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
941  verbose, grammar);
942  }
943  } else {
944  if (verbose) {
945  std::cerr << " ";
946  print_string(gamma, grammar);
947  std::cerr << " does not have a non-null terminal descendant\n";
948  }
949  if (at(complete, zeta_prime_addr)) { // test B
950  if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is ON\n";
951  contexts_generated = at(contexts, zeta_prime_addr);
952  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
953  verbose, grammar);
954  } else {
955  if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is OFF\n";
956  if (-1 != at(in_lane, zeta_prime_addr)) { // test C
957  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is ON\n";
958  move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
959  contexts_generated = at(contexts, zeta_prime_addr);
960  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
961  verbose, grammar);
962  } else {
963  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is OFF\n";
964  deal_with_tests_failed(num_originators_failed, first_originator_failed,
965  zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
966  verbose);
967  }
968  }
969  }
970  } /* END DO_LOOP */
971  if (verbose) std::cerr << "END DO_LOOP\n";
972  if (tests_failed) {
973  if (verbose) {
974  std::cerr << " TESTS_FAILED was on, turning it off and going to next configuration\n";
975  }
976  tests_failed = false;
977  continue;
978  }
979  bool keep_lane_popping = true;
980  if (verbose) std::cerr << " Start LANE popping\n";
981  while (keep_lane_popping) { // LANE popping loop
982  TEUCHOS_ASSERT(!lane.empty());
983  if (verbose) {
984  std::cerr << " LANE:";
985  print_stack(lane);
986  }
987  if (at(lane, size(lane) - 1) == MARKER) {
988  if (verbose) std::cerr << " Top of LANE is a marker\n";
989  if (verbose) std::cerr << " Start STACK popping\n";
990  while (true) { // STACK popping loop
991  TEUCHOS_ASSERT(!stack.empty());
992  if (verbose) {
993  std::cerr << " STACK:";
994  print_stack(stack);
995  std::cerr << " LANE:";
996  print_stack(lane);
997  }
998  if (stack.back() == MARKER) {
999  if (verbose) std::cerr << " Top of STACK is a marker, pop STACK and LANE\n";
1000  resize(stack, size(stack) - 1);
1001  resize(lane, size(lane) - 1);
1002  break; // out of STACK popping, back into LANE popping
1003  } else if (at(complete, stack.back())) {
1004  if (verbose) std::cerr << " Top of STACK is has COMPLETE flag, pop STACK\n";
1005  resize(stack, size(stack) - 1);
1006  // back into STACK popping
1007  } else {
1008  int addr = stack.back();
1009  if (verbose) std::cerr << " Top of STACK is " << addr << ", pop STACK\n";
1010  resize(stack, size(stack) - 1);
1011  if (verbose) std::cerr << " Push " << addr << " onto LANE\n";
1012  lane.push_back(addr);
1013  if (verbose) std::cerr << " IN_LANE(" << addr << ") <- ON\n";
1014  at(in_lane, addr) = size(lane) - 1;
1015  keep_lane_popping = false;
1016  break; // out of STACK and LANE popping, into top-level loop
1017  } // end STACK top checks
1018  } // end STACK popping loop
1019  } else if (at(lane, size(lane) - 1) == ZERO) {
1020  if (verbose) std::cerr << " Top of LANE is a zero\n";
1021  if (verbose) std::cerr << " Pop LANE\n";
1022  resize(lane, size(lane) - 1); // pop LANE
1023  // back to top of LANE popping loop
1024  } else { // top of LANE neither marker nor zero
1025  int tau_addr = lane.back();
1026  if (verbose) std::cerr << " Top of LANE is $\\tau$ = " << tau_addr << "\n";
1027  at(in_lane, tau_addr) = -1;
1028  if (verbose) std::cerr << " IN_LANE(" << tau_addr << ") <- OFF\n";
1029  at(complete, tau_addr) = true;
1030  if (verbose) std::cerr << " COMPLETE(" << tau_addr << ") <- ON\n";
1031  if (verbose) std::cerr << " HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1032  heuristic_propagation_of_context_sets(
1033  tau_addr, contexts, complete,
1034  scs, states, states2scs, cs, grammar);
1035  if (size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1036  if (verbose) std::cerr << "END PROGRAM\n\n";
1037  return;
1038  }
1039  if (verbose) std::cerr << " Pop LANE\n";
1040  resize(lane, size(lane) - 1); // pop LANE
1041  // back to top of LANE popping loop
1042  } // end top of LANE checks
1043  } // end LANE popping loop
1044  } // end top-level while(1) loop
1045 }
1046 
1047 static std::vector<bool> determine_adequate_states(
1048  StatesInProgress const& states,
1049  GrammarPtr grammar,
1050  bool verbose,
1051  std::ostream& os) {
1052  std::vector<bool> out = make_vector<bool>(size(states));
1053  for (int s_i = 0; s_i < size(states); ++s_i) {
1054  const StateInProgress& state = *at(states, s_i);
1055  bool state_is_adequate = true;
1056  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
1057  const ActionInProgress& action = at(state.actions, a_i);
1058  if (action.action.kind == ACTION_SHIFT &&
1059  is_nonterminal(*grammar, *(action.context.begin()))) {
1060  continue;
1061  }
1062  for (int a_j = a_i + 1; a_j < size(state.actions); ++a_j) {
1063  const ActionInProgress& action2 = at(state.actions, a_j);
1064  if (action2.action.kind == ACTION_SHIFT &&
1065  is_nonterminal(*grammar, *(action2.context.begin()))) {
1066  continue;
1067  }
1068  if (intersects(action2.context, action.context)) {
1069  if (verbose) {
1070  const ActionInProgress* ap1 = &action;
1071  const ActionInProgress* ap2 = &action2;
1072  if (ap1->action.kind == ACTION_SHIFT) {
1073  std::swap(ap1, ap2);
1074  }
1075  TEUCHOS_ASSERT(ap1->action.kind == ACTION_REDUCE);
1076  os << "shift-reduce conflict in state " << s_i << ":\n";
1077  os << "reduce ";
1078  const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1079  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1080  os << lhs_name << " ::=";
1081  for (int rhs_i = 0; rhs_i < size(prod.rhs); ++rhs_i) {
1082  int rhs_symb = at(prod.rhs, rhs_i);
1083  const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1084  os << " " << rhs_symb_name;
1085  }
1086  int shift_symb = *(ap2->context.begin());
1087  const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1088  os << "\nshift " << shift_name << '\n';
1089  }
1090  state_is_adequate = false;
1091  break;
1092  }
1093  }
1094  if (!state_is_adequate) break;
1095  }
1096  at(out, s_i) = state_is_adequate;
1097  }
1098  if (verbose) os << '\n';
1099  return out;
1100 }
1101 
1102 ParserInProgress draft_lalr1_parser(GrammarPtr grammar, bool verbose) {
1103  ParserInProgress out;
1104  Configs& cs = out.configs;
1105  StatesInProgress& states = out.states;
1106  StateConfigs& scs = out.state_configs;
1107  Graph& states2scs = out.states2state_configs;
1108  out.grammar = grammar;
1109  cs = make_configs(*grammar);
1110  Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1111  if (verbose) std::cerr << "Building LR(0) parser\n";
1112  states = make_lr0_parser(cs, *grammar, lhs2cs);
1113  scs = form_state_configs(states);
1114  states2scs = form_states_to_state_configs(scs, states);
1115  if (verbose) print_graphviz("lr0.dot", out, true, std::cerr);
1116  if (verbose) std::cerr << "Checking adequacy of LR(0) machine\n";
1117  std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1118  std::cerr);
1119  if (*(std::min_element(adequate.begin(), adequate.end()))) {
1120  if (verbose) std::cerr << "The grammar is LR(0)!\n";
1121  return out;
1122  }
1123  std::vector<bool> complete = make_vector<bool>(size(scs), false);
1124  std::vector<Context> contexts = make_vector<Context>(size(scs));
1125  int accept_prod_i = get_accept_production(*grammar);
1126  /* initialize the accepting state-configs as described in
1127  footnote 8 at the bottom of page 37 */
1128  for (int sc_i = 0; sc_i < size(scs); ++sc_i) {
1129  StateConfig& sc = at(scs, sc_i);
1130  StateInProgress& state = *at(states, sc.state);
1131  int config_i = at(state.configs, sc.config_in_state);
1132  Config& config = at(cs, config_i);
1133  if (config.production == accept_prod_i) {
1134  at(complete, sc_i) = true;
1135  at(contexts, sc_i).insert(get_end_terminal(*grammar));
1136  }
1137  }
1138  Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1139  if (verbose) std::cerr << "Originator Graph:\n";
1140  if (verbose) std::cerr << og << '\n';
1141  std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1142  /* compute context sets for all state-configs associated with reduction
1143  actions that are part of an inadequate state */
1144  for (int s_i = 0; s_i < size(states); ++s_i) {
1145  if (at(adequate, s_i)) continue;
1146  StateInProgress& state = *at(states, s_i);
1147  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
1148  int config_i = at(state.configs, cis_i);
1149  const Config& config = at(cs, config_i);
1150  const Grammar::Production& prod = at(grammar->productions, config.production);
1151  if (config.dot != size(prod.rhs)) continue;
1152  int zeta_j_addr = at(states2scs, s_i, cis_i);
1153  compute_context_set(zeta_j_addr, contexts, complete, scs,
1154  og, states, states2scs, cs, first_sets, grammar, verbose, out);
1155  }
1156  }
1157  /* update the context sets for all reduction state-configs
1158  which are marked complete, even if they aren't in inadequate states */
1159  for (int s_i = 0; s_i < size(states); ++s_i) {
1160  StateInProgress& state = *at(states, s_i);
1161  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
1162  int sc_i = at(states2scs, s_i, cis_i);
1163  if (!at(complete, sc_i)) continue;
1164  int config_i = at(state.configs, cis_i);
1165  Config& config = at(cs, config_i);
1166  const Grammar::Production& prod = at(grammar->productions, config.production);
1167  if (config.dot != size(prod.rhs)) continue;
1168  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
1169  ActionInProgress& action = at(state.actions, a_i);
1170  if (action.action.kind == ACTION_REDUCE &&
1171  action.action.production == config.production) {
1172  action.context = at(contexts, sc_i);
1173  }
1174  }
1175  }
1176  }
1177  if (verbose) std::cerr << "Checking adequacy of LALR(1) machine\n";
1178  adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1179  if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1180  std::stringstream ss;
1181  ss << "error: The grammar is not LALR(1).\n";
1182  determine_adequate_states(states, grammar, true, ss);
1183  print_graphviz("error.dot", out, true, ss);
1184  std::string s = ss.str();
1185  throw ParserBuildFail(s);
1186  }
1187  if (verbose) std::cerr << "The grammar is LALR(1)!\n";
1188  if (verbose) print_graphviz("lalr1.dot", out, true, std::cerr);
1189  return out;
1190 }
1191 
1192 Parser accept_parser(ParserInProgress const& pip) {
1193  const StatesInProgress& sips = pip.states;
1194  GrammarPtr grammar = pip.grammar;
1195  Parser out(grammar, size(sips));
1196  for (int s_i = 0; s_i < size(sips); ++s_i) {
1197  add_state(out);
1198  }
1199  for (int s_i = 0; s_i < size(sips); ++s_i) {
1200  const StateInProgress& sip = *at(sips, s_i);
1201  for (int a_i = 0; a_i < size(sip.actions); ++a_i) {
1202  const ActionInProgress& action = at(sip.actions, a_i);
1203  if (action.action.kind == ACTION_SHIFT &&
1204  is_nonterminal(*grammar, *(action.context.begin()))) {
1205  int nt = as_nonterminal(*grammar, *(action.context.begin()));
1206  add_nonterminal_action(out, s_i, nt, action.action.next_state);
1207  } else {
1208  for (Context::const_iterator it = action.context.begin();
1209  it != action.context.end(); ++it) {
1210  int terminal = *it;
1211  TEUCHOS_ASSERT(is_terminal(*grammar, terminal));
1212  add_terminal_action(out, s_i, terminal, action.action);
1213  }
1214  }
1215  }
1216  }
1217  return out;
1218 }
1219 
1220 ParserBuildFail::ParserBuildFail(const std::string& msg):
1221  std::invalid_argument(msg) {
1222 }
1223 
1224 Parser make_lalr1_parser(GrammarPtr grammar, bool verbose) {
1225  ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1226  return accept_parser(pip);
1227 }
1228 
1229 }
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
int size(const Comm< Ordinal > &comm)
Get the number of processes in the communicator.
Ptr< T > ptr(T *p)
Create a pointer to an object from a raw pointer.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void swap(Teuchos::any &a, Teuchos::any &b)
Special swap for other code to find via Argument Dependent Lookup.