Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_make_lalr1_parser.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_make_lalr1_parser.hpp"
11 
12 #include <map>
13 #include <iostream>
14 #include <cstdlib>
15 #include <queue>
16 #include <algorithm>
17 #include <fstream>
18 
19 #include "Teuchos_vector.hpp"
20 #include "Teuchos_Graph.hpp"
21 #include "Teuchos_stack.hpp"
22 #include "Teuchos_set.hpp"
23 
24 namespace Teuchos {
25 
26 /* The LALR(1) parser construction implemented here is based on
27  David Pager's work:
28 
29  Pager, David.
30  "The lane-tracing algorithm for constructing LR (k) parsers
31  and ways of enhancing its efficiency."
32  Information Sciences 12.1 (1977): 19-42.
33 
34  The identifiers used in this code are consistent with the terminology
35  in that paper, except where we bring in FIRST set terminology, which
36  Pager doesn't go into detail about. */
37 
38 Config::Config(int p, int d):
39  production(p),
40  dot(d)
41 {
42 }
43 
44 StateConfig::StateConfig(int s, int cis):
45  state(s),
46  config_in_state(cis)
47 {
48 }
49 
50 void swap(StateInProgress& a, StateInProgress& b) {
51  using std::swap;
52  swap(a.configs, b.configs);
53  swap(a.actions, b.actions);
54 }
55 
56 // expand the grammar productions into marked productions
57 static Configs make_configs(Grammar const& g) {
58  Configs configs;
59  for (int i = 0; i < Teuchos::size(g.productions); ++i) {
60  const Grammar::Production& production = at(g.productions, i);
61  for (int j = 0; j <= Teuchos::size(production.rhs); ++j) {
62  configs.push_back(Config(i,j));
63  }
64  }
65  return configs;
66 }
67 
68 static Graph get_left_hand_sides_to_start_configs(
69  Configs const& cs, Grammar const& grammar) {
70  Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
71  for (int c_i = 0; c_i < Teuchos::size(cs); ++c_i) {
72  const Config& c = at(cs, c_i);
73  if (c.dot > 0) continue;
74  int p_i = c.production;
75  const Grammar::Production& p = at(grammar.productions, p_i);
76  add_edge(lhs2sc, p.lhs, c_i);
77  }
78  return lhs2sc;
79 }
80 
81 struct StateCompare {
82  typedef StateInProgress const* Value;
83  bool operator()(Value const& a, Value const& b) const {
84  return a->configs < b->configs;
85  }
86 };
87 
88 typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
89 
90 static void close(StateInProgress& state,
91  Configs const& cs, Grammar const& grammar,
92  Graph const& lhs2sc) {
93  std::queue<int> config_q;
94  std::set<int> config_set;
95  for (std::vector<int>::const_iterator it = state.configs.begin();
96  it != state.configs.end(); ++it) {
97  int config_i = *it;
98  config_q.push(config_i);
99  TEUCHOS_ASSERT(!config_set.count(config_i));
100  config_set.insert(config_i);
101  }
102  while (!config_q.empty()) {
103  int config_i = config_q.front(); config_q.pop();
104  const Config& config = at(cs, config_i);
105  int prod_i = config.production;
106  const Grammar::Production& prod = at(grammar.productions, prod_i);
107  if (config.dot == Teuchos::size(prod.rhs)) continue;
108  int symbol_after_dot = at(prod.rhs, config.dot);
109  if (is_terminal(grammar, symbol_after_dot)) continue;
110  const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
111  for (NodeEdges::const_iterator it = edges.begin();
112  it != edges.end(); ++it) {
113  int sc = *it;
114  if (!config_set.count(sc)) {
115  config_set.insert(sc);
116  config_q.push(sc);
117  }
118  }
119  }
120  state.configs.assign(config_set.begin(), config_set.end());
121 }
122 
123 static void add_back(StatesInProgress& sips, StateInProgress& sip) {
124  using std::swap;
125  StateInProgressPtr ptr(new StateInProgress());
126  swap(*ptr, sip);
127  sips.push_back(ptr);
128 }
129 
130 static void add_reduction_actions(StatesInProgress& states,
131  Configs const& cs, Grammar const& grammar) {
132  for (StatesInProgress::iterator it = states.begin();
133  it != states.end(); ++it) {
134  StateInProgressPtr& state_uptr = *it;
135  StateInProgress& state = *state_uptr;
136  for (std::vector<int>::const_iterator it2 = state.configs.begin();
137  it2 != state.configs.end(); ++it2) {
138  int config_i = *it2;
139  const Config& config = at(cs, config_i);
140  int prod_i = config.production;
141  const Grammar::Production& prod = at(grammar.productions, prod_i);
142  if (config.dot != Teuchos::size(prod.rhs)) continue;
143  ActionInProgress reduction;
144  reduction.action.kind = ACTION_REDUCE;
145  reduction.action.production = config.production;
146  state.actions.push_back(reduction);
147  }
148  }
149 }
150 
151 static void set_lr0_contexts(
152  StatesInProgress& states,
153  Grammar const& grammar) {
154  for (StatesInProgress::iterator it = states.begin();
155  it != states.end(); ++it) {
156  StateInProgressPtr& state_uptr = *it;
157  StateInProgress& state = *state_uptr;
158  for (StateInProgress::Actions::iterator it2 = state.actions.begin();
159  it2 != state.actions.end(); ++it2) {
160  ActionInProgress& action = *it2;
161  if (action.action.kind != ACTION_REDUCE) continue;
162  if (action.action.production == get_accept_production(grammar)) {
163  action.context.insert(get_end_terminal(grammar));
164  } else {
165  for (int terminal = 0; terminal < grammar.nterminals; ++terminal) {
166  action.context.insert(terminal);
167  }
168  }
169  }
170  }
171 }
172 
173 static StatesInProgress make_lr0_parser(Configs const& cs, Grammar const& grammar,
174  Graph const& lhs2sc) {
175  StatesInProgress states;
176  StatePtr2StateIndex state_ptrs2idxs;
177  std::queue<int> state_q;
178  { /* start state */
179  StateInProgress start_state;
180  int accept_nt = get_accept_nonterminal(grammar);
181  /* there should only be one start configuration for the accept symbol */
182  int start_accept_config = get_edges(lhs2sc, accept_nt).front();
183  start_state.configs.push_back(start_accept_config);
184  close(start_state, cs, grammar, lhs2sc);
185  int start_state_i = Teuchos::size(states);
186  state_q.push(start_state_i);
187  add_back(states, start_state);
188  state_ptrs2idxs[states.back().get()] = start_state_i;
189  }
190  while (!state_q.empty()) {
191  int state_i = state_q.front(); state_q.pop();
192  StateInProgress& state = *at(states, state_i);
193  std::set<int> transition_symbols;
194  for (std::vector<int>::const_iterator it = state.configs.begin();
195  it != state.configs.end(); ++it) {
196  int config_i = *it;
197  const Config& config = at(cs, config_i);
198  int prod_i = config.production;
199  const Grammar::Production& prod = at(grammar.productions, prod_i);
200  if (config.dot == Teuchos::size(prod.rhs)) continue;
201  int symbol_after_dot = at(prod.rhs, config.dot);
202  transition_symbols.insert(symbol_after_dot);
203  }
204  for (std::set<int>::const_iterator it = transition_symbols.begin();
205  it != transition_symbols.end(); ++it) {
206  int transition_symbol = *it;
207  StateInProgress next_state;
208  for (std::vector<int>::const_iterator it2 = state.configs.begin();
209  it2 != state.configs.end(); ++it2) {
210  int config_i = *it2;
211  const Config& config = at(cs, config_i);
212  int prod_i = config.production;
213  const Grammar::Production& prod = at(grammar.productions, prod_i);
214  if (config.dot == Teuchos::size(prod.rhs)) continue;
215  int symbol_after_dot = at(prod.rhs, config.dot);
216  if (symbol_after_dot != transition_symbol) continue;
217  /* transition successor should just be the next index */
218  int next_config_i = config_i + 1;
219  next_state.configs.push_back(next_config_i);
220  }
221  close(next_state, cs, grammar, lhs2sc);
222  StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
223  int next_state_i;
224  if (it2 == state_ptrs2idxs.end()) {
225  next_state_i = Teuchos::size(states);
226  state_q.push(next_state_i);
227  add_back(states, next_state);
228  state_ptrs2idxs[states.back().get()] = next_state_i;
229  } else {
230  next_state_i = it2->second;
231  }
232  ActionInProgress transition;
233  transition.action.kind = ACTION_SHIFT;
234  transition.action.next_state = next_state_i;
235  transition.context.insert(transition_symbol);
236  state.actions.push_back(transition);
237  }
238  }
239  add_reduction_actions(states, cs, grammar);
240  set_lr0_contexts(states, grammar);
241  return states;
242 }
243 
244 static Graph get_productions_by_lhs(Grammar const& grammar) {
245  int nsymbols = grammar.nsymbols;
246  Graph lhs2prods = make_graph_with_nnodes(nsymbols);
247  for (int prod_i = 0; prod_i < Teuchos::size(grammar.productions); ++prod_i) {
248  const Grammar::Production& prod = at(grammar.productions, prod_i);
249  add_edge(lhs2prods, prod.lhs, prod_i);
250  }
251  return lhs2prods;
252 }
253 
254 /* compute a graph where symbols are graph nodes, and
255  there exists an edge (A, B) if B appears in the RHS of
256  any production in which A is the LHS */
257 static Graph get_symbol_graph(Grammar const& grammar, Graph const& lhs2prods) {
258  int nsymbols = grammar.nsymbols;
259  Graph out = make_graph_with_nnodes(nsymbols);
260  for (int lhs = 0; lhs < nsymbols; ++lhs) {
261  std::set<int> dependees;
262  for (int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
263  int prod_i = at(lhs2prods, lhs, i);
264  const Grammar::Production& prod = at(grammar.productions, prod_i);
265  for (int j = 0; j < Teuchos::size(prod.rhs); ++j) {
266  int rhs_symb = at(prod.rhs, j);
267  dependees.insert(rhs_symb);
268  }
269  }
270  at(out, lhs).assign(dependees.begin(), dependees.end());
271  }
272  return out;
273 }
274 
275 /* the "FIRST" set, i.e. the set of 1-heads of non-null terminal descendants of
276  some string.
277  As suggested by Westley Weimer here:
278  https://www.cs.virginia.edu/~weimer/2008-415/reading/FirstFollowLL.pdf
279  we will also use the FIRST set for determining whether the string has
280  a null terminal descendant, indicated by the prescence of a special
281  FIRST_NULL symbol in the FIRST set */
282 enum { FIRST_NULL = -425 };
283 typedef std::set<int> FirstSet;
284 
285 static void print_set(std::set<int> const& set, Grammar const& grammar) {
286  std::cerr << "{";
287  for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
288  if (it != set.begin()) std::cerr << ", ";
289  int symb = *it;
290  if (symb == FIRST_NULL) std::cerr << "null";
291  else {
292  const std::string& symb_name = at(grammar.symbol_names, symb);
293  if (symb_name == ",") std::cerr << "','";
294  else std::cerr << symb_name;
295  }
296  }
297  std::cerr << "}";
298 }
299 
300 static FirstSet get_first_set_of_string(std::vector<int> const& string,
301  std::vector<FirstSet> const& first_sets) {
302  FirstSet out;
303  /* walk the string, stop when any symbol is found that doesn't
304  have a null terminal descendant */
305  int i;
306  for (i = 0; i < Teuchos::size(string); ++i) {
307  int symbol = at(string, i);
308  bool has_null = false;
309  const FirstSet& first_set = at(first_sets, symbol);
310  for (FirstSet::const_iterator it = first_set.begin();
311  it != first_set.end(); ++it) {
312  int first_symbol = *it;
313  if (first_symbol == FIRST_NULL) has_null = true;
314  else out.insert(first_symbol);
315  }
316  if (!has_null) break;
317  }
318  if (i == Teuchos::size(string)) out.insert(FIRST_NULL);
319  return out;
320 }
321 
322 struct Event {
323  int added_symbol;
324  int dependee;
325  Event(int as, int d):
326  added_symbol(as),
327  dependee(d)
328  {}
329 };
330 
331 /* figure out the FIRST sets for each non-terminal in the grammar.
332  I couldn't find a super-efficient way to do this, so here is a
333  free-for-all event-driven implementation. */
334 static std::vector<FirstSet> compute_first_sets(Grammar const& grammar,
335  bool verbose) {
336  if (verbose) std::cerr << "computing FIRST sets...\n";
337  std::queue<Event> event_q;
338  int nsymbols = grammar.nsymbols;
339  std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
340  Graph lhs2prods = get_productions_by_lhs(grammar);
341  for (int symbol = 0; symbol < nsymbols; ++symbol) {
342  if (is_terminal(grammar, symbol)) {
343  event_q.push(Event(symbol, symbol));
344  } else {
345  for (int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
346  int prod_i = at(lhs2prods, symbol, i);
347  const Grammar::Production& prod = at(grammar.productions, prod_i);
348  if (prod.rhs.empty()) {
349  event_q.push(Event(FIRST_NULL, symbol));
350  break;
351  }
352  }
353  }
354  }
355  Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
356  Graph dependees2dependers = make_transpose(dependers2dependees);
357  while (!event_q.empty()) {
358  Event event = event_q.front(); event_q.pop();
359  int added_symb = event.added_symbol;
360  int dependee = event.dependee;
361  FirstSet& dependee_firsts = at(first_sets, dependee);
362  /* hopefully we don't get too many duplicate events piled up... */
363  if (dependee_firsts.count(added_symb)) continue;
364  dependee_firsts.insert(added_symb);
365  for (int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
366  int depender = at(dependees2dependers, dependee, i);
367  TEUCHOS_ASSERT(is_nonterminal(grammar, depender));
368  const FirstSet& depender_firsts = at(first_sets, depender);
369  for (int j = 0; j < count_edges(lhs2prods, depender); ++j) {
370  int prod_i = at(lhs2prods, depender, j);
371  const Grammar::Production& prod = at(grammar.productions, prod_i);
372  FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
373  for (FirstSet::iterator it = rhs_first_set.begin();
374  it != rhs_first_set.end(); ++it) {
375  int rhs_first_symb = *it;
376  if (!depender_firsts.count(rhs_first_symb)) {
377  event_q.push(Event(rhs_first_symb, depender));
378  }
379  }
380  }
381  }
382  }
383  if (verbose) {
384  for (int symb = 0; symb < nsymbols; ++symb) {
385  const std::string& symb_name = at(grammar.symbol_names, symb);
386  std::cerr << "FIRST(" << symb_name << ") = {";
387  const FirstSet& c = at(first_sets, symb);
388  for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
389  if (it != c.begin()) std::cerr << ", ";
390  int first_symb = *it;
391  if (first_symb == FIRST_NULL) {
392  std::cerr << "null";
393  } else {
394  const std::string& first_name = at(grammar.symbol_names, first_symb);
395  std::cerr << first_name;
396  }
397  }
398  std::cerr << "}\n";
399  }
400  std::cerr << '\n';
401  }
402  return first_sets;
403 }
404 
405 StateConfigs form_state_configs(StatesInProgress const& states) {
406  StateConfigs out;
407  for (int i = 0; i < Teuchos::size(states); ++i) {
408  StateInProgress& state = *at(states, i);
409  for (int j = 0; j < Teuchos::size(state.configs); ++j) {
410  out.push_back(StateConfig(i, j));
411  }
412  }
413  return out;
414 }
415 
416 Graph form_states_to_state_configs(StateConfigs const& scs,
417  StatesInProgress const& states) {
418  Graph out = make_graph_with_nnodes(Teuchos::size(states));
419  for (int i = 0; i < Teuchos::size(scs); ++i) {
420  const StateConfig& sc = at(scs, i);
421  at(out, sc.state).push_back(i);
422  }
423  return out;
424 }
425 
426 static std::string escape_dot(std::string const& s) {
427  std::string out;
428  for (std::size_t i = 0; i < s.size(); ++i) {
429  char c = s[i];
430  if (c == '\\' || c == '|' || c == '\"' || c == '<' || c == '>' || c == '{' || c == '}') {
431  out.push_back('\\');
432  out.push_back(c);
433  } else if (c == '.') {
434  out = "'";
435  out += s;
436  out += "'";
437  return out;
438  } else {
439  out.push_back(c);
440  }
441  }
442  return out;
443 }
444 
445 void print_graphviz(
446  std::string const& filepath,
447  ParserInProgress const& pip,
448  bool /* verbose */,
449  std::ostream& os
450  ) {
451  const StatesInProgress& sips = pip.states;
452  const Configs& cs = pip.configs;
453  GrammarPtr grammar = pip.grammar;
454  const Graph& states2scs = pip.states2state_configs;
455  os << "writing GraphViz file \"" << filepath << "\"\n";
456  os << "process with:\n";
457  os << " dot -Tpdf -o \"" << filepath << ".pdf\" \"" << filepath << "\"\n";
458  std::ofstream file(filepath.c_str());
459  TEUCHOS_ASSERT(file.is_open());
460  file << "digraph {\n";
461  file << "graph [\n";
462  file << "rankdir = \"LR\"\n";
463  file << "]\n";
464  for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
465  const StateInProgress& state = *at(sips, s_i);
466  file << s_i << " [\n";
467  file << "label = \"";
468  file << "State " << s_i << "\\l";
469  for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
470  int c_i = at(state.configs, cis_i);
471  const Config& config = at(cs, c_i);
472  const Grammar::Production& prod = at(grammar->productions, config.production);
473  int sc_i = at(states2scs, s_i, cis_i);
474  file << sc_i << ": ";
475  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
476  file << escape_dot(lhs_name) << " ::= ";
477  for (int rhs_i = 0; rhs_i <= Teuchos::size(prod.rhs); ++rhs_i) {
478  if (rhs_i == config.dot) file << " .";
479  if (rhs_i < Teuchos::size(prod.rhs)) {
480  int rhs_symb = at(prod.rhs, rhs_i);
481  const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
482  file << " " << escape_dot(rhs_symb_name);
483  }
484  }
485  if (config.dot == Teuchos::size(prod.rhs)) {
486  file << ", \\{";
487  bool found = false;
488  for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
489  const ActionInProgress& action = at(state.actions, a_i);
490  if (action.action.kind == ACTION_REDUCE &&
491  action.action.production == config.production) {
492  found = true;
493  const Context& ac = action.context;
494  for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
495  if (it != ac.begin()) file << ", ";
496  int symb = *it;
497  const std::string& symb_name = at(grammar->symbol_names, symb);
498  file << escape_dot(symb_name);
499  }
500  }
501  }
502  TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error,
503  "BUG: missing reduce action in state " << s_i << " !!!\n");
504  file << "\\}";
505  }
506  file << "\\l";
507  }
508  file << "\"\n";
509  file << "shape = \"record\"\n";
510  file << "]\n";
511  for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
512  const ActionInProgress& action = at(state.actions, a_i);
513  if (action.action.kind == ACTION_SHIFT) {
514  int symb = *(action.context.begin());
515  const std::string& symb_name = at(grammar->symbol_names, symb);
516  int next = action.action.next_state;
517  file << s_i << " -> " << next << " [\n";
518  file << "label = \"" << escape_dot(symb_name) << "\"\n";
519  file << "]\n";
520  }
521  }
522  }
523  file << "}\n";
524 }
525 
526 static Graph make_immediate_predecessor_graph(
527  StateConfigs const& scs,
528  StatesInProgress const& states,
529  Graph const& states2scs,
530  Configs const& cs,
531  GrammarPtr grammar) {
532  Graph out = make_graph_with_nnodes(Teuchos::size(scs));
533  for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
534  const StateInProgress& state = *at(states, s_i);
535  for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
536  int config_i = at(state.configs, cis_i);
537  const Config& config = at(cs, config_i);
538  const Grammar::Production& prod = at(grammar->productions, config.production);
539  int dot = config.dot;
540  if (dot == Teuchos::size(prod.rhs)) continue;
541  int s = at(prod.rhs, dot);
542  if (is_terminal(*grammar, s)) continue;
543  for (int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
544  int config_j = at(state.configs, cis_j);
545  const Config& config2 = at(cs, config_j);
546  const Grammar::Production& prod2 = at(grammar->productions, config2.production);
547  if (prod2.lhs == s) {
548  int sc_i = at(states2scs, s_i, cis_i);
549  int sc_j = at(states2scs, s_i, cis_j);
550  add_edge(out, sc_j, sc_i);
551  }
552  }
553  }
554  }
555  return out;
556 }
557 
558 static Graph find_transition_predecessors(
559  StateConfigs const& scs,
560  StatesInProgress const& states,
561  Graph const& states2scs,
562  Configs const& cs,
563  GrammarPtr grammar) {
564  Graph out = make_graph_with_nnodes(Teuchos::size(scs));
565  for (int state_i = 0; state_i < Teuchos::size(states); ++state_i) {
566  const StateInProgress& state = *at(states, state_i);
567  for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
568  const ActionInProgress& action = at(state.actions, a_i);
569  if (action.action.kind != ACTION_SHIFT) continue;
570  TEUCHOS_ASSERT(action.context.size() == 1);
571  int symbol = *(action.context.begin());
572  int state_j = action.action.next_state;
573  const StateInProgress& state2 = *at(states, state_j);
574  for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
575  int config_i = at(state.configs, cis_i);
576  const Config& config = at(cs, config_i);
577  for (int cis_j = 0; cis_j < Teuchos::size(state2.configs); ++cis_j) {
578  int config_j = at(state2.configs, cis_j);
579  const Config& config2 = at(cs, config_j);
580  if (config.production == config2.production &&
581  config.dot + 1 == config2.dot) {
582  const Grammar::Production& prod = at(grammar->productions, config.production);
583  int rhs_symbol = at(prod.rhs, config.dot);
584  if (rhs_symbol == symbol) {
585  int sc_i = at(states2scs, state_i, cis_i);
586  int sc_j = at(states2scs, state_j, cis_j);
587  add_edge(out, sc_j, sc_i);
588  }
589  }
590  }
591  }
592  }
593  }
594  return out;
595 }
596 
597 static Graph make_originator_graph(
598  StateConfigs const& scs,
599  StatesInProgress const& states,
600  Graph const& states2scs,
601  Configs const& cs,
602  GrammarPtr grammar) {
603  Graph out = make_graph_with_nnodes(Teuchos::size(scs));
604  Graph ipg = make_immediate_predecessor_graph(
605  scs, states, states2scs, cs, grammar);
606  Graph tpg = find_transition_predecessors(
607  scs, states, states2scs, cs, grammar);
608  for (int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
609  std::set<int> originators;
610  /* breadth-first search through the Transition
611  Precessor Graph (tpg), followed by a single hop
612  along the Immediate Predecessor Graph (ipg) */
613  std::queue<int> tpq;
614  std::set<int> tps;
615  tpq.push(sc_i);
616  tps.insert(sc_i);
617  while (!tpq.empty()) {
618  int tpp = tpq.front(); tpq.pop();
619  for (int i = 0; i < count_edges(tpg, tpp); ++i) {
620  int tpc = at(tpg, tpp, i);
621  if (tps.count(tpc)) continue;
622  tpq.push(tpc);
623  tps.insert(tpc);
624  }
625  for (int i = 0; i < count_edges(ipg, tpp); ++i) {
626  int ip_i = at(ipg, tpp, i);
627  originators.insert(ip_i);
628  }
629  }
630  at(out, sc_i).assign(originators.begin(), originators.end());
631  }
632  return out;
633 }
634 
635 static std::vector<int> get_follow_string(
636  int sc_addr,
637  StateConfigs const& scs,
638  StatesInProgress const& states,
639  Configs const& cs,
640  GrammarPtr grammar) {
641  const StateConfig& sc = at(scs, sc_addr);
642  const StateInProgress& state = *at(states, sc.state);
643  int config_i = at(state.configs, sc.config_in_state);
644  const Config& config = at(cs, config_i);
645  const Grammar::Production& prod = at(grammar->productions, config.production);
646  int out_size = Teuchos::size(prod.rhs) - (config.dot + 1);
647  std::vector<int> out;
648  /* out_size can be negative */
649  if (out_size < 1) return out;
650  reserve(out, out_size);
651  for (int i = config.dot + 1; i < Teuchos::size(prod.rhs); ++i) {
652  out.push_back(at(prod.rhs, i));
653  }
654  return out;
655 }
656 
657 static void print_string(std::vector<int> const& str, GrammarPtr grammar) {
658  std::cerr << "\"";
659  for (int i = 0; i < Teuchos::size(str); ++i) {
660  int symb = at(str, i);
661  const std::string& symb_name = at(grammar->symbol_names, symb);
662  std::cerr << symb_name;
663  }
664  std::cerr << "\"";
665 }
666 
667 static bool has_non_null_terminal_descendant(FirstSet const& first_set) {
668  if (first_set.empty()) return false;
669  if (first_set.size() > 1) return true;
670  return *(first_set.begin()) != FIRST_NULL;
671 }
672 
673 static Context get_contexts(FirstSet first_set) {
674  FirstSet::iterator it = first_set.find(FIRST_NULL);
675  if (it != first_set.end()) first_set.erase(it);
676  return first_set;
677 }
678 
679 enum { MARKER = -433 };
680 enum { ZERO = -100 }; // actual zero is a valid index for us
681 
682 static void print_stack(std::vector<int> const& stack) {
683  for (int i = 0; i < Teuchos::size(stack); ++i) {
684  int symb = at(stack, i);
685  if (symb == MARKER) std::cerr << " M";
686  else if (symb == ZERO) std::cerr << " Z";
687  else std::cerr << " " << symb;
688  }
689  std::cerr << '\n';
690 }
691 
692 static void move_markers(
693  std::vector<int>& lane,
694  std::vector<int>& in_lane,
695  int zeta_prime_addr,
696  int zeta_pointer,
697  bool tests_failed
698  ) {
699  int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
700  TEUCHOS_ASSERT(loc_of_zeta_prime != -1);
701  int r = 0;
702  for (int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
703  if (at(lane, i) == MARKER) {
704  ++r;
705  at(lane, i) = ZERO;
706  }
707  }
708  int top_addr = -1;
709  if (tests_failed) {
710  top_addr = lane.back();
711  lane.resize(lane.size() - 1); // pop
712  }
713  for (int i = 0; i < r; ++i) lane.push_back(MARKER);
714  if (tests_failed) {
715  lane.push_back(top_addr);
716  at(in_lane, top_addr) = Teuchos::size(lane) - 1;
717  }
718 }
719 
720 typedef std::vector<Context> Contexts;
721 
722 static void context_adding_routine(
723  std::vector<int> const& lane,
724  int zeta_pointer,
725  Context& contexts_generated,
726  Contexts& contexts,
727  bool verbose,
728  GrammarPtr grammar
729  ) {
730  if (verbose) {
731  std::cerr << " CONTEXT ADDING ROUTINE\n";
732  std::cerr << " LANE:";
733  print_stack(lane);
734  std::cerr << " $\\zeta$-POINTER = " << zeta_pointer << '\n';
735  }
736  for (int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
737  int v_r = at(lane, r);
738  if (verbose) std::cerr << " r = " << r << ", $v_r$ = ";
739  if (v_r < 0) {
740  if (verbose) {
741  if (v_r == MARKER) std::cerr << "marker\n";
742  else if (v_r == ZERO) std::cerr << "zero\n";
743  }
744  continue;
745  }
746  int tau_r_addr = v_r;
747  if (verbose) {
748  std::cerr << "$\\tau_r$ = " << tau_r_addr << '\n';
749  std::cerr << " CONTEXTS_GENERATED = ";
750  print_set(contexts_generated, *grammar);
751  std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
752  print_set(at(contexts, tau_r_addr), *grammar);
753  std::cerr << "\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
754  }
755  subtract_from(contexts_generated, at(contexts, tau_r_addr));
756  if (verbose) {
757  std::cerr << "\n CONTEXTS_GENERATED = ";
758  print_set(contexts_generated, *grammar);
759  std::cerr << "\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
760  }
761  unite_with(at(contexts, tau_r_addr), contexts_generated);
762  if (verbose) {
763  std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
764  print_set(at(contexts, tau_r_addr), *grammar);
765  std::cerr << "\n";
766  }
767  }
768 }
769 
770 static void deal_with_tests_failed(
771  int& num_originators_failed,
772  int& first_originator_failed,
773  int zeta_prime_addr,
774  bool& tests_failed,
775  std::vector<int>& lane,
776  std::vector<int>& in_lane,
777  int zeta_addr,
778  std::vector<int>& stack,
779  bool verbose
780  ) {
781  if (verbose) std::cerr << " Dealing with test failures\n";
782  if (num_originators_failed == 0) {
783  if (verbose) std::cerr << " " << zeta_prime_addr << " is the first originator of " << zeta_addr << " to fail the tests\n";
784  first_originator_failed = zeta_prime_addr;
785  if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto LANE:\n ";
786  lane.push_back(zeta_prime_addr);
787  if (verbose) print_stack(lane);
788  at(in_lane, zeta_prime_addr) = Teuchos::size(lane) - 1;
789  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") <- ON\n";
790  tests_failed = true;
791  if (verbose) std::cerr << " TESTS_FAILED <- ON\n";
792  } else if (num_originators_failed == 1) {
793  if (verbose) std::cerr << " " << zeta_prime_addr << " is the second originator of " << zeta_addr << " to fail the tests\n";
794  int zeta_double_prime_addr = first_originator_failed;
795  if (verbose) std::cerr << " the first was " << zeta_double_prime_addr << '\n';
796  TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 1) == zeta_double_prime_addr);
797  TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 2) == zeta_addr);
798  if (verbose) std::cerr << " pop LANE, push {marker, " << zeta_double_prime_addr << "} onto it:\n ";
799  lane.resize(lane.size() - 1);
800  lane.push_back(MARKER);
801  lane.push_back(zeta_double_prime_addr);
802  if (verbose) print_stack(lane);
803  if (verbose) std::cerr << " push {marker, " << zeta_prime_addr << "} onto STACK:\n ";
804  stack.push_back(MARKER);
805  stack.push_back(zeta_prime_addr);
806  if (verbose) print_stack(stack);
807  } else {
808  if (verbose) std::cerr << " " << zeta_prime_addr << " is the third or later originator of " << zeta_addr << " to fail the tests\n";
809  if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto STACK:\n ";
810  stack.push_back(zeta_prime_addr);
811  if (verbose) print_stack(stack);
812  }
813  ++num_originators_failed;
814 }
815 
816 static void heuristic_propagation_of_context_sets(
817  int tau_addr,
818  Contexts& contexts,
819  std::vector<bool>& complete,
820  StateConfigs const& scs,
821  StatesInProgress const& states,
822  Graph const& states2scs,
823  Configs const& cs,
824  GrammarPtr grammar
825  ) {
826  const StateConfig& tau = at(scs, tau_addr);
827  const StateInProgress& state = *at(states, tau.state);
828  int config_i = at(state.configs, tau.config_in_state);
829  const Config& config = at(cs, config_i);
830  if (config.dot != 0) return;
831  const Grammar::Production& prod = at(grammar->productions, config.production);
832  for (int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
833  int config_j = at(state.configs, cis_j);
834  if (config_j == config_i) continue;
835  const Config& config2 = at(cs, config_j);
836  if (config2.dot != 0) continue;
837  const Grammar::Production& prod2 = at(grammar->productions, config2.production);
838  if (prod.lhs != prod2.lhs) continue;
839  int tau_prime_addr = at(states2scs, tau.state, cis_j);
840  at(contexts, tau_prime_addr) = at(contexts, tau_addr);
841  at(complete, tau_prime_addr) = true;
842  }
843 }
844 
845 /* Here it is! The magical algorithm described by a flowchart in
846  Figure 7 of David Pager's paper. */
847 static void compute_context_set(
848  int zeta_j_addr,
849  Contexts& contexts,
850  std::vector<bool>& complete,
851  StateConfigs const& scs,
852  Graph const& originator_graph,
853  StatesInProgress const& states,
854  Graph const& states2scs,
855  Configs const& cs,
856  std::vector<FirstSet> const& first_sets,
857  GrammarPtr grammar,
858  bool verbose,
859  ParserInProgress const& pip
860  ) {
861  if (verbose) std::cerr << "Computing context set for $\\zeta_j$ = " << zeta_j_addr << "...\n";
862  if (verbose) std::cerr << "BEGIN PROGRAM\n";
863  if (at(complete, zeta_j_addr)) {
864  if (verbose) std::cerr << zeta_j_addr << " was already complete!\nEND PROGRAM\n\n";
865  return;
866  }
867  std::vector<int> stack;
868  // need random access, inner insert, which std::stack doesn't provide
869  std::vector<int> lane;
870  std::vector<int> in_lane = make_vector<int>(Teuchos::size(scs), -1);
871  lane.push_back(zeta_j_addr);
872  at(in_lane, zeta_j_addr) = Teuchos::size(lane) - 1;
873  bool tests_failed = false;
874  Context contexts_generated;
875  if (verbose) {
876  std::cerr << "Initial LANE:";
877  print_stack(lane);
878  }
879  while (true) {
880  TEUCHOS_ASSERT(!lane.empty());
881  int zeta_addr = lane.back();
882  if (verbose) {
883  std::cerr << "Top of LANE is $\\zeta$ = " << zeta_addr << '\n';
884  }
885  int zeta_pointer = Teuchos::size(lane) - 1;
886  if (verbose) std::cerr << "$\\zeta$-POINTER <- " << zeta_pointer << '\n';
887  int num_originators_failed = 0;
888  int first_originator_failed = -1;
889  if (verbose) std::cerr << "DO_LOOP:\n";
890  /* DO_LOOP */
891  for (int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
892  int zeta_prime_addr = at(originator_graph, zeta_addr, i);
893  if (verbose) {
894  std::cerr << "Next originator of $\\zeta$ = " << zeta_addr << " is $\\zeta'$ = " << zeta_prime_addr << '\n';
895  }
896  std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
897  if (verbose) {
898  std::cerr << " FOLLOW string of $\\zeta'$ = " << zeta_prime_addr << " is ";
899  print_string(gamma, grammar);
900  std::cerr << '\n';
901  }
902  FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
903  if (verbose) {
904  std::cerr << " FIRST set of ";
905  print_string(gamma, grammar);
906  std::cerr << " is ";
907  print_set(gamma_first, *grammar);
908  std::cerr << "\n";
909  }
910  if (has_non_null_terminal_descendant(gamma_first)) { // test A
911  if (verbose) {
912  std::cerr << " ";
913  print_string(gamma, grammar);
914  std::cerr << " has a non-null terminal descendant\n";
915  }
916  contexts_generated = get_contexts(gamma_first);
917  if (verbose) {
918  std::cerr << " CONTEXTS_GENERATED = ";
919  print_set(contexts_generated, *grammar);
920  std::cerr << " = 1-heads of non-null descendants of ";
921  print_string(gamma, grammar);
922  std::cerr << '\n';
923  }
924  if (gamma_first.count(FIRST_NULL)) {
925  if (verbose) {
926  std::cerr << " ";
927  print_string(gamma, grammar);
928  std::cerr << " has a null terminal descendant\n";
929  }
930  if (at(complete, zeta_prime_addr)) {
931  unite_with(contexts_generated, at(contexts, zeta_prime_addr));
932  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
933  verbose, grammar);
934  } else if (-1 == at(in_lane, zeta_prime_addr)) {
935  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
936  verbose, grammar);
937  /* TRACE_FURTHER */
938  deal_with_tests_failed(num_originators_failed, first_originator_failed,
939  zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
940  verbose);
941  } else {
942  print_graphviz("ambiguous.dot", pip, true, std::cerr);
943  std::stringstream ss;
944  ss << "error: grammar is ambiguous.\n";
945  ss << "zeta_j is " << zeta_j_addr << ", zeta is " << zeta_addr << ", and zeta prime is " << zeta_prime_addr << '\n';
946  throw ParserBuildFail(ss.str());
947  }
948  } else {
949  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
950  verbose, grammar);
951  }
952  } else {
953  if (verbose) {
954  std::cerr << " ";
955  print_string(gamma, grammar);
956  std::cerr << " does not have a non-null terminal descendant\n";
957  }
958  if (at(complete, zeta_prime_addr)) { // test B
959  if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is ON\n";
960  contexts_generated = at(contexts, zeta_prime_addr);
961  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
962  verbose, grammar);
963  } else {
964  if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is OFF\n";
965  if (-1 != at(in_lane, zeta_prime_addr)) { // test C
966  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is ON\n";
967  move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
968  contexts_generated = at(contexts, zeta_prime_addr);
969  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
970  verbose, grammar);
971  } else {
972  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is OFF\n";
973  deal_with_tests_failed(num_originators_failed, first_originator_failed,
974  zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
975  verbose);
976  }
977  }
978  }
979  } /* END DO_LOOP */
980  if (verbose) std::cerr << "END DO_LOOP\n";
981  if (tests_failed) {
982  if (verbose) {
983  std::cerr << " TESTS_FAILED was on, turning it off and going to next configuration\n";
984  }
985  tests_failed = false;
986  continue;
987  }
988  bool keep_lane_popping = true;
989  if (verbose) std::cerr << " Start LANE popping\n";
990  while (keep_lane_popping) { // LANE popping loop
991  TEUCHOS_ASSERT(!lane.empty());
992  if (verbose) {
993  std::cerr << " LANE:";
994  print_stack(lane);
995  }
996  if (at(lane, Teuchos::size(lane) - 1) == MARKER) {
997  if (verbose) std::cerr << " Top of LANE is a marker\n";
998  if (verbose) std::cerr << " Start STACK popping\n";
999  while (true) { // STACK popping loop
1000  TEUCHOS_ASSERT(!stack.empty());
1001  if (verbose) {
1002  std::cerr << " STACK:";
1003  print_stack(stack);
1004  std::cerr << " LANE:";
1005  print_stack(lane);
1006  }
1007  if (stack.back() == MARKER) {
1008  if (verbose) std::cerr << " Top of STACK is a marker, pop STACK and LANE\n";
1009  resize(stack, Teuchos::size(stack) - 1);
1010  resize(lane, Teuchos::size(lane) - 1);
1011  break; // out of STACK popping, back into LANE popping
1012  } else if (at(complete, stack.back())) {
1013  if (verbose) std::cerr << " Top of STACK is has COMPLETE flag, pop STACK\n";
1014  resize(stack, Teuchos::size(stack) - 1);
1015  // back into STACK popping
1016  } else {
1017  int addr = stack.back();
1018  if (verbose) std::cerr << " Top of STACK is " << addr << ", pop STACK\n";
1019  resize(stack, Teuchos::size(stack) - 1);
1020  if (verbose) std::cerr << " Push " << addr << " onto LANE\n";
1021  lane.push_back(addr);
1022  if (verbose) std::cerr << " IN_LANE(" << addr << ") <- ON\n";
1023  at(in_lane, addr) = Teuchos::size(lane) - 1;
1024  keep_lane_popping = false;
1025  break; // out of STACK and LANE popping, into top-level loop
1026  } // end STACK top checks
1027  } // end STACK popping loop
1028  } else if (at(lane, Teuchos::size(lane) - 1) == ZERO) {
1029  if (verbose) std::cerr << " Top of LANE is a zero\n";
1030  if (verbose) std::cerr << " Pop LANE\n";
1031  resize(lane, Teuchos::size(lane) - 1); // pop LANE
1032  // back to top of LANE popping loop
1033  } else { // top of LANE neither marker nor zero
1034  int tau_addr = lane.back();
1035  if (verbose) std::cerr << " Top of LANE is $\\tau$ = " << tau_addr << "\n";
1036  at(in_lane, tau_addr) = -1;
1037  if (verbose) std::cerr << " IN_LANE(" << tau_addr << ") <- OFF\n";
1038  at(complete, tau_addr) = true;
1039  if (verbose) std::cerr << " COMPLETE(" << tau_addr << ") <- ON\n";
1040  if (verbose) std::cerr << " HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1041  heuristic_propagation_of_context_sets(
1042  tau_addr, contexts, complete,
1043  scs, states, states2scs, cs, grammar);
1044  if (Teuchos::size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1045  if (verbose) std::cerr << "END PROGRAM\n\n";
1046  return;
1047  }
1048  if (verbose) std::cerr << " Pop LANE\n";
1049  resize(lane, Teuchos::size(lane) - 1); // pop LANE
1050  // back to top of LANE popping loop
1051  } // end top of LANE checks
1052  } // end LANE popping loop
1053  } // end top-level while(1) loop
1054 }
1055 
1056 static std::vector<bool> determine_adequate_states(
1057  StatesInProgress const& states,
1058  GrammarPtr grammar,
1059  bool verbose,
1060  std::ostream& os) {
1061  std::vector<bool> out = make_vector<bool>(Teuchos::size(states));
1062  for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1063  const StateInProgress& state = *at(states, s_i);
1064  bool state_is_adequate = true;
1065  for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1066  const ActionInProgress& action = at(state.actions, a_i);
1067  if (action.action.kind == ACTION_SHIFT &&
1068  is_nonterminal(*grammar, *(action.context.begin()))) {
1069  continue;
1070  }
1071  for (int a_j = a_i + 1; a_j < Teuchos::size(state.actions); ++a_j) {
1072  const ActionInProgress& action2 = at(state.actions, a_j);
1073  if (action2.action.kind == ACTION_SHIFT &&
1074  is_nonterminal(*grammar, *(action2.context.begin()))) {
1075  continue;
1076  }
1077  if (intersects(action2.context, action.context)) {
1078  if (verbose) {
1079  const ActionInProgress* ap1 = &action;
1080  const ActionInProgress* ap2 = &action2;
1081  if (ap1->action.kind == ACTION_SHIFT) {
1082  std::swap(ap1, ap2);
1083  }
1084  TEUCHOS_ASSERT(ap1->action.kind == ACTION_REDUCE);
1085  os << "shift-reduce conflict in state " << s_i << ":\n";
1086  os << "reduce ";
1087  const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1088  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1089  os << lhs_name << " ::=";
1090  for (int rhs_i = 0; rhs_i < Teuchos::size(prod.rhs); ++rhs_i) {
1091  int rhs_symb = at(prod.rhs, rhs_i);
1092  const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1093  os << " " << rhs_symb_name;
1094  }
1095  int shift_symb = *(ap2->context.begin());
1096  const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1097  os << "\nshift " << shift_name << '\n';
1098  }
1099  state_is_adequate = false;
1100  break;
1101  }
1102  }
1103  if (!state_is_adequate) break;
1104  }
1105  at(out, s_i) = state_is_adequate;
1106  }
1107  if (verbose) os << '\n';
1108  return out;
1109 }
1110 
1111 ParserInProgress draft_lalr1_parser(GrammarPtr grammar, bool verbose) {
1112  ParserInProgress out;
1113  Configs& cs = out.configs;
1114  StatesInProgress& states = out.states;
1115  StateConfigs& scs = out.state_configs;
1116  Graph& states2scs = out.states2state_configs;
1117  out.grammar = grammar;
1118  cs = make_configs(*grammar);
1119  Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1120  if (verbose) std::cerr << "Building LR(0) parser\n";
1121  states = make_lr0_parser(cs, *grammar, lhs2cs);
1122  scs = form_state_configs(states);
1123  states2scs = form_states_to_state_configs(scs, states);
1124  if (verbose) print_graphviz("lr0.dot", out, true, std::cerr);
1125  if (verbose) std::cerr << "Checking adequacy of LR(0) machine\n";
1126  std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1127  std::cerr);
1128  if (*(std::min_element(adequate.begin(), adequate.end()))) {
1129  if (verbose) std::cerr << "The grammar is LR(0)!\n";
1130  return out;
1131  }
1132  std::vector<bool> complete = make_vector<bool>(Teuchos::size(scs), false);
1133  std::vector<Context> contexts = make_vector<Context>(Teuchos::size(scs));
1134  int accept_prod_i = get_accept_production(*grammar);
1135  /* initialize the accepting state-configs as described in
1136  footnote 8 at the bottom of page 37 */
1137  for (int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
1138  StateConfig& sc = at(scs, sc_i);
1139  StateInProgress& state = *at(states, sc.state);
1140  int config_i = at(state.configs, sc.config_in_state);
1141  Config& config = at(cs, config_i);
1142  if (config.production == accept_prod_i) {
1143  at(complete, sc_i) = true;
1144  at(contexts, sc_i).insert(get_end_terminal(*grammar));
1145  }
1146  }
1147  Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1148  if (verbose) std::cerr << "Originator Graph:\n";
1149  if (verbose) std::cerr << og << '\n';
1150  std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1151  /* compute context sets for all state-configs associated with reduction
1152  actions that are part of an inadequate state */
1153  for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1154  if (at(adequate, s_i)) continue;
1155  StateInProgress& state = *at(states, s_i);
1156  for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1157  int config_i = at(state.configs, cis_i);
1158  const Config& config = at(cs, config_i);
1159  const Grammar::Production& prod = at(grammar->productions, config.production);
1160  if (config.dot != Teuchos::size(prod.rhs)) continue;
1161  int zeta_j_addr = at(states2scs, s_i, cis_i);
1162  compute_context_set(zeta_j_addr, contexts, complete, scs,
1163  og, states, states2scs, cs, first_sets, grammar, verbose, out);
1164  }
1165  }
1166  /* update the context sets for all reduction state-configs
1167  which are marked complete, even if they aren't in inadequate states */
1168  for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1169  StateInProgress& state = *at(states, s_i);
1170  for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1171  int sc_i = at(states2scs, s_i, cis_i);
1172  if (!at(complete, sc_i)) continue;
1173  int config_i = at(state.configs, cis_i);
1174  Config& config = at(cs, config_i);
1175  const Grammar::Production& prod = at(grammar->productions, config.production);
1176  if (config.dot != Teuchos::size(prod.rhs)) continue;
1177  for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1178  ActionInProgress& action = at(state.actions, a_i);
1179  if (action.action.kind == ACTION_REDUCE &&
1180  action.action.production == config.production) {
1181  action.context = at(contexts, sc_i);
1182  }
1183  }
1184  }
1185  }
1186  if (verbose) std::cerr << "Checking adequacy of LALR(1) machine\n";
1187  adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1188  if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1189  std::stringstream ss;
1190  ss << "error: The grammar is not LALR(1).\n";
1191  determine_adequate_states(states, grammar, true, ss);
1192  print_graphviz("error.dot", out, true, ss);
1193  std::string s = ss.str();
1194  throw ParserBuildFail(s);
1195  }
1196  if (verbose) std::cerr << "The grammar is LALR(1)!\n";
1197  if (verbose) print_graphviz("lalr1.dot", out, true, std::cerr);
1198  return out;
1199 }
1200 
1201 Parser accept_parser(ParserInProgress const& pip) {
1202  const StatesInProgress& sips = pip.states;
1203  GrammarPtr grammar = pip.grammar;
1204  Parser out(grammar, Teuchos::size(sips));
1205  for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1206  add_state(out);
1207  }
1208  for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1209  const StateInProgress& sip = *at(sips, s_i);
1210  for (int a_i = 0; a_i < Teuchos::size(sip.actions); ++a_i) {
1211  const ActionInProgress& action = at(sip.actions, a_i);
1212  if (action.action.kind == ACTION_SHIFT &&
1213  is_nonterminal(*grammar, *(action.context.begin()))) {
1214  int nt = as_nonterminal(*grammar, *(action.context.begin()));
1215  add_nonterminal_action(out, s_i, nt, action.action.next_state);
1216  } else {
1217  for (Context::const_iterator it = action.context.begin();
1218  it != action.context.end(); ++it) {
1219  int terminal = *it;
1220  TEUCHOS_ASSERT(is_terminal(*grammar, terminal));
1221  add_terminal_action(out, s_i, terminal, action.action);
1222  }
1223  }
1224  }
1225  }
1226  return out;
1227 }
1228 
1229 ParserBuildFail::ParserBuildFail(const std::string& msg):
1230  std::invalid_argument(msg) {
1231 }
1232 
1233 Parser make_lalr1_parser(GrammarPtr grammar, bool verbose) {
1234  ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1235  return accept_parser(pip);
1236 }
1237 
1238 }
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Ptr< T > ptr(T *p)
Create a pointer to an object from a raw pointer.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void swap(Teuchos::any &a, Teuchos::any &b)
Special swap for other code to find via Argument Dependent Lookup.