Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_Reader.cpp
1 #include "Teuchos_Reader.hpp"
2 
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <ios>
7 #include <cstdlib>
8 #include <set>
9 
10 #include "Teuchos_string.hpp"
11 #include "Teuchos_vector.hpp"
12 #include "Teuchos_Parser.hpp"
13 
14 namespace Teuchos {
15 
16 namespace {
17 
18 void print_indicator(std::ostream& os, std::string const& above, std::size_t pos) {
19  for (std::size_t i = 0; i < pos; ++i) {
20  if (above.at(i) == '\t') os << '\t';
21  else os << ' ';
22  }
23  os << "^\n";
24 }
25 
26 void print_underline(std::ostream& os, std::string const& above, std::size_t start, std::size_t end) {
27  for (std::size_t i = 0; i < start; ++i) {
28  if (above.at(i) == '\t') os << '\t';
29  else os << ' ';
30  }
31  for (std::size_t i = start; i < end; ++i) os << '~';
32  os << '\n';
33 }
34 
35 } // end anonymous namespace
36 
37 Reader::IndentStackEntry::IndentStackEntry(std::size_t l, std::size_t s, std::size_t e):
38  line(l),start_length(s),end_length(e) {
39 }
40 
41 void Reader::at_token(std::istream& stream) {
42  bool done = false;
43  /* this can loop arbitrarily as reductions are made,
44  because they don't consume the token */
45  while (!done) {
46  const Action& parser_action = get_action(parser, parser_state, lexer_token);
47  if (parser_action.kind == ACTION_NONE) {
48  std::stringstream ss;
49  ss << "error: Parser failure at line " << line;
50  ss << " column " << column << " of " << stream_name << '\n';
51  error_print_line(stream, ss);
52  std::set<std::string> expect_names;
53  for (int expect_token = 0;
54  expect_token < grammar->nterminals; ++expect_token) {
55  const Action& expect_action = get_action(parser, parser_state, expect_token);
56  if (expect_action.kind != ACTION_NONE) {
57  expect_names.insert(at(grammar->symbol_names, expect_token));
58  }
59  }
60  ss << "Expected one of {";
61  for (std::set<std::string>::iterator it = expect_names.begin();
62  it != expect_names.end(); ++it) {
63  if (it != expect_names.begin()) ss << ", ";
64  if (*it == ",") ss << "','";
65  else ss << *it;
66  }
67  ss << "}\n";
68  ss << "Got: " << at(grammar->symbol_names, lexer_token) << '\n';
69  ss << "Lexer text: \"" << lexer_text << "\"\n";
70  ss << "Parser was in state " << parser_state << '\n';
71  throw ParserFail(ss.str());
72  } else if (parser_action.kind == ACTION_SHIFT) {
73  if (sensing_indent) {
74  symbol_indentation_stack.push_back(indent_text.size());
75  }
76  Teuchos::any shift_result;
77  this->at_shift(shift_result, lexer_token, lexer_text);
78  add_back(value_stack, shift_result);
79  done = true;
80  } else if (parser_action.kind == ACTION_REDUCE) {
81  if (parser_action.production == get_accept_production(*grammar)) {
82  did_accept = true;
83  return;
84  }
85  const Grammar::Production& prod = at(grammar->productions, parser_action.production);
86  reduction_rhs.clear();
87  for (int i = 0; i < size(prod.rhs); ++i) {
88  add_back(reduction_rhs, at(value_stack, size(value_stack) - size(prod.rhs) + i));
89  }
90  resize(value_stack, size(value_stack) - size(prod.rhs));
91  Teuchos::any reduce_result;
92  try {
93  this->at_reduce(reduce_result, parser_action.production, reduction_rhs);
94  } catch (const ParserFail& e) {
95  std::stringstream ss;
96  ss << "error: Parser failure at line " << line;
97  ss << " column " << column << " of " << stream_name << '\n';
98  error_print_line(stream, ss);
99  ss << '\n' << e.what();
100  throw ParserFail(ss.str());
101  }
102  add_back(value_stack, reduce_result);
103  if (sensing_indent) {
104  if (size(prod.rhs)) {
105  resize(symbol_indentation_stack,
106  (size(symbol_indentation_stack) + 1)
107  - size(prod.rhs));
108  } else {
109  symbol_indentation_stack.push_back(symbol_indentation_stack.back());
110  }
111  }
112  } else {
113  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
114  "SERIOUS BUG: Action::kind enum value not in range\n");
115  }
116  parser_state = execute_action(parser, parser_stack, parser_action);
117  }
118 }
119 
120 void Reader::indent_mismatch() {
121  TEUCHOS_ASSERT(!indent_stack.empty());
122  const IndentStackEntry& top = indent_stack.back();
123  std::stringstream ss;
124  ss << "error: Indentation characters beginning line " << line << " of " << stream_name
125  << " don't match those beginning line " << top.line << '\n';
126  ss << "It is strongly recommended not to mix tabs and spaces in indentation-sensitive formats\n";
127  throw ParserFail(ss.str());
128 }
129 
130 void Reader::at_token_indent(std::istream& stream) {
131  if (!sensing_indent || lexer_token != tables->indent_info.newline_token) {
132  at_token(stream);
133  return;
134  }
135  std::size_t last_newline_pos = lexer_text.find_last_of("\n");
136  if (last_newline_pos == std::string::npos) {
137  throw ParserFail("INDENT token did not contain a newline '\\n' !\n");
138  }
139  std::string lexer_indent = lexer_text.substr(last_newline_pos + 1, std::string::npos);
140  // the at_token call is allowed to do anything to lexer_text
141  at_token(stream);
142  lexer_text.clear();
143  std::size_t minlen = std::min(lexer_indent.length(), indent_text.length());
144  if (lexer_indent.length() > indent_text.length()) {
145  if (0 != lexer_indent.compare(0, indent_text.length(), indent_text)) {
146  indent_mismatch();
147  }
148  indent_stack.push_back(IndentStackEntry(line, indent_text.length(), lexer_indent.length()));
149  indent_text = lexer_indent;
150  lexer_token = tables->indent_info.indent_token;
151  at_token(stream);
152  } else if (lexer_indent.length() < indent_text.length()) {
153  if (0 != indent_text.compare(0, lexer_indent.length(), lexer_indent)) {
154  indent_mismatch();
155  }
156  while (!indent_stack.empty()) {
157  const IndentStackEntry& top = indent_stack.back();
158  if (top.end_length <= minlen) break;
159  indent_stack.pop_back();
160  lexer_token = tables->indent_info.dedent_token;
161  at_token(stream);
162  }
163  indent_text = lexer_indent;
164  } else {
165  if (0 != lexer_indent.compare(indent_text)) {
166  indent_mismatch();
167  }
168  }
169 }
170 
171 void Reader::backtrack_to_last_accept(std::istream& stream) {
172  /* all the last_accept and backtracking is driven by
173  the "accept the longest match" rule */
174  line = last_lexer_accept_line;
175  column = last_lexer_accept_column;
176  line_text = last_lexer_accept_line_text;
177  while (lexer_text.size() > last_lexer_accept) {
178  bool ok = !stream.unget().fail();
179  TEUCHOS_ASSERT(ok);
180  resize(lexer_text, size(lexer_text) - 1);
181  }
182 }
183 
184 void Reader::reset_lexer_state() {
185  lexer_state = 0;
186  lexer_text.clear();
187  lexer_token = -1;
188 }
189 
190 void Reader::at_lexer_end(std::istream& stream) {
191  if (lexer_token == -1) {
192  std::stringstream ss;
193  if (lexer_text.find('\n') == std::string::npos) {
194  ss << "error: Could not tokenize this (line " << line;
195  ss << " column " << column << " of " << stream_name << "):\n";
196  ss << line_text << '\n';
197  TEUCHOS_ASSERT(line_text.size() >= lexer_text.size());
198  print_underline(ss, line_text, line_text.size() - lexer_text.size(), line_text.size());
199  } else {
200  ss << "error: Could not tokenize this (ends at line " << line;
201  ss << " column " << column << " of " << stream_name << "):\n";
202  ss << lexer_text << '\n';
203  }
204  throw ParserFail(ss.str());
205  }
206  backtrack_to_last_accept(stream);
207  at_token_indent(stream);
208  reset_lexer_state();
209 }
210 
211 Reader::Reader(ReaderTablesPtr tables_in):
212  tables(tables_in),
213  parser(tables->parser),
214  lexer(tables->lexer),
215  grammar(get_grammar(parser))
216 {
217  TEUCHOS_ASSERT(get_determinism(lexer));
218 }
219 
220 void Reader::update_position(char c) {
221  if (c == '\n') {
222  ++line;
223  column = 1;
224  line_text.clear();
225  } else {
226  ++column;
227  }
228 }
229 
230 void Reader::error_print_line(std::istream& is, std::ostream& os) {
231  std::size_t oldpos = line_text.size();
232  char c;
233  while (is.get(c)) {
234  if (c == '\n' || c == '\r') break;
235  line_text.push_back(c);
236  }
237  if (line_text.empty()) return;
238  os << line_text << '\n';
239  if (oldpos > 0) print_indicator(os, line_text, oldpos - 1);
240 }
241 
242 void Reader::read_stream(any& result, std::istream& stream, std::string const& stream_name_in) {
243  using std::swap;
244  line = 1;
245  column = 1;
246  lexer_state = 0;
247  lexer_text.clear();
248  line_text.clear();
249  lexer_token = -1;
250  parser_state = 0;
251  parser_stack.clear();
252  parser_stack.push_back(parser_state);
253  value_stack.clear();
254  did_accept = false;
255  stream_name = stream_name_in;
256  if (tables->indent_info.is_sensitive) {
257  sensing_indent = true;
258  indent_text.clear();
259  indent_stack.clear();
260  } else {
261  sensing_indent = false;
262  }
263  char c;
264  while (stream.get(c)) {
265  if (!is_symbol(c)) {
266  std::stringstream ss;
267  ss << "error: Unexpected character code " << int(c);
268  ss << " at line " << line << " column " << column;
269  ss << " of " << stream_name << '\n';
270  error_print_line(stream, ss);
271  throw ParserFail(ss.str());
272  }
273  line_text.push_back(c);
274  lexer_text.push_back(c);
275  int lexer_symbol = get_symbol(c);
276  lexer_state = step(lexer, lexer_state, lexer_symbol);
277  if (lexer_state == -1) {
278  at_lexer_end(stream);
279  } else {
280  int token = accepts(lexer, lexer_state);
281  update_position(c);
282  if (token != -1) {
283  lexer_token = token;
284  last_lexer_accept = lexer_text.size();
285  last_lexer_accept_line = line;
286  last_lexer_accept_column = column;
287  last_lexer_accept_line_text = line_text;
288  }
289  }
290  }
291  if (last_lexer_accept < lexer_text.size()) {
292  std::stringstream ss;
293  std::string bad_str = lexer_text.substr(last_lexer_accept, std::string::npos);
294  ss << "error: Could not tokenize \"" << bad_str;
295  ss << "\" at end of " << stream_name << '\n';
296  throw ParserFail(ss.str());
297  }
298  at_lexer_end(stream);
299  lexer_token = get_end_terminal(*grammar);
300  at_token(stream);
301  TEUCHOS_TEST_FOR_EXCEPTION(!did_accept, std::logic_error,
302  "The EOF terminal was accepted but the root nonterminal was not reduced\n"
303  "This indicates a bug in Teuchos::Reader\n");
304  TEUCHOS_ASSERT(value_stack.size() == 1);
305  swap(result, value_stack.back());
306 }
307 
308 void Reader::read_string(any& result, std::string const& string, std::string const& string_name) {
309  std::istringstream stream(string);
310  read_stream(result, stream, string_name);
311 }
312 
313 void Reader::read_file(any& result, std::string const& file_name) {
314  std::ifstream stream(file_name.c_str());
315  TEUCHOS_TEST_FOR_EXCEPTION(!stream.is_open(),
316  ParserFail,
317  "Could not open file " << file_name);
318  read_stream(result, stream, file_name);
319 }
320 
321 void Reader::at_shift(any&, int, std::string&) {
322 }
323 
324 void Reader::at_reduce(any&, int, std::vector<any>&) {
325 }
326 
327 DebugReader::DebugReader(ReaderTablesPtr tables_in, std::ostream& os_in):
328  Reader(tables_in),os(os_in)
329 {
330 }
331 
332 void DebugReader::at_shift(any& result, int token, std::string& text) {
333  std::string& text_escaped = make_any_ref<std::string>(result);
334  for (std::size_t i = 0; i < text.size(); ++i) {
335  char c = text[i];
336  switch (c) {
337  case '\n': text_escaped.append("\\n"); break;
338  case '\t': text_escaped.append("\\t"); break;
339  case '\r': text_escaped.append("\\r"); break;
340  default: text_escaped.push_back(c);
341  }
342  }
343  os << "SHIFT (" << at(grammar->symbol_names, token) << ")[" << text_escaped << "]\n";
344 }
345 
346 void DebugReader::at_reduce(any& result, int prod_i, std::vector<any>& rhs) {
347  os << "REDUCE";
348  std::string& lhs_text = make_any_ref<std::string>(result);
349  const Grammar::Production& prod = at(grammar->productions, prod_i);
350  for (int i = 0; i < size(prod.rhs); ++i) {
351  const std::string& rhs_name = at(grammar->symbol_names, at(prod.rhs, i));
352  const std::string& rhs_text = any_ref_cast<std::string>(at(rhs, i));
353  os << " (" << rhs_name << ")[" << rhs_text << "]";
354  lhs_text.append(rhs_text);
355  }
356  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
357  os << " -> (" << lhs_name << ")[" << lhs_text << "]\n";
358 }
359 
360 } // namespace Teuchos
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
void read_file(any &result, std::string const &file_name)
A convenience method for reading a file.
void read_string(any &result, std::string const &string, std::string const &string_name)
A convenience method for reading a string.
Modified boost::any class, which is a container for a templated value.
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
The main class for users to read text using TeuchosParser.
int size(const Comm< Ordinal > &comm)
Get the number of processes in the communicator.
virtual void at_reduce(any &result, int production, std::vector< any > &rhs)
User-overridable REDUCE (production) method.
virtual void at_shift(any &result, int token, std::string &text)
User-overridable SHIFT (token) method.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void read_stream(any &result, std::istream &stream, std::string const &stream_name_in)
The main method for reading a stream of text.
Declares Teuchos::Reader.