Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_Reader.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_Reader.hpp"
11 
12 #include <iostream>
13 #include <sstream>
14 #include <fstream>
15 #include <ios>
16 #include <cstdlib>
17 #include <set>
18 
19 #include "Teuchos_string.hpp"
20 #include "Teuchos_vector.hpp"
21 #include "Teuchos_Parser.hpp"
22 
23 namespace Teuchos {
24 
25 namespace {
26 
27 void print_indicator(std::ostream& os, std::string const& above, std::size_t pos) {
28  for (std::size_t i = 0; i < pos; ++i) {
29  if (above.at(i) == '\t') os << '\t';
30  else os << ' ';
31  }
32  os << "^\n";
33 }
34 
35 void print_underline(std::ostream& os, std::string const& above, std::size_t start, std::size_t end) {
36  for (std::size_t i = 0; i < start; ++i) {
37  if (above.at(i) == '\t') os << '\t';
38  else os << ' ';
39  }
40  for (std::size_t i = start; i < end; ++i) os << '~';
41  os << '\n';
42 }
43 
44 } // end anonymous namespace
45 
46 Reader::IndentStackEntry::IndentStackEntry(std::size_t l, std::size_t s, std::size_t e):
47  line(l),start_length(s),end_length(e) {
48 }
49 
50 void Reader::at_token(std::istream& stream) {
51  bool done = false;
52  /* this can loop arbitrarily as reductions are made,
53  because they don't consume the token */
54  while (!done) {
55  const Action& parser_action = get_action(parser, parser_state, lexer_token);
56  if (parser_action.kind == ACTION_NONE) {
57  std::stringstream ss;
58  ss << "error: Parser failure at line " << line;
59  ss << " column " << column << " of " << stream_name << '\n';
60  error_print_line(stream, ss);
61  std::set<std::string> expect_names;
62  for (int expect_token = 0;
63  expect_token < grammar->nterminals; ++expect_token) {
64  const Action& expect_action = get_action(parser, parser_state, expect_token);
65  if (expect_action.kind != ACTION_NONE) {
66  expect_names.insert(at(grammar->symbol_names, expect_token));
67  }
68  }
69  ss << "Expected one of {";
70  for (std::set<std::string>::iterator it = expect_names.begin();
71  it != expect_names.end(); ++it) {
72  if (it != expect_names.begin()) ss << ", ";
73  if (*it == ",") ss << "','";
74  else ss << *it;
75  }
76  ss << "}\n";
77  ss << "Got: " << at(grammar->symbol_names, lexer_token) << '\n';
78  ss << "Lexer text: \"" << lexer_text << "\"\n";
79  ss << "Parser was in state " << parser_state << '\n';
80  throw ParserFail(ss.str());
81  } else if (parser_action.kind == ACTION_SHIFT) {
82  if (sensing_indent) {
83  symbol_indentation_stack.push_back(indent_text.size());
84  }
85  Teuchos::any shift_result;
86  this->at_shift(shift_result, lexer_token, lexer_text);
87  add_back(value_stack, shift_result);
88  done = true;
89  } else if (parser_action.kind == ACTION_REDUCE) {
90  if (parser_action.production == get_accept_production(*grammar)) {
91  did_accept = true;
92  return;
93  }
94  const Grammar::Production& prod = at(grammar->productions, parser_action.production);
95  reduction_rhs.clear();
96  for (int i = 0; i < Teuchos::size(prod.rhs); ++i) {
97  add_back(reduction_rhs, at(value_stack, Teuchos::size(value_stack) - Teuchos::size(prod.rhs) + i));
98  }
99  resize(value_stack, Teuchos::size(value_stack) - Teuchos::size(prod.rhs));
100  Teuchos::any reduce_result;
101  try {
102  this->at_reduce(reduce_result, parser_action.production, reduction_rhs);
103  } catch (const ParserFail& e) {
104  std::stringstream ss;
105  ss << "error: Parser failure at line " << line;
106  ss << " column " << column << " of " << stream_name << '\n';
107  error_print_line(stream, ss);
108  ss << '\n' << e.what();
109  throw ParserFail(ss.str());
110  }
111  add_back(value_stack, reduce_result);
112  if (sensing_indent) {
113  if (Teuchos::size(prod.rhs)) {
114  resize(symbol_indentation_stack,
115  (Teuchos::size(symbol_indentation_stack) + 1)
116  - Teuchos::size(prod.rhs));
117  } else {
118  symbol_indentation_stack.push_back(symbol_indentation_stack.back());
119  }
120  }
121  } else {
122  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
123  "SERIOUS BUG: Action::kind enum value not in range\n");
124  }
125  parser_state = execute_action(parser, parser_stack, parser_action);
126  }
127 }
128 
129 void Reader::indent_mismatch() {
130  TEUCHOS_ASSERT(!indent_stack.empty());
131  const IndentStackEntry& top = indent_stack.back();
132  std::stringstream ss;
133  ss << "error: Indentation characters beginning line " << line << " of " << stream_name
134  << " don't match those beginning line " << top.line << '\n';
135  ss << "It is strongly recommended not to mix tabs and spaces in indentation-sensitive formats\n";
136  throw ParserFail(ss.str());
137 }
138 
139 void Reader::at_token_indent(std::istream& stream) {
140  if (!sensing_indent || lexer_token != tables->indent_info.newline_token) {
141  at_token(stream);
142  return;
143  }
144  std::size_t last_newline_pos = lexer_text.find_last_of("\n");
145  if (last_newline_pos == std::string::npos) {
146  throw ParserFail("INDENT token did not contain a newline '\\n' !\n");
147  }
148  std::string lexer_indent = lexer_text.substr(last_newline_pos + 1, std::string::npos);
149  // the at_token call is allowed to do anything to lexer_text
150  at_token(stream);
151  lexer_text.clear();
152  std::size_t minlen = std::min(lexer_indent.length(), indent_text.length());
153  if (lexer_indent.length() > indent_text.length()) {
154  if (0 != lexer_indent.compare(0, indent_text.length(), indent_text)) {
155  indent_mismatch();
156  }
157  indent_stack.push_back(IndentStackEntry(line, indent_text.length(), lexer_indent.length()));
158  indent_text = lexer_indent;
159  lexer_token = tables->indent_info.indent_token;
160  at_token(stream);
161  } else if (lexer_indent.length() < indent_text.length()) {
162  if (0 != indent_text.compare(0, lexer_indent.length(), lexer_indent)) {
163  indent_mismatch();
164  }
165  while (!indent_stack.empty()) {
166  const IndentStackEntry& top = indent_stack.back();
167  if (top.end_length <= minlen) break;
168  indent_stack.pop_back();
169  lexer_token = tables->indent_info.dedent_token;
170  at_token(stream);
171  }
172  indent_text = lexer_indent;
173  } else {
174  if (0 != lexer_indent.compare(indent_text)) {
175  indent_mismatch();
176  }
177  }
178 }
179 
180 void Reader::backtrack_to_last_accept(std::istream& stream) {
181  /* all the last_accept and backtracking is driven by
182  the "accept the longest match" rule */
183  line = last_lexer_accept_line;
184  column = last_lexer_accept_column;
185  line_text = last_lexer_accept_line_text;
186  while (lexer_text.size() > last_lexer_accept) {
187  bool ok = !stream.unget().fail();
188  TEUCHOS_ASSERT(ok);
189  resize(lexer_text, Teuchos::size(lexer_text) - 1);
190  }
191 }
192 
193 void Reader::reset_lexer_state() {
194  lexer_state = 0;
195  lexer_text.clear();
196  lexer_token = -1;
197 }
198 
199 void Reader::at_lexer_end(std::istream& stream) {
200  if (lexer_token == -1) {
201  std::stringstream ss;
202  if (lexer_text.find('\n') == std::string::npos) {
203  ss << "error: Could not tokenize this (line " << line;
204  ss << " column " << column << " of " << stream_name << "):\n";
205  ss << line_text << '\n';
206  TEUCHOS_ASSERT(line_text.size() >= lexer_text.size());
207  print_underline(ss, line_text, line_text.size() - lexer_text.size(), line_text.size());
208  } else {
209  ss << "error: Could not tokenize this (ends at line " << line;
210  ss << " column " << column << " of " << stream_name << "):\n";
211  ss << lexer_text << '\n';
212  }
213  throw ParserFail(ss.str());
214  }
215  backtrack_to_last_accept(stream);
216  at_token_indent(stream);
217  reset_lexer_state();
218 }
219 
220 Reader::Reader(ReaderTablesPtr tables_in):
221  tables(tables_in),
222  parser(tables->parser),
223  lexer(tables->lexer),
224  grammar(get_grammar(parser))
225 {
226  TEUCHOS_ASSERT(get_determinism(lexer));
227 }
228 
229 void Reader::update_position(char c) {
230  if (c == '\n') {
231  ++line;
232  column = 1;
233  line_text.clear();
234  } else {
235  ++column;
236  }
237 }
238 
239 void Reader::error_print_line(std::istream& is, std::ostream& os) {
240  std::size_t oldpos = line_text.size();
241  char c;
242  while (is.get(c)) {
243  if (c == '\n' || c == '\r') break;
244  line_text.push_back(c);
245  }
246  if (line_text.empty()) return;
247  os << line_text << '\n';
248  if (oldpos > 0) print_indicator(os, line_text, oldpos - 1);
249 }
250 
251 void Reader::read_stream(any& result, std::istream& stream, std::string const& stream_name_in) {
252  using std::swap;
253  line = 1;
254  column = 1;
255  lexer_state = 0;
256  lexer_text.clear();
257  line_text.clear();
258  lexer_token = -1;
259  parser_state = 0;
260  parser_stack.clear();
261  parser_stack.push_back(parser_state);
262  value_stack.clear();
263  did_accept = false;
264  stream_name = stream_name_in;
265  if (tables->indent_info.is_sensitive) {
266  sensing_indent = true;
267  indent_text.clear();
268  indent_stack.clear();
269  } else {
270  sensing_indent = false;
271  }
272  char c;
273  while (stream.get(c)) {
274  if (!is_symbol(c)) {
275  std::stringstream ss;
276  ss << "error: Unexpected character code " << int(c);
277  ss << " at line " << line << " column " << column;
278  ss << " of " << stream_name << '\n';
279  error_print_line(stream, ss);
280  throw ParserFail(ss.str());
281  }
282  line_text.push_back(c);
283  lexer_text.push_back(c);
284  int lexer_symbol = get_symbol(c);
285  lexer_state = step(lexer, lexer_state, lexer_symbol);
286  if (lexer_state == -1) {
287  at_lexer_end(stream);
288  } else {
289  int token = accepts(lexer, lexer_state);
290  update_position(c);
291  if (token != -1) {
292  lexer_token = token;
293  last_lexer_accept = lexer_text.size();
294  last_lexer_accept_line = line;
295  last_lexer_accept_column = column;
296  last_lexer_accept_line_text = line_text;
297  }
298  }
299  }
300  if (last_lexer_accept < lexer_text.size()) {
301  std::stringstream ss;
302  std::string bad_str = lexer_text.substr(last_lexer_accept, std::string::npos);
303  ss << "error: Could not tokenize \"" << bad_str;
304  ss << "\" at end of " << stream_name << '\n';
305  throw ParserFail(ss.str());
306  }
307  at_lexer_end(stream);
308  lexer_token = get_end_terminal(*grammar);
309  at_token(stream);
310  TEUCHOS_TEST_FOR_EXCEPTION(!did_accept, std::logic_error,
311  "The EOF terminal was accepted but the root nonterminal was not reduced\n"
312  "This indicates a bug in Teuchos::Reader\n");
313  TEUCHOS_ASSERT(value_stack.size() == 1);
314  swap(result, value_stack.back());
315 }
316 
317 void Reader::read_string(any& result, std::string const& string, std::string const& string_name) {
318  std::istringstream stream(string);
319  read_stream(result, stream, string_name);
320 }
321 
322 void Reader::read_file(any& result, std::string const& file_name) {
323  std::ifstream stream(file_name.c_str());
324  TEUCHOS_TEST_FOR_EXCEPTION(!stream.is_open(),
325  ParserFail,
326  "Could not open file " << file_name);
327  read_stream(result, stream, file_name);
328 }
329 
330 void Reader::at_shift(any&, int, std::string&) {
331 }
332 
333 void Reader::at_reduce(any&, int, std::vector<any>&) {
334 }
335 
336 DebugReader::DebugReader(ReaderTablesPtr tables_in, std::ostream& os_in):
337  Reader(tables_in),os(os_in)
338 {
339 }
340 
341 void DebugReader::at_shift(any& result, int token, std::string& text) {
342  std::string& text_escaped = make_any_ref<std::string>(result);
343  for (std::size_t i = 0; i < text.size(); ++i) {
344  char c = text[i];
345  switch (c) {
346  case '\n': text_escaped.append("\\n"); break;
347  case '\t': text_escaped.append("\\t"); break;
348  case '\r': text_escaped.append("\\r"); break;
349  default: text_escaped.push_back(c);
350  }
351  }
352  os << "SHIFT (" << at(grammar->symbol_names, token) << ")[" << text_escaped << "]\n";
353 }
354 
355 void DebugReader::at_reduce(any& result, int prod_i, std::vector<any>& rhs) {
356  os << "REDUCE";
357  std::string& lhs_text = make_any_ref<std::string>(result);
358  const Grammar::Production& prod = at(grammar->productions, prod_i);
359  for (int i = 0; i < Teuchos::size(prod.rhs); ++i) {
360  const std::string& rhs_name = at(grammar->symbol_names, at(prod.rhs, i));
361  const std::string& rhs_text = any_ref_cast<std::string>(at(rhs, i));
362  os << " (" << rhs_name << ")[" << rhs_text << "]";
363  lhs_text.append(rhs_text);
364  }
365  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
366  os << " -> (" << lhs_name << ")[" << lhs_text << "]\n";
367 }
368 
369 } // namespace Teuchos
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
void read_file(any &result, std::string const &file_name)
A convenience method for reading a file.
void read_string(any &result, std::string const &string, std::string const &string_name)
A convenience method for reading a string.
Modified boost::any class, which is a container for a templated value.
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
The main class for users to read text using TeuchosParser.
virtual void at_reduce(any &result, int production, std::vector< any > &rhs)
User-overridable REDUCE (production) method.
virtual void at_shift(any &result, int token, std::string &text)
User-overridable SHIFT (token) method.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void read_stream(any &result, std::istream &stream, std::string const &stream_name_in)
The main method for reading a stream of text.
Declares Teuchos::Reader.