Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_Language.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_Language.hpp"
11 
12 #include <set>
13 #include <iostream>
14 #include <sstream>
15 #include <cstdlib>
16 #include <cstdarg>
17 
18 #include "Teuchos_vector.hpp"
19 #include "Teuchos_regex.hpp"
20 #include "Teuchos_Parser.hpp"
21 
22 namespace Teuchos {
23 
24 void Language::Token::operator()(std::string const& name_in, std::string const& regex_in) {
25  name = name_in;
26  regex = regex_in;
27 }
28 
29 Language::RHSBuilder::RHSBuilder(Production& prod_in):
30  prod(prod_in) {
31 }
32 
33 Language::RHSBuilder& Language::RHSBuilder::operator,(std::string const& rhs_item) {
34  prod.rhs.push_back(rhs_item);
35  return *this;
36 }
37 
38 Language::RHSBuilder& Language::RHSBuilder::operator>>(std::string const& rhs_item) {
39  prod.rhs.push_back(rhs_item);
40  return *this;
41 }
42 
43 Language::RHSBuilder Language::Production::operator()(std::string const& lhs_in) {
44  lhs = lhs_in;
45  return Language::RHSBuilder(*this);
46 }
47 
48 GrammarPtr make_grammar(Language const& language) {
49  std::map<std::string, int> symbol_map;
50  int nterminals = 0;
51  for (Language::Tokens::const_iterator it = language.tokens.begin();
52  it != language.tokens.end(); ++it) {
53  const Language::Token& token = *it;
54  TEUCHOS_TEST_FOR_EXCEPTION(token.name.empty(), ParserFail,
55  "ERROR: token " << it - language.tokens.begin() << " has an empty name\n");
56  symbol_map[token.name] = nterminals++;
57  }
58  int nsymbols = nterminals;
59  for (Language::Productions::const_iterator it = language.productions.begin();
60  it != language.productions.end(); ++it) {
61  const Language::Production& production = *it;
62  TEUCHOS_TEST_FOR_EXCEPTION(production.lhs.empty(), ParserFail,
63  "ERROR: production " << it - language.productions.begin() << " has an empty LHS name\n");
64  if (symbol_map.count(production.lhs)) continue;
65  symbol_map[production.lhs] = nsymbols++;
66  }
67  RCP<Grammar> out(new Grammar());
68  out->nsymbols = nsymbols;
69  out->nterminals = nterminals;
70  for (Language::Productions::const_iterator it = language.productions.begin();
71  it != language.productions.end(); ++it) {
72  const Language::Production& lang_prod = *it;
73  out->productions.push_back(Grammar::Production());
74  Grammar::Production& gprod = out->productions.back();
75  TEUCHOS_ASSERT(symbol_map.count(lang_prod.lhs));
76  gprod.lhs = symbol_map[lang_prod.lhs];
77  for (Language::RHS::const_iterator it2 = lang_prod.rhs.begin();
78  it2 != lang_prod.rhs.end(); ++it2) {
79  const std::string& lang_symb = *it2;
80  TEUCHOS_TEST_FOR_EXCEPTION(!symbol_map.count(lang_symb), ParserFail,
81  "RHS entry \"" << lang_symb <<
82  "\" is neither a nonterminal (LHS of a production) nor a token!\n");
83  gprod.rhs.push_back(symbol_map[lang_symb]);
84  }
85  }
86  out->symbol_names = make_vector<std::string>(nsymbols);
87  for (std::map<std::string, int>::const_iterator it = symbol_map.begin();
88  it != symbol_map.end(); ++it) {
89  const std::pair<std::string, int>& pair = *it;
90  at(out->symbol_names, pair.second) = pair.first;
91  }
92  add_end_terminal(*out);
93  add_accept_production(*out);
94  return out;
95 }
96 
97 std::ostream& operator<<(std::ostream& os, Language const& lang) {
98  for (Language::Tokens::const_iterator it = lang.tokens.begin();
99  it != lang.tokens.end(); ++it) {
100  const Language::Token& token = *it;
101  os << "token " << token.name << " regex \'" << token.regex << "\'\n";
102  }
103  std::set<std::string> nonterminal_set;
104  std::vector<std::string> nonterminal_list;
105  for (Language::Productions::const_iterator it = lang.productions.begin();
106  it != lang.productions.end(); ++it) {
107  const Language::Production& prod = *it;
108  if (!nonterminal_set.count(prod.lhs)) {
109  nonterminal_set.insert(prod.lhs);
110  nonterminal_list.push_back(prod.lhs);
111  }
112  }
113  for (std::vector<std::string>::const_iterator it = nonterminal_list.begin();
114  it != nonterminal_list.end(); ++it) {
115  const std::string& nonterminal = *it;
116  std::stringstream ss;
117  ss << nonterminal << " ::=";
118  std::string lead = ss.str();
119  os << lead;
120  for (std::string::iterator it2 = lead.begin(); it2 != lead.end(); ++it2) {
121  *it2 = ' ';
122  }
123  bool first = true;
124  for (Language::Productions::const_iterator it2 = lang.productions.begin();
125  it2 != lang.productions.end(); ++it2) {
126  const Language::Production& prod = *it2;
127  if (prod.lhs != nonterminal) continue;
128  if (first) first = false;
129  else os << " |\n" << lead;
130  for (Language::RHS::const_iterator it3 = prod.rhs.begin();
131  it3 != prod.rhs.end(); ++it3) {
132  const std::string& symb = *it3;
133  if (symb == "|") os << " '|'";
134  else os << " " << symb;
135  }
136  }
137  os << "\n";
138  }
139  os << "\n";
140  return os;
141 }
142 
143 void make_lexer(FiniteAutomaton& result, Language const& language) {
144  using std::swap;
145  for (int i = 0; i < Teuchos::size(language.tokens); ++i) {
146  const std::string& name = at(language.tokens, i).name;
147  const std::string& regex = at(language.tokens, i).regex;
148  if (i == 0) {
149  regex::make_dfa(result, name, regex, i);
150  } else {
151  FiniteAutomaton b;
152  regex::make_dfa(b, name, regex, i);
153  unite(result, result, b);
154  }
155  }
156  make_deterministic(result, result);
157  simplify(result, result);
158 }
159 
160 static void make_indent_info(IndentInfo& out, Language const& language) {
161  out.is_sensitive = false;
162  out.indent_token = -1;
163  out.dedent_token = -1;
164  out.newline_token = -1;
165  for (int tok_i = 0; tok_i < Teuchos::size(language.tokens); ++tok_i) {
166  const Language::Token& token = at(language.tokens, tok_i);
167  if (token.name == "INDENT") {
168  TEUCHOS_TEST_FOR_EXCEPTION(out.indent_token != -1, ParserFail,
169  "error: Language has two or more INDENT tokens\n");
170  out.indent_token = tok_i;
171  out.is_sensitive = true;
172  } else if (token.name == "DEDENT") {
173  TEUCHOS_TEST_FOR_EXCEPTION(out.dedent_token != -1, ParserFail,
174  "error: Language has two or more DEDENT tokens\n");
175  out.dedent_token = tok_i;
176  } else if (token.name == "NEWLINE") {
177  TEUCHOS_TEST_FOR_EXCEPTION(out.newline_token != -1, ParserFail,
178  "error: Language has two or more NEWLINE tokens\n");
179  out.newline_token = tok_i;
180  }
181  }
182  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.indent_token == -1,
183  ParserFail,
184  "error: Indentation-sensitive language has no INDENT token\n");
185  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.dedent_token == -1,
186  ParserFail,
187  "error: Indentation-sensitive language has no DEDENT token\n");
188  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.newline_token == -1,
189  ParserFail,
190  "error: Indentation-sensitive language has no NEWLINE token\n");
192  (out.indent_token < out.newline_token ||
193  out.dedent_token < out.newline_token),
194  ParserFail,
195  "error: NEWLINE needs to come before all other indent tokens\n");
196 }
197 
199  RCP<ReaderTables> out(new ReaderTables());
200  make_lexer(out->lexer, language);
201  make_indent_info(out->indent_info, language);
202  GrammarPtr grammar = make_grammar(language);
203  out->parser = make_lalr1_parser(grammar);
204  return out;
205 }
206 
207 }
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
FiniteAutomaton lexer
lexer.
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
The main class for users to define a language using TeuchosParser.
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
Parser and lexer tables specifying how to read a Language.
Tokens tokens
vector of tokens
ReaderTablesPtr make_reader_tables(Language const &language)
constructs ReaderTables for the given Language.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Declares Teuchos::Language.