10 #include "Teuchos_set.hpp"
12 #include "Teuchos_regex.hpp"
17 #include "Teuchos_Assert.hpp"
19 #include "Teuchos_vector.hpp"
20 #include "Teuchos_string.hpp"
21 #include "Teuchos_chartab.hpp"
23 #include "Teuchos_chartab.hpp"
28 Language make_language() {
39 prods[PROD_REGEX](
"regex") >>
"union";
40 prods[PROD_UNION_DECAY](
"union") >>
"concat";
41 prods[PROD_UNION](
"union") >>
"union",
"|",
"concat";
42 prods[PROD_CONCAT_DECAY](
"concat") >>
"qualified";
43 prods[PROD_CONCAT](
"concat") >>
"concat",
"qualified";
44 prods[PROD_QUAL_DECAY](
"qualified") >>
"single";
45 prods[PROD_STAR](
"qualified") >>
"qualified",
"*";
46 prods[PROD_PLUS](
"qualified") >>
"qualified",
"+";
47 prods[PROD_MAYBE](
"qualified") >>
"qualified",
"?";
48 prods[PROD_SINGLE_CHAR](
"single") >>
"char";
49 prods[PROD_ANY](
"single") >>
".";
50 prods[PROD_SINGLE_SET](
"single") >>
"set";
51 prods[PROD_PARENS_UNION](
"single") >>
"(",
"union",
")";
52 prods[PROD_SET_POSITIVE](
"set") >>
"positive-set";
53 prods[PROD_SET_NEGATIVE](
"set") >>
"negative-set";
54 prods[PROD_POSITIVE_SET](
"positive-set") >>
"[",
"set-items",
"]";
55 prods[PROD_NEGATIVE_SET](
"negative-set") >>
"[",
"^",
"set-items",
"]";
56 prods[PROD_SET_ITEMS_DECAY](
"set-items") >>
"set-item";
57 prods[PROD_SET_ITEMS_ADD](
"set-items") >>
"set-items",
"set-item";
58 prods[PROD_SET_ITEM_CHAR](
"set-item") >>
"char";
59 prods[PROD_SET_ITEM_RANGE](
"set-item") >>
"range";
60 prods[PROD_RANGE](
"range") >>
"char",
"-",
"char";
61 out.tokens.resize(NTOKS);
63 out.tokens[TOK_CHAR](
"char",
"[^\\\\\\.\\[\\]\\(\\)\\|\\-\\^\\*\\+\\?]|\\\\.");
64 out.tokens[TOK_DOT](
".",
"\\.");
65 out.tokens[TOK_LRANGE](
"[",
"\\]");
66 out.tokens[TOK_RRANGE](
"]",
"\\]");
67 out.tokens[TOK_LPAREN](
"(",
"\\(");
68 out.tokens[TOK_RPAREN](
")",
"\\)");
69 out.tokens[TOK_UNION](
"|",
"\\|");
70 out.tokens[TOK_RANGE](
"-",
"\\-");
71 out.tokens[TOK_NEGATE](
"^",
"\\^");
72 out.tokens[TOK_STAR](
"*",
"\\*");
73 out.tokens[TOK_PLUS](
"+",
"\\+");
74 out.tokens[TOK_MAYBE](
"?",
"\\?");
81 std::string meta_chars_str =
".[]()|-^*+?";
82 std::set<int> all_chars;
83 for (
int i = 0; i < NCHARS; ++i) all_chars.insert(i);
84 std::set<int> nonmeta_chars = all_chars;
85 for (
int i = 0; i < Teuchos::size(meta_chars_str); ++i) {
86 int meta_char = at(meta_chars_str, i);
87 std::set<int>::iterator it = nonmeta_chars.find(get_symbol(meta_char));
88 nonmeta_chars.erase(it);
90 FiniteAutomaton lex_nonmeta;
91 make_set_nfa(lex_nonmeta, NCHARS, nonmeta_chars, TOK_CHAR);
92 FiniteAutomaton lex_slash;
93 make_char_single_nfa(lex_slash,
'\\');
94 FiniteAutomaton lex_any;
95 make_set_nfa(lex_any, NCHARS, all_chars);
96 FiniteAutomaton lex_escaped;
97 concat(lex_escaped, lex_slash, lex_any, TOK_CHAR);
98 FiniteAutomaton lex_char;
99 unite(lex_char, lex_nonmeta, lex_escaped);
100 FiniteAutomaton lex_metachars;
101 for (
int i = 0; i < Teuchos::size(meta_chars_str); ++i) {
102 int token = TOK_CHAR + i + 1;
104 FiniteAutomaton lex_metachar;
105 make_char_single_nfa(lex_metachar, at(meta_chars_str, i), token);
106 unite(lex_metachars, lex_metachars, lex_metachar);
108 make_char_single_nfa(lex_metachars, at(meta_chars_str, i), token);
111 unite(result, lex_metachars, lex_char);
112 make_deterministic(result, result);
113 simplify(result, result);
118 if (ptr.strong_count() == 0) {
119 RCP<ReaderTables> newptr(
new ReaderTables());
121 GrammarPtr grammar = make_grammar(*lang);
123 regex::make_lexer(newptr->lexer);
124 newptr->indent_info.is_sensitive =
false;
125 newptr->indent_info.indent_token = -1;
126 newptr->indent_info.dedent_token = -1;
134 if (ptr.strong_count() == 0) {
135 ptr.reset(
new Language(make_language()));
140 void make_dfa(FiniteAutomaton& result, std::string
const& name, std::string
const& regex,
int token) {
142 regex::Reader reader(token);
145 reader.read_string(result_any, regex, name);
147 std::stringstream ss;
148 ss << e.what() <<
'\n';
149 ss <<
"error: couldn't build DFA for token \"" << name <<
"\" regex \"" << regex <<
"\"\n";
150 ss <<
"repeating with DebugReader:\n";
151 DebugReader debug_reader(regex::ask_reader_tables(), ss);
152 debug_reader.read_string(result_any, regex, name);
153 throw ParserFail(ss.str());
155 swap(any_ref_cast<FiniteAutomaton>(result_any), result);
158 regex::Reader::Reader(
int result_token_in):
159 Teuchos::Reader(regex::ask_reader_tables()),
160 result_token(result_token_in) {
163 void regex::Reader::at_shift(any& result,
int token, std::string& text) {
164 if (token != TOK_CHAR)
return;
165 if (Teuchos::size(text) == 1) {
167 }
else if (Teuchos::size(text) == 2) {
172 "BUG: regex char text is \"" << text <<
"\"\n");
176 void regex::Reader::at_reduce(any& result_any,
int production, std::vector<any>& rhs) {
178 switch (production) {
180 swap(result_any, at(rhs, 0));
181 FiniteAutomaton& result = any_ref_cast<FiniteAutomaton>(result_any);
182 make_deterministic(result, result);
183 simplify(result, result);
186 case PROD_UNION_DECAY:
187 case PROD_CONCAT_DECAY:
188 case PROD_QUAL_DECAY:
189 case PROD_SET_ITEMS_DECAY:
190 case PROD_SET_ITEM_RANGE: {
191 swap(result_any, at(rhs, 0));
195 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
196 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
197 FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 2));
202 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
203 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
204 FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 1));
205 concat(result, a, b, result_token);
209 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
210 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
211 star(result, a, result_token);
215 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
216 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
217 plus(result, a, result_token);
221 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
222 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
223 maybe(result, a, result_token);
226 case PROD_SINGLE_CHAR: {
227 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
228 char c = any_cast<
char>(at(rhs, 0));
229 make_char_single_nfa(result, c, result_token);
233 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
234 make_range_nfa(result, NCHARS, 0, NCHARS - 1, result_token);
237 case PROD_SINGLE_SET: {
238 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
239 std::set<char>& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
240 make_char_set_nfa(result, charset, result_token);
243 case PROD_PARENS_UNION: {
244 swap(result_any, at(rhs, 1));
247 case PROD_SET_POSITIVE: {
248 swap(result_any, at(rhs, 0));
251 case PROD_SET_NEGATIVE: {
252 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
253 std::set<char>
const& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
254 negate_set(result, charset);
257 case PROD_POSITIVE_SET: {
258 swap(result_any, at(rhs, 1));
261 case PROD_NEGATIVE_SET: {
262 swap(result_any, at(rhs, 2));
265 case PROD_SET_ITEMS_ADD: {
266 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
267 std::set<char>& a = any_ref_cast<std::set<char> >(at(rhs, 0));
268 std::set<char>
const& b = any_ref_cast<std::set<char> >(at(rhs, 1));
270 unite_with(result, b);
273 case PROD_SET_ITEM_CHAR: {
274 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
275 char c = any_cast<
char>(at(rhs, 0));
280 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
281 char a = any_cast<
char>(at(rhs, 0));
282 char b = any_cast<
char>(at(rhs, 2));
283 for (
char c = a; c <= b; ++c) {
290 "BUG: unexpected production " << production <<
'\n');
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
Productions productions
vector of productions
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
RCP< const ReaderTables > ReaderTablesPtr
an RCP to a const ReaderTables
RCP< const Language > LanguagePtr
an RCP to a const Language
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Declares Teuchos::Reader.