1 #include "Teuchos_set.hpp"
3 #include "Teuchos_regex.hpp"
8 #include "Teuchos_Assert.hpp"
10 #include "Teuchos_vector.hpp"
11 #include "Teuchos_string.hpp"
12 #include "Teuchos_chartab.hpp"
14 #include "Teuchos_chartab.hpp"
19 Language make_language() {
30 prods[PROD_REGEX](
"regex") >>
"union";
31 prods[PROD_UNION_DECAY](
"union") >>
"concat";
32 prods[PROD_UNION](
"union") >>
"union",
"|",
"concat";
33 prods[PROD_CONCAT_DECAY](
"concat") >>
"qualified";
34 prods[PROD_CONCAT](
"concat") >>
"concat",
"qualified";
35 prods[PROD_QUAL_DECAY](
"qualified") >>
"single";
36 prods[PROD_STAR](
"qualified") >>
"qualified",
"*";
37 prods[PROD_PLUS](
"qualified") >>
"qualified",
"+";
38 prods[PROD_MAYBE](
"qualified") >>
"qualified",
"?";
39 prods[PROD_SINGLE_CHAR](
"single") >>
"char";
40 prods[PROD_ANY](
"single") >>
".";
41 prods[PROD_SINGLE_SET](
"single") >>
"set";
42 prods[PROD_PARENS_UNION](
"single") >>
"(",
"union",
")";
43 prods[PROD_SET_POSITIVE](
"set") >>
"positive-set";
44 prods[PROD_SET_NEGATIVE](
"set") >>
"negative-set";
45 prods[PROD_POSITIVE_SET](
"positive-set") >>
"[",
"set-items",
"]";
46 prods[PROD_NEGATIVE_SET](
"negative-set") >>
"[",
"^",
"set-items",
"]";
47 prods[PROD_SET_ITEMS_DECAY](
"set-items") >>
"set-item";
48 prods[PROD_SET_ITEMS_ADD](
"set-items") >>
"set-items",
"set-item";
49 prods[PROD_SET_ITEM_CHAR](
"set-item") >>
"char";
50 prods[PROD_SET_ITEM_RANGE](
"set-item") >>
"range";
51 prods[PROD_RANGE](
"range") >>
"char",
"-",
"char";
52 out.tokens.resize(NTOKS);
54 out.tokens[TOK_CHAR](
"char",
"[^\\\\\\.\\[\\]\\(\\)\\|\\-\\^\\*\\+\\?]|\\\\.");
55 out.tokens[TOK_DOT](
".",
"\\.");
56 out.tokens[TOK_LRANGE](
"[",
"\\]");
57 out.tokens[TOK_RRANGE](
"]",
"\\]");
58 out.tokens[TOK_LPAREN](
"(",
"\\(");
59 out.tokens[TOK_RPAREN](
")",
"\\)");
60 out.tokens[TOK_UNION](
"|",
"\\|");
61 out.tokens[TOK_RANGE](
"-",
"\\-");
62 out.tokens[TOK_NEGATE](
"^",
"\\^");
63 out.tokens[TOK_STAR](
"*",
"\\*");
64 out.tokens[TOK_PLUS](
"+",
"\\+");
65 out.tokens[TOK_MAYBE](
"?",
"\\?");
72 std::string meta_chars_str =
".[]()|-^*+?";
73 std::set<int> all_chars;
74 for (
int i = 0; i < NCHARS; ++i) all_chars.insert(i);
75 std::set<int> nonmeta_chars = all_chars;
76 for (
int i = 0; i < size(meta_chars_str); ++i) {
77 int meta_char = at(meta_chars_str, i);
78 std::set<int>::iterator it = nonmeta_chars.find(get_symbol(meta_char));
79 nonmeta_chars.erase(it);
81 FiniteAutomaton lex_nonmeta;
82 make_set_nfa(lex_nonmeta, NCHARS, nonmeta_chars, TOK_CHAR);
83 FiniteAutomaton lex_slash;
84 make_char_single_nfa(lex_slash,
'\\');
85 FiniteAutomaton lex_any;
86 make_set_nfa(lex_any, NCHARS, all_chars);
87 FiniteAutomaton lex_escaped;
88 concat(lex_escaped, lex_slash, lex_any, TOK_CHAR);
89 FiniteAutomaton lex_char;
90 unite(lex_char, lex_nonmeta, lex_escaped);
91 FiniteAutomaton lex_metachars;
92 for (
int i = 0; i < size(meta_chars_str); ++i) {
93 int token = TOK_CHAR + i + 1;
95 FiniteAutomaton lex_metachar;
96 make_char_single_nfa(lex_metachar, at(meta_chars_str, i), token);
97 unite(lex_metachars, lex_metachars, lex_metachar);
99 make_char_single_nfa(lex_metachars, at(meta_chars_str, i), token);
102 unite(result, lex_metachars, lex_char);
103 make_deterministic(result, result);
104 simplify(result, result);
109 if (ptr.strong_count() == 0) {
110 RCP<ReaderTables> newptr(
new ReaderTables());
112 GrammarPtr grammar = make_grammar(*lang);
114 regex::make_lexer(newptr->lexer);
115 newptr->indent_info.is_sensitive =
false;
116 newptr->indent_info.indent_token = -1;
117 newptr->indent_info.dedent_token = -1;
125 if (ptr.strong_count() == 0) {
126 ptr.reset(
new Language(make_language()));
131 void make_dfa(FiniteAutomaton& result, std::string
const& name, std::string
const& regex,
int token) {
133 regex::Reader reader(token);
136 reader.read_string(result_any, regex, name);
138 std::stringstream ss;
139 ss << e.what() <<
'\n';
140 ss <<
"error: couldn't build DFA for token \"" << name <<
"\" regex \"" << regex <<
"\"\n";
141 ss <<
"repeating with DebugReader:\n";
142 DebugReader debug_reader(regex::ask_reader_tables(), ss);
143 debug_reader.read_string(result_any, regex, name);
144 throw ParserFail(ss.str());
146 swap(any_ref_cast<FiniteAutomaton>(result_any), result);
149 regex::Reader::Reader(
int result_token_in):
150 Teuchos::Reader(regex::ask_reader_tables()),
151 result_token(result_token_in) {
154 void regex::Reader::at_shift(any& result,
int token, std::string& text) {
155 if (token != TOK_CHAR)
return;
156 if (size(text) == 1) {
158 }
else if (size(text) == 2) {
163 "BUG: regex char text is \"" << text <<
"\"\n");
167 void regex::Reader::at_reduce(any& result_any,
int production, std::vector<any>& rhs) {
169 switch (production) {
171 swap(result_any, at(rhs, 0));
172 FiniteAutomaton& result = any_ref_cast<FiniteAutomaton>(result_any);
173 make_deterministic(result, result);
174 simplify(result, result);
177 case PROD_UNION_DECAY:
178 case PROD_CONCAT_DECAY:
179 case PROD_QUAL_DECAY:
180 case PROD_SET_ITEMS_DECAY:
181 case PROD_SET_ITEM_RANGE: {
182 swap(result_any, at(rhs, 0));
186 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
187 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
188 FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 2));
193 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
194 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
195 FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 1));
196 concat(result, a, b, result_token);
200 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
201 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
202 star(result, a, result_token);
206 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
207 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
208 plus(result, a, result_token);
212 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
213 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
214 maybe(result, a, result_token);
217 case PROD_SINGLE_CHAR: {
218 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
219 char c = any_cast<
char>(at(rhs, 0));
220 make_char_single_nfa(result, c, result_token);
224 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
225 make_range_nfa(result, NCHARS, 0, NCHARS - 1, result_token);
228 case PROD_SINGLE_SET: {
229 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
230 std::set<char>& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
231 make_char_set_nfa(result, charset, result_token);
234 case PROD_PARENS_UNION: {
235 swap(result_any, at(rhs, 1));
238 case PROD_SET_POSITIVE: {
239 swap(result_any, at(rhs, 0));
242 case PROD_SET_NEGATIVE: {
243 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
244 std::set<char>
const& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
245 negate_set(result, charset);
248 case PROD_POSITIVE_SET: {
249 swap(result_any, at(rhs, 1));
252 case PROD_NEGATIVE_SET: {
253 swap(result_any, at(rhs, 2));
256 case PROD_SET_ITEMS_ADD: {
257 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
258 std::set<char>& a = any_ref_cast<std::set<char> >(at(rhs, 0));
259 std::set<char>
const& b = any_ref_cast<std::set<char> >(at(rhs, 1));
261 unite_with(result, b);
264 case PROD_SET_ITEM_CHAR: {
265 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
266 char c = any_cast<
char>(at(rhs, 0));
271 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
272 char a = any_cast<
char>(at(rhs, 0));
273 char b = any_cast<
char>(at(rhs, 2));
274 for (
char c = a; c <= b; ++c) {
281 "BUG: unexpected production " << production <<
'\n');
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
Productions productions
vector of productions
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
RCP< const ReaderTables > ReaderTablesPtr
an RCP to a const ReaderTables
RCP< const Language > LanguagePtr
an RCP to a const Language
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Declares Teuchos::Reader.