1 #include "Teuchos_FiniteAutomaton.hpp"
10 #include "Teuchos_chartab.hpp"
12 #include "Teuchos_Table.hpp"
16 template struct Table<int>;
18 FiniteAutomaton::FiniteAutomaton(
int nsymbols_init,
bool is_deterministic_init,
20 table(nsymbols_init + (is_deterministic_init ? 0 : 2), nstates_reserve),
21 is_deterministic(is_deterministic_init)
23 reserve(accepted_tokens, nstates_reserve);
26 void FiniteAutomaton::swap(FiniteAutomaton& other) {
28 swap(table, other.table);
29 swap(accepted_tokens, other.accepted_tokens);
30 swap(is_deterministic, other.is_deterministic);
33 int get_nstates(FiniteAutomaton
const& fa) {
34 return get_nrows(fa.table);
37 int get_nsymbols(FiniteAutomaton
const& fa) {
38 return get_ncols(fa.table) - (fa.is_deterministic ? 0 : 2);
41 bool get_determinism(FiniteAutomaton
const& fa) {
42 return fa.is_deterministic;
45 int get_epsilon0(FiniteAutomaton
const& fa) {
47 return get_ncols(fa.table) - 2;
50 int get_epsilon1(FiniteAutomaton
const& fa) {
52 return get_ncols(fa.table) - 1;
55 int add_state(FiniteAutomaton& fa) {
56 int state = get_nstates(fa);
57 resize(fa.table, state + 1, get_ncols(fa.table));
58 for (
int j = 0; j < get_ncols(fa.table); ++j) {
59 at(fa.table, state, j) = -1;
61 fa.accepted_tokens.push_back(-1);
65 void add_transition(FiniteAutomaton& fa,
int from_state,
int at_symbol,
int to_state) {
71 at(fa.table, from_state, at_symbol) = to_state;
74 void add_accept(FiniteAutomaton& fa,
int state,
int token) {
76 at(fa.accepted_tokens, state) = token;
79 void remove_accept(FiniteAutomaton& fa,
int state) {
80 at(fa.accepted_tokens, state) = -1;
83 int step(FiniteAutomaton
const& fa,
int state,
int symbol) {
88 return at(fa.table, state, symbol);
91 int accepts(FiniteAutomaton
const& fa,
int state) {
92 return at(fa.accepted_tokens, state);
95 int get_nsymbols_eps(FiniteAutomaton
const& fa) {
96 return get_ncols(fa.table);
99 void append_states(FiniteAutomaton& fa, FiniteAutomaton
const& other) {
101 bool other_determ = get_determinism(other);
103 int offset = get_nstates(fa);
104 for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
105 int my_state = add_state(fa);
106 int token = accepts(other, other_state);
107 if (0 <= token) add_accept(fa, my_state, token);
109 for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
110 int my_state = other_state + offset;
111 for (
int symbol = 0; symbol < get_nsymbols_eps(other); ++symbol) {
112 int other_next = step(other, other_state, symbol);
113 if (other_next < 0)
continue;
114 int my_next = other_next + offset;
115 add_transition(fa, my_state, symbol, my_next);
120 void make_single_nfa(FiniteAutomaton& result,
int nsymbols,
int symbol,
int token) {
121 make_range_nfa(result, nsymbols, symbol, symbol, token);
124 void make_set_nfa(FiniteAutomaton& result,
int nsymbols, std::set<int>
const& accepted,
int token) {
126 FiniteAutomaton out(nsymbols,
true, 2);
127 int start_state = add_state(out);
128 int accept_state = add_state(out);
129 for (std::set<int>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
131 add_transition(out, start_state, i, accept_state);
133 add_accept(out, accept_state, token);
137 void make_range_nfa(FiniteAutomaton& result,
int nsymbols,
int range_start,
int range_end,
int token) {
142 FiniteAutomaton out(nsymbols,
true, 2);
143 int start_state = add_state(out);
144 int accept_state = add_state(out);
145 for (
int i = range_start; i <= range_end; ++i) {
146 add_transition(out, start_state, i, accept_state);
148 add_accept(out, accept_state, token);
152 void unite(FiniteAutomaton& result, FiniteAutomaton
const& a, FiniteAutomaton
const& b) {
154 int nsymbols = get_nsymbols(a);
155 FiniteAutomaton out(nsymbols,
false, 1 + get_nstates(a) + get_nstates(b));
156 int start_state = add_state(out);
157 int a_offset = get_nstates(out);
158 append_states(out, a);
159 int b_offset = get_nstates(out);
160 append_states(out, b);
161 int epsilon0 = get_epsilon0(out);
162 int epsilon1 = get_epsilon1(out);
163 add_transition(out, start_state, epsilon0, a_offset);
164 add_transition(out, start_state, epsilon1, b_offset);
169 void concat(FiniteAutomaton& result, FiniteAutomaton
const& a, FiniteAutomaton
const& b,
int token) {
170 int nsymbols = get_nsymbols(a);
171 FiniteAutomaton out(nsymbols,
false, get_nstates(a) + get_nstates(b));
172 append_states(out, a);
173 int b_offset = get_nstates(out);
174 append_states(out, b);
175 int epsilon0 = get_epsilon0(out);
176 for (
int i = 0; i < get_nstates(a); ++i) {
177 if (accepts(a, i) != -1) {
178 add_transition(out, i, epsilon0, b_offset);
179 remove_accept(out, i);
182 for (
int i = 0; i < get_nstates(b); ++i) {
183 if (accepts(b, i) != -1) {
184 add_accept(out, i + b_offset, token);
190 void plus(FiniteAutomaton& result, FiniteAutomaton
const& a,
int token) {
192 FiniteAutomaton out(get_nsymbols(a),
false, get_nstates(a) + 1);
193 append_states(out, a);
194 int new_accept_state = add_state(out);
195 add_accept(out, new_accept_state, token);
196 int epsilon0 = get_epsilon0(out);
197 int epsilon1 = get_epsilon1(out);
198 for (
int i = 0; i < get_nstates(a); ++i) {
199 if (accepts(a, i) != -1) {
200 add_transition(out, i, epsilon0, new_accept_state);
203 add_transition(out, i, epsilon1, 0);
204 remove_accept(out, i);
210 void maybe(FiniteAutomaton& result, FiniteAutomaton
const& a,
int token) {
212 FiniteAutomaton out(get_nsymbols(a),
false, get_nstates(a) + 2);
213 int new_start_state = add_state(out);
214 int offset = get_nstates(out);
215 append_states(out, a);
216 int new_accept_state = add_state(out);
217 int epsilon0 = get_epsilon0(out);
218 int epsilon1 = get_epsilon1(out);
219 add_transition(out, new_start_state, epsilon1, offset);
222 int last = new_start_state;
223 for (
int i = 0; i < get_nstates(a); ++i) {
224 if (accepts(a, i) != -1) {
225 add_transition(out, last, epsilon0, i + offset);
226 remove_accept(out, i + offset);
230 add_transition(out, last, epsilon0, new_accept_state);
231 add_accept(out, new_accept_state, token);
235 void star(FiniteAutomaton& result, FiniteAutomaton
const& a,
int token) {
237 plus(result, a, token);
238 maybe(result, result, token);
241 typedef std::set<int> StateSet;
243 static StateSet step(StateSet
const& ss,
int symbol, FiniteAutomaton
const& fa) {
245 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
247 int next_state = step(fa, state, symbol);
248 if (next_state != -1) next_ss.insert(next_state);
253 typedef std::queue<int> StateQueue;
255 static StateSet get_epsilon_closure(StateSet ss, FiniteAutomaton
const& fa) {
257 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
261 int epsilon0 = get_epsilon0(fa);
262 int epsilon1 = get_epsilon1(fa);
264 int state = q.front(); q.pop();
265 for (
int epsilon = epsilon0; epsilon <= epsilon1; ++epsilon) {
266 int next_state = step(fa, state, epsilon);
267 if (next_state == -1)
continue;
268 if (!ss.count(next_state)) {
269 ss.insert(next_state);
277 struct StateSetPtrLess {
278 bool operator()(StateSet* a, StateSet* b)
const {
283 typedef std::map<StateSet*,int,StateSetPtrLess> StateSetPtr2State;
284 typedef RCP<StateSet> StateSetPtr;
285 typedef std::vector<StateSetPtr> StateSetUniqPtrVector;
287 static void add_back(StateSetUniqPtrVector& ssupv, StateSet& ss) {
289 StateSetPtr ptr(
new StateSet());
291 ssupv.push_back(ptr);
295 void make_deterministic(FiniteAutomaton& result, FiniteAutomaton& nfa) {
297 if (get_determinism(nfa)) {
301 StateSetPtr2State ssp2s;
302 StateSetUniqPtrVector ssupv;
303 FiniteAutomaton out(get_nsymbols(nfa),
true, 0);
306 start_ss = get_epsilon_closure(start_ss, nfa);
307 add_back(ssupv, start_ss);
308 ssp2s[ssupv.back().get()] = add_state(out);
310 while (front <
int(ssupv.size())) {
312 StateSet& ss = *at(ssupv, front);
314 for (
int symbol = 0; symbol < get_nsymbols(nfa); ++symbol) {
315 StateSet next_ss = Teuchos::step(ss, symbol, nfa);
316 if (next_ss.empty())
continue;
317 next_ss = get_epsilon_closure(next_ss, nfa);
319 StateSetPtr2State::iterator it = ssp2s.find(&next_ss);
320 if (it == ssp2s.end()) {
321 next_state = add_state(out);
322 add_back(ssupv, next_ss);
323 ssp2s[ssupv.back().get()] = next_state;
325 next_state = it->second;
327 add_transition(out, state, symbol, next_state);
329 int min_accepted = -1;
330 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
332 int nfa_token = accepts(nfa, nfa_state);
333 if (nfa_token == -1)
continue;
334 if (min_accepted == -1 || nfa_token < min_accepted) {
335 min_accepted = nfa_token;
338 if (min_accepted != -1) add_accept(out, state, min_accepted);
343 struct StateRowLess {
344 Table<int>
const& table;
345 std::vector<int>
const& accepted;
346 bool operator()(
int const& a,
int const& b)
const {
347 int aa = at(accepted, a);
348 int ab = at(accepted, b);
349 if (aa != ab)
return aa < ab;
350 for (
int symbol = 0, ncols = get_ncols(table); symbol < ncols; ++symbol) {
351 int na = at(table, a, symbol);
352 int nb = at(table, b, symbol);
353 if (na != nb)
return na < nb;
357 StateRowLess(Table<int>
const& t, std::vector<int>
const& a):
358 table(t),accepted(a) {
362 typedef std::map<int, int, StateRowLess> StateRow2SimpleState;
364 void simplify_once(FiniteAutomaton& result, FiniteAutomaton
const& fa) {
366 StateRow2SimpleState sr2ss(StateRowLess(fa.table, fa.accepted_tokens));
368 for (
int state = 0; state < get_nstates(fa); ++state) {
369 std::pair<StateRow2SimpleState::iterator, bool> res =
370 sr2ss.insert(std::make_pair(state, nsimple));
375 FiniteAutomaton out(get_nsymbols(fa), get_determinism(fa), nsimple);
376 for (
int simple = 0; simple < nsimple; ++simple) {
379 std::vector<bool> did_simple(
size_t(nsimple),
false);
380 for (
int state = 0; state < get_nstates(fa); ++state) {
382 int simple = sr2ss[state];
383 if (at(did_simple, simple))
continue;
384 for (
int symbol = 0; symbol < get_nsymbols_eps(fa); ++symbol) {
385 int next_state = step(fa, state, symbol);
386 if (next_state == -1)
continue;
388 int next_simple = sr2ss[next_state];
389 add_transition(out, simple, symbol, next_simple);
391 int token = accepts(fa, state);
393 add_accept(out, simple, token);
395 at(did_simple, simple) =
true;
400 void simplify(FiniteAutomaton& result, FiniteAutomaton
const& fa) {
402 FiniteAutomaton prev;
403 FiniteAutomaton next = fa;
404 int nstates_next = get_nstates(next);
409 nstates = nstates_next;
410 simplify_once(next, prev);
412 nstates_next = get_nstates(next);
413 }
while (nstates_next < nstates);
417 void make_char_nfa(FiniteAutomaton& result,
bool is_deterministic_init,
int nstates_reserve) {
419 FiniteAutomaton tmp(Teuchos::NCHARS, is_deterministic_init, nstates_reserve);
423 void add_char_transition(FiniteAutomaton& fa,
int from_state,
char at_char,
int to_state) {
424 add_transition(fa, from_state, get_symbol(at_char), to_state);
427 template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
430 template <
typename T>
431 struct IsSymbol<T, true> {
432 static bool eval(T c) {
433 if (c < 0)
return false;
434 return 0 <= Teuchos::chartab[int(c)];
438 template <
typename T>
439 struct IsSymbol<T, false> {
440 static bool eval(T c) {
441 if (c >= TEUCHOS_CHARTAB_SIZE)
return false;
442 return 0 <= Teuchos::chartab[int(c)];
446 bool is_symbol(
char c) {
447 return IsSymbol<char>::eval(c);
450 template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
453 template <
typename T>
454 struct GetSymbol<T, true> {
455 static int eval(T c) {
457 int symbol = Teuchos::chartab[int(c)];
463 template <
typename T>
464 struct GetSymbol<T, false> {
465 static int eval(T c) {
466 int symbol = Teuchos::chartab[int(c)];
472 int get_symbol(
char c) {
473 return GetSymbol<char>::eval(c);
476 char get_char(
int symbol) {
479 return inv_chartab[symbol];
482 void make_char_set_nfa(FiniteAutomaton& result, std::set<char>
const& accepted,
int token) {
483 std::set<int> symbol_set;
484 for (std::set<char>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
486 symbol_set.insert(get_symbol(c));
488 return make_set_nfa(result, Teuchos::NCHARS, symbol_set, token);
491 void make_char_range_nfa(FiniteAutomaton& result,
char range_start,
char range_end,
int token) {
492 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(range_start), get_symbol(range_end), token);
495 void make_char_single_nfa(FiniteAutomaton& result,
char symbol_char,
int token) {
496 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(symbol_char), get_symbol(symbol_char), token);
499 void negate_set(std::set<char>& result, std::set<char>
const& s) {
502 for (
int symbol = 0; symbol < NCHARS; ++symbol) {
503 char c = inv_chartab[symbol];
504 if (!s.count(c)) out.insert(c);
509 std::ostream& operator<<(std::ostream& os, FiniteAutomaton
const& fa) {
510 if (get_determinism(fa)) os <<
"dfa ";
512 os << get_nstates(fa) <<
" states " << get_nsymbols(fa) <<
" symbols\n";
513 for (
int state = 0; state < get_nstates(fa); ++state) {
514 for (
int symbol = 0; symbol < get_nsymbols(fa); ++symbol) {
515 int next_state = step(fa, state, symbol);
516 if (next_state != -1) os <<
"(" << state <<
", " << symbol <<
") -> " << next_state <<
'\n';
518 if (!get_determinism(fa)) {
519 for (
int symbol = get_epsilon0(fa); symbol <= get_epsilon1(fa); ++symbol) {
520 int next_state = step(fa, state, symbol);
521 if (next_state != -1) os <<
"(" << state <<
", eps" << (symbol - get_epsilon0(fa)) <<
") -> " << next_state <<
'\n';
524 int token = accepts(fa, state);
525 if (token != -1) os << state <<
" accepts " << token <<
'\n';
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Reference-counted pointer class and non-member templated function implementations.