10 #include "Teuchos_FiniteAutomaton.hpp"
19 #include "Teuchos_chartab.hpp"
21 #include "Teuchos_Table.hpp"
25 template struct Table<int>;
27 FiniteAutomaton::FiniteAutomaton(
int nsymbols_init,
bool is_deterministic_init,
29 table(nsymbols_init + (is_deterministic_init ? 0 : 2), nstates_reserve),
30 is_deterministic(is_deterministic_init)
32 reserve(accepted_tokens, nstates_reserve);
35 void FiniteAutomaton::swap(FiniteAutomaton& other) {
37 swap(table, other.table);
38 swap(accepted_tokens, other.accepted_tokens);
39 swap(is_deterministic, other.is_deterministic);
42 int get_nstates(FiniteAutomaton
const& fa) {
43 return get_nrows(fa.table);
46 int get_nsymbols(FiniteAutomaton
const& fa) {
47 return get_ncols(fa.table) - (fa.is_deterministic ? 0 : 2);
50 bool get_determinism(FiniteAutomaton
const& fa) {
51 return fa.is_deterministic;
54 int get_epsilon0(FiniteAutomaton
const& fa) {
56 return get_ncols(fa.table) - 2;
59 int get_epsilon1(FiniteAutomaton
const& fa) {
61 return get_ncols(fa.table) - 1;
64 int add_state(FiniteAutomaton& fa) {
65 int state = get_nstates(fa);
66 resize(fa.table, state + 1, get_ncols(fa.table));
67 for (
int j = 0; j < get_ncols(fa.table); ++j) {
68 at(fa.table, state, j) = -1;
70 fa.accepted_tokens.push_back(-1);
74 void add_transition(FiniteAutomaton& fa,
int from_state,
int at_symbol,
int to_state) {
80 at(fa.table, from_state, at_symbol) = to_state;
83 void add_accept(FiniteAutomaton& fa,
int state,
int token) {
85 at(fa.accepted_tokens, state) = token;
88 void remove_accept(FiniteAutomaton& fa,
int state) {
89 at(fa.accepted_tokens, state) = -1;
92 int step(FiniteAutomaton
const& fa,
int state,
int symbol) {
97 return at(fa.table, state, symbol);
100 int accepts(FiniteAutomaton
const& fa,
int state) {
101 return at(fa.accepted_tokens, state);
104 int get_nsymbols_eps(FiniteAutomaton
const& fa) {
105 return get_ncols(fa.table);
108 void append_states(FiniteAutomaton& fa, FiniteAutomaton
const& other) {
110 bool other_determ = get_determinism(other);
112 int offset = get_nstates(fa);
113 for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
114 int my_state = add_state(fa);
115 int token = accepts(other, other_state);
116 if (0 <= token) add_accept(fa, my_state, token);
118 for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
119 int my_state = other_state + offset;
120 for (
int symbol = 0; symbol < get_nsymbols_eps(other); ++symbol) {
121 int other_next = step(other, other_state, symbol);
122 if (other_next < 0)
continue;
123 int my_next = other_next + offset;
124 add_transition(fa, my_state, symbol, my_next);
129 void make_single_nfa(FiniteAutomaton& result,
int nsymbols,
int symbol,
int token) {
130 make_range_nfa(result, nsymbols, symbol, symbol, token);
133 void make_set_nfa(FiniteAutomaton& result,
int nsymbols, std::set<int>
const& accepted,
int token) {
135 FiniteAutomaton out(nsymbols,
true, 2);
136 int start_state = add_state(out);
137 int accept_state = add_state(out);
138 for (std::set<int>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
140 add_transition(out, start_state, i, accept_state);
142 add_accept(out, accept_state, token);
146 void make_range_nfa(FiniteAutomaton& result,
int nsymbols,
int range_start,
int range_end,
int token) {
151 FiniteAutomaton out(nsymbols,
true, 2);
152 int start_state = add_state(out);
153 int accept_state = add_state(out);
154 for (
int i = range_start; i <= range_end; ++i) {
155 add_transition(out, start_state, i, accept_state);
157 add_accept(out, accept_state, token);
161 void unite(FiniteAutomaton& result, FiniteAutomaton
const& a, FiniteAutomaton
const& b) {
163 int nsymbols = get_nsymbols(a);
164 FiniteAutomaton out(nsymbols,
false, 1 + get_nstates(a) + get_nstates(b));
165 int start_state = add_state(out);
166 int a_offset = get_nstates(out);
167 append_states(out, a);
168 int b_offset = get_nstates(out);
169 append_states(out, b);
170 int epsilon0 = get_epsilon0(out);
171 int epsilon1 = get_epsilon1(out);
172 add_transition(out, start_state, epsilon0, a_offset);
173 add_transition(out, start_state, epsilon1, b_offset);
178 void concat(FiniteAutomaton& result, FiniteAutomaton
const& a, FiniteAutomaton
const& b,
int token) {
179 int nsymbols = get_nsymbols(a);
180 FiniteAutomaton out(nsymbols,
false, get_nstates(a) + get_nstates(b));
181 append_states(out, a);
182 int b_offset = get_nstates(out);
183 append_states(out, b);
184 int epsilon0 = get_epsilon0(out);
185 for (
int i = 0; i < get_nstates(a); ++i) {
186 if (accepts(a, i) != -1) {
187 add_transition(out, i, epsilon0, b_offset);
188 remove_accept(out, i);
191 for (
int i = 0; i < get_nstates(b); ++i) {
192 if (accepts(b, i) != -1) {
193 add_accept(out, i + b_offset, token);
199 void plus(FiniteAutomaton& result, FiniteAutomaton
const& a,
int token) {
201 FiniteAutomaton out(get_nsymbols(a),
false, get_nstates(a) + 1);
202 append_states(out, a);
203 int new_accept_state = add_state(out);
204 add_accept(out, new_accept_state, token);
205 int epsilon0 = get_epsilon0(out);
206 int epsilon1 = get_epsilon1(out);
207 for (
int i = 0; i < get_nstates(a); ++i) {
208 if (accepts(a, i) != -1) {
209 add_transition(out, i, epsilon0, new_accept_state);
212 add_transition(out, i, epsilon1, 0);
213 remove_accept(out, i);
219 void maybe(FiniteAutomaton& result, FiniteAutomaton
const& a,
int token) {
221 FiniteAutomaton out(get_nsymbols(a),
false, get_nstates(a) + 2);
222 int new_start_state = add_state(out);
223 int offset = get_nstates(out);
224 append_states(out, a);
225 int new_accept_state = add_state(out);
226 int epsilon0 = get_epsilon0(out);
227 int epsilon1 = get_epsilon1(out);
228 add_transition(out, new_start_state, epsilon1, offset);
231 int last = new_start_state;
232 for (
int i = 0; i < get_nstates(a); ++i) {
233 if (accepts(a, i) != -1) {
234 add_transition(out, last, epsilon0, i + offset);
235 remove_accept(out, i + offset);
239 add_transition(out, last, epsilon0, new_accept_state);
240 add_accept(out, new_accept_state, token);
244 void star(FiniteAutomaton& result, FiniteAutomaton
const& a,
int token) {
246 plus(result, a, token);
247 maybe(result, result, token);
250 typedef std::set<int> StateSet;
252 static StateSet step(StateSet
const& ss,
int symbol, FiniteAutomaton
const& fa) {
254 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
256 int next_state = step(fa, state, symbol);
257 if (next_state != -1) next_ss.insert(next_state);
262 typedef std::queue<int> StateQueue;
264 static StateSet get_epsilon_closure(StateSet ss, FiniteAutomaton
const& fa) {
266 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
270 int epsilon0 = get_epsilon0(fa);
271 int epsilon1 = get_epsilon1(fa);
273 int state = q.front(); q.pop();
274 for (
int epsilon = epsilon0; epsilon <= epsilon1; ++epsilon) {
275 int next_state = step(fa, state, epsilon);
276 if (next_state == -1)
continue;
277 if (!ss.count(next_state)) {
278 ss.insert(next_state);
286 struct StateSetPtrLess {
287 bool operator()(StateSet* a, StateSet* b)
const {
292 typedef std::map<StateSet*,int,StateSetPtrLess> StateSetPtr2State;
293 typedef RCP<StateSet> StateSetPtr;
294 typedef std::vector<StateSetPtr> StateSetUniqPtrVector;
296 static void add_back(StateSetUniqPtrVector& ssupv, StateSet& ss) {
298 StateSetPtr ptr(
new StateSet());
300 ssupv.push_back(ptr);
304 void make_deterministic(FiniteAutomaton& result, FiniteAutomaton& nfa) {
306 if (get_determinism(nfa)) {
310 StateSetPtr2State ssp2s;
311 StateSetUniqPtrVector ssupv;
312 FiniteAutomaton out(get_nsymbols(nfa),
true, 0);
315 start_ss = get_epsilon_closure(start_ss, nfa);
316 add_back(ssupv, start_ss);
317 ssp2s[ssupv.back().get()] = add_state(out);
319 while (front <
int(ssupv.size())) {
321 StateSet& ss = *at(ssupv, front);
323 for (
int symbol = 0; symbol < get_nsymbols(nfa); ++symbol) {
324 StateSet next_ss = Teuchos::step(ss, symbol, nfa);
325 if (next_ss.empty())
continue;
326 next_ss = get_epsilon_closure(next_ss, nfa);
328 StateSetPtr2State::iterator it = ssp2s.find(&next_ss);
329 if (it == ssp2s.end()) {
330 next_state = add_state(out);
331 add_back(ssupv, next_ss);
332 ssp2s[ssupv.back().get()] = next_state;
334 next_state = it->second;
336 add_transition(out, state, symbol, next_state);
338 int min_accepted = -1;
339 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
341 int nfa_token = accepts(nfa, nfa_state);
342 if (nfa_token == -1)
continue;
343 if (min_accepted == -1 || nfa_token < min_accepted) {
344 min_accepted = nfa_token;
347 if (min_accepted != -1) add_accept(out, state, min_accepted);
352 struct StateRowLess {
353 Table<int>
const& table;
354 std::vector<int>
const& accepted;
355 bool operator()(
int const& a,
int const& b)
const {
356 int aa = at(accepted, a);
357 int ab = at(accepted, b);
358 if (aa != ab)
return aa < ab;
359 for (
int symbol = 0, ncols = get_ncols(table); symbol < ncols; ++symbol) {
360 int na = at(table, a, symbol);
361 int nb = at(table, b, symbol);
362 if (na != nb)
return na < nb;
366 StateRowLess(Table<int>
const& t, std::vector<int>
const& a):
367 table(t),accepted(a) {
371 typedef std::map<int, int, StateRowLess> StateRow2SimpleState;
373 void simplify_once(FiniteAutomaton& result, FiniteAutomaton
const& fa) {
375 StateRow2SimpleState sr2ss(StateRowLess(fa.table, fa.accepted_tokens));
377 for (
int state = 0; state < get_nstates(fa); ++state) {
378 std::pair<StateRow2SimpleState::iterator, bool> res =
379 sr2ss.insert(std::make_pair(state, nsimple));
384 FiniteAutomaton out(get_nsymbols(fa), get_determinism(fa), nsimple);
385 for (
int simple = 0; simple < nsimple; ++simple) {
388 std::vector<bool> did_simple(
size_t(nsimple),
false);
389 for (
int state = 0; state < get_nstates(fa); ++state) {
391 int simple = sr2ss[state];
392 if (at(did_simple, simple))
continue;
393 for (
int symbol = 0; symbol < get_nsymbols_eps(fa); ++symbol) {
394 int next_state = step(fa, state, symbol);
395 if (next_state == -1)
continue;
397 int next_simple = sr2ss[next_state];
398 add_transition(out, simple, symbol, next_simple);
400 int token = accepts(fa, state);
402 add_accept(out, simple, token);
404 at(did_simple, simple) =
true;
409 void simplify(FiniteAutomaton& result, FiniteAutomaton
const& fa) {
411 FiniteAutomaton prev;
412 FiniteAutomaton next = fa;
413 int nstates_next = get_nstates(next);
418 nstates = nstates_next;
419 simplify_once(next, prev);
421 nstates_next = get_nstates(next);
422 }
while (nstates_next < nstates);
426 void add_char_transition(FiniteAutomaton& fa,
int from_state,
char at_char,
int to_state) {
427 add_transition(fa, from_state, get_symbol(at_char), to_state);
430 template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
433 template <
typename T>
434 struct IsSymbol<T, true> {
435 static bool eval(T c) {
436 if (c < 0)
return false;
437 return 0 <= Teuchos::chartab[int(c)];
441 template <
typename T>
442 struct IsSymbol<T, false> {
443 static bool eval(T c) {
444 if (c >= TEUCHOS_CHARTAB_SIZE)
return false;
445 return 0 <= Teuchos::chartab[int(c)];
449 bool is_symbol(
char c) {
450 return IsSymbol<char>::eval(c);
453 template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
456 template <
typename T>
457 struct GetSymbol<T, true> {
458 static int eval(T c) {
460 int symbol = Teuchos::chartab[int(c)];
466 template <
typename T>
467 struct GetSymbol<T, false> {
468 static int eval(T c) {
469 int symbol = Teuchos::chartab[int(c)];
475 int get_symbol(
char c) {
476 return GetSymbol<char>::eval(c);
479 char get_char(
int symbol) {
482 return inv_chartab[symbol];
485 void make_char_set_nfa(FiniteAutomaton& result, std::set<char>
const& accepted,
int token) {
486 std::set<int> symbol_set;
487 for (std::set<char>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
489 symbol_set.insert(get_symbol(c));
491 return make_set_nfa(result, Teuchos::NCHARS, symbol_set, token);
494 void make_char_range_nfa(FiniteAutomaton& result,
char range_start,
char range_end,
int token) {
495 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(range_start), get_symbol(range_end), token);
498 void make_char_single_nfa(FiniteAutomaton& result,
char symbol_char,
int token) {
499 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(symbol_char), get_symbol(symbol_char), token);
502 void negate_set(std::set<char>& result, std::set<char>
const& s) {
505 for (
int symbol = 0; symbol < NCHARS; ++symbol) {
506 char c = inv_chartab[symbol];
507 if (!s.count(c)) out.insert(c);
512 std::ostream& operator<<(std::ostream& os, FiniteAutomaton
const& fa) {
513 if (get_determinism(fa)) os <<
"dfa ";
515 os << get_nstates(fa) <<
" states " << get_nsymbols(fa) <<
" symbols\n";
516 for (
int state = 0; state < get_nstates(fa); ++state) {
517 for (
int symbol = 0; symbol < get_nsymbols(fa); ++symbol) {
518 int next_state = step(fa, state, symbol);
519 if (next_state != -1) os <<
"(" << state <<
", " << symbol <<
") -> " << next_state <<
'\n';
521 if (!get_determinism(fa)) {
522 for (
int symbol = get_epsilon0(fa); symbol <= get_epsilon1(fa); ++symbol) {
523 int next_state = step(fa, state, symbol);
524 if (next_state != -1) os <<
"(" << state <<
", eps" << (symbol - get_epsilon0(fa)) <<
") -> " << next_state <<
'\n';
527 int token = accepts(fa, state);
528 if (token != -1) os << state <<
" accepts " << token <<
'\n';
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Reference-counted pointer class and non-member templated function implementations.