1 #include "Teuchos_FiniteAutomaton.hpp" 
   10 #include "Teuchos_chartab.hpp" 
   12 #include "Teuchos_Table.hpp" 
   16 template struct Table<int>;
 
   18 FiniteAutomaton::FiniteAutomaton(
int nsymbols_init, 
bool is_deterministic_init,
 
   20   table(nsymbols_init + (is_deterministic_init ? 0 : 2), nstates_reserve),
 
   21   is_deterministic(is_deterministic_init)
 
   23   reserve(accepted_tokens, nstates_reserve);
 
   26 void FiniteAutomaton::swap(FiniteAutomaton& other) {
 
   28   swap(table, other.table);
 
   29   swap(accepted_tokens, other.accepted_tokens);
 
   30   swap(is_deterministic, other.is_deterministic);
 
   33 int get_nstates(FiniteAutomaton 
const& fa) {
 
   34   return get_nrows(fa.table);
 
   37 int get_nsymbols(FiniteAutomaton 
const& fa) {
 
   38   return get_ncols(fa.table) - (fa.is_deterministic ? 0 : 2);
 
   41 bool get_determinism(FiniteAutomaton 
const& fa) {
 
   42   return fa.is_deterministic;
 
   45 int get_epsilon0(FiniteAutomaton 
const& fa) {
 
   47   return get_ncols(fa.table) - 2;
 
   50 int get_epsilon1(FiniteAutomaton 
const& fa) {
 
   52   return get_ncols(fa.table) - 1;
 
   55 int add_state(FiniteAutomaton& fa) {
 
   56   int state = get_nstates(fa);
 
   57   resize(fa.table, state + 1, get_ncols(fa.table));
 
   58   for (
int j = 0; j < get_ncols(fa.table); ++j) {
 
   59     at(fa.table, state, j) = -1;
 
   61   fa.accepted_tokens.push_back(-1);
 
   65 void add_transition(FiniteAutomaton& fa, 
int from_state, 
int at_symbol, 
int to_state) {
 
   71   at(fa.table, from_state, at_symbol) = to_state;
 
   74 void add_accept(FiniteAutomaton& fa, 
int state, 
int token) {
 
   76   at(fa.accepted_tokens, state) = token;
 
   79 void remove_accept(FiniteAutomaton& fa, 
int state) {
 
   80   at(fa.accepted_tokens, state) = -1;
 
   83 int step(FiniteAutomaton 
const& fa, 
int state, 
int symbol) {
 
   88   return at(fa.table, state, symbol);
 
   91 int accepts(FiniteAutomaton 
const& fa, 
int state) {
 
   92   return at(fa.accepted_tokens, state);
 
   95 int get_nsymbols_eps(FiniteAutomaton 
const& fa) {
 
   96   return get_ncols(fa.table);
 
   99 void append_states(FiniteAutomaton& fa, FiniteAutomaton 
const& other) {
 
  101   bool other_determ = get_determinism(other);
 
  103   int offset = get_nstates(fa);
 
  104   for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
 
  105     int my_state = add_state(fa);
 
  106     int token = accepts(other, other_state);
 
  107     if (0 <= token) add_accept(fa, my_state, token);
 
  109   for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
 
  110     int my_state = other_state + offset;
 
  111     for (
int symbol = 0; symbol < get_nsymbols_eps(other); ++symbol) {
 
  112       int other_next = step(other, other_state, symbol);
 
  113       if (other_next < 0) 
continue;
 
  114       int my_next = other_next + offset;
 
  115       add_transition(fa, my_state, symbol, my_next);
 
  120 void make_single_nfa(FiniteAutomaton& result, 
int nsymbols, 
int symbol, 
int token) {
 
  121   make_range_nfa(result, nsymbols, symbol, symbol, token);
 
  124 void make_set_nfa(FiniteAutomaton& result, 
int nsymbols, std::set<int> 
const& accepted, 
int token) {
 
  126   FiniteAutomaton out(nsymbols, 
true, 2);
 
  127   int start_state = add_state(out);
 
  128   int accept_state = add_state(out);
 
  129   for (std::set<int>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
 
  131     add_transition(out, start_state, i, accept_state);
 
  133   add_accept(out, accept_state, token);
 
  137 void make_range_nfa(FiniteAutomaton& result, 
int nsymbols, 
int range_start, 
int range_end, 
int token) {
 
  142   FiniteAutomaton out(nsymbols, 
true, 2);
 
  143   int start_state = add_state(out);
 
  144   int accept_state = add_state(out);
 
  145   for (
int i = range_start; i <= range_end; ++i) {
 
  146     add_transition(out, start_state, i, accept_state);
 
  148   add_accept(out, accept_state, token);
 
  152 void unite(FiniteAutomaton& result, FiniteAutomaton 
const& a, FiniteAutomaton 
const& b) {
 
  154   int nsymbols = get_nsymbols(a);
 
  155   FiniteAutomaton out(nsymbols, 
false, 1 + get_nstates(a) + get_nstates(b));
 
  156   int start_state = add_state(out);
 
  157   int a_offset = get_nstates(out);
 
  158   append_states(out, a);
 
  159   int b_offset = get_nstates(out);
 
  160   append_states(out, b);
 
  161   int epsilon0 = get_epsilon0(out);
 
  162   int epsilon1 = get_epsilon1(out);
 
  163   add_transition(out, start_state, epsilon0, a_offset);
 
  164   add_transition(out, start_state, epsilon1, b_offset);
 
  169 void concat(FiniteAutomaton& result, FiniteAutomaton 
const& a, FiniteAutomaton 
const& b, 
int token) {
 
  170   int nsymbols = get_nsymbols(a);
 
  171   FiniteAutomaton out(nsymbols, 
false, get_nstates(a) + get_nstates(b));
 
  172   append_states(out, a);
 
  173   int b_offset = get_nstates(out);
 
  174   append_states(out, b);
 
  175   int epsilon0 = get_epsilon0(out);
 
  176   for (
int i = 0; i < get_nstates(a); ++i) {
 
  177     if (accepts(a, i) != -1) {
 
  178       add_transition(out, i, epsilon0, b_offset);
 
  179       remove_accept(out, i);
 
  182   for (
int i = 0; i < get_nstates(b); ++i) {
 
  183     if (accepts(b, i) != -1) {
 
  184       add_accept(out, i + b_offset, token);
 
  190 void plus(FiniteAutomaton& result, FiniteAutomaton 
const& a, 
int token) {
 
  192   FiniteAutomaton out(get_nsymbols(a), 
false, get_nstates(a) + 1);
 
  193   append_states(out, a);
 
  194   int new_accept_state = add_state(out);
 
  195   add_accept(out, new_accept_state, token);
 
  196   int epsilon0 = get_epsilon0(out);
 
  197   int epsilon1 = get_epsilon1(out);
 
  198   for (
int i = 0; i < get_nstates(a); ++i) {
 
  199     if (accepts(a, i) != -1) {
 
  200       add_transition(out, i, epsilon0, new_accept_state);
 
  203       add_transition(out, i, epsilon1, 0);
 
  204       remove_accept(out, i);
 
  210 void maybe(FiniteAutomaton& result, FiniteAutomaton 
const& a, 
int token) {
 
  212   FiniteAutomaton out(get_nsymbols(a), 
false, get_nstates(a) + 2);
 
  213   int new_start_state = add_state(out);
 
  214   int offset = get_nstates(out);
 
  215   append_states(out, a);
 
  216   int new_accept_state = add_state(out);
 
  217   int epsilon0 = get_epsilon0(out);
 
  218   int epsilon1 = get_epsilon1(out);
 
  219   add_transition(out, new_start_state, epsilon1, offset);
 
  222   int last = new_start_state;
 
  223   for (
int i = 0; i < get_nstates(a); ++i) {
 
  224     if (accepts(a, i) != -1) {
 
  225       add_transition(out, last, epsilon0, i + offset);
 
  226       remove_accept(out, i + offset);
 
  230   add_transition(out, last, epsilon0, new_accept_state);
 
  231   add_accept(out, new_accept_state, token);
 
  235 void star(FiniteAutomaton& result, FiniteAutomaton 
const& a, 
int token) {
 
  237   plus(result, a, token);
 
  238   maybe(result, result, token);
 
  241 typedef std::set<int> StateSet;
 
  243 static StateSet step(StateSet 
const& ss, 
int symbol, FiniteAutomaton 
const& fa) {
 
  245   for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
 
  247     int next_state = step(fa, state, symbol);
 
  248     if (next_state != -1) next_ss.insert(next_state);
 
  253 typedef std::queue<int> StateQueue;
 
  255 static StateSet get_epsilon_closure(StateSet ss, FiniteAutomaton 
const& fa) {
 
  257   for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
 
  261   int epsilon0 = get_epsilon0(fa);
 
  262   int epsilon1 = get_epsilon1(fa);
 
  264     int state = q.front(); q.pop();
 
  265     for (
int epsilon = epsilon0; epsilon <= epsilon1; ++epsilon) {
 
  266       int next_state = step(fa, state, epsilon);
 
  267       if (next_state == -1) 
continue;
 
  268       if (!ss.count(next_state)) {
 
  269         ss.insert(next_state);
 
  277 struct StateSetPtrLess {
 
  278   bool operator()(StateSet* a, StateSet* b)
 const {
 
  283 typedef std::map<StateSet*,int,StateSetPtrLess> StateSetPtr2State;
 
  284 typedef RCP<StateSet> StateSetPtr;
 
  285 typedef std::vector<StateSetPtr> StateSetUniqPtrVector;
 
  287 static void add_back(StateSetUniqPtrVector& ssupv, StateSet& ss) {
 
  289   StateSetPtr ptr(
new StateSet());
 
  291   ssupv.push_back(ptr);
 
  295 void make_deterministic(FiniteAutomaton& result, FiniteAutomaton& nfa) {
 
  297   if (get_determinism(nfa)) {
 
  301   StateSetPtr2State ssp2s;
 
  302   StateSetUniqPtrVector ssupv;
 
  303   FiniteAutomaton out(get_nsymbols(nfa), 
true, 0);
 
  306   start_ss = get_epsilon_closure(start_ss, nfa);
 
  307   add_back(ssupv, start_ss);
 
  308   ssp2s[ssupv.back().get()] = add_state(out);
 
  310   while (front < 
int(ssupv.size())) {
 
  312     StateSet& ss = *at(ssupv, front);
 
  314     for (
int symbol = 0; symbol < get_nsymbols(nfa); ++symbol) {
 
  315       StateSet next_ss = Teuchos::step(ss, symbol, nfa);
 
  316       if (next_ss.empty()) 
continue;
 
  317       next_ss = get_epsilon_closure(next_ss, nfa);
 
  319       StateSetPtr2State::iterator it = ssp2s.find(&next_ss);
 
  320       if (it == ssp2s.end()) {
 
  321         next_state = add_state(out);
 
  322         add_back(ssupv, next_ss);
 
  323         ssp2s[ssupv.back().get()] = next_state;
 
  325         next_state = it->second;
 
  327       add_transition(out, state, symbol, next_state);
 
  329     int min_accepted = -1;
 
  330     for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
 
  332       int nfa_token = accepts(nfa, nfa_state);
 
  333       if (nfa_token == -1) 
continue;
 
  334       if (min_accepted == -1 || nfa_token < min_accepted) {
 
  335         min_accepted = nfa_token;
 
  338     if (min_accepted != -1) add_accept(out, state, min_accepted);
 
  343 struct StateRowLess {
 
  344   Table<int> 
const& table;
 
  345   std::vector<int> 
const& accepted;
 
  346   bool operator()(
int const& a, 
int const& b)
 const {
 
  347     int aa = at(accepted, a);
 
  348     int ab = at(accepted, b);
 
  349     if (aa != ab) 
return aa < ab;
 
  350     for (
int symbol = 0, ncols = get_ncols(table); symbol < ncols; ++symbol) {
 
  351       int na = at(table, a, symbol);
 
  352       int nb = at(table, b, symbol);
 
  353       if (na != nb) 
return na < nb;
 
  357   StateRowLess(Table<int> 
const& t, std::vector<int> 
const& a):
 
  358     table(t),accepted(a) {
 
  362 typedef std::map<int, int, StateRowLess> StateRow2SimpleState;
 
  364 void simplify_once(FiniteAutomaton& result, FiniteAutomaton 
const& fa) {
 
  366   StateRow2SimpleState sr2ss(StateRowLess(fa.table, fa.accepted_tokens));
 
  368   for (
int state = 0; state < get_nstates(fa); ++state) {
 
  369     std::pair<StateRow2SimpleState::iterator, bool> res =
 
  370       sr2ss.insert(std::make_pair(state, nsimple));
 
  375   FiniteAutomaton out(get_nsymbols(fa), get_determinism(fa), nsimple);
 
  376   for (
int simple = 0; simple < nsimple; ++simple) {
 
  379   std::vector<bool> did_simple(
size_t(nsimple), 
false);
 
  380   for (
int state = 0; state < get_nstates(fa); ++state) {
 
  382     int simple = sr2ss[state];
 
  383     if (at(did_simple, simple)) 
continue;
 
  384     for (
int symbol = 0; symbol < get_nsymbols_eps(fa); ++symbol) {
 
  385       int next_state = step(fa, state, symbol);
 
  386       if (next_state == -1) 
continue;
 
  388       int next_simple = sr2ss[next_state];
 
  389       add_transition(out, simple, symbol, next_simple);
 
  391     int token = accepts(fa, state);
 
  393       add_accept(out, simple, token);
 
  395     at(did_simple, simple) = 
true;
 
  400 void simplify(FiniteAutomaton& result, FiniteAutomaton 
const& fa) {
 
  402   FiniteAutomaton prev;
 
  403   FiniteAutomaton next = fa;
 
  404   int nstates_next = get_nstates(next);
 
  409     nstates = nstates_next;
 
  410     simplify_once(next, prev);
 
  412     nstates_next = get_nstates(next);
 
  413   } 
while (nstates_next < nstates);
 
  417 void make_char_nfa(FiniteAutomaton& result, 
bool is_deterministic_init, 
int nstates_reserve) {
 
  419   FiniteAutomaton tmp(Teuchos::NCHARS, is_deterministic_init, nstates_reserve);
 
  423 void add_char_transition(FiniteAutomaton& fa, 
int from_state, 
char at_char, 
int to_state) {
 
  424   add_transition(fa, from_state, get_symbol(at_char), to_state);
 
  427 template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
 
  430 template <
typename T>
 
  431 struct IsSymbol<T, true> {
 
  432   static bool eval(T c) {
 
  433     if (c < 0) 
return false;
 
  434     return 0 <= Teuchos::chartab[int(c)];
 
  438 template <
typename T>
 
  439 struct IsSymbol<T, false> {
 
  440   static bool eval(T c) {
 
  441     if (c >= TEUCHOS_CHARTAB_SIZE) 
return false;
 
  442     return 0 <= Teuchos::chartab[int(c)];
 
  446 bool is_symbol(
char c) {
 
  447   return IsSymbol<char>::eval(c);
 
  450 template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
 
  453 template <
typename T>
 
  454 struct GetSymbol<T, true> {
 
  455   static int eval(T c) {
 
  457     int symbol = Teuchos::chartab[int(c)];
 
  463 template <
typename T>
 
  464 struct GetSymbol<T, false> {
 
  465   static int eval(T c) {
 
  466     int symbol = Teuchos::chartab[int(c)];
 
  472 int get_symbol(
char c) {
 
  473   return GetSymbol<char>::eval(c);
 
  476 char get_char(
int symbol) {
 
  479   return inv_chartab[symbol];
 
  482 void make_char_set_nfa(FiniteAutomaton& result, std::set<char> 
const& accepted, 
int token) {
 
  483   std::set<int> symbol_set;
 
  484   for (std::set<char>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
 
  486     symbol_set.insert(get_symbol(c));
 
  488   return make_set_nfa(result, Teuchos::NCHARS, symbol_set, token);
 
  491 void make_char_range_nfa(FiniteAutomaton& result, 
char range_start, 
char range_end, 
int token) {
 
  492   return make_range_nfa(result, Teuchos::NCHARS, get_symbol(range_start), get_symbol(range_end), token);
 
  495 void make_char_single_nfa(FiniteAutomaton& result, 
char symbol_char, 
int token) {
 
  496   return make_range_nfa(result, Teuchos::NCHARS, get_symbol(symbol_char), get_symbol(symbol_char), token);
 
  499 void negate_set(std::set<char>& result, std::set<char> 
const& s) {
 
  502   for (
int symbol = 0; symbol < NCHARS; ++symbol) {
 
  503     char c = inv_chartab[symbol];
 
  504     if (!s.count(c)) out.insert(c);
 
  509 std::ostream& operator<<(std::ostream& os, FiniteAutomaton 
const& fa) {
 
  510   if (get_determinism(fa)) os << 
"dfa ";
 
  512   os << get_nstates(fa) << 
" states " << get_nsymbols(fa) << 
" symbols\n";
 
  513   for (
int state = 0; state < get_nstates(fa); ++state) {
 
  514     for (
int symbol = 0; symbol < get_nsymbols(fa); ++symbol) {
 
  515       int next_state = step(fa, state, symbol);
 
  516       if (next_state != -1) os << 
"(" << state << 
", " << symbol << 
") -> " << next_state << 
'\n';
 
  518     if (!get_determinism(fa)) {
 
  519       for (
int symbol = get_epsilon0(fa); symbol <= get_epsilon1(fa); ++symbol) {
 
  520         int next_state = step(fa, state, symbol);
 
  521         if (next_state != -1) os << 
"(" << state << 
", eps" << (symbol - get_epsilon0(fa)) << 
") -> " << next_state << 
'\n';
 
  524     int token = accepts(fa, state);
 
  525     if (token != -1) os << state << 
" accepts " << token << 
'\n';
 
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails. 
 
Reference-counted pointer class and non-member templated function implementations.