1 #include "Teuchos_set.hpp" 
    3 #include "Teuchos_regex.hpp" 
    8 #include "Teuchos_Assert.hpp" 
   10 #include "Teuchos_vector.hpp" 
   11 #include "Teuchos_string.hpp" 
   12 #include "Teuchos_chartab.hpp" 
   14 #include "Teuchos_chartab.hpp" 
   19 Language make_language() {
 
   30   prods[PROD_REGEX](
"regex") >> 
"union";
 
   31   prods[PROD_UNION_DECAY](
"union") >> 
"concat";
 
   32   prods[PROD_UNION](
"union") >> 
"union", 
"|", 
"concat"; 
 
   33   prods[PROD_CONCAT_DECAY](
"concat") >> 
"qualified";
 
   34   prods[PROD_CONCAT](
"concat") >> 
"concat", 
"qualified"; 
 
   35   prods[PROD_QUAL_DECAY](
"qualified") >> 
"single";
 
   36   prods[PROD_STAR](
"qualified") >> 
"qualified", 
"*";
 
   37   prods[PROD_PLUS](
"qualified") >> 
"qualified", 
"+";
 
   38   prods[PROD_MAYBE](
"qualified") >> 
"qualified", 
"?";
 
   39   prods[PROD_SINGLE_CHAR](
"single") >> 
"char";
 
   40   prods[PROD_ANY](
"single") >> 
"."; 
 
   41   prods[PROD_SINGLE_SET](
"single") >> 
"set";
 
   42   prods[PROD_PARENS_UNION](
"single") >> 
"(", 
"union", 
")";
 
   43   prods[PROD_SET_POSITIVE](
"set") >> 
"positive-set";
 
   44   prods[PROD_SET_NEGATIVE](
"set") >> 
"negative-set";
 
   45   prods[PROD_POSITIVE_SET](
"positive-set") >> 
"[", 
"set-items", 
"]";
 
   46   prods[PROD_NEGATIVE_SET](
"negative-set") >> 
"[", 
"^", 
"set-items", 
"]";
 
   47   prods[PROD_SET_ITEMS_DECAY](
"set-items") >> 
"set-item";
 
   48   prods[PROD_SET_ITEMS_ADD](
"set-items") >> 
"set-items", 
"set-item";
 
   49   prods[PROD_SET_ITEM_CHAR](
"set-item") >> 
"char";
 
   50   prods[PROD_SET_ITEM_RANGE](
"set-item") >> 
"range";
 
   51   prods[PROD_RANGE](
"range") >> 
"char", 
"-", 
"char";
 
   52   out.tokens.resize(NTOKS);
 
   54   out.tokens[TOK_CHAR](
"char", 
"[^\\\\\\.\\[\\]\\(\\)\\|\\-\\^\\*\\+\\?]|\\\\.");
 
   55   out.tokens[TOK_DOT](
".", 
"\\.");
 
   56   out.tokens[TOK_LRANGE](
"[", 
"\\]");
 
   57   out.tokens[TOK_RRANGE](
"]", 
"\\]");
 
   58   out.tokens[TOK_LPAREN](
"(", 
"\\(");
 
   59   out.tokens[TOK_RPAREN](
")", 
"\\)");
 
   60   out.tokens[TOK_UNION](
"|", 
"\\|");
 
   61   out.tokens[TOK_RANGE](
"-", 
"\\-");
 
   62   out.tokens[TOK_NEGATE](
"^", 
"\\^");
 
   63   out.tokens[TOK_STAR](
"*", 
"\\*");
 
   64   out.tokens[TOK_PLUS](
"+", 
"\\+");
 
   65   out.tokens[TOK_MAYBE](
"?", 
"\\?");
 
   72   std::string meta_chars_str = 
".[]()|-^*+?";
 
   73   std::set<int> all_chars;
 
   74   for (
int i = 0; i < NCHARS; ++i) all_chars.insert(i);
 
   75   std::set<int> nonmeta_chars = all_chars;
 
   76   for (
int i = 0; i < size(meta_chars_str); ++i) {
 
   77     int meta_char = at(meta_chars_str, i);
 
   78     std::set<int>::iterator it = nonmeta_chars.find(get_symbol(meta_char));
 
   79     nonmeta_chars.erase(it);
 
   81   FiniteAutomaton lex_nonmeta;
 
   82   make_set_nfa(lex_nonmeta, NCHARS, nonmeta_chars, TOK_CHAR);
 
   83   FiniteAutomaton lex_slash;
 
   84   make_char_single_nfa(lex_slash, 
'\\');
 
   85   FiniteAutomaton lex_any;
 
   86   make_set_nfa(lex_any, NCHARS, all_chars);
 
   87   FiniteAutomaton lex_escaped;
 
   88   concat(lex_escaped, lex_slash, lex_any, TOK_CHAR);
 
   89   FiniteAutomaton lex_char;
 
   90   unite(lex_char, lex_nonmeta, lex_escaped);
 
   91   FiniteAutomaton lex_metachars;
 
   92   for (
int i = 0; i < size(meta_chars_str); ++i) {
 
   93     int token = TOK_CHAR + i + 1;
 
   95       FiniteAutomaton lex_metachar;
 
   96       make_char_single_nfa(lex_metachar, at(meta_chars_str, i), token);
 
   97       unite(lex_metachars, lex_metachars, lex_metachar);
 
   99       make_char_single_nfa(lex_metachars, at(meta_chars_str, i), token);
 
  102   unite(result, lex_metachars, lex_char);
 
  103   make_deterministic(result, result);
 
  104   simplify(result, result);
 
  109   if (ptr.strong_count() == 0) {
 
  110     RCP<ReaderTables> newptr(
new ReaderTables());
 
  112     GrammarPtr grammar = make_grammar(*lang);
 
  114     regex::make_lexer(newptr->lexer);
 
  115     newptr->indent_info.is_sensitive = 
false;
 
  116     newptr->indent_info.indent_token = -1;
 
  117     newptr->indent_info.dedent_token = -1;
 
  125   if (ptr.strong_count() == 0) {
 
  126     ptr.reset(
new Language(make_language()));
 
  131 void make_dfa(FiniteAutomaton& result, std::string 
const& name, std::string 
const& regex, 
int token) {
 
  133   regex::Reader reader(token);
 
  136     reader.read_string(result_any, regex, name);
 
  138     std::stringstream ss;
 
  139     ss << e.what() << 
'\n';
 
  140     ss << 
"error: couldn't build DFA for token \"" << name << 
"\" regex \"" << regex << 
"\"\n";
 
  141     ss << 
"repeating with DebugReader:\n";
 
  142     DebugReader debug_reader(regex::ask_reader_tables(), ss);
 
  143     debug_reader.read_string(result_any, regex, name);
 
  144     throw ParserFail(ss.str());
 
  146   swap(any_ref_cast<FiniteAutomaton>(result_any), result);
 
  149 regex::Reader::Reader(
int result_token_in):
 
  150   Teuchos::Reader(regex::ask_reader_tables()),
 
  151   result_token(result_token_in) {
 
  154 void regex::Reader::at_shift(any& result, 
int token, std::string& text) {
 
  155   if (token != TOK_CHAR) 
return;
 
  156   if (size(text) == 1) {
 
  158   } 
else if (size(text) == 2) {
 
  163         "BUG: regex char text is \"" << text << 
"\"\n");
 
  167 void regex::Reader::at_reduce(any& result_any, 
int production, std::vector<any>& rhs) {
 
  169   switch (production) {
 
  171       swap(result_any, at(rhs, 0));
 
  172       FiniteAutomaton& result = any_ref_cast<FiniteAutomaton>(result_any);
 
  173       make_deterministic(result, result);
 
  174       simplify(result, result);
 
  177     case PROD_UNION_DECAY:
 
  178     case PROD_CONCAT_DECAY:
 
  179     case PROD_QUAL_DECAY:
 
  180     case PROD_SET_ITEMS_DECAY:
 
  181     case PROD_SET_ITEM_RANGE: {
 
  182       swap(result_any, at(rhs, 0));
 
  186       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  187       FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  188       FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 2));
 
  193       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  194       FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  195       FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 1));
 
  196       concat(result, a, b, result_token);
 
  200       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  201       FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  202       star(result, a, result_token);
 
  206       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  207       FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  208       plus(result, a, result_token);
 
  212       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  213       FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  214       maybe(result, a, result_token);
 
  217     case PROD_SINGLE_CHAR: {
 
  218       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  219       char c = any_cast<
char>(at(rhs, 0));
 
  220       make_char_single_nfa(result, c, result_token);
 
  224       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  225       make_range_nfa(result, NCHARS, 0, NCHARS - 1, result_token);
 
  228     case PROD_SINGLE_SET: {
 
  229       FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  230       std::set<char>& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
 
  231       make_char_set_nfa(result, charset, result_token);
 
  234     case PROD_PARENS_UNION: {
 
  235       swap(result_any, at(rhs, 1));
 
  238     case PROD_SET_POSITIVE: {
 
  239       swap(result_any, at(rhs, 0));
 
  242     case PROD_SET_NEGATIVE: {
 
  243       std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  244       std::set<char> 
const& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
 
  245       negate_set(result, charset);
 
  248     case PROD_POSITIVE_SET: {
 
  249       swap(result_any, at(rhs, 1));
 
  252     case PROD_NEGATIVE_SET: {
 
  253       swap(result_any, at(rhs, 2));
 
  256     case PROD_SET_ITEMS_ADD: {
 
  257       std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  258       std::set<char>& a = any_ref_cast<std::set<char> >(at(rhs, 0));
 
  259       std::set<char> 
const& b = any_ref_cast<std::set<char> >(at(rhs, 1));
 
  261       unite_with(result, b);
 
  264     case PROD_SET_ITEM_CHAR: {
 
  265       std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  266       char c = any_cast<
char>(at(rhs, 0));
 
  271       std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  272       char a = any_cast<
char>(at(rhs, 0));
 
  273       char b = any_cast<
char>(at(rhs, 2));
 
  274       for (
char c = a; c <= b; ++c) {
 
  281       "BUG: unexpected production " << production << 
'\n');
 
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar. 
 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging. 
 
Tries to create LALR(1) parser tables for a given grammar. 
 
Productions productions
vector of productions 
 
Declares Teuchos::Parser, ParserFail and make_lalr1_parser. 
 
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens. 
 
RCP< const ReaderTables > ReaderTablesPtr
an RCP to a const ReaderTables 
 
RCP< const Language > LanguagePtr
an RCP to a const Language 
 
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails. 
 
Declares Teuchos::Reader.