43 template <
typename ExecSpace>
 
   44 void run(
const int cell_begin, 
const int cell_end, 
const int cell_step,
 
   45          const int nbasis, 
const int npoint, 
const int ntrial, 
const bool check)
 
   48   printf(
"ncell %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s\n", 
"flat sfad", 
"flat slfad", 
"flat dfad", 
"dfad sc", 
"analytic", 
"const", 
"team", 
"hier sfad", 
"hier slfad", 
"hier dfad", 
"h dfad sc");
 
   49   for(
int i=cell_begin; i<=cell_end; i+=cell_step) {
 
   50     double sfad_flat = time_fad_flat<SFadType,fad_dim,ExecSpace>(
 
   51       i,nbasis,npoint,ndim,ntrial,
check);
 
   52     double slfad_flat = time_fad_flat<SLFadType,fad_dim,ExecSpace>(
 
   53       i,nbasis,npoint,ndim,ntrial,
check);
 
   54     double dfad_flat = time_fad_flat<DFadType,fad_dim,ExecSpace>(
 
   55       i,nbasis,npoint,ndim,ntrial,
check);
 
   56     double dfad_scratch = time_fad_scratch<DFadType,fad_dim,ExecSpace>(
 
   57       i,nbasis,npoint,ndim,ntrial,
check);
 
   58     double analytic = time_analytic_flat<fad_dim,ExecSpace>(
 
   59       i,nbasis,npoint,ndim,ntrial,
check);
 
   60     double analytic_const = time_analytic_const<fad_dim,ExecSpace>(
 
   61       i,nbasis,npoint,ndim,ntrial,
check);
 
   62     double analytic_team = time_analytic_team<fad_dim,ExecSpace>(
 
   63       i,nbasis,npoint,ndim,ntrial,
check);
 
   64     double sfad_hierarchical = time_fad_hierarchical_team<SFadType,fad_dim,ExecSpace>(
 
   65       i,nbasis,npoint,ndim,ntrial,
check);
 
   66     double slfad_hierarchical = time_fad_hierarchical_team<SLFadType,fad_dim,ExecSpace>(
 
   67       i,nbasis,npoint,ndim,ntrial,
check);
 
   68     double dfad_hierarchical = time_dfad_hierarchical_team<fad_dim,ExecSpace>(
 
   69       i,nbasis,npoint,ndim,ntrial,
check);
 
   70     double dfad_hierarchical_scratch =
 
   71       time_dfad_hierarchical_team_scratch<fad_dim,ExecSpace>(
 
   72       i,nbasis,npoint,ndim,ntrial,
check);
 
   73     printf(
"%5d %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e\n",i,sfad_flat,slfad_flat,dfad_flat,dfad_scratch,analytic,analytic_const,analytic_team,sfad_hierarchical,slfad_hierarchical,dfad_hierarchical,dfad_hierarchical_scratch);
 
   77 int main(
int argc, 
char* argv[]) {
 
   78   Kokkos::initialize(argc,argv);
 
   85     clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for simple Kokkos kernel");
 
   86 #ifdef KOKKOS_ENABLE_SERIAL 
   88     clp.
setOption(
"serial", 
"no-serial", &serial, 
"Whether to run Serial");
 
   90 #ifdef KOKKOS_ENABLE_OPENMP 
   92     clp.
setOption(
"openmp", 
"no-openmp", &openmp, 
"Whether to run OpenMP");
 
   94 #ifdef KOKKOS_ENABLE_THREADS 
   96     clp.
setOption(
"threads", 
"no-threads", &threads, 
"Whether to run Threads");
 
   98 #ifdef KOKKOS_ENABLE_CUDA 
  100     clp.
setOption(
"cuda", 
"no-cuda", &cuda, 
"Whether to run CUDA");
 
  102     bool print_config = 
false;
 
  103     clp.
setOption(
"print-config", 
"no-print-config", &print_config,
 
  104                   "Whether to print Kokkos device configuration");
 
  105     int cell_begin = 100;
 
  106     clp.
setOption(
"begin", &cell_begin, 
"Starting number of cells");
 
  108     clp.
setOption(
"end", &cell_end, 
"Ending number of cells");
 
  110     clp.
setOption(
"step", &cell_step, 
"Cell increment");
 
  112     clp.
setOption(
"basis", &nbasis, 
"Number of basis functions");
 
  114     clp.
setOption(
"point", &npoint, 
"Number of integration points");
 
  116     clp.
setOption(
"trial", &ntrial, 
"Number of trials");
 
  118     clp.
setOption(
"check", 
"no-check", &check,
 
  119                   "Check correctness of results");
 
  122     switch (clp.
parse(argc, argv)) {
 
  133       Kokkos::print_configuration(std::cout, 
true);
 
  135 #ifdef KOKKOS_ENABLE_SERIAL 
  137       using Kokkos::Serial;
 
  138       run<Serial>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial, 
check);
 
  142 #ifdef KOKKOS_ENABLE_OPENMP 
  144       using Kokkos::OpenMP;
 
  145       run<OpenMP>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial, 
check);
 
  149 #ifdef KOKKOS_ENABLE_THREADS 
  151       using Kokkos::Threads;
 
  152       run<Threads>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial, 
check);
 
  156 #ifdef KOKKOS_ENABLE_CUDA 
  159       run<Cuda>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial, 
check);
 
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
 
void run(const int cell_begin, const int cell_end, const int cell_step, const int nbasis, const int npoint, const int ntrial, const bool check)
 
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
 
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
 
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const 
 
void setDocString(const char doc_string[])