43 template <
typename ExecSpace>
44 void run(
const int cell_begin,
const int cell_end,
const int cell_step,
45 const int nbasis,
const int npoint,
const int ntrial,
const bool check)
48 printf(
"ncell %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s\n",
"flat sfad",
"flat slfad",
"flat dfad",
"dfad sc",
"analytic",
"const",
"team",
"hier sfad",
"hier slfad",
"hier dfad",
"h dfad sc");
49 for(
int i=cell_begin; i<=cell_end; i+=cell_step) {
50 double sfad_flat = time_fad_flat<SFadType,fad_dim,ExecSpace>(
51 i,nbasis,npoint,ndim,ntrial,
check);
52 double slfad_flat = time_fad_flat<SLFadType,fad_dim,ExecSpace>(
53 i,nbasis,npoint,ndim,ntrial,
check);
54 double dfad_flat = time_fad_flat<DFadType,fad_dim,ExecSpace>(
55 i,nbasis,npoint,ndim,ntrial,
check);
56 double dfad_scratch = time_fad_scratch<DFadType,fad_dim,ExecSpace>(
57 i,nbasis,npoint,ndim,ntrial,
check);
58 double analytic = time_analytic_flat<fad_dim,ExecSpace>(
59 i,nbasis,npoint,ndim,ntrial,
check);
60 double analytic_const = time_analytic_const<fad_dim,ExecSpace>(
61 i,nbasis,npoint,ndim,ntrial,
check);
62 double analytic_team = time_analytic_team<fad_dim,ExecSpace>(
63 i,nbasis,npoint,ndim,ntrial,
check);
64 double sfad_hierarchical = time_fad_hierarchical_team<SFadType,fad_dim,ExecSpace>(
65 i,nbasis,npoint,ndim,ntrial,
check);
66 double slfad_hierarchical = time_fad_hierarchical_team<SLFadType,fad_dim,ExecSpace>(
67 i,nbasis,npoint,ndim,ntrial,
check);
68 double dfad_hierarchical = time_dfad_hierarchical_team<fad_dim,ExecSpace>(
69 i,nbasis,npoint,ndim,ntrial,
check);
70 double dfad_hierarchical_scratch =
71 time_dfad_hierarchical_team_scratch<fad_dim,ExecSpace>(
72 i,nbasis,npoint,ndim,ntrial,
check);
73 printf(
"%5d %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e\n",i,sfad_flat,slfad_flat,dfad_flat,dfad_scratch,analytic,analytic_const,analytic_team,sfad_hierarchical,slfad_hierarchical,dfad_hierarchical,dfad_hierarchical_scratch);
77 int main(
int argc,
char* argv[]) {
78 Kokkos::initialize(argc,argv);
85 clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for simple Kokkos kernel");
86 #ifdef KOKKOS_ENABLE_SERIAL
88 clp.
setOption(
"serial",
"no-serial", &serial,
"Whether to run Serial");
90 #ifdef KOKKOS_ENABLE_OPENMP
92 clp.
setOption(
"openmp",
"no-openmp", &openmp,
"Whether to run OpenMP");
94 #ifdef KOKKOS_ENABLE_THREADS
96 clp.
setOption(
"threads",
"no-threads", &threads,
"Whether to run Threads");
98 #ifdef KOKKOS_ENABLE_CUDA
100 clp.
setOption(
"cuda",
"no-cuda", &cuda,
"Whether to run CUDA");
102 bool print_config =
false;
103 clp.
setOption(
"print-config",
"no-print-config", &print_config,
104 "Whether to print Kokkos device configuration");
105 int cell_begin = 100;
106 clp.
setOption(
"begin", &cell_begin,
"Starting number of cells");
108 clp.
setOption(
"end", &cell_end,
"Ending number of cells");
110 clp.
setOption(
"step", &cell_step,
"Cell increment");
112 clp.
setOption(
"basis", &nbasis,
"Number of basis functions");
114 clp.
setOption(
"point", &npoint,
"Number of integration points");
116 clp.
setOption(
"trial", &ntrial,
"Number of trials");
118 clp.
setOption(
"check",
"no-check", &check,
119 "Check correctness of results");
122 switch (clp.
parse(argc, argv)) {
133 Kokkos::print_configuration(std::cout,
true);
135 #ifdef KOKKOS_ENABLE_SERIAL
137 using Kokkos::Serial;
138 run<Serial>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
142 #ifdef KOKKOS_ENABLE_OPENMP
144 using Kokkos::OpenMP;
145 run<OpenMP>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
149 #ifdef KOKKOS_ENABLE_THREADS
151 using Kokkos::Threads;
152 run<Threads>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
156 #ifdef KOKKOS_ENABLE_CUDA
159 run<Cuda>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
void run(const int cell_begin, const int cell_end, const int cell_step, const int nbasis, const int npoint, const int ntrial, const bool check)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])