23 template <
typename ExecSpace>
24 void run(
const int cell_begin,
const int cell_end,
const int cell_step,
25 const int nbasis,
const int npoint,
const int ntrial,
const bool check)
28 printf(
"ncell %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s\n",
"flat sfad",
"flat slfad",
"flat dfad",
"dfad sc",
"analytic",
"const",
"team",
"hier sfad",
"hier slfad",
"hier dfad",
"h dfad sc");
29 for(
int i=cell_begin;
i<=cell_end;
i+=cell_step) {
30 double sfad_flat = time_fad_flat<SFadType,fad_dim,ExecSpace>(
31 i,nbasis,npoint,ndim,ntrial,
check);
32 double slfad_flat = time_fad_flat<SLFadType,fad_dim,ExecSpace>(
33 i,nbasis,npoint,ndim,ntrial,
check);
34 double dfad_flat = time_fad_flat<DFadType,fad_dim,ExecSpace>(
35 i,nbasis,npoint,ndim,ntrial,
check);
36 double dfad_scratch = time_fad_scratch<DFadType,fad_dim,ExecSpace>(
37 i,nbasis,npoint,ndim,ntrial,
check);
38 double analytic = time_analytic_flat<fad_dim,ExecSpace>(
39 i,nbasis,npoint,ndim,ntrial,
check);
40 double analytic_const = time_analytic_const<fad_dim,ExecSpace>(
41 i,nbasis,npoint,ndim,ntrial,
check);
42 double analytic_team = time_analytic_team<fad_dim,ExecSpace>(
43 i,nbasis,npoint,ndim,ntrial,
check);
44 double sfad_hierarchical = time_fad_hierarchical_team<SFadType,fad_dim,ExecSpace>(
45 i,nbasis,npoint,ndim,ntrial,
check);
46 double slfad_hierarchical = time_fad_hierarchical_team<SLFadType,fad_dim,ExecSpace>(
47 i,nbasis,npoint,ndim,ntrial,
check);
48 double dfad_hierarchical = time_dfad_hierarchical_team<fad_dim,ExecSpace>(
49 i,nbasis,npoint,ndim,ntrial,
check);
50 double dfad_hierarchical_scratch =
51 time_dfad_hierarchical_team_scratch<fad_dim,ExecSpace>(
52 i,nbasis,npoint,ndim,ntrial,
check);
53 printf(
"%5d %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e %12.3e\n",i,sfad_flat,slfad_flat,dfad_flat,dfad_scratch,analytic,analytic_const,analytic_team,sfad_hierarchical,slfad_hierarchical,dfad_hierarchical,dfad_hierarchical_scratch);
57 int main(
int argc,
char* argv[]) {
58 Kokkos::initialize(argc,argv);
65 clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for simple Kokkos kernel");
66 #ifdef KOKKOS_ENABLE_SERIAL
68 clp.
setOption(
"serial",
"no-serial", &serial,
"Whether to run Serial");
70 #ifdef KOKKOS_ENABLE_OPENMP
72 clp.
setOption(
"openmp",
"no-openmp", &openmp,
"Whether to run OpenMP");
74 #ifdef KOKKOS_ENABLE_THREADS
76 clp.
setOption(
"threads",
"no-threads", &threads,
"Whether to run Threads");
78 #ifdef KOKKOS_ENABLE_CUDA
80 clp.
setOption(
"cuda",
"no-cuda", &cuda,
"Whether to run CUDA");
82 bool print_config =
false;
83 clp.
setOption(
"print-config",
"no-print-config", &print_config,
84 "Whether to print Kokkos device configuration");
86 clp.
setOption(
"begin", &cell_begin,
"Starting number of cells");
88 clp.
setOption(
"end", &cell_end,
"Ending number of cells");
90 clp.
setOption(
"step", &cell_step,
"Cell increment");
92 clp.
setOption(
"basis", &nbasis,
"Number of basis functions");
94 clp.
setOption(
"point", &npoint,
"Number of integration points");
96 clp.
setOption(
"trial", &ntrial,
"Number of trials");
98 clp.
setOption(
"check",
"no-check", &check,
99 "Check correctness of results");
102 switch (clp.
parse(argc, argv)) {
113 Kokkos::print_configuration(std::cout,
true);
115 #ifdef KOKKOS_ENABLE_SERIAL
117 using Kokkos::Serial;
118 run<Serial>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
122 #ifdef KOKKOS_ENABLE_OPENMP
124 using Kokkos::OpenMP;
125 run<OpenMP>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
129 #ifdef KOKKOS_ENABLE_THREADS
131 using Kokkos::Threads;
132 run<Threads>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
136 #ifdef KOKKOS_ENABLE_CUDA
139 run<Cuda>(cell_begin, cell_end, cell_step, nbasis, npoint, ntrial,
check);
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
void run(const int cell_begin, const int cell_end, const int cell_step, const int nbasis, const int npoint, const int ntrial, const bool check)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])