46 #include "Kokkos_Core.hpp"
59 const char *
sg_alg_names[] = {
"Original Matrix-Free",
"Product CRS" };
62 run_test(
const size_t num_cpu,
const size_t num_core_per_cpu,
63 const size_t num_threads_per_core,
64 const size_t p,
const size_t d,
const size_t nGrid,
const size_t nIter,
65 const bool symmetric,
SG_Alg sg_alg,
66 const std::vector<double>& perf1 = std::vector<double>())
69 typedef Kokkos::Threads Device;
70 const size_t team_count = num_cpu * num_core_per_cpu;
71 const size_t threads_per_team = num_threads_per_core;
72 Kokkos::InitArguments init_args;
73 init_args.num_threads = team_count*threads_per_team;
74 Kokkos::initialize( init_args );
76 std::vector<int> var_degree( d , p );
78 std::vector<double> perf;
81 unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
84 unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
85 var_degree , nGrid , nIter ,
true , symmetric );
91 speed_up = perf1[1] / perf[1];
93 speed_up = perf[1] / perf[1];
94 double efficiency = speed_up / team_count;
96 std::cout << team_count <<
" , "
103 << 100.0 * efficiency <<
" , "
117 CLP.
setOption(
"p", &p,
"Polynomial order");
119 CLP.
setOption(
"d", &d,
"Stochastic dimension");
121 CLP.
setOption(
"n", &nGrid,
"Number of spatial grid points in each dimension");
123 CLP.
setOption(
"niter", &nIter,
"Number of iterations");
124 int n_thread_per_core = 1;
125 CLP.
setOption(
"nthread", &n_thread_per_core,
"Number of threads per core to use");
126 int n_hyperthreads = 2;
127 CLP.
setOption(
"nht", &n_hyperthreads,
"Number of hyperthreads per core available");
130 "SG Mat-Vec Algorithm");
131 bool symmetric =
true;
132 CLP.
setOption(
"symmetric",
"asymmetric", &symmetric,
"Use symmetric PDF");
133 CLP.
parse( argc, argv );
136 const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
137 const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
138 const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
139 if (static_cast<size_t>(n_thread_per_core) > core_capacity )
140 n_thread_per_core = core_capacity;
143 std::cout << std::endl
146 <<
"\"#Variable\" , "
147 <<
"\"PolyDegree\" , "
151 <<
"\"" <<
sg_alg_names[sg_alg] <<
" MXV Efficiency\" , "
155 const std::vector<double> perf1 =
156 run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
159 for (
size_t n=2;
n<=num_cpu; ++
n) {
160 const std::vector<double> perf =
161 run_test(
n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
165 for (
size_t n=2;
n<=num_core_per_cpu; ++
n) {
166 const std::vector<double> perf =
167 run_test(num_cpu,
n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
171 const std::vector<double> perf =
172 run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
173 nIter, symmetric, sg_alg, perf1);
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
const SG_Alg sg_alg_values[]
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
int main(int argc, char **argv)
const char * sg_alg_names[]