14 #include "Kokkos_Core.hpp"
27 const char *
sg_alg_names[] = {
"Original Matrix-Free",
"Product CRS" };
30 run_test(
const size_t num_cpu,
const size_t num_core_per_cpu,
31 const size_t num_threads_per_core,
32 const size_t p,
const size_t d,
const size_t nGrid,
const size_t nIter,
33 const bool symmetric,
SG_Alg sg_alg,
34 const std::vector<double>& perf1 = std::vector<double>())
37 typedef Kokkos::Threads Device;
38 const size_t team_count = num_cpu * num_core_per_cpu;
39 const size_t threads_per_team = num_threads_per_core;
40 Kokkos::InitializationSettings init_args;
41 init_args.set_num_threads(team_count*threads_per_team);
42 Kokkos::initialize( init_args );
44 std::vector<int> var_degree( d , p );
46 std::vector<double> perf;
49 unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
52 unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
53 var_degree , nGrid , nIter ,
true , symmetric );
59 speed_up = perf1[1] / perf[1];
61 speed_up = perf[1] / perf[1];
62 double efficiency = speed_up / team_count;
64 std::cout << team_count <<
" , "
71 << 100.0 * efficiency <<
" , "
85 CLP.
setOption(
"p", &p,
"Polynomial order");
87 CLP.
setOption(
"d", &d,
"Stochastic dimension");
89 CLP.
setOption(
"n", &nGrid,
"Number of spatial grid points in each dimension");
91 CLP.
setOption(
"niter", &nIter,
"Number of iterations");
92 int n_thread_per_core = 1;
93 CLP.
setOption(
"nthread", &n_thread_per_core,
"Number of threads per core to use");
94 int n_hyperthreads = 2;
95 CLP.
setOption(
"nht", &n_hyperthreads,
"Number of hyperthreads per core available");
98 "SG Mat-Vec Algorithm");
99 bool symmetric =
true;
100 CLP.
setOption(
"symmetric",
"asymmetric", &symmetric,
"Use symmetric PDF");
101 CLP.
parse( argc, argv );
104 const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
105 const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
106 const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
107 if (static_cast<size_t>(n_thread_per_core) > core_capacity )
108 n_thread_per_core = core_capacity;
111 std::cout << std::endl
114 <<
"\"#Variable\" , "
115 <<
"\"PolyDegree\" , "
119 <<
"\"" <<
sg_alg_names[sg_alg] <<
" MXV Efficiency\" , "
123 const std::vector<double> perf1 =
124 run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
127 for (
size_t n=2;
n<=num_cpu; ++
n) {
128 const std::vector<double> perf =
129 run_test(
n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
133 for (
size_t n=2;
n<=num_core_per_cpu; ++
n) {
134 const std::vector<double> perf =
135 run_test(num_cpu,
n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
139 const std::vector<double> perf =
140 run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
141 nIter, symmetric, sg_alg, perf1);
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
const SG_Alg sg_alg_values[]
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
int main(int argc, char **argv)
const char * sg_alg_names[]