13 #include "Kokkos_Core.hpp"
18 #ifdef KOKKOS_ENABLE_CUDA
19 #include "cuda_runtime_api.h"
22 template <
typename Scalar,
typename Ordinal,
typename Device>
35 const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
36 const size_t num_cores_per_socket =
37 Kokkos::hwloc::get_available_cores_per_numa();
46 "This test performance of mean-based UQ::PCE multiply routines.\n");
48 CLP.
setOption(
"n", &nGrid,
"Number of mesh points in the each direction");
50 CLP.
setOption(
"ni", &nIter,
"Number of multiply iterations");
52 CLP.
setOption(
"order", &order,
"Polynomial order");
54 CLP.
setOption(
"dmin", &dim_min,
"Starting stochastic dimension");
56 CLP.
setOption(
"dmax", &dim_max,
"Stopping stochastic dimension");
57 int numa = num_sockets;
58 CLP.
setOption(
"numa", &numa,
"Number of numa nodes");
59 int cores = num_cores_per_socket;
60 CLP.
setOption(
"cores", &cores,
"Cores per numa node");
61 #ifdef KOKKOS_ENABLE_THREADS
63 CLP.
setOption(
"threads", &threads,
"Number of threads for Threads device");
65 #ifdef KOKKOS_ENABLE_OPENMP
67 CLP.
setOption(
"openmp", &openmp,
"Number of threads for OpenMP device");
69 #ifdef KOKKOS_ENABLE_CUDA
71 CLP.
setOption(
"cuda",
"no-cuda", &cuda,
"Enable Cuda device");
73 CLP.
setOption(
"device", &device_id,
"CUDA device ID");
75 CLP.
parse( argc, argv );
80 #ifdef KOKKOS_ENABLE_THREADS
82 typedef Kokkos::Threads Device;
84 Kokkos::InitializationSettings init_args;
85 init_args.set_num_threads(threads);
86 Kokkos::initialize( init_args );
88 std::cout << std::endl
89 <<
"Threads performance with " << threads
90 <<
" threads, " << numa <<
" numas, " << cores
91 <<
" cores/numa:" << std::endl;
93 performance_test_driver<Scalar,Ordinal,Device>(
94 nGrid, nIter, order, dim_min, dim_max);
100 #ifdef KOKKOS_ENABLE_OPENMP
102 typedef Kokkos::OpenMP Device;
104 Kokkos::InitializationSettings init_args;
105 init_args.set_num_threads(openmp);
106 Kokkos::initialize( init_args );
108 std::cout << std::endl
109 <<
"OpenMP performance with " << openmp
110 <<
" threads, " << numa <<
" numas, " << cores
111 <<
" cores/numa:" << std::endl;
113 performance_test_driver<Scalar,Ordinal,Device>(
114 nGrid, nIter, order, dim_min, dim_max);
120 #ifdef KOKKOS_ENABLE_CUDA
122 typedef Kokkos::Cuda Device;
124 Kokkos::InitializationSettings init_args;
125 init_args.set_device_id(device_id);
126 Kokkos::initialize( init_args );
128 cudaDeviceProp deviceProp;
129 cudaGetDeviceProperties(&deviceProp, device_id);
130 std::cout << std::endl
131 <<
"CUDA performance for device " << device_id <<
" ("
132 << deviceProp.name <<
"):"
135 performance_test_driver<Scalar,Ordinal,Device>(
136 nGrid, nIter, order, dim_min, dim_max);
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setDocString(const char doc_string[])