16 #include "Kokkos_Core.hpp"
21 #ifdef KOKKOS_ENABLE_CUDA
22 #include "cuda_runtime_api.h"
25 template <
typename Storage>
26 void mainHost(
int nGrid,
int nIter, KokkosSparse::DeviceConfig dev_config);
27 template <
typename Storage>
28 void mainCuda(
int nGrid,
int nIter, KokkosSparse::DeviceConfig dev_config);
36 const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
37 const size_t num_cores_per_socket =
38 Kokkos::hwloc::get_available_cores_per_numa();
39 const size_t num_threads_per_core =
40 Kokkos::hwloc::get_available_threads_per_core();
45 "This test performance of MP::Vector multiply routines.\n");
47 CLP.
setOption(
"n", &nGrid,
"Number of mesh points in the each direction");
49 CLP.
setOption(
"ni", &nIter,
"Number of multiply iterations");
50 int num_cores = num_cores_per_socket * num_sockets;
52 "Number of CPU cores to use (defaults to all)");
54 CLP.
setOption(
"hyperthreads", &num_hyper_threads,
55 "Number of hyper threads per core to use (defaults to all)");
56 int threads_per_vector = 1;
57 CLP.
setOption(
"threads_per_vector", &threads_per_vector,
58 "Number of threads to use within each vector");
59 #ifdef KOKKOS_ENABLE_THREADS
61 CLP.
setOption(
"threads",
"no-threads", &threads,
"Enable Threads device");
63 #ifdef KOKKOS_ENABLE_OPENMP
65 CLP.
setOption(
"openmp",
"no-openmp", &openmp,
"Enable OpenMP device");
67 #ifdef KOKKOS_ENABLE_CUDA
69 CLP.
setOption(
"cuda",
"no-cuda", &cuda,
"Enable Cuda device");
70 int cuda_threads_per_vector = 16;
71 CLP.
setOption(
"cuda_threads_per_vector", &cuda_threads_per_vector,
72 "Number of Cuda threads to use within each vector");
73 int cuda_block_size = 0;
74 CLP.
setOption(
"cuda_block_size", &cuda_block_size,
75 "Cuda block size (0 implies the default choice)");
76 int num_cuda_blocks = 0;
77 CLP.
setOption(
"num_cuda_blocks", &num_cuda_blocks,
78 "Number of Cuda blocks (0 implies the default choice)");
80 CLP.
setOption(
"device", &device_id,
"CUDA device ID");
82 CLP.
parse( argc, argv );
87 #ifdef KOKKOS_ENABLE_THREADS
89 typedef Kokkos::Threads Device;
92 Kokkos::InitializationSettings init_args;
93 init_args.set_num_threads(num_cores*num_hyper_threads);
94 Kokkos::initialize( init_args );
96 std::cout << std::endl
97 <<
"Threads performance with " << num_cores*num_hyper_threads
98 <<
" threads:" << std::endl;
100 KokkosSparse::DeviceConfig dev_config(num_cores,
102 num_hyper_threads / threads_per_vector);
104 mainHost<Storage>(nGrid, nIter, dev_config);
110 #ifdef KOKKOS_ENABLE_OPENMP
112 typedef Kokkos::OpenMP Device;
115 Kokkos::InitializationSettings init_args;
116 init_args.set_num_threads(num_cores*num_hyper_threads);
117 Kokkos::initialize( init_args );
119 std::cout << std::endl
120 <<
"OpenMP performance with " << num_cores*num_hyper_threads
121 <<
" threads:" << std::endl;
123 KokkosSparse::DeviceConfig dev_config(num_cores,
125 num_hyper_threads / threads_per_vector);
127 mainHost<Storage>(nGrid, nIter, dev_config);
133 #ifdef KOKKOS_ENABLE_CUDA
135 typedef Kokkos::Cuda Device;
138 Kokkos::InitializationSettings init_args;
139 init_args.set_device_id(device_id);
140 Kokkos::initialize( init_args );
142 cudaDeviceProp deviceProp;
143 cudaGetDeviceProperties(&deviceProp, device_id);
144 std::cout << std::endl
145 <<
"CUDA performance for device " << device_id <<
" ("
146 << deviceProp.name <<
"):"
149 KokkosSparse::DeviceConfig dev_config(
151 cuda_threads_per_vector,
152 cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);
154 mainCuda<Storage>(nGrid,nIter,dev_config);
Stokhos::StandardStorage< int, double > Storage
void mainHost(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Statically allocated storage class.
void mainCuda(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void setDocString(const char doc_string[])