16 #include "Kokkos_Core.hpp"
21 #ifdef KOKKOS_ENABLE_CUDA
22 #include "cuda_runtime_api.h"
26 #include <sys/types.h>
29 int main(
int argc,
char *argv[])
35 const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
36 const size_t num_cores_per_socket =
37 Kokkos::hwloc::get_available_cores_per_numa();
38 const size_t num_threads_per_core =
39 Kokkos::hwloc::get_available_threads_per_core();
44 "This test performance of MP::Vector FEM assembly.\n");
46 CLP.
setOption(
"n", &nGrid,
"Number of mesh points in each direction. Set to zero to use a range");
48 CLP.
setOption(
"n-begin", &nGridBegin,
"Beginning number of mesh points in each direction.");
50 CLP.
setOption(
"n-end", &nGridEnd,
"Ending number of mesh points in each direction.");
52 CLP.
setOption(
"n-step", &nGridStep,
"Increment in number of mesh points in each direction.");
54 CLP.
setOption(
"ni", &nIter,
"Number of assembly iterations");
56 CLP.
setOption(
"print",
"no-print", &print,
"Print debugging output");
58 CLP.
setOption(
"check",
"no-check", &check,
"Check correctness");
59 bool quadratic =
false;
60 CLP.
setOption(
"quadratic",
"linear", &quadratic,
"Use quadratic basis functions");
61 int num_cores = num_cores_per_socket * num_sockets;
63 "Number of CPU cores to use (defaults to all)");
64 int num_hyper_threads = num_threads_per_core;
65 CLP.
setOption(
"hyperthreads", &num_hyper_threads,
66 "Number of hyper threads per core to use (defaults to all)");
67 #ifdef KOKKOS_ENABLE_THREADS
69 CLP.
setOption(
"threads",
"no-threads", &threads,
"Enable Threads device");
71 #ifdef KOKKOS_ENABLE_OPENMP
73 CLP.
setOption(
"openmp",
"no-openmp", &openmp,
"Enable OpenMP device");
75 #ifdef KOKKOS_ENABLE_CUDA
77 CLP.
setOption(
"cuda",
"no-cuda", &cuda,
"Enable Cuda device");
79 CLP.
setOption(
"device", &device_id,
"CUDA device ID.");
82 CLP.
setOption(
"vtune",
"no-vtune", &vtune,
"connect to vtune");
83 CLP.
parse( argc, argv );
92 std::stringstream cmd;
93 pid_t my_os_pid=getpid();
94 const std::string vtune_loc =
96 const std::string output_dir =
"./vtune/vtune.0";
98 <<
" -collect hotspots -result-dir " << output_dir
99 <<
" -target-pid " << my_os_pid <<
" &";
100 std::cout << cmd.str() << std::endl;
101 system(cmd.str().c_str());
105 Kokkos::initialize(argc,argv);
106 #ifdef KOKKOS_ENABLE_THREADS
108 typedef Kokkos::Threads Device;
110 std::cout << std::endl
111 <<
"Threads performance with " << num_cores*num_hyper_threads
112 <<
" threads:" << std::endl;
114 performance_test_driver<Device>(
115 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
119 #ifdef KOKKOS_ENABLE_OPENMP
121 typedef Kokkos::OpenMP Device;
123 std::cout << std::endl
124 <<
"OpenMP performance with " << num_cores*num_hyper_threads
125 <<
" threads:" << std::endl;
127 performance_test_driver<Device>(
128 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
132 #ifdef KOKKOS_ENABLE_CUDA
134 typedef Kokkos::Cuda Device;
136 cudaDeviceProp deviceProp;
137 cudaGetDeviceProperties(&deviceProp, device_id);
138 std::cout << std::endl
139 <<
"CUDA performance performance with device " << device_id
141 << deviceProp.name <<
"):"
144 performance_test_driver<Device>(
145 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])