16 #include "Kokkos_Core.hpp"
21 #ifdef KOKKOS_ENABLE_CUDA
22 #include "cuda_runtime_api.h"
26 #include <sys/types.h>
29 int main(
int argc,
char *argv[])
38 "This test performance of MP::Vector FEM assembly.\n");
40 CLP.
setOption(
"n", &nGrid,
"Number of mesh points in each direction. Set to zero to use a range");
42 CLP.
setOption(
"n-begin", &nGridBegin,
"Beginning number of mesh points in each direction.");
44 CLP.
setOption(
"n-end", &nGridEnd,
"Ending number of mesh points in each direction.");
46 CLP.
setOption(
"n-step", &nGridStep,
"Increment in number of mesh points in each direction.");
48 CLP.
setOption(
"ni", &nIter,
"Number of assembly iterations");
50 CLP.
setOption(
"print",
"no-print", &print,
"Print debugging output");
52 CLP.
setOption(
"check",
"no-check", &check,
"Check correctness");
53 bool quadratic =
false;
54 CLP.
setOption(
"quadratic",
"linear", &quadratic,
"Use quadratic basis functions");
57 "Number of CPU cores to use (defaults to all)");
58 int num_hyper_threads = -1;
59 CLP.
setOption(
"hyperthreads", &num_hyper_threads,
60 "Number of hyper threads per core to use (defaults to all)");
61 #ifdef KOKKOS_ENABLE_THREADS
63 CLP.
setOption(
"threads",
"no-threads", &threads,
"Enable Threads device");
65 #ifdef KOKKOS_ENABLE_OPENMP
67 CLP.
setOption(
"openmp",
"no-openmp", &openmp,
"Enable OpenMP device");
69 #ifdef KOKKOS_ENABLE_CUDA
71 CLP.
setOption(
"cuda",
"no-cuda", &cuda,
"Enable Cuda device");
73 CLP.
setOption(
"device", &device_id,
"CUDA device ID.");
76 CLP.
setOption(
"vtune",
"no-vtune", &vtune,
"connect to vtune");
77 CLP.
parse( argc, argv );
86 std::stringstream cmd;
87 pid_t my_os_pid=getpid();
88 const std::string vtune_loc =
90 const std::string output_dir =
"./vtune/vtune.0";
92 <<
" -collect hotspots -result-dir " << output_dir
93 <<
" -target-pid " << my_os_pid <<
" &";
94 std::cout << cmd.str() << std::endl;
95 system(cmd.str().c_str());
99 Kokkos::initialize(argc,argv);
100 #ifdef KOKKOS_ENABLE_THREADS
102 typedef Kokkos::Threads Device;
104 std::cout << std::endl
105 <<
"Threads performance with " << Kokkos::Threads().concurrency()
106 <<
" threads:" << std::endl;
108 performance_test_driver<Device>(
109 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
113 #ifdef KOKKOS_ENABLE_OPENMP
115 typedef Kokkos::OpenMP Device;
117 std::cout << std::endl
118 <<
"OpenMP performance with " << Kokkos::OpenMP().concurrency()
119 <<
" threads:" << std::endl;
121 performance_test_driver<Device>(
122 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
127 #ifdef KOKKOS_ENABLE_CUDA
129 typedef Kokkos::Cuda Device;
131 cudaDeviceProp deviceProp;
132 cudaGetDeviceProperties(&deviceProp, device_id);
133 std::cout << std::endl
134 <<
"CUDA performance performance with device " << device_id
136 << deviceProp.name <<
"):"
139 performance_test_driver<Device>(
140 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])