48 #include "Kokkos_Core.hpp"
53 #ifdef KOKKOS_ENABLE_CUDA
54 #include "cuda_runtime_api.h"
58 #include <sys/types.h>
61 int main(
int argc,
char *argv[])
70 "This test performance of MP::Vector FEM assembly.\n");
72 CLP.
setOption(
"n", &nGrid,
"Number of mesh points in each direction. Set to zero to use a range");
74 CLP.
setOption(
"n-begin", &nGridBegin,
"Beginning number of mesh points in each direction.");
76 CLP.
setOption(
"n-end", &nGridEnd,
"Ending number of mesh points in each direction.");
78 CLP.
setOption(
"n-step", &nGridStep,
"Increment in number of mesh points in each direction.");
80 CLP.
setOption(
"ni", &nIter,
"Number of assembly iterations");
82 CLP.
setOption(
"print",
"no-print", &print,
"Print debugging output");
84 CLP.
setOption(
"check",
"no-check", &check,
"Check correctness");
85 bool quadratic =
false;
86 CLP.
setOption(
"quadratic",
"linear", &quadratic,
"Use quadratic basis functions");
89 "Number of CPU cores to use (defaults to all)");
90 int num_hyper_threads = -1;
91 CLP.
setOption(
"hyperthreads", &num_hyper_threads,
92 "Number of hyper threads per core to use (defaults to all)");
93 #ifdef KOKKOS_ENABLE_THREADS
95 CLP.
setOption(
"threads",
"no-threads", &threads,
"Enable Threads device");
97 #ifdef KOKKOS_ENABLE_OPENMP
99 CLP.
setOption(
"openmp",
"no-openmp", &openmp,
"Enable OpenMP device");
101 #ifdef KOKKOS_ENABLE_CUDA
103 CLP.
setOption(
"cuda",
"no-cuda", &cuda,
"Enable Cuda device");
105 CLP.
setOption(
"device", &device_id,
"CUDA device ID.");
108 CLP.
setOption(
"vtune",
"no-vtune", &vtune,
"connect to vtune");
109 CLP.
parse( argc, argv );
118 std::stringstream cmd;
119 pid_t my_os_pid=getpid();
120 const std::string vtune_loc =
122 const std::string output_dir =
"./vtune/vtune.0";
124 <<
" -collect hotspots -result-dir " << output_dir
125 <<
" -target-pid " << my_os_pid <<
" &";
126 std::cout << cmd.str() << std::endl;
127 system(cmd.str().c_str());
131 Kokkos::initialize(argc,argv);
132 #ifdef KOKKOS_ENABLE_THREADS
134 typedef Kokkos::Threads Device;
136 std::cout << std::endl
137 <<
"Threads performance with " << Kokkos::Threads::concurrency()
138 <<
" threads:" << std::endl;
140 performance_test_driver<Device>(
141 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
145 #ifdef KOKKOS_ENABLE_OPENMP
147 typedef Kokkos::OpenMP Device;
149 std::cout << std::endl
150 <<
"OpenMP performance with " << Kokkos::OpenMP::concurrency()
151 <<
" threads:" << std::endl;
153 performance_test_driver<Device>(
154 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
159 #ifdef KOKKOS_ENABLE_CUDA
161 typedef Kokkos::Cuda Device;
163 cudaDeviceProp deviceProp;
164 cudaGetDeviceProperties(&deviceProp, device_id);
165 std::cout << std::endl
166 <<
"CUDA performance performance with device " << device_id
168 << deviceProp.name <<
"):"
171 performance_test_driver<Device>(
172 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, check);
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
void setDocString(const char doc_string[])