48 #include "Kokkos_Core.hpp" 
   53 #ifdef KOKKOS_ENABLE_CUDA 
   54 #include "cuda_runtime_api.h" 
   58 #include <sys/types.h> 
   61 int main(
int argc, 
char *argv[])
 
   67     const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
 
   68     const size_t num_cores_per_socket =
 
   69       Kokkos::hwloc::get_available_cores_per_numa();
 
   70     const size_t num_threads_per_core =
 
   71       Kokkos::hwloc::get_available_threads_per_core();
 
   76       "This test performance of MP::Vector FEM assembly.\n");
 
   78     CLP.
setOption(
"n", &nGrid, 
"Number of mesh points in each direction.  Set to zero to use a range");
 
   80     CLP.
setOption(
"n-begin", &nGridBegin, 
"Beginning number of mesh points in each direction.");
 
   82     CLP.
setOption(
"n-end", &nGridEnd, 
"Ending number of mesh points in each direction.");
 
   84     CLP.
setOption(
"n-step", &nGridStep, 
"Increment in number of mesh points in each direction.");
 
   86     CLP.
setOption(
"ni", &nIter, 
"Number of assembly iterations");
 
   88     CLP.
setOption(
"print", 
"no-print", &print, 
"Print debugging output");
 
   90     CLP.
setOption(
"check", 
"no-check", &check, 
"Check correctness");
 
   91     bool quadratic = 
false;
 
   92     CLP.
setOption(
"quadratic", 
"linear", &quadratic, 
"Use quadratic basis functions");
 
   93     int num_cores = num_cores_per_socket * num_sockets;
 
   95                   "Number of CPU cores to use (defaults to all)");
 
   96     int num_hyper_threads = num_threads_per_core;
 
   97     CLP.
setOption(
"hyperthreads", &num_hyper_threads,
 
   98                   "Number of hyper threads per core to use (defaults to all)");
 
   99 #ifdef KOKKOS_ENABLE_THREADS 
  101     CLP.
setOption(
"threads", 
"no-threads", &threads, 
"Enable Threads device");
 
  103 #ifdef KOKKOS_ENABLE_OPENMP 
  105     CLP.
setOption(
"openmp", 
"no-openmp", &openmp, 
"Enable OpenMP device");
 
  107 #ifdef KOKKOS_ENABLE_CUDA 
  109     CLP.
setOption(
"cuda", 
"no-cuda", &cuda, 
"Enable Cuda device");
 
  111     CLP.
setOption(
"device", &device_id, 
"CUDA device ID.");
 
  114     CLP.
setOption(
"vtune", 
"no-vtune", &vtune, 
"connect to vtune");
 
  115     CLP.
parse( argc, argv );
 
  124       std::stringstream cmd;
 
  125       pid_t my_os_pid=getpid();
 
  126       const std::string vtune_loc =
 
  128       const std::string output_dir = 
"./vtune/vtune.0";
 
  130           << 
" -collect hotspots -result-dir " << output_dir
 
  131           << 
" -target-pid " << my_os_pid << 
" &";
 
  132       std::cout << cmd.str() << std::endl;
 
  133       system(cmd.str().c_str());
 
  137     Kokkos::initialize(argc,argv);
 
  138 #ifdef KOKKOS_ENABLE_THREADS 
  140       typedef Kokkos::Threads Device;
 
  142       std::cout << std::endl
 
  143                 << 
"Threads performance with " << num_cores*num_hyper_threads
 
  144                 << 
" threads:" << std::endl;
 
  146       performance_test_driver<Device>(
 
  147         print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, 
check);
 
  151 #ifdef KOKKOS_ENABLE_OPENMP 
  153       typedef Kokkos::OpenMP Device;
 
  155       std::cout << std::endl
 
  156                 << 
"OpenMP performance with " << num_cores*num_hyper_threads
 
  157                 << 
" threads:" << std::endl;
 
  159       performance_test_driver<Device>(
 
  160         print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, 
check);
 
  164 #ifdef KOKKOS_ENABLE_CUDA 
  166       typedef Kokkos::Cuda Device;
 
  168       cudaDeviceProp deviceProp;
 
  169       cudaGetDeviceProperties(&deviceProp, device_id);
 
  170       std::cout << std::endl
 
  171                 << 
"CUDA performance performance with device " << device_id
 
  173                 << deviceProp.name << 
"):" 
  176       performance_test_driver<Device>(
 
  177         print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic, 
check);
 
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
 
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
 
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
 
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const 
 
void setDocString(const char doc_string[])