Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KokkosSpMM/TestSpMM.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Devices
13 #include "Kokkos_Core.hpp"
14 
15 // Utilities
18 #ifdef KOKKOS_ENABLE_CUDA
19 #include "cuda_runtime_api.h"
20 #endif
21 
22 template <typename Scalar, typename Ordinal, typename Device>
23 void performance_test_driver( const Ordinal nGrid,
24  const Ordinal nIter,
25  const Ordinal ensemble_min,
26  const Ordinal ensemble_max,
27  const Ordinal ensemble_step );
28 
29 int main(int argc, char *argv[])
30 {
31  bool success = true;
32  bool verbose = false;
33  try {
34 
35 #ifdef KOKKOS_ENABLE_THREADS
36  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
37  const size_t num_cores_per_socket =
38  Kokkos::hwloc::get_available_cores_per_numa();
39  const size_t num_threads_per_core =
40  Kokkos::hwloc::get_available_threads_per_core();
41 #endif
42 
43  // Setup command line options
45  CLP.setDocString(
46  "This test performance of MP::Vector multiply routines.\n");
47  int nGrid = 32;
48  CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
49  int nIter = 10;
50  CLP.setOption("ni", &nIter, "Number of multiply iterations");
51  int ensemble_min = 4;
52  CLP.setOption("emin", &ensemble_min, "Staring ensemble size");
53  int ensemble_max = 24;
54  CLP.setOption("emax", &ensemble_max, "Stoping ensemble size");
55  int ensemble_step = 4;
56  CLP.setOption("estep", &ensemble_step, "Ensemble increment");
57 #ifdef KOKKOS_ENABLE_THREADS
58  bool threads = true;
59  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
60  int num_cores = num_cores_per_socket * num_sockets;
61  CLP.setOption("cores", &num_cores,
62  "Number of CPU cores to use (defaults to all)");
63  int num_hyper_threads = num_threads_per_core;
64  CLP.setOption("hyperthreads", &num_hyper_threads,
65  "Number of hyper threads per core to use (defaults to all)");
66 #endif
67 #ifdef KOKKOS_ENABLE_CUDA
68  bool cuda = true;
69  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
70  int device_id = 0;
71  CLP.setOption("device", &device_id, "CUDA device ID");
72 #endif
73  CLP.parse( argc, argv );
74 
75  typedef int Ordinal;
76  typedef double Scalar;
77 
78 #ifdef KOKKOS_ENABLE_THREADS
79  if (threads) {
80  typedef Kokkos::Threads Device;
81 
82  Kokkos::InitializationSettings init_args;
83  init_args.set_num_threads(num_cores*num_hyper_threads);
84  Kokkos::initialize( init_args );
85 
86  std::cout << std::endl
87  << "Threads performance with " << num_cores*num_hyper_threads
88  << " threads:" << std::endl;
89 
90  performance_test_driver<Scalar,Ordinal,Device>(
91  nGrid, nIter, ensemble_min, ensemble_max, ensemble_step);
92 
93  Kokkos::finalize();
94  }
95 #endif
96 
97 #ifdef KOKKOS_ENABLE_CUDA
98  if (cuda) {
99  typedef Kokkos::Cuda Device;
100 
101  Kokkos::InitializationSettings init_args;
102  init_args.set_device_id(device_id);
103  Kokkos::initialize( init_args );
104 
105  cudaDeviceProp deviceProp;
106  cudaGetDeviceProperties(&deviceProp, device_id);
107  std::cout << std::endl
108  << "CUDA performance for device " << device_id << " ("
109  << deviceProp.name << "):"
110  << std::endl;
111 
112  performance_test_driver<Scalar,Ordinal,Device>(
113  nGrid, nIter, ensemble_min, ensemble_max, ensemble_step);
114 
115  Kokkos::finalize();
116  }
117 #endif
118 
119  }
120  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
121 
122  if (success)
123  return 0;
124  return -1;
125 }
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setDocString(const char doc_string[])