Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMv.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Tests
13 #include "TestSpMv.hpp"
14 
15 // Devices
16 #include "Kokkos_Core.hpp"
17 
18 // Utilities
21 #ifdef KOKKOS_ENABLE_CUDA
22 #include "cuda_runtime_api.h"
23 #endif
24 
25 template <typename Storage>
26 void mainHost(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config);
27 template <typename Storage>
28 void mainCuda(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config);
29 
30 int main(int argc, char *argv[])
31 {
32  bool success = true;
33  bool verbose = false;
34  try {
35 
36  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
37  const size_t num_cores_per_socket =
38  Kokkos::hwloc::get_available_cores_per_numa();
39  const size_t num_threads_per_core =
40  Kokkos::hwloc::get_available_threads_per_core();
41 
42  // Setup command line options
44  CLP.setDocString(
45  "This test performance of MP::Vector multiply routines.\n");
46  int nGrid = 32;
47  CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
48  int nIter = 10;
49  CLP.setOption("ni", &nIter, "Number of multiply iterations");
50  int num_cores = num_cores_per_socket * num_sockets;
51  CLP.setOption("cores", &num_cores,
52  "Number of CPU cores to use (defaults to all)");
53  int num_hyper_threads = num_threads_per_core;
54  CLP.setOption("hyperthreads", &num_hyper_threads,
55  "Number of hyper threads per core to use (defaults to all)");
56  int threads_per_vector = 1;
57  CLP.setOption("threads_per_vector", &threads_per_vector,
58  "Number of threads to use within each vector");
59 #ifdef KOKKOS_ENABLE_THREADS
60  bool threads = true;
61  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
62 #endif
63 #ifdef KOKKOS_ENABLE_OPENMP
64  bool openmp = true;
65  CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
66 #endif
67 #ifdef KOKKOS_ENABLE_CUDA
68  bool cuda = true;
69  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
70  int cuda_threads_per_vector = 16;
71  CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector,
72  "Number of Cuda threads to use within each vector");
73  int cuda_block_size = 0;
74  CLP.setOption("cuda_block_size", &cuda_block_size,
75  "Cuda block size (0 implies the default choice)");
76  int num_cuda_blocks = 0;
77  CLP.setOption("num_cuda_blocks", &num_cuda_blocks,
78  "Number of Cuda blocks (0 implies the default choice)");
79  int device_id = 0;
80  CLP.setOption("device", &device_id, "CUDA device ID");
81 #endif
82  CLP.parse( argc, argv );
83 
84  typedef int Ordinal;
85  typedef double Scalar;
86 
87 #ifdef KOKKOS_ENABLE_THREADS
88  if (threads) {
89  typedef Kokkos::Threads Device;
91 
92  Kokkos::InitializationSettings init_args;
93  init_args.set_num_threads(num_cores*num_hyper_threads);
94  Kokkos::initialize( init_args );
95 
96  std::cout << std::endl
97  << "Threads performance with " << num_cores*num_hyper_threads
98  << " threads:" << std::endl;
99 
100  KokkosSparse::DeviceConfig dev_config(num_cores,
101  threads_per_vector,
102  num_hyper_threads / threads_per_vector);
103 
104  mainHost<Storage>(nGrid, nIter, dev_config);
105 
106  Kokkos::finalize();
107  }
108 #endif
109 
110 #ifdef KOKKOS_ENABLE_OPENMP
111  if (openmp) {
112  typedef Kokkos::OpenMP Device;
114 
115  Kokkos::InitializationSettings init_args;
116  init_args.set_num_threads(num_cores*num_hyper_threads);
117  Kokkos::initialize( init_args );
118 
119  std::cout << std::endl
120  << "OpenMP performance with " << num_cores*num_hyper_threads
121  << " threads:" << std::endl;
122 
123  KokkosSparse::DeviceConfig dev_config(num_cores,
124  threads_per_vector,
125  num_hyper_threads / threads_per_vector);
126 
127  mainHost<Storage>(nGrid, nIter, dev_config);
128 
129  Kokkos::finalize();
130  }
131 #endif
132 
133 #ifdef KOKKOS_ENABLE_CUDA
134  if (cuda) {
135  typedef Kokkos::Cuda Device;
137 
138  Kokkos::InitializationSettings init_args;
139  init_args.set_device_id(device_id);
140  Kokkos::initialize( init_args );
141 
142  cudaDeviceProp deviceProp;
143  cudaGetDeviceProperties(&deviceProp, device_id);
144  std::cout << std::endl
145  << "CUDA performance for device " << device_id << " ("
146  << deviceProp.name << "):"
147  << std::endl;
148 
149  KokkosSparse::DeviceConfig dev_config(
150  num_cuda_blocks,
151  cuda_threads_per_vector,
152  cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);
153 
154  mainCuda<Storage>(nGrid,nIter,dev_config);
155 
156  Kokkos::finalize();
157  }
158 #endif
159 
160  }
161  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
162 
163  if (success)
164  return 0;
165  return -1;
166 }
Stokhos::StandardStorage< int, double > Storage
void mainHost(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Statically allocated storage class.
void mainCuda(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void setDocString(const char doc_string[])