Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KokkosSpMM/TestSpMM.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Devices
13 #include "Kokkos_Core.hpp"
14 
15 // Utilities
18 #ifdef KOKKOS_ENABLE_CUDA
19 #include "cuda_runtime_api.h"
20 #endif
21 
22 template <typename Scalar, typename Ordinal, typename Device>
23 void performance_test_driver( const Ordinal nGrid,
24  const Ordinal nIter,
25  const Ordinal ensemble_min,
26  const Ordinal ensemble_max,
27  const Ordinal ensemble_step );
28 
29 int main(int argc, char *argv[])
30 {
31  bool success = true;
32  bool verbose = false;
33  try {
34 
35 #ifdef KOKKOS_ENABLE_THREADS
36  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
37  const size_t num_cores_per_socket =
38  Kokkos::hwloc::get_available_cores_per_numa();
39  const size_t num_threads_per_core =
40  Kokkos::hwloc::get_available_threads_per_core();
41 #endif
42 
43  // Setup command line options
45  CLP.setDocString(
46  "This test performance of MP::Vector multiply routines.\n");
47  int nGrid = 32;
48  CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
49  int nIter = 10;
50  CLP.setOption("ni", &nIter, "Number of multiply iterations");
51  int ensemble_min = 4;
52  CLP.setOption("emin", &ensemble_min, "Staring ensemble size");
53  int ensemble_max = 24;
54  CLP.setOption("emax", &ensemble_max, "Stoping ensemble size");
55  int ensemble_step = 4;
56  CLP.setOption("estep", &ensemble_step, "Ensemble increment");
57 #ifdef KOKKOS_ENABLE_THREADS
58  bool threads = true;
59  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
60  int num_cores = num_cores_per_socket * num_sockets;
61  CLP.setOption("cores", &num_cores,
62  "Number of CPU cores to use (defaults to all)");
63  int num_hyper_threads = num_threads_per_core;
64  CLP.setOption("hyperthreads", &num_hyper_threads,
65  "Number of hyper threads per core to use (defaults to all)");
66 #endif
67 #ifdef KOKKOS_ENABLE_CUDA
68  bool cuda = true;
69  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
70  int device_id = 0;
71  CLP.setOption("device", &device_id, "CUDA device ID");
72 #endif
73  CLP.parse( argc, argv );
74 
75 #ifdef KOKKOS_ENABLE_THREADS
76  if (threads) {
77  typedef double Scalar;
78  typedef int Ordinal;
79  typedef Kokkos::Threads Device;
80 
81  Kokkos::InitializationSettings init_args;
82  init_args.set_num_threads(num_cores*num_hyper_threads);
83  Kokkos::initialize( init_args );
84 
85  std::cout << std::endl
86  << "Threads performance with " << num_cores*num_hyper_threads
87  << " threads:" << std::endl;
88 
89  performance_test_driver<Scalar,Ordinal,Device>(
90  nGrid, nIter, ensemble_min, ensemble_max, ensemble_step);
91 
92  Kokkos::finalize();
93  }
94 #endif
95 
96 #ifdef KOKKOS_ENABLE_CUDA
97  if (cuda) {
98  typedef double Scalar;
99  typedef int Ordinal;
100  typedef Kokkos::Cuda Device;
101 
102  Kokkos::InitializationSettings init_args;
103  init_args.set_device_id(device_id);
104  Kokkos::initialize( init_args );
105 
106  cudaDeviceProp deviceProp;
107  cudaGetDeviceProperties(&deviceProp, device_id);
108  std::cout << std::endl
109  << "CUDA performance for device " << device_id << " ("
110  << deviceProp.name << "):"
111  << std::endl;
112 
113  performance_test_driver<Scalar,Ordinal,Device>(
114  nGrid, nIter, ensemble_min, ensemble_max, ensemble_step);
115 
116  Kokkos::finalize();
117  }
118 #endif
119 
120  }
121  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
122 
123  if (success)
124  return 0;
125  return -1;
126 }
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setDocString(const char doc_string[])