Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestMeanMultiply.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Devices
13 #include "Kokkos_Core.hpp"
14 
15 // Utilities
18 #ifdef KOKKOS_ENABLE_CUDA
19 #include "cuda_runtime_api.h"
20 #endif
21 
22 template <typename Scalar, typename Ordinal, typename Device>
23 void performance_test_driver( const Ordinal nGrid,
24  const Ordinal nIter,
25  const Ordinal order,
26  const Ordinal min_var,
27  const Ordinal max_var );
28 
29 int main(int argc, char *argv[])
30 {
31  bool success = true;
32  bool verbose = false;
33  try {
34 
35  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
36  const size_t num_cores_per_socket =
37  Kokkos::hwloc::get_available_cores_per_numa();
38  // const size_t num_threads_per_core =
39  // Kokkos::hwloc::get_available_threads_per_core();
40  // const size_t num_threads =
41  // num_sockets * num_cores_per_socket * num_threads_per_core;
42 
43  // Setup command line options
45  CLP.setDocString(
46  "This test performance of mean-based UQ::PCE multiply routines.\n");
47  int nGrid = 32;
48  CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
49  int nIter = 10;
50  CLP.setOption("ni", &nIter, "Number of multiply iterations");
51  int order = 3;
52  CLP.setOption("order", &order, "Polynomial order");
53  int dim_min = 1;
54  CLP.setOption("dmin", &dim_min, "Starting stochastic dimension");
55  int dim_max = 12;
56  CLP.setOption("dmax", &dim_max, "Stopping stochastic dimension");
57  int numa = num_sockets;
58  CLP.setOption("numa", &numa, "Number of numa nodes");
59  int cores = num_cores_per_socket;
60  CLP.setOption("cores", &cores, "Cores per numa node");
61 #ifdef KOKKOS_ENABLE_THREADS
62  int threads = 0;
63  CLP.setOption("threads", &threads, "Number of threads for Threads device");
64 #endif
65 #ifdef KOKKOS_ENABLE_OPENMP
66  int openmp = 0;
67  CLP.setOption("openmp", &openmp, "Number of threads for OpenMP device");
68 #endif
69 #ifdef KOKKOS_ENABLE_CUDA
70  bool cuda = false;
71  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
72  int device_id = 0;
73  CLP.setOption("device", &device_id, "CUDA device ID");
74 #endif
75  CLP.parse( argc, argv );
76 
77  typedef int Ordinal;
78  typedef double Scalar;
79 
80 #ifdef KOKKOS_ENABLE_THREADS
81  if (threads > 0) {
82  typedef Kokkos::Threads Device;
83 
84  Kokkos::InitializationSettings init_args;
85  init_args.set_num_threads(threads);
86  Kokkos::initialize( init_args );
87 
88  std::cout << std::endl
89  << "Threads performance with " << threads
90  << " threads, " << numa << " numas, " << cores
91  << " cores/numa:" << std::endl;
92 
93  performance_test_driver<Scalar,Ordinal,Device>(
94  nGrid, nIter, order, dim_min, dim_max);
95 
96  Kokkos::finalize();
97  }
98 #endif
99 
100 #ifdef KOKKOS_ENABLE_OPENMP
101  if (openmp > 0) {
102  typedef Kokkos::OpenMP Device;
103 
104  Kokkos::InitializationSettings init_args;
105  init_args.set_num_threads(openmp);
106  Kokkos::initialize( init_args );
107 
108  std::cout << std::endl
109  << "OpenMP performance with " << openmp
110  << " threads, " << numa << " numas, " << cores
111  << " cores/numa:" << std::endl;
112 
113  performance_test_driver<Scalar,Ordinal,Device>(
114  nGrid, nIter, order, dim_min, dim_max);
115 
116  Kokkos::finalize();
117  }
118 #endif
119 
120 #ifdef KOKKOS_ENABLE_CUDA
121  if (cuda) {
122  typedef Kokkos::Cuda Device;
123 
124  Kokkos::InitializationSettings init_args;
125  init_args.set_device_id(device_id);
126  Kokkos::initialize( init_args );
127 
128  cudaDeviceProp deviceProp;
129  cudaGetDeviceProperties(&deviceProp, device_id);
130  std::cout << std::endl
131  << "CUDA performance for device " << device_id << " ("
132  << deviceProp.name << "):"
133  << std::endl;
134 
135  performance_test_driver<Scalar,Ordinal,Device>(
136  nGrid, nIter, order, dim_min, dim_max);
137 
138  Kokkos::finalize();
139  }
140 #endif
141 
142  }
143  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
144 
145  if (success)
146  return 0;
147  return -1;
148 }
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setDocString(const char doc_string[])