Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMv.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <iostream>
43 
44 // Tests
45 #include "TestSpMv.hpp"
46 
47 // Devices
48 #include "Kokkos_Core.hpp"
49 
50 // Utilities
53 #ifdef KOKKOS_ENABLE_CUDA
54 #include "cuda_runtime_api.h"
55 #endif
56 
57 template <typename Storage>
58 void mainHost(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config);
59 template <typename Storage>
60 void mainCuda(int nGrid, int nIter, KokkosSparse::DeviceConfig dev_config);
61 
62 int main(int argc, char *argv[])
63 {
64  bool success = true;
65  bool verbose = false;
66  try {
67 
68  const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
69  const size_t num_cores_per_socket =
70  Kokkos::hwloc::get_available_cores_per_numa();
71  const size_t num_threads_per_core =
72  Kokkos::hwloc::get_available_threads_per_core();
73 
74  // Setup command line options
76  CLP.setDocString(
77  "This test performance of MP::Vector multiply routines.\n");
78  int nGrid = 32;
79  CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
80  int nIter = 10;
81  CLP.setOption("ni", &nIter, "Number of multiply iterations");
82  int num_cores = num_cores_per_socket * num_sockets;
83  CLP.setOption("cores", &num_cores,
84  "Number of CPU cores to use (defaults to all)");
85  int num_hyper_threads = num_threads_per_core;
86  CLP.setOption("hyperthreads", &num_hyper_threads,
87  "Number of hyper threads per core to use (defaults to all)");
88  int threads_per_vector = 1;
89  CLP.setOption("threads_per_vector", &threads_per_vector,
90  "Number of threads to use within each vector");
91 #ifdef KOKKOS_ENABLE_THREADS
92  bool threads = true;
93  CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
94 #endif
95 #ifdef KOKKOS_ENABLE_OPENMP
96  bool openmp = true;
97  CLP.setOption("openmp", "no-openmp", &openmp, "Enable OpenMP device");
98 #endif
99 #ifdef KOKKOS_ENABLE_CUDA
100  bool cuda = true;
101  CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
102  int cuda_threads_per_vector = 16;
103  CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector,
104  "Number of Cuda threads to use within each vector");
105  int cuda_block_size = 0;
106  CLP.setOption("cuda_block_size", &cuda_block_size,
107  "Cuda block size (0 implies the default choice)");
108  int num_cuda_blocks = 0;
109  CLP.setOption("num_cuda_blocks", &num_cuda_blocks,
110  "Number of Cuda blocks (0 implies the default choice)");
111  int device_id = 0;
112  CLP.setOption("device", &device_id, "CUDA device ID");
113 #endif
114  CLP.parse( argc, argv );
115 
116  typedef int Ordinal;
117  typedef double Scalar;
118 
119 #ifdef KOKKOS_ENABLE_THREADS
120  if (threads) {
121  typedef Kokkos::Threads Device;
123 
124  Kokkos::InitArguments init_args;
125  init_args.num_threads = num_cores*num_hyper_threads;
126  Kokkos::initialize( init_args );
127 
128  std::cout << std::endl
129  << "Threads performance with " << num_cores*num_hyper_threads
130  << " threads:" << std::endl;
131 
132  KokkosSparse::DeviceConfig dev_config(num_cores,
133  threads_per_vector,
134  num_hyper_threads / threads_per_vector);
135 
136  mainHost<Storage>(nGrid, nIter, dev_config);
137 
138  Kokkos::finalize();
139  }
140 #endif
141 
142 #ifdef KOKKOS_ENABLE_OPENMP
143  if (openmp) {
144  typedef Kokkos::OpenMP Device;
146 
147  Kokkos::InitArguments init_args;
148  init_args.num_threads = num_cores*num_hyper_threads;
149  Kokkos::initialize( init_args );
150 
151  std::cout << std::endl
152  << "OpenMP performance with " << num_cores*num_hyper_threads
153  << " threads:" << std::endl;
154 
155  KokkosSparse::DeviceConfig dev_config(num_cores,
156  threads_per_vector,
157  num_hyper_threads / threads_per_vector);
158 
159  mainHost<Storage>(nGrid, nIter, dev_config);
160 
161  Kokkos::finalize();
162  }
163 #endif
164 
165 #ifdef KOKKOS_ENABLE_CUDA
166  if (cuda) {
167  typedef Kokkos::Cuda Device;
169 
170  Kokkos::InitArguments init_args;
171  init_args.device_id = device_id;
172  Kokkos::initialize( init_args );
173 
174  cudaDeviceProp deviceProp;
175  cudaGetDeviceProperties(&deviceProp, device_id);
176  std::cout << std::endl
177  << "CUDA performance for device " << device_id << " ("
178  << deviceProp.name << "):"
179  << std::endl;
180 
181  KokkosSparse::DeviceConfig dev_config(
182  num_cuda_blocks,
183  cuda_threads_per_vector,
184  cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);
185 
186  mainCuda<Storage>(nGrid,nIter,dev_config);
187 
188  Kokkos::finalize();
189  }
190 #endif
191 
192  }
193  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
194 
195  if (success)
196  return 0;
197  return -1;
198 }
Stokhos::StandardStorage< int, double > Storage
void mainHost(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Statically allocated storage class.
void mainCuda(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void setDocString(const char doc_string[])