Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMv.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // MP::Vector and Matrix
14 #include "KokkosSparse_CrsMatrix.hpp"
15 #include "KokkosSparse_spmv.hpp"
17 
18 // Compile-time loops
19 #include "Sacado_mpl_range_c.hpp"
20 #include "Sacado_mpl_for_each.hpp"
21 #include "Sacado_mpl_integral_c.hpp"
22 
23 // Utilities
24 #include "Kokkos_Timer.hpp"
25 
26 template< typename IntType >
27 inline
28 IntType map_fem_graph_coord( const IntType & N ,
29  const IntType & i ,
30  const IntType & j ,
31  const IntType & k )
32 {
33  return k + N * ( j + N * i );
34 }
35 
36 inline
37 size_t generate_fem_graph( size_t N ,
38  std::vector< std::vector<size_t> > & graph )
39 {
40  graph.resize( N * N * N , std::vector<size_t>() );
41 
42  size_t total = 0 ;
43 
44  for ( int i = 0 ; i < (int) N ; ++i ) {
45  for ( int j = 0 ; j < (int) N ; ++j ) {
46  for ( int k = 0 ; k < (int) N ; ++k ) {
47 
48  const size_t row = map_fem_graph_coord((int)N,i,j,k);
49 
50  graph[row].reserve(27);
51 
52  for ( int ii = -1 ; ii < 2 ; ++ii ) {
53  for ( int jj = -1 ; jj < 2 ; ++jj ) {
54  for ( int kk = -1 ; kk < 2 ; ++kk ) {
55  if ( 0 <= i + ii && i + ii < (int) N &&
56  0 <= j + jj && j + jj < (int) N &&
57  0 <= k + kk && k + kk < (int) N ) {
58  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
59 
60  graph[row].push_back(col);
61  }
62  }}}
63  total += graph[row].size();
64  }}}
65 
66  return total ;
67 }
68 
69 template <typename StorageType, typename MultiplyTag>
70 std::vector<double>
71 test_mpvector_spmv(const int ensemble_length,
72  const int nGrid,
73  const int iterCount,
74  KokkosSparse::DeviceConfig dev_config,
75  MultiplyTag tag)
76 {
77  typedef StorageType storage_type;
78  typedef typename storage_type::value_type value_type;
81  typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
82  typedef Sacado::MP::Vector<StorageType> VectorType;
83  typedef Kokkos::LayoutRight Layout;
84  typedef Kokkos::View< VectorType*, Layout, execution_space > vector_type;
85  typedef KokkosSparse::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type;
86  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
87  typedef typename matrix_type::values_type matrix_values_type;
88 
89  //------------------------------
90  // Generate graph for "FEM" box structure:
91 
92  std::vector< std::vector<size_t> > fem_graph;
93  const size_t fem_length = nGrid * nGrid * nGrid;
94  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
95 
96  //------------------------------
97  // Generate input multivector:
98 
99  vector_type x =
100  vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length, ensemble_length);
101  vector_type y =
102  vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length, ensemble_length);
103 
104  //------------------------------
105 
106  matrix_graph_type matrix_graph =
107  Kokkos::create_staticcrsgraph<matrix_graph_type>(
108  std::string("test crs graph"), fem_graph);
109  matrix_values_type matrix_values =
110  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length, ensemble_length);
111  matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph);
112  matrix.dev_config = dev_config;
113 
114  //------------------------------
115  // Fill:
116 
117  {
118  // The VectorType may be dynamic (with allocated memory)
119  // so cannot pass a VectorType value to the device.
120  // Get an array-of-intrinsic View and fill that view.
121  typename vector_type::array_type xx( x );
122  typename vector_type::array_type yy( y );
123  typename matrix_values_type::array_type mm( matrix_values );
124 
125  Kokkos::deep_copy( xx , value_type(1.0) );
126  Kokkos::deep_copy( yy , value_type(1.0) );
127  Kokkos::deep_copy( mm , value_type(1.0) );
128  }
129 
130  //------------------------------
131 
132  // One iteration to warm up
133  Stokhos::multiply( matrix, x, y, tag );
134 
135  execution_space().fence();
136  Kokkos::Timer clock ;
137  for (int iter = 0; iter < iterCount; ++iter) {
138  Stokhos::multiply( matrix, x, y, tag );
139  }
140  execution_space().fence();
141 
142  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
143  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
144 
145  std::vector<double> perf(5);
146  perf[0] = fem_length;
147  perf[1] = ensemble_length;
148  perf[2] = graph_length;
149  perf[3] = seconds_per_iter;
150  perf[4] = flops / seconds_per_iter;
151  return perf;
152 }
153 
154 template <typename ScalarType, typename OrdinalType, typename Device>
155 std::vector<double>
156 test_scalar_spmv(const int ensemble_length,
157  const int nGrid,
158  const int iterCount,
159  KokkosSparse::DeviceConfig dev_config)
160 {
161  typedef ScalarType value_type;
162  typedef OrdinalType ordinal_type;
163  typedef Device execution_space;
164  typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
165  typedef Kokkos::View< value_type*, execution_space > vector_type;
166  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, device_type > matrix_type;
167  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
168  typedef typename matrix_type::values_type matrix_values_type;
169 
170  //------------------------------
171  // Generate graph for "FEM" box structure:
172 
173  std::vector< std::vector<size_t> > fem_graph;
174  const size_t fem_length = nGrid * nGrid * nGrid;
175  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
176 
177  //------------------------------
178  // Generate input multivector:
179 
180  std::vector<vector_type> x(ensemble_length);
181  std::vector<vector_type> y(ensemble_length);
182  for (int e=0; e<ensemble_length; ++e) {
183  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
184  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
185 
186  Kokkos::deep_copy( x[e] , value_type(1.0) );
187  Kokkos::deep_copy( y[e] , value_type(0.0) );
188  }
189 
190  //------------------------------
191 
192  std::vector<matrix_type> matrix(ensemble_length);
193  for (int e=0; e<ensemble_length; ++e) {
194  matrix_graph_type matrix_graph =
195  Kokkos::create_staticcrsgraph<matrix_graph_type>(
196  std::string("test crs graph"), fem_graph);
197  matrix_values_type matrix_values =
198  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
199  matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph);
200 
201  Kokkos::deep_copy( matrix[e].values , value_type(1.0) );
202  }
203 
204  //------------------------------
205 
206  // One iteration to warm up
207  for (int iter = 0; iter < iterCount; ++iter) {
208  for (int e=0; e<ensemble_length; ++e) {
209  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
210  }
211  }
212 
213  execution_space().fence();
214  Kokkos::Timer clock ;
215  for (int iter = 0; iter < iterCount; ++iter) {
216  for (int e=0; e<ensemble_length; ++e) {
217  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
218  }
219  }
220  execution_space().fence();
221 
222  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
223  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
224 
225  std::vector<double> perf(5);
226  perf[0] = fem_length;
227  perf[1] = ensemble_length;
228  perf[2] = graph_length;
229  perf[3] = seconds_per_iter;
230  perf[4] = flops / seconds_per_iter;
231  return perf;
232 }
233 
234 template <class Storage>
235 struct PerformanceDriverOp {
236  typedef typename Storage::value_type Scalar;
237  typedef typename Storage::ordinal_type Ordinal;
239  const int nGrid, nIter;
240  KokkosSparse::DeviceConfig dev_config;
241 
242  PerformanceDriverOp(const int nGrid_, const int nIter_,
243  KokkosSparse::DeviceConfig dev_config_) :
244  nGrid(nGrid_), nIter(nIter_), dev_config(dev_config_) {}
245 
246  template <typename ArgT>
247  void operator() (ArgT arg) const {
248  const int ensemble = ArgT::value;
249  typedef typename Storage::template apply_N<ensemble> NewStorageApply;
250  typedef typename NewStorageApply::type storage_type;
251 
252  const std::vector<double> perf_scalar =
253  test_scalar_spmv<Scalar,Ordinal,Device>(
254  ensemble, nGrid, nIter, dev_config );
255 
256  const std::vector<double> perf_mpvector =
257  test_mpvector_spmv<storage_type>(
259 
260  std::cout << nGrid << " , "
261  << perf_scalar[0] << " , "
262  << perf_scalar[2] << " , "
263  << perf_scalar[1] << " , "
264  << perf_scalar[3] << " , "
265  << perf_scalar[4] / perf_scalar[4] << " , "
266  << perf_scalar[4] << " , "
267  << perf_mpvector[4]/ perf_scalar[4] << " , "
268  << perf_mpvector[4] << " , "
269  << std::endl;
270  }
271 };
272 
273 template <class Storage, int entry_min, int entry_max, int entry_step>
274 void performance_test_driver( const int nGrid,
275  const int nIter,
276  KokkosSparse::DeviceConfig dev_config)
277 {
278  std::cout.precision(8);
279  std::cout << std::endl
280  << "\"Grid Size\" , "
281  << "\"FEM Size\" , "
282  << "\"FEM Graph Size\" , "
283  << "\"Ensemble Size\" , "
284  << "\"Scalar SpMv Time\" , "
285  << "\"Scalar SpMv Speedup\" , "
286  << "\"Scalar SpMv GFLOPS\" , "
287  << "\"MPVector SpMv Speedup\" , "
288  << "\"MPVector SpMv GFLOPS\" , "
289  << std::endl;
290 
291  // Loop over [entry_min, entry_max] vector entries per thread
292  typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
293  PerformanceDriverOp<Storage> op(nGrid, nIter, dev_config);
294  Sacado::mpl::for_each_no_kokkos<Range> f(op);
295 }
Stokhos::StandardStorage< int, double > storage_type
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Definition: TestEpetra.cpp:45
Stokhos_MV_Multiply_Op< Stokhos::DefaultMultiply > DefaultMultiply
Kokkos::DefaultExecutionSpace execution_space
std::vector< double > test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:156
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestEpetra.cpp:35
void operator()(ArgT arg) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< double > test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config, MultiplyTag tag)
Definition: TestSpMv.hpp:71
KokkosSparse::DeviceConfig dev_config
Definition: TestSpMv.hpp:240
Storage::execution_space Device
Definition: TestSpMv.hpp:238
Storage::value_type Scalar
Definition: TestSpMv.hpp:236
Storage::ordinal_type Ordinal
Definition: TestSpMv.hpp:237
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Kokkos::Example::FENL::DeviceConfig dev_config
PerformanceDriverOp(const int nGrid_, const int nIter_, KokkosSparse::DeviceConfig dev_config_)
Definition: TestSpMv.hpp:242
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(KokkosKernels::Experimental::Controls, const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)