Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMv.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // MP::Vector and Matrix
45 #include "KokkosSparse_CrsMatrix.hpp"
46 #include "KokkosSparse_spmv.hpp"
48 
49 // Compile-time loops
50 #include "Sacado_mpl_range_c.hpp"
51 #include "Sacado_mpl_for_each.hpp"
52 #include "Sacado_mpl_integral_c.hpp"
53 
54 // Utilities
55 #include "impl/Kokkos_Timer.hpp"
56 
57 template< typename IntType >
58 inline
59 IntType map_fem_graph_coord( const IntType & N ,
60  const IntType & i ,
61  const IntType & j ,
62  const IntType & k )
63 {
64  return k + N * ( j + N * i );
65 }
66 
67 inline
68 size_t generate_fem_graph( size_t N ,
69  std::vector< std::vector<size_t> > & graph )
70 {
71  graph.resize( N * N * N , std::vector<size_t>() );
72 
73  size_t total = 0 ;
74 
75  for ( int i = 0 ; i < (int) N ; ++i ) {
76  for ( int j = 0 ; j < (int) N ; ++j ) {
77  for ( int k = 0 ; k < (int) N ; ++k ) {
78 
79  const size_t row = map_fem_graph_coord((int)N,i,j,k);
80 
81  graph[row].reserve(27);
82 
83  for ( int ii = -1 ; ii < 2 ; ++ii ) {
84  for ( int jj = -1 ; jj < 2 ; ++jj ) {
85  for ( int kk = -1 ; kk < 2 ; ++kk ) {
86  if ( 0 <= i + ii && i + ii < (int) N &&
87  0 <= j + jj && j + jj < (int) N &&
88  0 <= k + kk && k + kk < (int) N ) {
89  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
90 
91  graph[row].push_back(col);
92  }
93  }}}
94  total += graph[row].size();
95  }}}
96 
97  return total ;
98 }
99 
100 template <typename StorageType, typename MultiplyTag>
101 std::vector<double>
102 test_mpvector_spmv(const int ensemble_length,
103  const int nGrid,
104  const int iterCount,
105  KokkosSparse::DeviceConfig dev_config,
106  MultiplyTag tag)
107 {
108  typedef StorageType storage_type;
109  typedef typename storage_type::value_type value_type;
110  typedef typename storage_type::ordinal_type ordinal_type;
112  typedef Sacado::MP::Vector<StorageType> VectorType;
113  typedef Kokkos::LayoutRight Layout;
114  typedef Kokkos::View< VectorType*, Layout, execution_space > vector_type;
115  typedef KokkosSparse::CrsMatrix< VectorType, ordinal_type, execution_space > matrix_type;
116  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
117  typedef typename matrix_type::values_type matrix_values_type;
118 
119  //------------------------------
120  // Generate graph for "FEM" box structure:
121 
122  std::vector< std::vector<size_t> > fem_graph;
123  const size_t fem_length = nGrid * nGrid * nGrid;
124  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
125 
126  //------------------------------
127  // Generate input multivector:
128 
129  vector_type x =
130  vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length, ensemble_length);
131  vector_type y =
132  vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length, ensemble_length);
133 
134  //------------------------------
135 
136  matrix_graph_type matrix_graph =
137  Kokkos::create_staticcrsgraph<matrix_graph_type>(
138  std::string("test crs graph"), fem_graph);
139  matrix_values_type matrix_values =
140  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length, ensemble_length);
141  matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph);
142  matrix.dev_config = dev_config;
143 
144  //------------------------------
145  // Fill:
146 
147  {
148  // The VectorType may be dynamic (with allocated memory)
149  // so cannot pass a VectorType value to the device.
150  // Get an array-of-intrinsic View and fill that view.
151  typename vector_type::array_type xx( x );
152  typename vector_type::array_type yy( y );
153  typename matrix_values_type::array_type mm( matrix_values );
154 
155  Kokkos::deep_copy( xx , value_type(1.0) );
156  Kokkos::deep_copy( yy , value_type(1.0) );
157  Kokkos::deep_copy( mm , value_type(1.0) );
158  }
159 
160  //------------------------------
161 
162  // One iteration to warm up
163  Stokhos::multiply( matrix, x, y, tag );
164 
165  execution_space::fence();
166  Kokkos::Impl::Timer clock ;
167  for (int iter = 0; iter < iterCount; ++iter) {
168  Stokhos::multiply( matrix, x, y, tag );
169  }
170  execution_space::fence();
171 
172  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
173  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
174 
175  std::vector<double> perf(5);
176  perf[0] = fem_length;
177  perf[1] = ensemble_length;
178  perf[2] = graph_length;
179  perf[3] = seconds_per_iter;
180  perf[4] = flops / seconds_per_iter;
181  return perf;
182 }
183 
184 template <typename ScalarType, typename OrdinalType, typename Device>
185 std::vector<double>
186 test_scalar_spmv(const int ensemble_length,
187  const int nGrid,
188  const int iterCount,
189  KokkosSparse::DeviceConfig dev_config)
190 {
191  typedef ScalarType value_type;
192  typedef OrdinalType ordinal_type;
193  typedef Device execution_space;
194  typedef Kokkos::View< value_type*, execution_space > vector_type;
195  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
196  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
197  typedef typename matrix_type::values_type matrix_values_type;
198 
199  //------------------------------
200  // Generate graph for "FEM" box structure:
201 
202  std::vector< std::vector<size_t> > fem_graph;
203  const size_t fem_length = nGrid * nGrid * nGrid;
204  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
205 
206  //------------------------------
207  // Generate input multivector:
208 
209  std::vector<vector_type> x(ensemble_length);
210  std::vector<vector_type> y(ensemble_length);
211  for (int e=0; e<ensemble_length; ++e) {
212  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
213  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
214 
215  Kokkos::deep_copy( x[e] , value_type(1.0) );
216  Kokkos::deep_copy( y[e] , value_type(0.0) );
217  }
218 
219  //------------------------------
220 
221  std::vector<matrix_type> matrix(ensemble_length);
222  for (int e=0; e<ensemble_length; ++e) {
223  matrix_graph_type matrix_graph =
224  Kokkos::create_staticcrsgraph<matrix_graph_type>(
225  std::string("test crs graph"), fem_graph);
226  matrix_values_type matrix_values =
227  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
228  matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph);
229 
230  Kokkos::deep_copy( matrix[e].values , value_type(1.0) );
231  }
232 
233  //------------------------------
234 
235  // One iteration to warm up
236  for (int iter = 0; iter < iterCount; ++iter) {
237  for (int e=0; e<ensemble_length; ++e) {
238  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
239  }
240  }
241 
242  execution_space::fence();
243  Kokkos::Impl::Timer clock ;
244  for (int iter = 0; iter < iterCount; ++iter) {
245  for (int e=0; e<ensemble_length; ++e) {
246  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
247  }
248  }
249  execution_space::fence();
250 
251  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
252  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
253 
254  std::vector<double> perf(5);
255  perf[0] = fem_length;
256  perf[1] = ensemble_length;
257  perf[2] = graph_length;
258  perf[3] = seconds_per_iter;
259  perf[4] = flops / seconds_per_iter;
260  return perf;
261 }
262 
263 template <class Storage>
264 struct PerformanceDriverOp {
265  typedef typename Storage::value_type Scalar;
266  typedef typename Storage::ordinal_type Ordinal;
268  const int nGrid, nIter;
269  KokkosSparse::DeviceConfig dev_config;
270 
271  PerformanceDriverOp(const int nGrid_, const int nIter_,
272  KokkosSparse::DeviceConfig dev_config_) :
273  nGrid(nGrid_), nIter(nIter_), dev_config(dev_config_) {}
274 
275  template <typename ArgT>
276  void operator() (ArgT arg) const {
277  const int ensemble = ArgT::value;
278  typedef typename Storage::template apply_N<ensemble> NewStorageApply;
279  typedef typename NewStorageApply::type storage_type;
280 
281  const std::vector<double> perf_scalar =
282  test_scalar_spmv<Scalar,Ordinal,Device>(
283  ensemble, nGrid, nIter, dev_config );
284 
285  const std::vector<double> perf_mpvector =
286  test_mpvector_spmv<storage_type>(
288 
289  std::cout << nGrid << " , "
290  << perf_scalar[0] << " , "
291  << perf_scalar[2] << " , "
292  << perf_scalar[1] << " , "
293  << perf_scalar[3] << " , "
294  << perf_scalar[4] / perf_scalar[4] << " , "
295  << perf_scalar[4] << " , "
296  << perf_mpvector[4]/ perf_scalar[4] << " , "
297  << perf_mpvector[4] << " , "
298  << std::endl;
299  }
300 };
301 
302 template <class Storage, int entry_min, int entry_max, int entry_step>
303 void performance_test_driver( const int nGrid,
304  const int nIter,
305  KokkosSparse::DeviceConfig dev_config)
306 {
307  std::cout.precision(8);
308  std::cout << std::endl
309  << "\"Grid Size\" , "
310  << "\"FEM Size\" , "
311  << "\"FEM Graph Size\" , "
312  << "\"Ensemble Size\" , "
313  << "\"Scalar SpMv Time\" , "
314  << "\"Scalar SpMv Speedup\" , "
315  << "\"Scalar SpMv GFLOPS\" , "
316  << "\"MPVector SpMv Speedup\" , "
317  << "\"MPVector SpMv GFLOPS\" , "
318  << std::endl;
319 
320  // Loop over [entry_min, entry_max] vector entries per thread
321  typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
322  PerformanceDriverOp<Storage> op(nGrid, nIter, dev_config);
323  Sacado::mpl::for_each_no_kokkos<Range> f(op);
324 }
Stokhos::StandardStorage< int, double > storage_type
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Definition: TestEpetra.cpp:77
Stokhos_MV_Multiply_Op< Stokhos::DefaultMultiply > DefaultMultiply
Kokkos::DefaultExecutionSpace execution_space
std::vector< double > test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:186
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestEpetra.cpp:67
void operator()(ArgT arg) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< double > test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config, MultiplyTag tag)
Definition: TestSpMv.hpp:102
KokkosSparse::DeviceConfig dev_config
Definition: TestSpMv.hpp:269
Storage::execution_space Device
Definition: TestSpMv.hpp:267
Storage::value_type Scalar
Definition: TestSpMv.hpp:265
Storage::ordinal_type Ordinal
Definition: TestSpMv.hpp:266
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Kokkos::Example::FENL::DeviceConfig dev_config
PerformanceDriverOp(const int nGrid_, const int nIter_, KokkosSparse::DeviceConfig dev_config_)
Definition: TestSpMv.hpp:271
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)