Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMv.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // MP::Vector and Matrix
45 #include "KokkosSparse_CrsMatrix.hpp"
46 #include "KokkosSparse_spmv.hpp"
48 
49 // Compile-time loops
50 #include "Sacado_mpl_range_c.hpp"
51 #include "Sacado_mpl_for_each.hpp"
52 #include "Sacado_mpl_integral_c.hpp"
53 
54 // Utilities
55 #include "impl/Kokkos_Timer.hpp"
56 
57 template< typename IntType >
58 inline
59 IntType map_fem_graph_coord( const IntType & N ,
60  const IntType & i ,
61  const IntType & j ,
62  const IntType & k )
63 {
64  return k + N * ( j + N * i );
65 }
66 
67 inline
68 size_t generate_fem_graph( size_t N ,
69  std::vector< std::vector<size_t> > & graph )
70 {
71  graph.resize( N * N * N , std::vector<size_t>() );
72 
73  size_t total = 0 ;
74 
75  for ( int i = 0 ; i < (int) N ; ++i ) {
76  for ( int j = 0 ; j < (int) N ; ++j ) {
77  for ( int k = 0 ; k < (int) N ; ++k ) {
78 
79  const size_t row = map_fem_graph_coord((int)N,i,j,k);
80 
81  graph[row].reserve(27);
82 
83  for ( int ii = -1 ; ii < 2 ; ++ii ) {
84  for ( int jj = -1 ; jj < 2 ; ++jj ) {
85  for ( int kk = -1 ; kk < 2 ; ++kk ) {
86  if ( 0 <= i + ii && i + ii < (int) N &&
87  0 <= j + jj && j + jj < (int) N &&
88  0 <= k + kk && k + kk < (int) N ) {
89  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
90 
91  graph[row].push_back(col);
92  }
93  }}}
94  total += graph[row].size();
95  }}}
96 
97  return total ;
98 }
99 
100 template <typename StorageType, typename MultiplyTag>
101 std::vector<double>
102 test_mpvector_spmv(const int ensemble_length,
103  const int nGrid,
104  const int iterCount,
105  KokkosSparse::DeviceConfig dev_config,
106  MultiplyTag tag)
107 {
108  typedef StorageType storage_type;
109  typedef typename storage_type::value_type value_type;
110  typedef typename storage_type::ordinal_type ordinal_type;
112  typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
113  typedef Sacado::MP::Vector<StorageType> VectorType;
114  typedef Kokkos::LayoutRight Layout;
115  typedef Kokkos::View< VectorType*, Layout, execution_space > vector_type;
116  typedef KokkosSparse::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type;
117  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
118  typedef typename matrix_type::values_type matrix_values_type;
119 
120  //------------------------------
121  // Generate graph for "FEM" box structure:
122 
123  std::vector< std::vector<size_t> > fem_graph;
124  const size_t fem_length = nGrid * nGrid * nGrid;
125  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
126 
127  //------------------------------
128  // Generate input multivector:
129 
130  vector_type x =
131  vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length, ensemble_length);
132  vector_type y =
133  vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length, ensemble_length);
134 
135  //------------------------------
136 
137  matrix_graph_type matrix_graph =
138  Kokkos::create_staticcrsgraph<matrix_graph_type>(
139  std::string("test crs graph"), fem_graph);
140  matrix_values_type matrix_values =
141  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length, ensemble_length);
142  matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph);
143  matrix.dev_config = dev_config;
144 
145  //------------------------------
146  // Fill:
147 
148  {
149  // The VectorType may be dynamic (with allocated memory)
150  // so cannot pass a VectorType value to the device.
151  // Get an array-of-intrinsic View and fill that view.
152  typename vector_type::array_type xx( x );
153  typename vector_type::array_type yy( y );
154  typename matrix_values_type::array_type mm( matrix_values );
155 
156  Kokkos::deep_copy( xx , value_type(1.0) );
157  Kokkos::deep_copy( yy , value_type(1.0) );
158  Kokkos::deep_copy( mm , value_type(1.0) );
159  }
160 
161  //------------------------------
162 
163  // One iteration to warm up
164  Stokhos::multiply( matrix, x, y, tag );
165 
166  execution_space().fence();
167  Kokkos::Impl::Timer clock ;
168  for (int iter = 0; iter < iterCount; ++iter) {
169  Stokhos::multiply( matrix, x, y, tag );
170  }
171  execution_space().fence();
172 
173  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
174  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
175 
176  std::vector<double> perf(5);
177  perf[0] = fem_length;
178  perf[1] = ensemble_length;
179  perf[2] = graph_length;
180  perf[3] = seconds_per_iter;
181  perf[4] = flops / seconds_per_iter;
182  return perf;
183 }
184 
185 template <typename ScalarType, typename OrdinalType, typename Device>
186 std::vector<double>
187 test_scalar_spmv(const int ensemble_length,
188  const int nGrid,
189  const int iterCount,
190  KokkosSparse::DeviceConfig dev_config)
191 {
192  typedef ScalarType value_type;
193  typedef OrdinalType ordinal_type;
194  typedef Device execution_space;
195  typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
196  typedef Kokkos::View< value_type*, execution_space > vector_type;
197  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, device_type > matrix_type;
198  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
199  typedef typename matrix_type::values_type matrix_values_type;
200 
201  //------------------------------
202  // Generate graph for "FEM" box structure:
203 
204  std::vector< std::vector<size_t> > fem_graph;
205  const size_t fem_length = nGrid * nGrid * nGrid;
206  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
207 
208  //------------------------------
209  // Generate input multivector:
210 
211  std::vector<vector_type> x(ensemble_length);
212  std::vector<vector_type> y(ensemble_length);
213  for (int e=0; e<ensemble_length; ++e) {
214  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
215  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
216 
217  Kokkos::deep_copy( x[e] , value_type(1.0) );
218  Kokkos::deep_copy( y[e] , value_type(0.0) );
219  }
220 
221  //------------------------------
222 
223  std::vector<matrix_type> matrix(ensemble_length);
224  for (int e=0; e<ensemble_length; ++e) {
225  matrix_graph_type matrix_graph =
226  Kokkos::create_staticcrsgraph<matrix_graph_type>(
227  std::string("test crs graph"), fem_graph);
228  matrix_values_type matrix_values =
229  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
230  matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph);
231 
232  Kokkos::deep_copy( matrix[e].values , value_type(1.0) );
233  }
234 
235  //------------------------------
236 
237  // One iteration to warm up
238  for (int iter = 0; iter < iterCount; ++iter) {
239  for (int e=0; e<ensemble_length; ++e) {
240  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
241  }
242  }
243 
244  execution_space().fence();
245  Kokkos::Impl::Timer clock ;
246  for (int iter = 0; iter < iterCount; ++iter) {
247  for (int e=0; e<ensemble_length; ++e) {
248  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
249  }
250  }
251  execution_space().fence();
252 
253  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
254  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
255 
256  std::vector<double> perf(5);
257  perf[0] = fem_length;
258  perf[1] = ensemble_length;
259  perf[2] = graph_length;
260  perf[3] = seconds_per_iter;
261  perf[4] = flops / seconds_per_iter;
262  return perf;
263 }
264 
265 template <class Storage>
266 struct PerformanceDriverOp {
267  typedef typename Storage::value_type Scalar;
268  typedef typename Storage::ordinal_type Ordinal;
270  const int nGrid, nIter;
271  KokkosSparse::DeviceConfig dev_config;
272 
273  PerformanceDriverOp(const int nGrid_, const int nIter_,
274  KokkosSparse::DeviceConfig dev_config_) :
275  nGrid(nGrid_), nIter(nIter_), dev_config(dev_config_) {}
276 
277  template <typename ArgT>
278  void operator() (ArgT arg) const {
279  const int ensemble = ArgT::value;
280  typedef typename Storage::template apply_N<ensemble> NewStorageApply;
281  typedef typename NewStorageApply::type storage_type;
282 
283  const std::vector<double> perf_scalar =
284  test_scalar_spmv<Scalar,Ordinal,Device>(
285  ensemble, nGrid, nIter, dev_config );
286 
287  const std::vector<double> perf_mpvector =
288  test_mpvector_spmv<storage_type>(
290 
291  std::cout << nGrid << " , "
292  << perf_scalar[0] << " , "
293  << perf_scalar[2] << " , "
294  << perf_scalar[1] << " , "
295  << perf_scalar[3] << " , "
296  << perf_scalar[4] / perf_scalar[4] << " , "
297  << perf_scalar[4] << " , "
298  << perf_mpvector[4]/ perf_scalar[4] << " , "
299  << perf_mpvector[4] << " , "
300  << std::endl;
301  }
302 };
303 
304 template <class Storage, int entry_min, int entry_max, int entry_step>
305 void performance_test_driver( const int nGrid,
306  const int nIter,
307  KokkosSparse::DeviceConfig dev_config)
308 {
309  std::cout.precision(8);
310  std::cout << std::endl
311  << "\"Grid Size\" , "
312  << "\"FEM Size\" , "
313  << "\"FEM Graph Size\" , "
314  << "\"Ensemble Size\" , "
315  << "\"Scalar SpMv Time\" , "
316  << "\"Scalar SpMv Speedup\" , "
317  << "\"Scalar SpMv GFLOPS\" , "
318  << "\"MPVector SpMv Speedup\" , "
319  << "\"MPVector SpMv GFLOPS\" , "
320  << std::endl;
321 
322  // Loop over [entry_min, entry_max] vector entries per thread
323  typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
324  PerformanceDriverOp<Storage> op(nGrid, nIter, dev_config);
325  Sacado::mpl::for_each_no_kokkos<Range> f(op);
326 }
Stokhos::StandardStorage< int, double > storage_type
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Definition: TestEpetra.cpp:77
Stokhos_MV_Multiply_Op< Stokhos::DefaultMultiply > DefaultMultiply
Kokkos::DefaultExecutionSpace execution_space
std::vector< double > test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:187
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestEpetra.cpp:67
void operator()(ArgT arg) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< double > test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config, MultiplyTag tag)
Definition: TestSpMv.hpp:102
KokkosSparse::DeviceConfig dev_config
Definition: TestSpMv.hpp:271
Storage::execution_space Device
Definition: TestSpMv.hpp:269
Storage::value_type Scalar
Definition: TestSpMv.hpp:267
Storage::ordinal_type Ordinal
Definition: TestSpMv.hpp:268
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Kokkos::Example::FENL::DeviceConfig dev_config
PerformanceDriverOp(const int nGrid_, const int nIter_, KokkosSparse::DeviceConfig dev_config_)
Definition: TestSpMv.hpp:273
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)