Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMM.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // Kokkos CrsMatrix
44 #include "KokkosSparse_CrsMatrix.hpp"
45 #include "KokkosSparse_spmv.hpp"
46 
47 
48 // Utilities
49 #include "impl/Kokkos_Timer.hpp"
50 
51 template< typename IntType >
52 inline
53 IntType map_fem_graph_coord( const IntType & N ,
54  const IntType & i ,
55  const IntType & j ,
56  const IntType & k )
57 {
58  return k + N * ( j + N * i );
59 }
60 
61 inline
62 size_t generate_fem_graph( size_t N ,
63  std::vector< std::vector<size_t> > & graph )
64 {
65  graph.resize( N * N * N , std::vector<size_t>() );
66 
67  size_t total = 0 ;
68 
69  for ( int i = 0 ; i < (int) N ; ++i ) {
70  for ( int j = 0 ; j < (int) N ; ++j ) {
71  for ( int k = 0 ; k < (int) N ; ++k ) {
72 
73  const size_t row = map_fem_graph_coord((int)N,i,j,k);
74 
75  graph[row].reserve(27);
76 
77  for ( int ii = -1 ; ii < 2 ; ++ii ) {
78  for ( int jj = -1 ; jj < 2 ; ++jj ) {
79  for ( int kk = -1 ; kk < 2 ; ++kk ) {
80  if ( 0 <= i + ii && i + ii < (int) N &&
81  0 <= j + jj && j + jj < (int) N &&
82  0 <= k + kk && k + kk < (int) N ) {
83  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
84 
85  graph[row].push_back(col);
86  }
87  }}}
88  total += graph[row].size();
89  }}}
90 
91  return total ;
92 }
93 
94 template <typename ScalarType, typename OrdinalType, typename Device>
95 void
96 test_spmm(const OrdinalType ensemble_length,
97  const OrdinalType nGrid,
98  const OrdinalType iterCount,
99  std::vector<double>& scalar_perf,
100  std::vector<double>& block_left_perf,
101  std::vector<double>& block_right_perf)
102 {
103  typedef ScalarType value_type;
104  typedef OrdinalType ordinal_type;
105  typedef Device execution_space;
106  typedef Kokkos::View< value_type*, execution_space > vector_type;
107  typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
108  //typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > right_multivec_type;
109  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
110  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
111  typedef typename matrix_type::values_type matrix_values_type;
112 
113  //------------------------------
114  // Generate graph for "FEM" box structure:
115 
116  std::vector< std::vector<size_t> > fem_graph;
117  const size_t fem_length = nGrid * nGrid * nGrid;
118  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
119 
120  //------------------------------
121  // Generate input vectors:
122 
123  std::vector<vector_type> x(ensemble_length);
124  std::vector<vector_type> y(ensemble_length);
125  for (ordinal_type e=0; e<ensemble_length; ++e) {
126  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
127  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
128 
129  Kokkos::deep_copy( x[e] , value_type(1.0) );
130  Kokkos::deep_copy( y[e] , value_type(0.0) );
131  }
132  left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing("xl"), fem_length, ensemble_length);
133  left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing("yl"), fem_length, ensemble_length);
134  // right_multivec_type xr(Kokkos::ViewAllocateWithoutInitializing("xr"), fem_length, ensemble_length);
135  // right_multivec_type yr(Kokkos::ViewAllocateWithoutInitializing("yr"), fem_length, ensemble_length);
136  Kokkos::deep_copy(xl, value_type(1.0));
137  //Kokkos::deep_copy(xr, value_type(1.0));
138  Kokkos::deep_copy(yl, value_type(0.0));
139  //Kokkos::deep_copy(yr, value_type(0.0));
140 
141  //------------------------------
142  // Generate matrix
143 
144  matrix_graph_type matrix_graph =
145  Kokkos::create_staticcrsgraph<matrix_graph_type>(
146  std::string("test crs graph"), fem_graph);
147  matrix_values_type matrix_values =
148  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
149  matrix_type matrix("matrix", fem_length, matrix_values, matrix_graph);
150  Kokkos::deep_copy( matrix_values , value_type(1.0) );
151 
152  //------------------------------
153  // Scalar multiply
154 
155  {
156  // warm up
157  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
158  for (ordinal_type e=0; e<ensemble_length; ++e) {
159  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
160  }
161  }
162 
163  execution_space().fence();
164  Kokkos::Impl::Timer clock ;
165  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
166  for (ordinal_type e=0; e<ensemble_length; ++e) {
167  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
168  }
169  }
170  execution_space().fence();
171 
172  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
173  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
174 
175  scalar_perf.resize(5);
176  scalar_perf[0] = fem_length;
177  scalar_perf[1] = ensemble_length;
178  scalar_perf[2] = graph_length;
179  scalar_perf[3] = seconds_per_iter;
180  scalar_perf[4] = flops / seconds_per_iter;
181  }
182 
183  //------------------------------
184  // Block-left multiply
185 
186  {
187  // warm up
188  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
189  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
190  }
191 
192  execution_space().fence();
193  Kokkos::Impl::Timer clock ;
194  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
195  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
196  }
197  execution_space().fence();
198 
199  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
200  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
201 
202  block_left_perf.resize(5);
203  block_left_perf[0] = fem_length;
204  block_left_perf[1] = ensemble_length;
205  block_left_perf[2] = graph_length;
206  block_left_perf[3] = seconds_per_iter;
207  block_left_perf[4] = flops / seconds_per_iter;
208  }
209 
210 #if 0
211  //------------------------------
212  // Block-right multiply
213 
214  {
215  // warm up
216  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
217  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
218  }
219 
220  execution_space().fence();
221  Kokkos::Impl::Timer clock ;
222  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
223  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
224  }
225  execution_space().fence();
226 
227  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
228  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
229 
230  block_right_perf.resize(5);
231  block_right_perf[0] = fem_length;
232  block_right_perf[1] = ensemble_length;
233  block_right_perf[2] = graph_length;
234  block_right_perf[3] = seconds_per_iter;
235  block_right_perf[4] = flops / seconds_per_iter;
236  }
237 #endif
238 
239 }
240 
241 template <typename Scalar, typename Ordinal, typename Device>
243  const Ordinal nIter,
244  const Ordinal ensemble_min,
245  const Ordinal ensemble_max,
246  const Ordinal ensemble_step )
247 {
248  std::cout.precision(8);
249  std::cout << std::endl
250  << "\"Grid Size\" , "
251  << "\"FEM Size\" , "
252  << "\"FEM Graph Size\" , "
253  << "\"Ensemble Size\" , "
254  << "\"Scalar SpMM Time\" , "
255  << "\"Scalar SpMM Speedup\" , "
256  << "\"Scalar SpMM GFLOPS\" , "
257  << "\"Block-Left SpMM Speedup\" , "
258  << "\"Block-Left SpMM GFLOPS\" , "
259  //<< "\"Block_Right SpMM Speedup\" , "
260  //<< "\"Block_Right SpMM GFLOPS\" , "
261  << std::endl;
262 
263  std::vector<double> perf_scalar, perf_block_left, perf_block_right;
264  for (Ordinal e=ensemble_min; e<=ensemble_max; e+=ensemble_step) {
265 
266  test_spmm<Scalar,Ordinal,Device>(
267  e, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right );
268 
269  std::cout << nGrid << " , "
270  << perf_scalar[0] << " , "
271  << perf_scalar[2] << " , "
272  << perf_scalar[1] << " , "
273  << perf_scalar[3] << " , "
274  << perf_scalar[4] / perf_scalar[4] << " , "
275  << perf_scalar[4] << " , "
276  << perf_block_left[4]/ perf_scalar[4] << " , "
277  << perf_block_left[4] << " , "
278  //<< perf_block_right[4]/ perf_scalar[4] << " , "
279  //<< perf_block_right[4] << " , "
280  << std::endl;
281 
282  }
283 }
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Definition: TestEpetra.cpp:77
void test_spmm(const OrdinalType ensemble_length, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf)
Definition: TestSpMM.hpp:96
Kokkos::DefaultExecutionSpace execution_space
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestEpetra.cpp:67
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)