Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestSpMM.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // Kokkos CrsMatrix
13 #include "KokkosSparse_CrsMatrix.hpp"
14 #include "KokkosSparse_spmv.hpp"
15 
16 
17 // Utilities
18 #include "Kokkos_Timer.hpp"
19 
20 template< typename IntType >
21 inline
22 IntType map_fem_graph_coord( const IntType & N ,
23  const IntType & i ,
24  const IntType & j ,
25  const IntType & k )
26 {
27  return k + N * ( j + N * i );
28 }
29 
30 inline
31 size_t generate_fem_graph( size_t N ,
32  std::vector< std::vector<size_t> > & graph )
33 {
34  graph.resize( N * N * N , std::vector<size_t>() );
35 
36  size_t total = 0 ;
37 
38  for ( int i = 0 ; i < (int) N ; ++i ) {
39  for ( int j = 0 ; j < (int) N ; ++j ) {
40  for ( int k = 0 ; k < (int) N ; ++k ) {
41 
42  const size_t row = map_fem_graph_coord((int)N,i,j,k);
43 
44  graph[row].reserve(27);
45 
46  for ( int ii = -1 ; ii < 2 ; ++ii ) {
47  for ( int jj = -1 ; jj < 2 ; ++jj ) {
48  for ( int kk = -1 ; kk < 2 ; ++kk ) {
49  if ( 0 <= i + ii && i + ii < (int) N &&
50  0 <= j + jj && j + jj < (int) N &&
51  0 <= k + kk && k + kk < (int) N ) {
52  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
53 
54  graph[row].push_back(col);
55  }
56  }}}
57  total += graph[row].size();
58  }}}
59 
60  return total ;
61 }
62 
63 template <typename ScalarType, typename OrdinalType, typename Device>
64 void
65 test_spmm(const OrdinalType ensemble_length,
66  const OrdinalType nGrid,
67  const OrdinalType iterCount,
68  std::vector<double>& scalar_perf,
69  std::vector<double>& block_left_perf,
70  std::vector<double>& block_right_perf)
71 {
72  typedef ScalarType value_type;
73  typedef OrdinalType ordinal_type;
74  typedef Device execution_space;
75  typedef Kokkos::View< value_type*, execution_space > vector_type;
76  typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
77  //typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > right_multivec_type;
78  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
79  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
80  typedef typename matrix_type::values_type matrix_values_type;
81 
82  //------------------------------
83  // Generate graph for "FEM" box structure:
84 
85  std::vector< std::vector<size_t> > fem_graph;
86  const size_t fem_length = nGrid * nGrid * nGrid;
87  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
88 
89  //------------------------------
90  // Generate input vectors:
91 
92  std::vector<vector_type> x(ensemble_length);
93  std::vector<vector_type> y(ensemble_length);
94  for (ordinal_type e=0; e<ensemble_length; ++e) {
95  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
96  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
97 
98  Kokkos::deep_copy( x[e] , value_type(1.0) );
99  Kokkos::deep_copy( y[e] , value_type(0.0) );
100  }
101  left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing("xl"), fem_length, ensemble_length);
102  left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing("yl"), fem_length, ensemble_length);
103  // right_multivec_type xr(Kokkos::ViewAllocateWithoutInitializing("xr"), fem_length, ensemble_length);
104  // right_multivec_type yr(Kokkos::ViewAllocateWithoutInitializing("yr"), fem_length, ensemble_length);
105  Kokkos::deep_copy(xl, value_type(1.0));
106  //Kokkos::deep_copy(xr, value_type(1.0));
107  Kokkos::deep_copy(yl, value_type(0.0));
108  //Kokkos::deep_copy(yr, value_type(0.0));
109 
110  //------------------------------
111  // Generate matrix
112 
113  matrix_graph_type matrix_graph =
114  Kokkos::create_staticcrsgraph<matrix_graph_type>(
115  std::string("test crs graph"), fem_graph);
116  matrix_values_type matrix_values =
117  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
118  matrix_type matrix("matrix", fem_length, matrix_values, matrix_graph);
119  Kokkos::deep_copy( matrix_values , value_type(1.0) );
120 
121  //------------------------------
122  // Scalar multiply
123 
124  {
125  // warm up
126  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
127  for (ordinal_type e=0; e<ensemble_length; ++e) {
128  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
129  }
130  }
131 
132  execution_space().fence();
133  Kokkos::Timer clock ;
134  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
135  for (ordinal_type e=0; e<ensemble_length; ++e) {
136  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
137  }
138  }
139  execution_space().fence();
140 
141  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
142  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
143 
144  scalar_perf.resize(5);
145  scalar_perf[0] = fem_length;
146  scalar_perf[1] = ensemble_length;
147  scalar_perf[2] = graph_length;
148  scalar_perf[3] = seconds_per_iter;
149  scalar_perf[4] = flops / seconds_per_iter;
150  }
151 
152  //------------------------------
153  // Block-left multiply
154 
155  {
156  // warm up
157  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
158  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
159  }
160 
161  execution_space().fence();
162  Kokkos::Timer clock ;
163  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
164  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
165  }
166  execution_space().fence();
167 
168  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
169  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
170 
171  block_left_perf.resize(5);
172  block_left_perf[0] = fem_length;
173  block_left_perf[1] = ensemble_length;
174  block_left_perf[2] = graph_length;
175  block_left_perf[3] = seconds_per_iter;
176  block_left_perf[4] = flops / seconds_per_iter;
177  }
178 
179 #if 0
180  //------------------------------
181  // Block-right multiply
182 
183  {
184  // warm up
185  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
186  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
187  }
188 
189  execution_space().fence();
190  Kokkos::Timer clock ;
191  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
192  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
193  }
194  execution_space().fence();
195 
196  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
197  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
198 
199  block_right_perf.resize(5);
200  block_right_perf[0] = fem_length;
201  block_right_perf[1] = ensemble_length;
202  block_right_perf[2] = graph_length;
203  block_right_perf[3] = seconds_per_iter;
204  block_right_perf[4] = flops / seconds_per_iter;
205  }
206 #endif
207 
208 }
209 
210 template <typename Scalar, typename Ordinal, typename Device>
212  const Ordinal nIter,
213  const Ordinal ensemble_min,
214  const Ordinal ensemble_max,
215  const Ordinal ensemble_step )
216 {
217  std::cout.precision(8);
218  std::cout << std::endl
219  << "\"Grid Size\" , "
220  << "\"FEM Size\" , "
221  << "\"FEM Graph Size\" , "
222  << "\"Ensemble Size\" , "
223  << "\"Scalar SpMM Time\" , "
224  << "\"Scalar SpMM Speedup\" , "
225  << "\"Scalar SpMM GFLOPS\" , "
226  << "\"Block-Left SpMM Speedup\" , "
227  << "\"Block-Left SpMM GFLOPS\" , "
228  //<< "\"Block_Right SpMM Speedup\" , "
229  //<< "\"Block_Right SpMM GFLOPS\" , "
230  << std::endl;
231 
232  std::vector<double> perf_scalar, perf_block_left, perf_block_right;
233  for (Ordinal e=ensemble_min; e<=ensemble_max; e+=ensemble_step) {
234 
235  test_spmm<Scalar,Ordinal,Device>(
236  e, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right );
237 
238  std::cout << nGrid << " , "
239  << perf_scalar[0] << " , "
240  << perf_scalar[2] << " , "
241  << perf_scalar[1] << " , "
242  << perf_scalar[3] << " , "
243  << perf_scalar[4] / perf_scalar[4] << " , "
244  << perf_scalar[4] << " , "
245  << perf_block_left[4]/ perf_scalar[4] << " , "
246  << perf_block_left[4] << " , "
247  //<< perf_block_right[4]/ perf_scalar[4] << " , "
248  //<< perf_block_right[4] << " , "
249  << std::endl;
250 
251  }
252 }
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Definition: TestEpetra.cpp:45
void test_spmm(const OrdinalType ensemble_length, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf)
Definition: TestSpMM.hpp:65
Kokkos::DefaultExecutionSpace execution_space
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestEpetra.cpp:35
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(KokkosKernels::Experimental::Controls, const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)