Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestMeanMultiply.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <iostream>
43 
44 // PCE scalar type
46 
47 // Kokkos CrsMatrix
48 #include "KokkosSparse_CrsMatrix.hpp"
49 #include "KokkosSparse_spmv.hpp"
52 
53 // Stokhos
57 
58 // Utilities
59 #include "impl/Kokkos_Timer.hpp"
60 
61 template< typename IntType >
62 inline
63 IntType map_fem_graph_coord( const IntType & N ,
64  const IntType & i ,
65  const IntType & j ,
66  const IntType & k )
67 {
68  return k + N * ( j + N * i );
69 }
70 
71 inline
72 size_t generate_fem_graph( size_t N ,
73  std::vector< std::vector<size_t> > & graph )
74 {
75  graph.resize( N * N * N , std::vector<size_t>() );
76 
77  size_t total = 0 ;
78 
79  for ( int i = 0 ; i < (int) N ; ++i ) {
80  for ( int j = 0 ; j < (int) N ; ++j ) {
81  for ( int k = 0 ; k < (int) N ; ++k ) {
82 
83  const size_t row = map_fem_graph_coord((int)N,i,j,k);
84 
85  graph[row].reserve(27);
86 
87  for ( int ii = -1 ; ii < 2 ; ++ii ) {
88  for ( int jj = -1 ; jj < 2 ; ++jj ) {
89  for ( int kk = -1 ; kk < 2 ; ++kk ) {
90  if ( 0 <= i + ii && i + ii < (int) N &&
91  0 <= j + jj && j + jj < (int) N &&
92  0 <= k + kk && k + kk < (int) N ) {
93  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
94 
95  graph[row].push_back(col);
96  }
97  }}}
98  total += graph[row].size();
99  }}}
100 
101  return total ;
102 }
103 
104 template <typename ScalarType, typename OrdinalType, typename Device>
105 void
106 test_mean_multiply(const OrdinalType order,
107  const OrdinalType dim,
108  const OrdinalType nGrid,
109  const OrdinalType iterCount,
110  std::vector<double>& scalar_perf,
111  std::vector<double>& block_left_perf,
112  std::vector<double>& block_right_perf,
113  std::vector<double>& pce_perf,
114  std::vector<double>& block_pce_perf)
115 {
116  typedef ScalarType value_type;
117  typedef OrdinalType ordinal_type;
118  typedef Device execution_space;
119 
122 
123  typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space > scalar_vector_type;
124  typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > scalar_left_multi_vector_type;
125  typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > scalar_right_multi_vector_type;
126  typedef Kokkos::View< pce_type*, Kokkos::LayoutLeft, execution_space > pce_vector_type;
127  typedef Kokkos::View< pce_type**, Kokkos::LayoutLeft, execution_space > pce_multi_vector_type;
128 
129  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > scalar_matrix_type;
130  typedef KokkosSparse::CrsMatrix< pce_type, ordinal_type, execution_space > pce_matrix_type;
131  typedef typename scalar_matrix_type::StaticCrsGraphType matrix_graph_type;
132  typedef typename scalar_matrix_type::values_type scalar_matrix_values_type;
133  typedef typename pce_matrix_type::values_type pce_matrix_values_type;
134 
135  typedef Stokhos::OneDOrthogPolyBasis<ordinal_type,value_type> abstract_basis_type;
140  typedef typename pce_type::cijk_type kokkos_cijk_type;
141 
142  using Teuchos::rcp;
143  using Teuchos::RCP;
144  using Teuchos::Array;
145 
146  // Number of columns for PCE multi-vector apply
147  const ordinal_type num_pce_col = 5;
148 
149  // Create Stochastic Galerkin basis and expansion
150  Array< RCP<const abstract_basis_type> > bases(dim);
151  for (ordinal_type i=0; i<dim; ++i) {
152  bases[i] = Teuchos::rcp(new basis_type(order, true));
153  }
154  RCP<const product_basis_type> basis = rcp(new product_basis_type(bases));
155  RCP<cijk_type> cijk = basis->computeTripleProductTensor();
156  kokkos_cijk_type kokkos_cijk =
157  Stokhos::create_product_tensor<execution_space>(*basis, *cijk);
158  Kokkos::setGlobalCijkTensor(kokkos_cijk);
159 
160  //------------------------------
161  // Generate graph for "FEM" box structure:
162 
163  std::vector< std::vector<size_t> > fem_graph;
164  const size_t fem_length = nGrid * nGrid * nGrid;
165  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
166 
167  //------------------------------
168  // Generate input vectors:
169 
170  ordinal_type pce_size = basis->size();
171  scalar_left_multi_vector_type xl(Kokkos::ViewAllocateWithoutInitializing("scalar left x"), fem_length, pce_size);
172  scalar_left_multi_vector_type yl(Kokkos::ViewAllocateWithoutInitializing("scalar right y"), fem_length, pce_size);
173  scalar_right_multi_vector_type xr(Kokkos::ViewAllocateWithoutInitializing("scalar right x"), fem_length, pce_size);
174  scalar_right_multi_vector_type yr(Kokkos::ViewAllocateWithoutInitializing("scalar right y"), fem_length, pce_size);
175  std::vector<scalar_vector_type> x_col(pce_size), y_col(pce_size);
176  for (ordinal_type i=0; i<pce_size; ++i) {
177  x_col[i] = scalar_vector_type (Kokkos::ViewAllocateWithoutInitializing("scalar x col"), fem_length);
178  y_col[i] = scalar_vector_type(Kokkos::ViewAllocateWithoutInitializing("scalar y col"), fem_length);
179  Kokkos::deep_copy( x_col[i] , value_type(1.0) );
180  Kokkos::deep_copy( y_col[i] , value_type(0.0) );
181  }
182  pce_vector_type x_pce =
183  Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce x"),
184  kokkos_cijk, fem_length, pce_size);
185  pce_vector_type y_pce =
186  Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce y"),
187  kokkos_cijk, fem_length, pce_size);
188  pce_multi_vector_type x_multi_pce =
189  Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce multi x"),
190  kokkos_cijk, fem_length,
191  num_pce_col, pce_size);
192  pce_multi_vector_type y_multi_pce =
193  Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce multi y"),
194  kokkos_cijk, fem_length,
195  num_pce_col, pce_size);
196 
197  Kokkos::deep_copy( xl , value_type(1.0) );
198  Kokkos::deep_copy( yl , value_type(0.0) );
199  Kokkos::deep_copy( xr , value_type(1.0) );
200  Kokkos::deep_copy( yr , value_type(0.0) );
201  Kokkos::deep_copy( x_pce , value_type(1.0) );
202  Kokkos::deep_copy( y_pce , value_type(0.0) );
203  Kokkos::deep_copy( x_multi_pce , value_type(1.0) );
204  Kokkos::deep_copy( y_multi_pce , value_type(0.0) );
205 
206  //------------------------------
207  // Generate matrix
208 
209  matrix_graph_type matrix_graph =
210  Kokkos::create_staticcrsgraph<matrix_graph_type>(
211  std::string("test crs graph"), fem_graph);
212  scalar_matrix_values_type scalar_matrix_values =
213  scalar_matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("scalar matrix"), graph_length);
214  pce_matrix_values_type pce_matrix_values =
215  Kokkos::make_view<pce_matrix_values_type>(Kokkos::ViewAllocateWithoutInitializing("pce matrix"), kokkos_cijk, graph_length, 1);
216  scalar_matrix_type scalar_matrix("scalar matrix", fem_length,
217  scalar_matrix_values, matrix_graph);
218  pce_matrix_type pce_matrix("pce matrix", fem_length,
219  pce_matrix_values, matrix_graph);
220 
221  Kokkos::deep_copy( scalar_matrix_values , value_type(1.0) );
222  Kokkos::deep_copy( pce_matrix_values , value_type(1.0) );
223 
224  //------------------------------
225  // Scalar multiply
226 
227  {
228  // warm up
229  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
230  for (ordinal_type col=0; col<pce_size; ++col) {
231  // scalar_vector_type xc =
232  // Kokkos::subview(x, Kokkos::ALL(), col);
233  // scalar_vector_type yc =
234  // Kokkos::subview(y, Kokkos::ALL(), col);
235  // Kokkos::MV_Multiply( yc, scalar_matrix, xc );
236  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
237  }
238  }
239 
240  execution_space::fence();
241  Kokkos::Impl::Timer clock ;
242  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
243  for (ordinal_type col=0; col<pce_size; ++col) {
244  // scalar_vector_type xc =
245  // Kokkos::subview(x, Kokkos::ALL(), col);
246  // scalar_vector_type yc =
247  // Kokkos::subview(y, Kokkos::ALL(), col);
248  // Kokkos::MV_Multiply( yc, scalar_matrix, xc );
249  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
250  }
251  }
252  execution_space::fence();
253 
254  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
255  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
256 
257  scalar_perf.resize(5);
258  scalar_perf[0] = fem_length;
259  scalar_perf[1] = pce_size;
260  scalar_perf[2] = graph_length;
261  scalar_perf[3] = seconds_per_iter;
262  scalar_perf[4] = flops / seconds_per_iter;
263  }
264 
265  //------------------------------
266  // Block-left multiply
267 
268  {
269  // warm up
270  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
271  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xl , value_type(0.0) ,yl);
272  }
273 
274  execution_space::fence();
275  Kokkos::Impl::Timer clock ;
276  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
277  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xl , value_type(0.0) ,yl);
278  }
279  execution_space::fence();
280 
281  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
282  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
283 
284  block_left_perf.resize(5);
285  block_left_perf[0] = fem_length;
286  block_left_perf[1] = pce_size;
287  block_left_perf[2] = graph_length;
288  block_left_perf[3] = seconds_per_iter;
289  block_left_perf[4] = flops / seconds_per_iter;
290  }
291 
292  //------------------------------
293  // Block-right multiply
294 
295  {
296  // warm up
297  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
298  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xr , value_type(0.0) ,yr);
299  }
300 
301  execution_space::fence();
302  Kokkos::Impl::Timer clock ;
303  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
304  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xr , value_type(0.0) ,yr);
305  }
306  execution_space::fence();
307 
308  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
309  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
310 
311  block_right_perf.resize(5);
312  block_right_perf[0] = fem_length;
313  block_right_perf[1] = pce_size;
314  block_right_perf[2] = graph_length;
315  block_right_perf[3] = seconds_per_iter;
316  block_right_perf[4] = flops / seconds_per_iter;
317  }
318 
319  //------------------------------
320  // PCE multiply
321 
322  {
323  // warm up
324  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
325  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
326  }
327 
328  execution_space::fence();
329  Kokkos::Impl::Timer clock ;
330  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
331  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
332  }
333  execution_space::fence();
334 
335  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
336  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
337 
338  pce_perf.resize(5);
339  pce_perf[0] = fem_length;
340  pce_perf[1] = pce_size;
341  pce_perf[2] = graph_length;
342  pce_perf[3] = seconds_per_iter;
343  pce_perf[4] = flops / seconds_per_iter;
344  }
345 
346  //------------------------------
347  // PCE multi-vector multiply
348 
349  {
350  // warm up
351  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
352  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
353  }
354 
355  execution_space::fence();
356  Kokkos::Impl::Timer clock ;
357  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
358  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
359  }
360  execution_space::fence();
361 
362  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
363  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size * num_pce_col;
364 
365  block_pce_perf.resize(5);
366  block_pce_perf[0] = fem_length;
367  block_pce_perf[1] = pce_size;
368  block_pce_perf[2] = graph_length;
369  block_pce_perf[3] = seconds_per_iter;
370  block_pce_perf[4] = flops / seconds_per_iter;
371  }
372 
373 }
374 
375 template <typename Scalar, typename Ordinal, typename Device>
377  const Ordinal nIter,
378  const Ordinal order,
379  const Ordinal min_var,
380  const Ordinal max_var )
381 {
382  std::cout.precision(8);
383  std::cout << std::endl
384  << "\"Grid Size\" , "
385  << "\"FEM Size\" , "
386  << "\"FEM Graph Size\" , "
387  << "\"Dimension\" , "
388  << "\"Order\" , "
389  << "\"PCE Size\" , "
390  << "\"Scalar SpMM Time\" , "
391  << "\"Scalar SpMM Speedup\" , "
392  << "\"Scalar SpMM GFLOPS\" , "
393  << "\"Block-Left SpMM Speedup\" , "
394  << "\"Block-Left SpMM GFLOPS\" , "
395  << "\"Block-Right SpMM Speedup\" , "
396  << "\"Block-Right SpMM GFLOPS\" , "
397  << "\"PCE SpMM Speedup\" , "
398  << "\"PCE SpMM GFLOPS\" , "
399  << "\"Block PCE SpMM Speedup\" , "
400  << "\"Block PCE SpMM GFLOPS\" , "
401  << std::endl;
402 
403  std::vector<double> perf_scalar, perf_block_left, perf_block_right,
404  perf_pce, perf_block_pce;
405  for (Ordinal dim=min_var; dim<=max_var; ++dim) {
406 
407  test_mean_multiply<Scalar,Ordinal,Device>(
408  order, dim, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right,
409  perf_pce, perf_block_pce );
410 
411  std::cout << nGrid << " , "
412  << perf_scalar[0] << " , "
413  << perf_scalar[2] << " , "
414  << dim << " , "
415  << order << " , "
416  << perf_scalar[1] << " , "
417  << perf_scalar[3] << " , "
418  << perf_scalar[4] / perf_scalar[4] << " , "
419  << perf_scalar[4] << " , "
420  << perf_block_left[4]/ perf_scalar[4] << " , "
421  << perf_block_left[4] << " , "
422  << perf_block_right[4]/ perf_scalar[4] << " , "
423  << perf_block_right[4] << " , "
424  << perf_pce[4]/ perf_scalar[4] << " , "
425  << perf_pce[4] << " , "
426  << perf_block_pce[4]/ perf_scalar[4] << " , "
427  << perf_block_pce[4] << " , "
428  << std::endl;
429 
430  }
431 }
432 
433 #define INST_PERF_DRIVER(SCALAR, ORDINAL, DEVICE) \
434  template void performance_test_driver< SCALAR, ORDINAL, DEVICE >( \
435  const ORDINAL nGrid, const ORDINAL nIter, const ORDINAL order, \
436  const ORDINAL min_var, const ORDINAL max_var);
Stokhos::StandardStorage< int, double > storage_type
Sacado::ETPCE::OrthogPoly< double, Stokhos::StandardStorage< int, double > > pce_type
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Definition: TestEpetra.cpp:77
Kokkos::DefaultExecutionSpace execution_space
void test_mean_multiply(const OrdinalType order, const OrdinalType dim, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf, std::vector< double > &pce_perf, std::vector< double > &block_pce_perf)
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestEpetra.cpp:67
Stokhos::LegendreBasis< int, double > basis_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void setGlobalCijkTensor(const cijk_type &cijk)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< view_type >::value, typename CijkType< view_type >::type >::type cijk(const view_type &view)
Legendre polynomial basis.
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
A comparison functor implementing a strict weak ordering based lexographic ordering.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)