Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestStochastic.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <utility>
11 #include <cmath>
12 #include <iostream>
13 
14 #include "Kokkos_Core.hpp"
15 #include "Kokkos_Timer.hpp"
16 
17 #include "Stokhos_Update.hpp"
18 #include "Stokhos_CrsMatrix.hpp"
30 
31 #if defined(HAVE_MPI) && 0
32 #include "Epetra_MpiComm.h"
33 #else
34 #include "Epetra_SerialComm.h"
35 #endif
37 #include "Stokhos_JacobiBasis.hpp"
43 
44 #ifdef HAVE_STOKHOS_KOKKOSLINALG
45 #include "KokkosSparse_CrsMatrix.hpp"
46 #include "KokkosSparse_spmv.hpp"
47 #include "KokkosBlas1_update.hpp"
48 #endif
49 
50 namespace unit_test {
51 
52 template< typename IntType >
53 inline
54 IntType map_fem_graph_coord( const IntType & N ,
55  const IntType & i ,
56  const IntType & j ,
57  const IntType & k )
58 {
59  return k + N * ( j + N * i );
60 }
61 
62 inline
63 size_t generate_fem_graph( size_t N ,
64  std::vector< std::vector<size_t> > & graph )
65 {
66  graph.resize( N * N * N , std::vector<size_t>() );
67 
68  size_t total = 0 ;
69 
70  for ( int i = 0 ; i < (int) N ; ++i ) {
71  for ( int j = 0 ; j < (int) N ; ++j ) {
72  for ( int k = 0 ; k < (int) N ; ++k ) {
73 
74  const size_t row = map_fem_graph_coord((int)N,i,j,k);
75 
76  graph[row].reserve(27);
77 
78  for ( int ii = -1 ; ii < 2 ; ++ii ) {
79  for ( int jj = -1 ; jj < 2 ; ++jj ) {
80  for ( int kk = -1 ; kk < 2 ; ++kk ) {
81  if ( 0 <= i + ii && i + ii < (int) N &&
82  0 <= j + jj && j + jj < (int) N &&
83  0 <= k + kk && k + kk < (int) N ) {
84  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
85 
86  graph[row].push_back(col);
87  }
88  }}}
89  total += graph[row].size();
90  }}}
91 
92  return total ;
93 }
94 
95 template <typename Scalar> struct ScalarTolerances {};
96 
97 template <> struct ScalarTolerances<float> {
98  typedef float scalar_type;
99  static scalar_type sparse_cijk_tol() { return 1e-5; }
100 };
101 
102 template <> struct ScalarTolerances<double> {
103  typedef double scalar_type;
104  static scalar_type sparse_cijk_tol() { return 1e-12; }
105 };
106 
107 template< typename ScalarType , typename TensorType, class Device >
108 std::vector<double>
110  const std::vector<int> & var_degree ,
111  const int nGrid ,
112  const int iterCount ,
113  const bool symmetric )
114 {
115  typedef ScalarType value_type ;
116  typedef Kokkos::View< value_type** ,
117  Kokkos::LayoutLeft ,
118  Device > block_vector_type ;
119 
121 
123  typedef typename matrix_type::graph_type graph_type ;
124 
125  typedef ScalarType value_type ;
126  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
129  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
131 
132  using Teuchos::rcp;
133  using Teuchos::RCP;
134  using Teuchos::Array;
135 
136  // Create Stochastic Galerkin basis and expansion
137  const size_t num_KL = var_degree.size();
138  Array< RCP<const abstract_basis_type> > bases(num_KL);
139  for (size_t i=0; i<num_KL; i++) {
140  if (symmetric)
141  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
142  else
143  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
144  }
145  RCP<const product_basis_type> basis =
146  rcp(new product_basis_type(
148  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
149 
150  //------------------------------
151  // Generate graph for "FEM" box structure:
152 
153  std::vector< std::vector<size_t> > graph ;
154 
155  const size_t outer_length = nGrid * nGrid * nGrid ;
156  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
157 
158  //------------------------------
159  // Generate CRS block-tensor matrix:
160 
161  matrix_type matrix ;
162 
163  matrix.block =
164  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
165  *Cijk );
166  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
167 
168  const size_t inner_length = matrix.block.dimension();
169  const size_t inner_length_aligned = matrix.block.aligned_dimension();
170 
171  matrix.values =
172  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
173 
174  block_vector_type x =
175  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
176  block_vector_type y =
177  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
178 
179  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
180 
181  //------------------------------
182  // Generate input multivector:
183 
184  Kokkos::deep_copy( x , ScalarType(1.0) );
185  block_vector_type x0 =
186  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"),
187  inner_length_aligned , outer_length );
188  Kokkos::deep_copy( x0 , ScalarType(1.0) );
189 
190  //------------------------------
191 
192  Device().fence();
193  Kokkos::Timer clock ;
194  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
195  Kokkos::deep_copy( x, x0 ); // akin to import
196  Stokhos::multiply( matrix , x , y );
197  }
198  Device().fence();
199 
200  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
201  const double flops_per_block = matrix.block.tensor().num_flops();
202  const double flops = 1.0e-9*graph_length*flops_per_block;
203 
204  std::vector<double> perf(6) ;
205 
206  perf[0] = outer_length * inner_length ;
207  perf[1] = seconds_per_iter ;
208  perf[2] = flops / seconds_per_iter;
209  perf[3] = matrix.block.tensor().entry_count();
210  perf[4] = inner_length ;
211  perf[5] = flops_per_block;
212 
213  return perf ;
214 }
215 
216 template< typename ScalarType , class Device >
217 std::vector<double>
219  const std::vector<int> & var_degree ,
220  const int nGrid ,
221  const int iterCount ,
222  const bool symmetric )
223 {
224  typedef ScalarType value_type ;
225  typedef Kokkos::View< value_type**,
226  Kokkos::LayoutLeft ,
227  Device > block_vector_type ;
228 
229  //------------------------------
230 
232  value_type , Device > matrix_type ;
233 
234  typedef typename matrix_type::graph_type graph_type ;
235 
236  typedef ScalarType value_type ;
237  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
240  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
242 
243  using Teuchos::rcp;
244  using Teuchos::RCP;
245  using Teuchos::Array;
246 
247  // Create Stochastic Galerkin basis and expansion
248  const size_t num_KL = var_degree.size();
249  Array< RCP<const abstract_basis_type> > bases(num_KL);
250  for (size_t i=0; i<num_KL; i++) {
251  if (symmetric)
252  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
253  else
254  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
255  }
256  RCP<const product_basis_type> basis =
257  rcp(new product_basis_type(
259  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
260 
261  //------------------------------
262  // Generate FEM graph:
263 
264  std::vector< std::vector<size_t> > fem_graph ;
265 
266  const size_t fem_length = nGrid * nGrid * nGrid ;
267  const size_t fem_graph_length = unit_test::generate_fem_graph( nGrid , fem_graph );
268 
269  const size_t stoch_length = basis->size();
270 
271  //------------------------------
272 
273  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), stoch_length , fem_length );
274  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), stoch_length , fem_length );
275 
276  Kokkos::deep_copy( x , ScalarType(1.0) );
277 
278  //------------------------------
279  // Generate CRS matrix of blocks with symmetric diagonal storage
280 
281  matrix_type matrix ;
282 
283  matrix.block = Stokhos::SymmetricDiagonalSpec< Device >( stoch_length );
284  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
285  std::string("test product tensor graph") , fem_graph );
286  matrix.values = block_vector_type(
287  Kokkos::ViewAllocateWithoutInitializing("matrix"), matrix.block.matrix_size() , fem_graph_length );
288 
289  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
290 
291  //------------------------------
292 
293  Device().fence();
294  Kokkos::Timer clock ;
295  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
296  Stokhos::multiply( matrix , x , y );
297  }
298  Device().fence();
299 
300  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
301  const double flops_per_block = 2.0*stoch_length*stoch_length;
302  const double flops = 1e-9*fem_graph_length*flops_per_block;
303 
304  std::vector<double> perf(6);
305  perf[0] = fem_length * stoch_length ;
306  perf[1] = seconds_per_iter;
307  perf[2] = flops / seconds_per_iter;
308  perf[3] = Cijk->num_entries();
309  perf[4] = stoch_length;
310  perf[5] = flops_per_block;
311  return perf;
312 }
313 
314 //----------------------------------------------------------------------------
315 // Flatten to a plain CRS matrix
316 //
317 // Outer DOF == fem
318 // Inner DOF == stochastic
319 
320 template< typename ScalarType , class Device >
321 std::vector<double>
323  const std::vector<int> & var_degree ,
324  const int nGrid ,
325  const int iterCount ,
326  const bool symmetric )
327 {
328  typedef ScalarType value_type ;
329  typedef Kokkos::View< value_type* , Device > vector_type ;
330 
331  //------------------------------
332 
333  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
334  typedef typename matrix_type::values_type matrix_values_type;
335  typedef typename matrix_type::graph_type matrix_graph_type;
336 
337  typedef ScalarType value_type ;
338  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
341  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
343 
344  using Teuchos::rcp;
345  using Teuchos::RCP;
346  using Teuchos::Array;
347 
348  // Create Stochastic Galerkin basis and expansion
349  const size_t num_KL = var_degree.size();
350  Array< RCP<const abstract_basis_type> > bases(num_KL);
351  for (size_t i=0; i<num_KL; i++) {
352  if (symmetric)
353  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
354  else
355  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
356  }
357  RCP<const product_basis_type> basis =
358  rcp(new product_basis_type(
360  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
361 
362  //------------------------------
363  // Generate FEM graph:
364 
365  std::vector< std::vector<size_t> > fem_graph ;
366 
367  const size_t fem_length = nGrid * nGrid * nGrid ;
368 
369  unit_test::generate_fem_graph( nGrid , fem_graph );
370 
371  //------------------------------
372  // Stochastic graph
373 
374  const size_t stoch_length = basis->size();
375  std::vector< std::vector< int > > stoch_graph( stoch_length );
376 #if defined(HAVE_MPI) && 0
377  Epetra_MpiComm comm(MPI_COMM_WORLD);
378 #else
379  Epetra_SerialComm comm;
380 #endif
382  *basis, *Cijk, comm);
383  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
384  int len = cijk_graph->NumGlobalIndices(i);
385  stoch_graph[i].resize(len);
386  int len2;
387  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
388  }
389 
390  //------------------------------
391  // Generate flattened graph with FEM outer and stochastic inner
392 
393  const size_t flat_length = fem_length * stoch_length ;
394 
395  std::vector< std::vector<size_t> > flat_graph( flat_length );
396 
397  for ( size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
398 
399  const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
400 
401  for ( size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
402 
403  const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
404  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
405  const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
406 
407  flat_graph[iFlatRow].resize( iFlatRowNZ );
408 
409  size_t iFlatEntry = 0 ;
410 
411  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
412 
413  const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
414 
415  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
416 
417  const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
418  const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
419 
420  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
421 
422  ++iFlatEntry ;
423  }
424  }
425  }
426  }
427 
428  //------------------------------
429 
430  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
431  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
432 
433  Kokkos::deep_copy( x , ScalarType(1.0) );
434 
435  //------------------------------
436 
437  matrix_type matrix ;
438 
439  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
440  std::string("testing") , flat_graph );
441 
442  const size_t flat_graph_length = matrix.graph.entries.extent(0);
443 
444  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
445 
446  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
447 
448  //Kokkos::write_matrix_market(matrix, "flat_commuted.mm");
449 
450  //------------------------------
451 
452  Device().fence();
453  Kokkos::Timer clock ;
454  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
455  Stokhos::multiply( matrix , x , y );
456  }
457  Device().fence();
458 
459  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
460  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
461 
462  std::vector<double> perf(4);
463  perf[0] = flat_length ;
464  perf[1] = seconds_per_iter;
465  perf[2] = flops;
466  perf[3] = flat_graph_length ;
467  return perf;
468 }
469 
470 //----------------------------------------------------------------------------
471 //----------------------------------------------------------------------------
472 // Flatten to a plain CRS matrix
473 //
474 // Outer DOF == stochastic
475 // Inner DOF == fem
476 
477 template< typename ScalarType , class Device >
478 std::vector<double>
480  const std::vector<int> & var_degree ,
481  const int nGrid ,
482  const int iterCount ,
483  const bool symmetric )
484 {
485  typedef ScalarType value_type ;
486  typedef Kokkos::View< value_type* , Device > vector_type ;
487 
488  //------------------------------
489 
490  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
491  typedef typename matrix_type::values_type matrix_values_type;
492  typedef typename matrix_type::graph_type matrix_graph_type;
493 
494  typedef ScalarType value_type ;
495  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
498  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
500 
501  using Teuchos::rcp;
502  using Teuchos::RCP;
503  using Teuchos::Array;
504 
505  // Create Stochastic Galerkin basis and expansion
506  const size_t num_KL = var_degree.size();
507  Array< RCP<const abstract_basis_type> > bases(num_KL);
508  for (size_t i=0; i<num_KL; i++) {
509  if (symmetric)
510  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
511  else
512  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
513  }
514  RCP<const product_basis_type> basis =
515  rcp(new product_basis_type(
517  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
518 
519  //------------------------------
520  // Generate FEM graph:
521 
522  std::vector< std::vector<size_t> > fem_graph ;
523 
524  const size_t fem_length = nGrid * nGrid * nGrid ;
525 
526  unit_test::generate_fem_graph( nGrid , fem_graph );
527 
528  //------------------------------
529  // Stochastic graph
530 
531  const size_t stoch_length = basis->size();
532  std::vector< std::vector< int > > stoch_graph( stoch_length );
533 #if defined(HAVE_MPI) && 0
534  Epetra_MpiComm comm(MPI_COMM_WORLD);
535 #else
536  Epetra_SerialComm comm;
537 #endif
539  *basis, *Cijk, comm);
540  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
541  int len = cijk_graph->NumGlobalIndices(i);
542  stoch_graph[i].resize(len);
543  int len2;
544  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
545  }
546 
547  //------------------------------
548  // Generate flattened graph with stochastic outer and FEM inner
549 
550  const size_t flat_length = fem_length * stoch_length ;
551 
552  std::vector< std::vector<size_t> > flat_graph( flat_length );
553 
554  for ( size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
555 
556  const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
557 
558  for ( size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
559 
560  const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
561  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
562  const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
563 
564  flat_graph[iFlatRow].resize( iFlatRowNZ );
565 
566  size_t iFlatEntry = 0 ;
567 
568  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
569 
570  const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
571 
572  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
573 
574  const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
575  const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
576 
577  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
578  ++iFlatEntry ;
579  }
580  }
581  }
582  }
583 
584  //------------------------------
585 
586  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
587  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
588 
589  Kokkos::deep_copy( x , ScalarType(1.0) );
590 
591  //------------------------------
592 
593  matrix_type matrix ;
594 
595  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , flat_graph );
596 
597  const size_t flat_graph_length = matrix.graph.entries.extent(0);
598 
599  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
600 
601  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
602 
603  //Kokkos::write_matrix_market(matrix, "flat_original.mm");
604 
605  //------------------------------
606 
607  Device().fence();
608  Kokkos::Timer clock ;
609  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
610  Stokhos::multiply( matrix , x , y );
611  }
612  Device().fence();
613 
614  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
615  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
616 
617  std::vector<double> perf(4);
618  perf[0] = flat_length ;
619  perf[1] = seconds_per_iter;
620  perf[2] = flops;
621  perf[3] = flat_graph_length ;
622  return perf;
623 }
624 
625 template< typename ScalarType , class Device >
626 std::vector<double>
628  const std::vector<int> & var_degree ,
629  const int nGrid ,
630  const int iterCount ,
631  const bool symmetric )
632 {
633  typedef ScalarType value_type ;
634  typedef Kokkos::View< value_type** ,
635  Kokkos::LayoutLeft ,
636  Device > block_vector_type ;
637 
640 
642  typedef typename matrix_type::graph_type graph_type ;
643 
644  typedef ScalarType value_type ;
645  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
648  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
650 
651  using Teuchos::rcp;
652  using Teuchos::RCP;
653  using Teuchos::Array;
654 
655  // Create Stochastic Galerkin basis and expansion
656  const size_t num_KL = var_degree.size();
657  Array< RCP<const abstract_basis_type> > bases(num_KL);
658  for (size_t i=0; i<num_KL; i++) {
659  if (symmetric)
660  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
661  else
662  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
663  }
664  RCP<const product_basis_type> basis =
665  rcp(new product_basis_type(
667  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
668 
669  //------------------------------
670  // Generate graph for "FEM" box structure:
671 
672  std::vector< std::vector<size_t> > graph ;
673 
674  const size_t outer_length = nGrid * nGrid * nGrid ;
675  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
676 
677  //------------------------------
678  // Generate CRS block-tensor matrix:
679 
680  matrix_type matrix ;
681 
682  Teuchos::ParameterList params;
683  params.set("Tile Size", 128);
684  params.set("Max Tiles", 10000);
685  matrix.block =
686  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
687  params);
688  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
689 
690  const size_t inner_length = matrix.block.dimension();
691  const size_t inner_length_aligned = matrix.block.aligned_dimension();
692 
693  matrix.values =
694  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
695 
696  block_vector_type x =
697  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
698  block_vector_type y =
699  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
700 
701  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
702 
703  //------------------------------
704  // Generate input multivector:
705 
706  Kokkos::deep_copy( x , ScalarType(1.0) );
707 
708  //------------------------------
709 
710  Device().fence();
711  Kokkos::Timer clock ;
712  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
713  Stokhos::multiply( matrix , x , y );
714  }
715  Device().fence();
716 
717  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
718  const double flops_per_block = matrix.block.tensor().num_flops();
719  const double flops = 1.0e-9*graph_length*flops_per_block;
720 
721  // std::cout << "tensor: flops = " << flops
722  // << " time = " << seconds_per_iter << std::endl;
723 
724  std::vector<double> perf(6) ;
725 
726  perf[0] = outer_length * inner_length ;
727  perf[1] = seconds_per_iter ;
728  perf[2] = flops / seconds_per_iter;
729  perf[3] = matrix.block.tensor().entry_count();
730  perf[4] = inner_length ;
731  perf[5] = flops_per_block;
732 
733  return perf ;
734 }
735 
736 template< typename ScalarType , class Device >
737 std::vector<double>
739  const std::vector<int> & var_degree ,
740  const int nGrid ,
741  const int iterCount ,
742  const bool symmetric )
743 {
744  typedef ScalarType value_type ;
745  typedef Kokkos::View< value_type** ,
746  Kokkos::LayoutLeft ,
747  Device > block_vector_type ;
748 
751 
753  typedef typename matrix_type::graph_type graph_type ;
754 
755  typedef ScalarType value_type ;
756  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
759  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
761 
762  using Teuchos::rcp;
763  using Teuchos::RCP;
764  using Teuchos::Array;
765 
766  // Create Stochastic Galerkin basis and expansion
767  const size_t num_KL = var_degree.size();
768  Array< RCP<const abstract_basis_type> > bases(num_KL);
769  for (size_t i=0; i<num_KL; i++) {
770  if (symmetric)
771  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
772  else
773  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
774  }
775  RCP<const product_basis_type> basis =
776  rcp(new product_basis_type(
778  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
779 
780  //------------------------------
781  // Generate graph for "FEM" box structure:
782 
783  std::vector< std::vector<size_t> > graph ;
784 
785  const size_t outer_length = nGrid * nGrid * nGrid ;
786  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
787 
788  //------------------------------
789  // Generate CRS block-tensor matrix:
790 
791  matrix_type matrix ;
792 
793  Teuchos::ParameterList params;
794  params.set("Tile Size", 128);
795  matrix.block =
796  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
797  params);
798  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
799 
800  const size_t inner_length = matrix.block.dimension();
801  const size_t inner_length_aligned = matrix.block.aligned_dimension();
802 
803  matrix.values =
804  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
805 
806  block_vector_type x =
807  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
808  block_vector_type y =
809  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
810 
811  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
812 
813  //------------------------------
814  // Generate input multivector:
815 
816  Kokkos::deep_copy( x , ScalarType(1.0) );
817 
818  //------------------------------
819 
820  Device().fence();
821  Kokkos::Timer clock ;
822  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
823  Stokhos::multiply( matrix , x , y );
824  }
825  Device().fence();
826 
827  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
828  const double flops_per_block = matrix.block.tensor().num_flops();
829  const double flops = 1.0e-9*graph_length*flops_per_block;
830 
831  // std::cout << "tensor: flops = " << flops
832  // << " time = " << seconds_per_iter << std::endl;
833 
834  std::vector<double> perf(6) ;
835 
836  perf[0] = outer_length * inner_length ;
837  perf[1] = seconds_per_iter ;
838  perf[2] = flops / seconds_per_iter;
839  perf[3] = matrix.block.tensor().entry_count();
840  perf[4] = inner_length ;
841  perf[5] = flops_per_block;
842 
843  return perf ;
844 }
845 
846 template< typename ScalarType , class Device >
847 std::vector<double>
849  const std::vector<int> & var_degree ,
850  const int nGrid ,
851  const int iterCount ,
852  const bool symmetric )
853 {
854  typedef ScalarType value_type ;
855  typedef Kokkos::View< value_type** ,
856  Kokkos::LayoutLeft ,
857  Device > block_vector_type ;
858 
861 
863  typedef typename matrix_type::graph_type graph_type ;
864 
865  typedef ScalarType value_type ;
866  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
869  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
871 
872  using Teuchos::rcp;
873  using Teuchos::RCP;
874  using Teuchos::Array;
875 
876  // Create Stochastic Galerkin basis and expansion
877  const size_t num_KL = var_degree.size();
878  Array< RCP<const abstract_basis_type> > bases(num_KL);
879  for (size_t i=0; i<num_KL; i++) {
880  if (symmetric)
881  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
882  else
883  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
884  }
885  RCP<const product_basis_type> basis =
886  rcp(new product_basis_type(
888  RCP<Cijk_type> Cijk =
890 
891  //------------------------------
892  // Generate graph for "FEM" box structure:
893 
894  std::vector< std::vector<size_t> > graph ;
895 
896  const size_t outer_length = nGrid * nGrid * nGrid ;
897  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
898 
899  //------------------------------
900  // Generate CRS block-tensor matrix:
901 
902  matrix_type matrix ;
903 
904  matrix.block =
905  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
906  *Cijk );
907  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
908 
909  const size_t inner_length = matrix.block.dimension();
910 
911  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length , graph_length );
912 
913  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length , outer_length );
914  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length , outer_length );
915 
916  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
917 
918  //------------------------------
919  // Generate input multivector:
920 
921  Kokkos::deep_copy( x , ScalarType(1.0) );
922 
923  //------------------------------
924 
925  Device().fence();
926  Kokkos::Timer clock ;
927  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
928  Stokhos::multiply( matrix , x , y );
929  }
930  Device().fence();
931 
932  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
933  const double flops_per_block = matrix.block.tensor().num_flops();
934  const double flops = 1.0e-9*graph_length*flops_per_block;
935 
936  // std::cout << "tensor: flops = " << flops
937  // << " time = " << seconds_per_iter << std::endl;
938 
939  std::vector<double> perf(6) ;
940 
941  perf[0] = outer_length * inner_length ;
942  perf[1] = seconds_per_iter ;
943  perf[2] = flops / seconds_per_iter;
944  perf[3] = matrix.block.tensor().num_non_zeros();
945  perf[4] = inner_length ;
946  perf[5] = flops_per_block;
947 
948  return perf ;
949 }
950 
951 template< typename ScalarType , class Device >
952 std::vector<double>
954  const std::vector<int> & var_degree ,
955  const int nGrid ,
956  const int iterCount ,
957  const bool symmetric )
958 {
959  typedef ScalarType value_type ;
960  typedef Kokkos::View< value_type** ,
961  Kokkos::LayoutLeft ,
962  Device > block_vector_type ;
963 
966 
968  typedef typename matrix_type::graph_type graph_type ;
969 
970  typedef ScalarType value_type ;
971  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
974  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
976 
977  using Teuchos::rcp;
978  using Teuchos::RCP;
979  using Teuchos::Array;
980 
981  // Create Stochastic Galerkin basis and expansion
982  const size_t num_KL = var_degree.size();
983  Array< RCP<const abstract_basis_type> > bases(num_KL);
984  for (size_t i=0; i<num_KL; i++) {
985  if (symmetric)
986  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
987  else
988  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
989  }
990  RCP<const product_basis_type> basis =
991  rcp(new product_basis_type(
993  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
994 
995  //------------------------------
996  // Generate graph for "FEM" box structure:
997 
998  std::vector< std::vector<size_t> > graph ;
999 
1000  const size_t outer_length = nGrid * nGrid * nGrid ;
1001  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
1002 
1003  //------------------------------
1004  // Generate CRS block-tensor matrix:
1005 
1006  matrix_type matrix ;
1007 
1008  Teuchos::ParameterList params;
1009  params.set("Symmetric", symmetric);
1010  matrix.block =
1011  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1012  *Cijk,
1013  params );
1014  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
1015 
1016  const size_t inner_length = matrix.block.tensor().dimension();
1017  const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1018 
1019  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
1020 
1021  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
1022  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
1023 
1024  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
1025 
1026  //------------------------------
1027  // Generate input multivector:
1028 
1029  Kokkos::deep_copy( x , ScalarType(1.0) );
1030 
1031  //------------------------------
1032 
1033  Device().fence();
1034  Kokkos::Timer clock ;
1035  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1036  Stokhos::multiply( matrix , x , y );
1037  }
1038  Device().fence();
1039 
1040  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1041  const double flops_per_block = matrix.block.tensor().num_flops();
1042  const double flops = 1.0e-9*graph_length*flops_per_block;
1043 
1044  // std::cout << "tensor: flops = " << flops
1045  // << " time = " << seconds_per_iter << std::endl;
1046 
1047  std::vector<double> perf(6) ;
1048 
1049  perf[0] = outer_length * inner_length ;
1050  perf[1] = seconds_per_iter ;
1051  perf[2] = flops / seconds_per_iter;
1052  perf[3] = matrix.block.tensor().num_non_zeros();
1053  perf[4] = inner_length ;
1054  perf[5] = flops_per_block;
1055 
1056  return perf ;
1057 }
1058 
1059 template< typename ScalarType , class Device , class SparseMatOps >
1060 std::vector<double>
1062  const std::vector<int> & var_degree ,
1063  const int nGrid ,
1064  const int iterCount ,
1065  const bool test_block ,
1066  const bool symmetric )
1067 {
1068  typedef ScalarType value_type ;
1069  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1072  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1074 
1075  using Teuchos::rcp;
1076  using Teuchos::RCP;
1077  using Teuchos::Array;
1078 
1079  // Create Stochastic Galerkin basis and expansion
1080  const size_t num_KL = var_degree.size();
1081  Array< RCP<const abstract_basis_type> > bases(num_KL);
1082  for (size_t i=0; i<num_KL; i++) {
1083  if (symmetric)
1084  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1085  else
1086  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1087  }
1088  RCP<const product_basis_type> basis =
1089  rcp(new product_basis_type(
1091  const size_t outer_length = basis->size();
1092  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1093 
1094  //------------------------------
1095 
1096  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1097  typedef typename matrix_type::values_type matrix_values_type;
1098  typedef typename matrix_type::graph_type matrix_graph_type;
1099 
1100  //------------------------------
1101  // Generate FEM graph:
1102 
1103  std::vector< std::vector<size_t> > fem_graph ;
1104 
1105  const size_t inner_length = nGrid * nGrid * nGrid ;
1106  const size_t graph_length =
1107  unit_test::generate_fem_graph( nGrid , fem_graph );
1108 
1109  //------------------------------
1110 
1111  typedef Kokkos::View<value_type*,Device> vec_type ;
1112 
1113  std::vector<matrix_type> matrix( outer_length ) ;
1114  std::vector<vec_type> x( outer_length ) ;
1115  std::vector<vec_type> y( outer_length ) ;
1116  std::vector<vec_type> tmp( outer_length ) ;
1117 
1118  for (size_t block=0; block<outer_length; ++block) {
1119  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , fem_graph );
1120 
1121  matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1122 
1123  x[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length );
1124  y[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length );
1125  tmp[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("tmp"), inner_length );
1126 
1127  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1128 
1129  Kokkos::deep_copy( x[block] , ScalarType(1.0) );
1130  Kokkos::deep_copy( y[block] , ScalarType(1.0) );
1131  }
1132 
1133  Device().fence();
1134  SparseMatOps smo;
1135  Kokkos::Timer clock ;
1136  int n_apply = 0;
1137  int n_add = 0;
1138  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1139 
1140  // Original matrix-free multiply algorithm using a block apply
1141  n_apply = 0;
1142  n_add = 0;
1143  typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1144  typename Cijk_type::k_iterator k_end = Cijk->k_end();
1145  for (typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1146  int nj = Cijk->num_j(k_it);
1147  if (nj > 0) {
1148  int k = index(k_it);
1149  typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1150  typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1151  std::vector<vec_type> xx(nj), yy(nj);
1152  int jdx = 0;
1153  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1154  ++j_it) {
1155  int j = index(j_it);
1156  xx[jdx] = x[j];
1157  yy[jdx] = tmp[j];
1158  jdx++;
1159  }
1160  Stokhos::multiply( matrix[k] , xx , yy, smo );
1161  n_apply += nj;
1162  jdx = 0;
1163  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1164  ++j_it) {
1165  typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1166  typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1167  for (typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1168  ++i_it) {
1169  int i = index(i_it);
1170  value_type c = value(i_it);
1171  Stokhos::update( value_type(1.0) , y[i] , c , yy[jdx] );
1172  ++n_add;
1173  }
1174  jdx++;
1175  }
1176  }
1177  }
1178 
1179  }
1180  Device().fence();
1181 
1182  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1183  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1184  static_cast<double>(n_add)*inner_length);
1185 
1186  // std::cout << "mat-free: flops = " << flops
1187  // << " time = " << seconds_per_iter << std::endl;
1188 
1189  std::vector<double> perf(4);
1190  perf[0] = outer_length * inner_length;
1191  perf[1] = seconds_per_iter ;
1192  perf[2] = flops/seconds_per_iter;
1193  perf[3] = flops;
1194 
1195  return perf;
1196 }
1197 
1198 template< typename ScalarType , class Device , class SparseMatOps >
1199 std::vector<double>
1201  const std::vector<int> & var_degree ,
1202  const int nGrid ,
1203  const int iterCount ,
1204  const bool test_block ,
1205  const bool symmetric )
1206 {
1207  typedef ScalarType value_type ;
1208  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1211  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1213 
1214  using Teuchos::rcp;
1215  using Teuchos::RCP;
1216  using Teuchos::Array;
1217 
1218  // Create Stochastic Galerkin basis and expansion
1219  const size_t num_KL = var_degree.size();
1220  Array< RCP<const abstract_basis_type> > bases(num_KL);
1221  for (size_t i=0; i<num_KL; i++) {
1222  if (symmetric)
1223  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1224  else
1225  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1226  }
1227  RCP<const product_basis_type> basis =
1228  rcp(new product_basis_type(
1230  const size_t outer_length = basis->size();
1231  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1232 
1233  //------------------------------
1234 
1235  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1236  typedef typename matrix_type::values_type matrix_values_type;
1237  typedef typename matrix_type::graph_type matrix_graph_type;
1238 
1239  //------------------------------
1240  // Generate FEM graph:
1241 
1242  std::vector< std::vector<size_t> > fem_graph ;
1243 
1244  const size_t inner_length = nGrid * nGrid * nGrid ;
1245  const size_t graph_length =
1246  unit_test::generate_fem_graph( nGrid , fem_graph );
1247 
1248  //------------------------------
1249 
1250  typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged> vec_type ;
1251  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1252 
1253  std::vector<matrix_type> matrix( outer_length ) ;
1254  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1255  inner_length, outer_length ) ;
1256  multi_vec_type y("y", inner_length, outer_length ) ;
1257  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1258  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1259 
1260  Kokkos::deep_copy( x , ScalarType(1.0) );
1261 
1262  for (size_t block=0; block<outer_length; ++block) {
1263  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1264  std::string("testing") , fem_graph );
1265 
1266  matrix[block].values = matrix_values_type( "matrix" , graph_length );
1267 
1268  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1269  }
1270 
1271  Device().fence();
1272  SparseMatOps smo;
1273  Kokkos::Timer clock ;
1274  int n_apply = 0;
1275  int n_add = 0;
1276  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1277 
1278  // Original matrix-free multiply algorithm using a block apply
1279  typedef typename Cijk_type::k_iterator k_iterator;
1280  typedef typename Cijk_type::kj_iterator kj_iterator;
1281  typedef typename Cijk_type::kji_iterator kji_iterator;
1282  n_apply = 0;
1283  n_add = 0;
1284  k_iterator k_begin = Cijk->k_begin();
1285  k_iterator k_end = Cijk->k_end();
1286  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1287  unsigned nj = Cijk->num_j(k_it);
1288  if (nj > 0) {
1289  int k = index(k_it);
1290  kj_iterator j_begin = Cijk->j_begin(k_it);
1291  kj_iterator j_end = Cijk->j_end(k_it);
1292  std::vector<int> j_indices(nj);
1293  unsigned jdx = 0;
1294  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1295  int j = index(j_it);
1296  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1297  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1298  Kokkos::deep_copy(tt, xx);
1299  }
1300  multi_vec_type tmp_x_view =
1301  Kokkos::subview( tmp_x, Kokkos::ALL(),
1302  std::make_pair(0u,nj));
1303  multi_vec_type tmp_y_view =
1304  Kokkos::subview( tmp_y, Kokkos::ALL(),
1305  std::make_pair(0u,nj));
1306  Stokhos::multiply( matrix[k] , tmp_x_view , tmp_y_view, smo );
1307  n_apply += nj;
1308  jdx = 0;
1309  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1310  vec_type tmp_y_view =
1311  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1312  kji_iterator i_begin = Cijk->i_begin(j_it);
1313  kji_iterator i_end = Cijk->i_end(j_it);
1314  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1315  int i = index(i_it);
1316  value_type c = value(i_it);
1317  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1318  Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1319  ++n_add;
1320  }
1321  }
1322  }
1323  }
1324 
1325  }
1326  Device().fence();
1327 
1328  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1329  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1330  static_cast<double>(n_add)*inner_length);
1331 
1332  // std::cout << "mat-free: flops = " << flops
1333  // << " time = " << seconds_per_iter << std::endl;
1334 
1335  std::vector<double> perf(4);
1336  perf[0] = outer_length * inner_length;
1337  perf[1] = seconds_per_iter ;
1338  perf[2] = flops/seconds_per_iter;
1339  perf[3] = flops;
1340 
1341  return perf;
1342 }
1343 
1344 #ifdef HAVE_STOKHOS_KOKKOSLINALG
1345 template< typename ScalarType , class Device >
1346 std::vector<double>
1347 test_original_matrix_free_kokkos(
1348  const std::vector<int> & var_degree ,
1349  const int nGrid ,
1350  const int iterCount ,
1351  const bool test_block ,
1352  const bool symmetric )
1353 {
1354  typedef ScalarType value_type ;
1355  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1358  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1360 
1361  using Teuchos::rcp;
1362  using Teuchos::RCP;
1363  using Teuchos::Array;
1364 
1365  // Create Stochastic Galerkin basis and expansion
1366  const size_t num_KL = var_degree.size();
1367  Array< RCP<const abstract_basis_type> > bases(num_KL);
1368  for (size_t i=0; i<num_KL; i++) {
1369  if (symmetric)
1370  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1371  else
1372  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1373  }
1374  RCP<const product_basis_type> basis =
1375  rcp(new product_basis_type(
1376  bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1377  const size_t outer_length = basis->size();
1378  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1379 
1380  //------------------------------
1381 
1382  typedef int ordinal_type;
1383  typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1384  typedef typename matrix_type::values_type matrix_values_type;
1385  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1386 
1387  //------------------------------
1388  // Generate FEM graph:
1389 
1390  std::vector< std::vector<size_t> > fem_graph ;
1391 
1392  const size_t inner_length = nGrid * nGrid * nGrid ;
1393  const size_t graph_length =
1394  unit_test::generate_fem_graph( nGrid , fem_graph );
1395 
1396  //------------------------------
1397 
1398  typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged> vec_type ;
1399  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1400 
1401  std::vector<matrix_type> matrix( outer_length ) ;
1402  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1403  inner_length, outer_length ) ;
1404  multi_vec_type y( "y", inner_length, outer_length ) ;
1405  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1406  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1407 
1408  Kokkos::deep_copy( x , ScalarType(1.0) );
1409 
1410  for (size_t block=0; block<outer_length; ++block) {
1411  matrix_graph_type matrix_graph =
1412  Kokkos::create_staticcrsgraph<matrix_graph_type>(
1413  std::string("test crs graph") , fem_graph );
1414 
1415  matrix_values_type matrix_values = matrix_values_type(
1416  Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1417  Kokkos::deep_copy(matrix_values , ScalarType(1.0) );
1418  matrix[block] = matrix_type("matrix", outer_length, matrix_values,
1419  matrix_graph);
1420  }
1421 
1422  Device().fence();
1423  Kokkos::Timer clock ;
1424  int n_apply = 0;
1425  int n_add = 0;
1426  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1427 
1428  // Original matrix-free multiply algorithm using a block apply
1429  typedef typename Cijk_type::k_iterator k_iterator;
1430  typedef typename Cijk_type::kj_iterator kj_iterator;
1431  typedef typename Cijk_type::kji_iterator kji_iterator;
1432  n_apply = 0;
1433  n_add = 0;
1434  k_iterator k_begin = Cijk->k_begin();
1435  k_iterator k_end = Cijk->k_end();
1436  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1437  unsigned nj = Cijk->num_j(k_it);
1438  if (nj > 0) {
1439  int k = index(k_it);
1440  kj_iterator j_begin = Cijk->j_begin(k_it);
1441  kj_iterator j_end = Cijk->j_end(k_it);
1442  unsigned jdx = 0;
1443  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1444  int j = index(j_it);
1445  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1446  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1447  Kokkos::deep_copy(tt, xx);
1448  }
1449  multi_vec_type tmp_x_view =
1450  Kokkos::subview( tmp_x, Kokkos::ALL(),
1451  std::make_pair(0u,nj));
1452  multi_vec_type tmp_y_view =
1453  Kokkos::subview( tmp_y, Kokkos::ALL(),
1454  std::make_pair(0u,nj));
1455  KokkosSparse::spmv( "N" , value_type(1.0) , matrix[k] , tmp_x_view , value_type(0.0) , tmp_y_view );
1456  n_apply += nj;
1457  jdx = 0;
1458  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1459  vec_type tmp_y_view =
1460  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1461  kji_iterator i_begin = Cijk->i_begin(j_it);
1462  kji_iterator i_end = Cijk->i_end(j_it);
1463  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1464  int i = index(i_it);
1465  value_type c = value(i_it);
1466  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1467  //Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1468  KokkosBlas::update(value_type(1.0) , y_view, c, tmp_y_view, value_type(0.0), y_view);
1469  ++n_add;
1470  }
1471  }
1472  }
1473  }
1474 
1475  }
1476  Device().fence();
1477 
1478  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1479  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1480  static_cast<double>(n_add)*inner_length);
1481 
1482  // std::cout << "mat-free: flops = " << flops
1483  // << " time = " << seconds_per_iter << std::endl;
1484 
1485  std::vector<double> perf(4);
1486  perf[0] = outer_length * inner_length;
1487  perf[1] = seconds_per_iter ;
1488  perf[2] = flops/seconds_per_iter;
1489  perf[3] = flops;
1490 
1491  return perf;
1492 }
1493 #endif
1494 
1495 template< class Scalar, class Device >
1496 void performance_test_driver_all( const int pdeg ,
1497  const int minvar ,
1498  const int maxvar ,
1499  const int nGrid ,
1500  const int nIter ,
1501  const bool test_block ,
1502  const bool symmetric )
1503 {
1504  //------------------------------
1505  // Generate FEM graph:
1506 
1507  std::vector< std::vector<size_t> > fem_graph ;
1508 
1509  const size_t fem_nonzeros =
1510  unit_test::generate_fem_graph( nGrid , fem_graph );
1511 
1512  //------------------------------
1513 
1514  std::cout.precision(8);
1515 
1516  //------------------------------
1517 
1518  std::cout << std::endl << "\"FEM NNZ = " << fem_nonzeros << "\"" << std::endl;
1519 
1520  std::cout << std::endl
1521  << "\"#nGrid\" , "
1522  << "\"#Variable\" , "
1523  << "\"PolyDegree\" , "
1524  << "\"#Bases\" , "
1525  << "\"#TensorEntry\" , "
1526  << "\"VectorSize\" , "
1527  << "\"Original-Flat MXV-Time\" , "
1528  << "\"Original-Flat MXV-Speedup\" , "
1529  << "\"Original-Flat MXV-GFLOPS\" , "
1530  << "\"Commuted-Flat MXV-Speedup\" , "
1531  << "\"Commuted-Flat MXV-GFLOPS\" , "
1532  << "\"Block-Diagonal MXV-Speedup\" , "
1533  << "\"Block-Diagonal MXV-GFLOPS\" , "
1534  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1535  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1536  << std::endl ;
1537 
1538  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1539 
1540  std::vector<int> var_degree( nvar , pdeg );
1541 
1542  //------------------------------
1543 
1544  const std::vector<double> perf_flat_original =
1545  test_product_flat_original_matrix<Scalar,Device>(
1546  var_degree , nGrid , nIter , symmetric );
1547 
1548  const std::vector<double> perf_flat_commuted =
1549  test_product_flat_commuted_matrix<Scalar,Device>(
1550  var_degree , nGrid , nIter , symmetric );
1551 
1552  const std::vector<double> perf_matrix =
1553  test_product_tensor_diagonal_matrix<Scalar,Device>(
1554  var_degree , nGrid , nIter , symmetric );
1555 
1556  const std::vector<double> perf_crs_tensor =
1557  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1558  var_degree , nGrid , nIter , symmetric );
1559 
1560  if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1561  perf_flat_commuted[3] != perf_flat_original[3] ) {
1562  std::cout << "ERROR: Original and commuted matrix sizes do not match"
1563  << std::endl
1564  << " original size = " << perf_flat_original[0]
1565  << " , nonzero = " << perf_flat_original[3]
1566  << std::endl
1567  << " commuted size = " << perf_flat_commuted[0]
1568  << " , nonzero = " << perf_flat_commuted[3]
1569  << std::endl ;
1570  }
1571 
1572  std::cout << nGrid << " , "
1573  << nvar << " , "
1574  << pdeg << " , "
1575  << perf_crs_tensor[4] << " , "
1576  << perf_crs_tensor[3] << " , "
1577  << perf_flat_original[0] << " , "
1578  << perf_flat_original[1] << " , "
1579  << perf_flat_original[1] / perf_flat_original[1] << " , "
1580  << perf_flat_original[2] << " , "
1581  << perf_flat_original[1] / perf_flat_commuted[1] << " , "
1582  << perf_flat_commuted[2] << " , "
1583  << perf_flat_original[1] / perf_matrix[1] << " , "
1584  << perf_matrix[2] << " , "
1585  << perf_flat_original[1] / perf_crs_tensor[1] << " , "
1586  << perf_crs_tensor[2] << " , "
1587  << std::endl ;
1588  }
1589 
1590  //------------------------------
1591 }
1592 
1593 template< class Scalar, class Device , class SparseMatOps >
1594 void performance_test_driver_poly( const int pdeg ,
1595  const int minvar ,
1596  const int maxvar ,
1597  const int nGrid ,
1598  const int nIter ,
1599  const bool test_block ,
1600  const bool symmetric )
1601 {
1602  std::cout.precision(8);
1603 
1604  //------------------------------
1605 
1606  std::vector< std::vector<size_t> > fem_graph ;
1607  const size_t graph_length =
1608  unit_test::generate_fem_graph( nGrid , fem_graph );
1609  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1610 
1611  std::cout << std::endl
1612  << "\"#nGrid\" , "
1613  << "\"#Variable\" , "
1614  << "\"PolyDegree\" , "
1615  << "\"#Bases\" , "
1616  << "\"#TensorEntry\" , "
1617  << "\"VectorSize\" , "
1618  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1619  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1620  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1621  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1622  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1623  // << "\"Block-Coo-Tensor MXV-Speedup\" , "
1624  // << "\"Block-Coo-Tensor MXV-GFLOPS\" , "
1625  // << "\"Block-Tiled Crs-Tensor MXV-Speedup\" , "
1626  // << "\"Block-Tiled Crs-3-Tensor MXV-GFLOPS\" , "
1627  << std::endl ;
1628 
1629  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1630  std::vector<int> var_degree( nvar , pdeg );
1631 
1632  const std::vector<double> perf_crs_tensor =
1633  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1634  var_degree , nGrid , nIter , symmetric );
1635 
1636  // const bool Pack = std::is_same<Device,Kokkos::Cuda>::value;
1637  // const std::vector<double> perf_coo_tensor =
1638  // test_product_tensor_matrix<Scalar,Stokhos::CooProductTensor<Scalar,Device,Pack>,Device>(
1639  // var_degree , nGrid , nIter , symmetric );
1640 
1641  // const std::vector<double> perf_tiled_crs_tensor =
1642  // test_simple_tiled_product_tensor_matrix<Scalar,Device>(
1643  // var_degree , nGrid , nIter , symmetric );
1644 
1645  std::vector<double> perf_original_mat_free_block;
1646 #if defined(HAVE_STOKHOS_KOKKOSLINALG)
1647 #if defined( KOKKOS_ENABLE_CUDA )
1648  enum { is_cuda = std::is_same<Device,Kokkos::Cuda>::value };
1649 #else
1650  enum { is_cuda = false };
1651 #endif
1652  if ( is_cuda )
1653  perf_original_mat_free_block =
1654  test_original_matrix_free_kokkos<Scalar,Device>(
1655  var_degree , nGrid , nIter , test_block , symmetric );
1656  else
1657  perf_original_mat_free_block =
1658  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1659  var_degree , nGrid , nIter , test_block , symmetric );
1660 #else
1661  perf_original_mat_free_block =
1662  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1663  var_degree , nGrid , nIter , test_block , symmetric );
1664 #endif
1665 
1666  std::cout << nGrid << " , "
1667  << nvar << " , "
1668  << pdeg << " , "
1669  << perf_crs_tensor[4] << " , "
1670  << perf_crs_tensor[3] << " , "
1671  << perf_original_mat_free_block[0] << " , "
1672  << perf_original_mat_free_block[1] << " , "
1673  << perf_original_mat_free_block[1] /
1674  perf_original_mat_free_block[1] << " , "
1675  << perf_original_mat_free_block[2] << " , "
1676  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1677  << perf_crs_tensor[2] << " , "
1678  // << perf_original_mat_free_block[1] / perf_coo_tensor[1] << " , "
1679  // << perf_coo_tensor[2] << " , "
1680  // << perf_original_mat_free_block[1] / perf_tiled_crs_tensor[1] << " , "
1681  // << perf_tiled_crs_tensor[2]
1682  << std::endl ;
1683  }
1684 
1685  //------------------------------
1686 }
1687 
1688 template< class Scalar, class Device , class SparseMatOps >
1689 void performance_test_driver_poly_deg( const int nvar ,
1690  const int minp ,
1691  const int maxp ,
1692  const int nGrid ,
1693  const int nIter ,
1694  const bool test_block ,
1695  const bool symmetric )
1696 {
1697  bool do_flat_sparse =
1698  std::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1699 
1700  std::cout.precision(8);
1701 
1702  //------------------------------
1703 
1704  std::vector< std::vector<size_t> > fem_graph ;
1705  const size_t graph_length =
1706  unit_test::generate_fem_graph( nGrid , fem_graph );
1707  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1708 
1709  std::cout << std::endl
1710  << "\"#nGrid\" , "
1711  << "\"#Variable\" , "
1712  << "\"PolyDegree\" , "
1713  << "\"#Bases\" , "
1714  << "\"#TensorEntry\" , "
1715  << "\"VectorSize\" , "
1716  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1717  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1718  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1719  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1720  << "\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1721  if (do_flat_sparse)
1722  std::cout << "\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1723  << "\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1724  << "\"Lexicographic FLOPS / Crs FLOPS\" , ";
1725  std::cout << std::endl ;
1726 
1727  for ( int p = minp ; p <= maxp ; ++p ) {
1728  std::vector<int> var_degree( nvar , p );
1729 
1730  const std::vector<double> perf_crs_tensor =
1731  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1732  var_degree , nGrid , nIter , symmetric );
1733 
1734  std::vector<double> perf_lexo_sparse_3_tensor;
1735  if (do_flat_sparse) {
1736  perf_lexo_sparse_3_tensor =
1737  test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1738  }
1739 
1740  const std::vector<double> perf_original_mat_free_block =
1741  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1742  var_degree , nGrid , nIter , test_block , symmetric );
1743 
1744  std::cout << nGrid << " , "
1745  << nvar << " , "
1746  << p << " , "
1747  << perf_crs_tensor[4] << " , "
1748  << perf_crs_tensor[3] << " , "
1749  << perf_original_mat_free_block[0] << " , "
1750  << perf_original_mat_free_block[1] << " , "
1751  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1752  << perf_original_mat_free_block[2] << " , "
1753  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1754  << perf_crs_tensor[2] << " , ";
1755  if (do_flat_sparse) {
1756  std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] << " , "
1757  << perf_lexo_sparse_3_tensor[2] << " , "
1758  << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1759  }
1760 
1761 
1762  std::cout << std::endl ;
1763  }
1764 
1765  //------------------------------
1766 }
1767 
1768 template< class Scalar, class Device , class SparseMatOps >
1769 void performance_test_driver_linear( const int minvar ,
1770  const int maxvar ,
1771  const int varinc ,
1772  const int nGrid ,
1773  const int nIter ,
1774  const bool test_block ,
1775  const bool symmetric )
1776 {
1777  std::cout.precision(8);
1778 
1779  //------------------------------
1780 
1781  std::vector< std::vector<size_t> > fem_graph ;
1782  const size_t graph_length =
1783  unit_test::generate_fem_graph( nGrid , fem_graph );
1784  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1785 
1786  std::cout << std::endl
1787  << "\"#nGrid\" , "
1788  << "\"#Variable\" , "
1789  << "\"PolyDegree\" , "
1790  << "\"#Bases\" , "
1791  << "\"#TensorEntry\" , "
1792  << "\"VectorSize\" , "
1793  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1794  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1795  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1796  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1797  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1798  << "\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1799  << "\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1800  << std::endl ;
1801 
1802  for ( int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1803  std::vector<int> var_degree( nvar , 1 );
1804 
1805  const std::vector<double> perf_crs_tensor =
1806  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1807  var_degree , nGrid , nIter , symmetric );
1808 
1809  const std::vector<double> perf_linear_sparse_3_tensor =
1810  test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1811 
1812  const std::vector<double> perf_original_mat_free_block =
1813  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1814  var_degree , nGrid , nIter , test_block , symmetric );
1815 
1816  std::cout << nGrid << " , "
1817  << nvar << " , "
1818  << 1 << " , "
1819  << perf_crs_tensor[4] << " , "
1820  << perf_crs_tensor[3] << " , "
1821  << perf_original_mat_free_block[0] << " , "
1822  << perf_original_mat_free_block[1] << " , "
1823  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1824  << perf_original_mat_free_block[2] << " , "
1825  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1826  << perf_crs_tensor[2] << " , "
1827  << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] << " , "
1828  << perf_linear_sparse_3_tensor[2] << " , "
1829  << std::endl ;
1830  }
1831 
1832  //------------------------------
1833 }
1834 
1835 template< class Scalar, class Device >
1837 
1838 //----------------------------------------------------------------------------
1839 
1840 }
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
int NumGlobalIndices(long long Row) const
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Jacobi polynomial basis.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP...> >::value >::type update(const typename Kokkos::View< XD, XP...>::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP...> &x, const typename Kokkos::View< YD, YP...>::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP...> &y, const typename Kokkos::View< ZD, ZP...>::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP...> &z)
CRS matrix of dense blocks.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(KokkosKernels::Experimental::Controls, const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
size_type size() const
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
int ExtractGlobalRowCopy(int_type Row, int LenOfIndices, int &NumIndices, int_type *Indices) const
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)