Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestStochastic.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <utility>
42 #include <cmath>
43 #include <iostream>
44 
45 #include "Kokkos_Core.hpp"
46 #include "impl/Kokkos_Timer.hpp"
47 
48 #include "Stokhos_Update.hpp"
49 #include "Stokhos_CrsMatrix.hpp"
61 
62 #if defined(HAVE_MPI) && 0
63 #include "Epetra_MpiComm.h"
64 #else
65 #include "Epetra_SerialComm.h"
66 #endif
68 #include "Stokhos_JacobiBasis.hpp"
74 
75 #ifdef HAVE_STOKHOS_KOKKOSLINALG
76 #include "KokkosSparse_CrsMatrix.hpp"
77 #include "KokkosSparse_spmv.hpp"
78 #include "KokkosBlas1_update.hpp"
79 #endif
80 
81 namespace unit_test {
82 
83 template< typename IntType >
84 inline
85 IntType map_fem_graph_coord( const IntType & N ,
86  const IntType & i ,
87  const IntType & j ,
88  const IntType & k )
89 {
90  return k + N * ( j + N * i );
91 }
92 
93 inline
94 size_t generate_fem_graph( size_t N ,
95  std::vector< std::vector<size_t> > & graph )
96 {
97  graph.resize( N * N * N , std::vector<size_t>() );
98 
99  size_t total = 0 ;
100 
101  for ( int i = 0 ; i < (int) N ; ++i ) {
102  for ( int j = 0 ; j < (int) N ; ++j ) {
103  for ( int k = 0 ; k < (int) N ; ++k ) {
104 
105  const size_t row = map_fem_graph_coord((int)N,i,j,k);
106 
107  graph[row].reserve(27);
108 
109  for ( int ii = -1 ; ii < 2 ; ++ii ) {
110  for ( int jj = -1 ; jj < 2 ; ++jj ) {
111  for ( int kk = -1 ; kk < 2 ; ++kk ) {
112  if ( 0 <= i + ii && i + ii < (int) N &&
113  0 <= j + jj && j + jj < (int) N &&
114  0 <= k + kk && k + kk < (int) N ) {
115  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
116 
117  graph[row].push_back(col);
118  }
119  }}}
120  total += graph[row].size();
121  }}}
122 
123  return total ;
124 }
125 
126 template <typename Scalar> struct ScalarTolerances {};
127 
128 template <> struct ScalarTolerances<float> {
129  typedef float scalar_type;
130  static scalar_type sparse_cijk_tol() { return 1e-5; }
131 };
132 
133 template <> struct ScalarTolerances<double> {
134  typedef double scalar_type;
135  static scalar_type sparse_cijk_tol() { return 1e-12; }
136 };
137 
138 template< typename ScalarType , typename TensorType, class Device >
139 std::vector<double>
141  const std::vector<int> & var_degree ,
142  const int nGrid ,
143  const int iterCount ,
144  const bool symmetric )
145 {
146  typedef ScalarType value_type ;
147  typedef Kokkos::View< value_type** ,
148  Kokkos::LayoutLeft ,
149  Device > block_vector_type ;
150 
152 
154  typedef typename matrix_type::graph_type graph_type ;
155 
156  typedef ScalarType value_type ;
157  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
160  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
162 
163  using Teuchos::rcp;
164  using Teuchos::RCP;
165  using Teuchos::Array;
166 
167  // Create Stochastic Galerkin basis and expansion
168  const size_t num_KL = var_degree.size();
169  Array< RCP<const abstract_basis_type> > bases(num_KL);
170  for (size_t i=0; i<num_KL; i++) {
171  if (symmetric)
172  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
173  else
174  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
175  }
176  RCP<const product_basis_type> basis =
177  rcp(new product_basis_type(
179  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
180 
181  //------------------------------
182  // Generate graph for "FEM" box structure:
183 
184  std::vector< std::vector<size_t> > graph ;
185 
186  const size_t outer_length = nGrid * nGrid * nGrid ;
187  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
188 
189  //------------------------------
190  // Generate CRS block-tensor matrix:
191 
192  matrix_type matrix ;
193 
194  matrix.block =
195  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
196  *Cijk );
197  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
198 
199  const size_t inner_length = matrix.block.dimension();
200  const size_t inner_length_aligned = matrix.block.aligned_dimension();
201 
202  matrix.values =
203  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
204 
205  block_vector_type x =
206  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
207  block_vector_type y =
208  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
209 
210  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
211 
212  //------------------------------
213  // Generate input multivector:
214 
215  Kokkos::deep_copy( x , ScalarType(1.0) );
216  block_vector_type x0 =
217  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"),
218  inner_length_aligned , outer_length );
219  Kokkos::deep_copy( x0 , ScalarType(1.0) );
220 
221  //------------------------------
222 
223  Device::fence();
224  Kokkos::Impl::Timer clock ;
225  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
226  Kokkos::deep_copy( x, x0 ); // akin to import
227  Stokhos::multiply( matrix , x , y );
228  }
229  Device::fence();
230 
231  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
232  const double flops_per_block = matrix.block.tensor().num_flops();
233  const double flops = 1.0e-9*graph_length*flops_per_block;
234 
235  std::vector<double> perf(6) ;
236 
237  perf[0] = outer_length * inner_length ;
238  perf[1] = seconds_per_iter ;
239  perf[2] = flops / seconds_per_iter;
240  perf[3] = matrix.block.tensor().entry_count();
241  perf[4] = inner_length ;
242  perf[5] = flops_per_block;
243 
244  return perf ;
245 }
246 
247 template< typename ScalarType , class Device >
248 std::vector<double>
250  const std::vector<int> & var_degree ,
251  const int nGrid ,
252  const int iterCount ,
253  const bool symmetric )
254 {
255  typedef ScalarType value_type ;
256  typedef Kokkos::View< value_type**,
257  Kokkos::LayoutLeft ,
258  Device > block_vector_type ;
259 
260  //------------------------------
261 
263  value_type , Device > matrix_type ;
264 
265  typedef typename matrix_type::graph_type graph_type ;
266 
267  typedef ScalarType value_type ;
268  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
271  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
273 
274  using Teuchos::rcp;
275  using Teuchos::RCP;
276  using Teuchos::Array;
277 
278  // Create Stochastic Galerkin basis and expansion
279  const size_t num_KL = var_degree.size();
280  Array< RCP<const abstract_basis_type> > bases(num_KL);
281  for (size_t i=0; i<num_KL; i++) {
282  if (symmetric)
283  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
284  else
285  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
286  }
287  RCP<const product_basis_type> basis =
288  rcp(new product_basis_type(
290  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
291 
292  //------------------------------
293  // Generate FEM graph:
294 
295  std::vector< std::vector<size_t> > fem_graph ;
296 
297  const size_t fem_length = nGrid * nGrid * nGrid ;
298  const size_t fem_graph_length = unit_test::generate_fem_graph( nGrid , fem_graph );
299 
300  const size_t stoch_length = basis->size();
301 
302  //------------------------------
303 
304  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), stoch_length , fem_length );
305  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), stoch_length , fem_length );
306 
307  Kokkos::deep_copy( x , ScalarType(1.0) );
308 
309  //------------------------------
310  // Generate CRS matrix of blocks with symmetric diagonal storage
311 
312  matrix_type matrix ;
313 
314  matrix.block = Stokhos::SymmetricDiagonalSpec< Device >( stoch_length );
315  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
316  std::string("test product tensor graph") , fem_graph );
317  matrix.values = block_vector_type(
318  Kokkos::ViewAllocateWithoutInitializing("matrix"), matrix.block.matrix_size() , fem_graph_length );
319 
320  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
321 
322  //------------------------------
323 
324  Device::fence();
325  Kokkos::Impl::Timer clock ;
326  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
327  Stokhos::multiply( matrix , x , y );
328  }
329  Device::fence();
330 
331  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
332  const double flops_per_block = 2.0*stoch_length*stoch_length;
333  const double flops = 1e-9*fem_graph_length*flops_per_block;
334 
335  std::vector<double> perf(6);
336  perf[0] = fem_length * stoch_length ;
337  perf[1] = seconds_per_iter;
338  perf[2] = flops / seconds_per_iter;
339  perf[3] = Cijk->num_entries();
340  perf[4] = stoch_length;
341  perf[5] = flops_per_block;
342  return perf;
343 }
344 
345 //----------------------------------------------------------------------------
346 // Flatten to a plain CRS matrix
347 //
348 // Outer DOF == fem
349 // Inner DOF == stochastic
350 
351 template< typename ScalarType , class Device >
352 std::vector<double>
354  const std::vector<int> & var_degree ,
355  const int nGrid ,
356  const int iterCount ,
357  const bool symmetric )
358 {
359  typedef ScalarType value_type ;
360  typedef Kokkos::View< value_type* , Device > vector_type ;
361 
362  //------------------------------
363 
364  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
365  typedef typename matrix_type::values_type matrix_values_type;
366  typedef typename matrix_type::graph_type matrix_graph_type;
367 
368  typedef ScalarType value_type ;
369  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
372  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
374 
375  using Teuchos::rcp;
376  using Teuchos::RCP;
377  using Teuchos::Array;
378 
379  // Create Stochastic Galerkin basis and expansion
380  const size_t num_KL = var_degree.size();
381  Array< RCP<const abstract_basis_type> > bases(num_KL);
382  for (size_t i=0; i<num_KL; i++) {
383  if (symmetric)
384  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
385  else
386  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
387  }
388  RCP<const product_basis_type> basis =
389  rcp(new product_basis_type(
391  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
392 
393  //------------------------------
394  // Generate FEM graph:
395 
396  std::vector< std::vector<size_t> > fem_graph ;
397 
398  const size_t fem_length = nGrid * nGrid * nGrid ;
399 
400  unit_test::generate_fem_graph( nGrid , fem_graph );
401 
402  //------------------------------
403  // Stochastic graph
404 
405  const size_t stoch_length = basis->size();
406  std::vector< std::vector< int > > stoch_graph( stoch_length );
407 #if defined(HAVE_MPI) && 0
408  Epetra_MpiComm comm(MPI_COMM_WORLD);
409 #else
410  Epetra_SerialComm comm;
411 #endif
413  *basis, *Cijk, comm);
414  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
415  int len = cijk_graph->NumGlobalIndices(i);
416  stoch_graph[i].resize(len);
417  int len2;
418  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
419  }
420 
421  //------------------------------
422  // Generate flattened graph with FEM outer and stochastic inner
423 
424  const size_t flat_length = fem_length * stoch_length ;
425 
426  std::vector< std::vector<size_t> > flat_graph( flat_length );
427 
428  for ( size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
429 
430  const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
431 
432  for ( size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
433 
434  const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
435  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
436  const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
437 
438  flat_graph[iFlatRow].resize( iFlatRowNZ );
439 
440  size_t iFlatEntry = 0 ;
441 
442  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
443 
444  const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
445 
446  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
447 
448  const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
449  const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
450 
451  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
452 
453  ++iFlatEntry ;
454  }
455  }
456  }
457  }
458 
459  //------------------------------
460 
461  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
462  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
463 
464  Kokkos::deep_copy( x , ScalarType(1.0) );
465 
466  //------------------------------
467 
468  matrix_type matrix ;
469 
470  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
471  std::string("testing") , flat_graph );
472 
473  const size_t flat_graph_length = matrix.graph.entries.extent(0);
474 
475  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
476 
477  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
478 
479  //Kokkos::write_matrix_market(matrix, "flat_commuted.mm");
480 
481  //------------------------------
482 
483  Device::fence();
484  Kokkos::Impl::Timer clock ;
485  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
486  Stokhos::multiply( matrix , x , y );
487  }
488  Device::fence();
489 
490  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
491  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
492 
493  std::vector<double> perf(4);
494  perf[0] = flat_length ;
495  perf[1] = seconds_per_iter;
496  perf[2] = flops;
497  perf[3] = flat_graph_length ;
498  return perf;
499 }
500 
501 //----------------------------------------------------------------------------
502 //----------------------------------------------------------------------------
503 // Flatten to a plain CRS matrix
504 //
505 // Outer DOF == stochastic
506 // Inner DOF == fem
507 
508 template< typename ScalarType , class Device >
509 std::vector<double>
511  const std::vector<int> & var_degree ,
512  const int nGrid ,
513  const int iterCount ,
514  const bool symmetric )
515 {
516  typedef ScalarType value_type ;
517  typedef Kokkos::View< value_type* , Device > vector_type ;
518 
519  //------------------------------
520 
521  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
522  typedef typename matrix_type::values_type matrix_values_type;
523  typedef typename matrix_type::graph_type matrix_graph_type;
524 
525  typedef ScalarType value_type ;
526  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
529  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
531 
532  using Teuchos::rcp;
533  using Teuchos::RCP;
534  using Teuchos::Array;
535 
536  // Create Stochastic Galerkin basis and expansion
537  const size_t num_KL = var_degree.size();
538  Array< RCP<const abstract_basis_type> > bases(num_KL);
539  for (size_t i=0; i<num_KL; i++) {
540  if (symmetric)
541  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
542  else
543  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
544  }
545  RCP<const product_basis_type> basis =
546  rcp(new product_basis_type(
548  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
549 
550  //------------------------------
551  // Generate FEM graph:
552 
553  std::vector< std::vector<size_t> > fem_graph ;
554 
555  const size_t fem_length = nGrid * nGrid * nGrid ;
556 
557  unit_test::generate_fem_graph( nGrid , fem_graph );
558 
559  //------------------------------
560  // Stochastic graph
561 
562  const size_t stoch_length = basis->size();
563  std::vector< std::vector< int > > stoch_graph( stoch_length );
564 #if defined(HAVE_MPI) && 0
565  Epetra_MpiComm comm(MPI_COMM_WORLD);
566 #else
567  Epetra_SerialComm comm;
568 #endif
570  *basis, *Cijk, comm);
571  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
572  int len = cijk_graph->NumGlobalIndices(i);
573  stoch_graph[i].resize(len);
574  int len2;
575  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
576  }
577 
578  //------------------------------
579  // Generate flattened graph with stochastic outer and FEM inner
580 
581  const size_t flat_length = fem_length * stoch_length ;
582 
583  std::vector< std::vector<size_t> > flat_graph( flat_length );
584 
585  for ( size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
586 
587  const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
588 
589  for ( size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
590 
591  const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
592  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
593  const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
594 
595  flat_graph[iFlatRow].resize( iFlatRowNZ );
596 
597  size_t iFlatEntry = 0 ;
598 
599  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
600 
601  const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
602 
603  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
604 
605  const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
606  const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
607 
608  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
609  ++iFlatEntry ;
610  }
611  }
612  }
613  }
614 
615  //------------------------------
616 
617  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
618  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
619 
620  Kokkos::deep_copy( x , ScalarType(1.0) );
621 
622  //------------------------------
623 
624  matrix_type matrix ;
625 
626  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , flat_graph );
627 
628  const size_t flat_graph_length = matrix.graph.entries.extent(0);
629 
630  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
631 
632  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
633 
634  //Kokkos::write_matrix_market(matrix, "flat_original.mm");
635 
636  //------------------------------
637 
638  Device::fence();
639  Kokkos::Impl::Timer clock ;
640  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
641  Stokhos::multiply( matrix , x , y );
642  }
643  Device::fence();
644 
645  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
646  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
647 
648  std::vector<double> perf(4);
649  perf[0] = flat_length ;
650  perf[1] = seconds_per_iter;
651  perf[2] = flops;
652  perf[3] = flat_graph_length ;
653  return perf;
654 }
655 
656 template< typename ScalarType , class Device >
657 std::vector<double>
659  const std::vector<int> & var_degree ,
660  const int nGrid ,
661  const int iterCount ,
662  const bool symmetric )
663 {
664  typedef ScalarType value_type ;
665  typedef Kokkos::View< value_type** ,
666  Kokkos::LayoutLeft ,
667  Device > block_vector_type ;
668 
671 
673  typedef typename matrix_type::graph_type graph_type ;
674 
675  typedef ScalarType value_type ;
676  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
679  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
681 
682  using Teuchos::rcp;
683  using Teuchos::RCP;
684  using Teuchos::Array;
685 
686  // Create Stochastic Galerkin basis and expansion
687  const size_t num_KL = var_degree.size();
688  Array< RCP<const abstract_basis_type> > bases(num_KL);
689  for (size_t i=0; i<num_KL; i++) {
690  if (symmetric)
691  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
692  else
693  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
694  }
695  RCP<const product_basis_type> basis =
696  rcp(new product_basis_type(
698  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
699 
700  //------------------------------
701  // Generate graph for "FEM" box structure:
702 
703  std::vector< std::vector<size_t> > graph ;
704 
705  const size_t outer_length = nGrid * nGrid * nGrid ;
706  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
707 
708  //------------------------------
709  // Generate CRS block-tensor matrix:
710 
711  matrix_type matrix ;
712 
713  Teuchos::ParameterList params;
714  params.set("Tile Size", 128);
715  params.set("Max Tiles", 10000);
716  matrix.block =
717  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
718  params);
719  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
720 
721  const size_t inner_length = matrix.block.dimension();
722  const size_t inner_length_aligned = matrix.block.aligned_dimension();
723 
724  matrix.values =
725  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
726 
727  block_vector_type x =
728  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
729  block_vector_type y =
730  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
731 
732  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
733 
734  //------------------------------
735  // Generate input multivector:
736 
737  Kokkos::deep_copy( x , ScalarType(1.0) );
738 
739  //------------------------------
740 
741  Device::fence();
742  Kokkos::Impl::Timer clock ;
743  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
744  Stokhos::multiply( matrix , x , y );
745  }
746  Device::fence();
747 
748  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
749  const double flops_per_block = matrix.block.tensor().num_flops();
750  const double flops = 1.0e-9*graph_length*flops_per_block;
751 
752  // std::cout << "tensor: flops = " << flops
753  // << " time = " << seconds_per_iter << std::endl;
754 
755  std::vector<double> perf(6) ;
756 
757  perf[0] = outer_length * inner_length ;
758  perf[1] = seconds_per_iter ;
759  perf[2] = flops / seconds_per_iter;
760  perf[3] = matrix.block.tensor().entry_count();
761  perf[4] = inner_length ;
762  perf[5] = flops_per_block;
763 
764  return perf ;
765 }
766 
767 template< typename ScalarType , class Device >
768 std::vector<double>
770  const std::vector<int> & var_degree ,
771  const int nGrid ,
772  const int iterCount ,
773  const bool symmetric )
774 {
775  typedef ScalarType value_type ;
776  typedef Kokkos::View< value_type** ,
777  Kokkos::LayoutLeft ,
778  Device > block_vector_type ;
779 
782 
784  typedef typename matrix_type::graph_type graph_type ;
785 
786  typedef ScalarType value_type ;
787  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
790  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
792 
793  using Teuchos::rcp;
794  using Teuchos::RCP;
795  using Teuchos::Array;
796 
797  // Create Stochastic Galerkin basis and expansion
798  const size_t num_KL = var_degree.size();
799  Array< RCP<const abstract_basis_type> > bases(num_KL);
800  for (size_t i=0; i<num_KL; i++) {
801  if (symmetric)
802  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
803  else
804  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
805  }
806  RCP<const product_basis_type> basis =
807  rcp(new product_basis_type(
809  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
810 
811  //------------------------------
812  // Generate graph for "FEM" box structure:
813 
814  std::vector< std::vector<size_t> > graph ;
815 
816  const size_t outer_length = nGrid * nGrid * nGrid ;
817  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
818 
819  //------------------------------
820  // Generate CRS block-tensor matrix:
821 
822  matrix_type matrix ;
823 
824  Teuchos::ParameterList params;
825  params.set("Tile Size", 128);
826  matrix.block =
827  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
828  params);
829  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
830 
831  const size_t inner_length = matrix.block.dimension();
832  const size_t inner_length_aligned = matrix.block.aligned_dimension();
833 
834  matrix.values =
835  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
836 
837  block_vector_type x =
838  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
839  block_vector_type y =
840  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
841 
842  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
843 
844  //------------------------------
845  // Generate input multivector:
846 
847  Kokkos::deep_copy( x , ScalarType(1.0) );
848 
849  //------------------------------
850 
851  Device::fence();
852  Kokkos::Impl::Timer clock ;
853  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
854  Stokhos::multiply( matrix , x , y );
855  }
856  Device::fence();
857 
858  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
859  const double flops_per_block = matrix.block.tensor().num_flops();
860  const double flops = 1.0e-9*graph_length*flops_per_block;
861 
862  // std::cout << "tensor: flops = " << flops
863  // << " time = " << seconds_per_iter << std::endl;
864 
865  std::vector<double> perf(6) ;
866 
867  perf[0] = outer_length * inner_length ;
868  perf[1] = seconds_per_iter ;
869  perf[2] = flops / seconds_per_iter;
870  perf[3] = matrix.block.tensor().entry_count();
871  perf[4] = inner_length ;
872  perf[5] = flops_per_block;
873 
874  return perf ;
875 }
876 
877 template< typename ScalarType , class Device >
878 std::vector<double>
880  const std::vector<int> & var_degree ,
881  const int nGrid ,
882  const int iterCount ,
883  const bool symmetric )
884 {
885  typedef ScalarType value_type ;
886  typedef Kokkos::View< value_type** ,
887  Kokkos::LayoutLeft ,
888  Device > block_vector_type ;
889 
892 
894  typedef typename matrix_type::graph_type graph_type ;
895 
896  typedef ScalarType value_type ;
897  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
900  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
902 
903  using Teuchos::rcp;
904  using Teuchos::RCP;
905  using Teuchos::Array;
906 
907  // Create Stochastic Galerkin basis and expansion
908  const size_t num_KL = var_degree.size();
909  Array< RCP<const abstract_basis_type> > bases(num_KL);
910  for (size_t i=0; i<num_KL; i++) {
911  if (symmetric)
912  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
913  else
914  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
915  }
916  RCP<const product_basis_type> basis =
917  rcp(new product_basis_type(
919  RCP<Cijk_type> Cijk =
921 
922  //------------------------------
923  // Generate graph for "FEM" box structure:
924 
925  std::vector< std::vector<size_t> > graph ;
926 
927  const size_t outer_length = nGrid * nGrid * nGrid ;
928  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
929 
930  //------------------------------
931  // Generate CRS block-tensor matrix:
932 
933  matrix_type matrix ;
934 
935  matrix.block =
936  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
937  *Cijk );
938  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
939 
940  const size_t inner_length = matrix.block.dimension();
941 
942  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length , graph_length );
943 
944  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length , outer_length );
945  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length , outer_length );
946 
947  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
948 
949  //------------------------------
950  // Generate input multivector:
951 
952  Kokkos::deep_copy( x , ScalarType(1.0) );
953 
954  //------------------------------
955 
956  Device::fence();
957  Kokkos::Impl::Timer clock ;
958  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
959  Stokhos::multiply( matrix , x , y );
960  }
961  Device::fence();
962 
963  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
964  const double flops_per_block = matrix.block.tensor().num_flops();
965  const double flops = 1.0e-9*graph_length*flops_per_block;
966 
967  // std::cout << "tensor: flops = " << flops
968  // << " time = " << seconds_per_iter << std::endl;
969 
970  std::vector<double> perf(6) ;
971 
972  perf[0] = outer_length * inner_length ;
973  perf[1] = seconds_per_iter ;
974  perf[2] = flops / seconds_per_iter;
975  perf[3] = matrix.block.tensor().num_non_zeros();
976  perf[4] = inner_length ;
977  perf[5] = flops_per_block;
978 
979  return perf ;
980 }
981 
982 template< typename ScalarType , class Device >
983 std::vector<double>
985  const std::vector<int> & var_degree ,
986  const int nGrid ,
987  const int iterCount ,
988  const bool symmetric )
989 {
990  typedef ScalarType value_type ;
991  typedef Kokkos::View< value_type** ,
992  Kokkos::LayoutLeft ,
993  Device > block_vector_type ;
994 
997 
999  typedef typename matrix_type::graph_type graph_type ;
1000 
1001  typedef ScalarType value_type ;
1002  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1005  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1007 
1008  using Teuchos::rcp;
1009  using Teuchos::RCP;
1010  using Teuchos::Array;
1011 
1012  // Create Stochastic Galerkin basis and expansion
1013  const size_t num_KL = var_degree.size();
1014  Array< RCP<const abstract_basis_type> > bases(num_KL);
1015  for (size_t i=0; i<num_KL; i++) {
1016  if (symmetric)
1017  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1018  else
1019  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1020  }
1021  RCP<const product_basis_type> basis =
1022  rcp(new product_basis_type(
1024  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1025 
1026  //------------------------------
1027  // Generate graph for "FEM" box structure:
1028 
1029  std::vector< std::vector<size_t> > graph ;
1030 
1031  const size_t outer_length = nGrid * nGrid * nGrid ;
1032  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
1033 
1034  //------------------------------
1035  // Generate CRS block-tensor matrix:
1036 
1037  matrix_type matrix ;
1038 
1039  Teuchos::ParameterList params;
1040  params.set("Symmetric", symmetric);
1041  matrix.block =
1042  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1043  *Cijk,
1044  params );
1045  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
1046 
1047  const size_t inner_length = matrix.block.tensor().dimension();
1048  const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1049 
1050  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
1051 
1052  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
1053  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
1054 
1055  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
1056 
1057  //------------------------------
1058  // Generate input multivector:
1059 
1060  Kokkos::deep_copy( x , ScalarType(1.0) );
1061 
1062  //------------------------------
1063 
1064  Device::fence();
1065  Kokkos::Impl::Timer clock ;
1066  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1067  Stokhos::multiply( matrix , x , y );
1068  }
1069  Device::fence();
1070 
1071  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1072  const double flops_per_block = matrix.block.tensor().num_flops();
1073  const double flops = 1.0e-9*graph_length*flops_per_block;
1074 
1075  // std::cout << "tensor: flops = " << flops
1076  // << " time = " << seconds_per_iter << std::endl;
1077 
1078  std::vector<double> perf(6) ;
1079 
1080  perf[0] = outer_length * inner_length ;
1081  perf[1] = seconds_per_iter ;
1082  perf[2] = flops / seconds_per_iter;
1083  perf[3] = matrix.block.tensor().num_non_zeros();
1084  perf[4] = inner_length ;
1085  perf[5] = flops_per_block;
1086 
1087  return perf ;
1088 }
1089 
1090 template< typename ScalarType , class Device , class SparseMatOps >
1091 std::vector<double>
1093  const std::vector<int> & var_degree ,
1094  const int nGrid ,
1095  const int iterCount ,
1096  const bool test_block ,
1097  const bool symmetric )
1098 {
1099  typedef ScalarType value_type ;
1100  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1103  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1105 
1106  using Teuchos::rcp;
1107  using Teuchos::RCP;
1108  using Teuchos::Array;
1109 
1110  // Create Stochastic Galerkin basis and expansion
1111  const size_t num_KL = var_degree.size();
1112  Array< RCP<const abstract_basis_type> > bases(num_KL);
1113  for (size_t i=0; i<num_KL; i++) {
1114  if (symmetric)
1115  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1116  else
1117  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1118  }
1119  RCP<const product_basis_type> basis =
1120  rcp(new product_basis_type(
1122  const size_t outer_length = basis->size();
1123  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1124 
1125  //------------------------------
1126 
1127  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1128  typedef typename matrix_type::values_type matrix_values_type;
1129  typedef typename matrix_type::graph_type matrix_graph_type;
1130 
1131  //------------------------------
1132  // Generate FEM graph:
1133 
1134  std::vector< std::vector<size_t> > fem_graph ;
1135 
1136  const size_t inner_length = nGrid * nGrid * nGrid ;
1137  const size_t graph_length =
1138  unit_test::generate_fem_graph( nGrid , fem_graph );
1139 
1140  //------------------------------
1141 
1142  typedef Kokkos::View<value_type*,Device> vec_type ;
1143 
1144  std::vector<matrix_type> matrix( outer_length ) ;
1145  std::vector<vec_type> x( outer_length ) ;
1146  std::vector<vec_type> y( outer_length ) ;
1147  std::vector<vec_type> tmp( outer_length ) ;
1148 
1149  for (size_t block=0; block<outer_length; ++block) {
1150  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , fem_graph );
1151 
1152  matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1153 
1154  x[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length );
1155  y[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length );
1156  tmp[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("tmp"), inner_length );
1157 
1158  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1159 
1160  Kokkos::deep_copy( x[block] , ScalarType(1.0) );
1161  Kokkos::deep_copy( y[block] , ScalarType(1.0) );
1162  }
1163 
1164  Device::fence();
1165  SparseMatOps smo;
1166  Kokkos::Impl::Timer clock ;
1167  int n_apply = 0;
1168  int n_add = 0;
1169  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1170 
1171  // Original matrix-free multiply algorithm using a block apply
1172  n_apply = 0;
1173  n_add = 0;
1174  typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1175  typename Cijk_type::k_iterator k_end = Cijk->k_end();
1176  for (typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1177  int nj = Cijk->num_j(k_it);
1178  if (nj > 0) {
1179  int k = index(k_it);
1180  typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1181  typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1182  std::vector<vec_type> xx(nj), yy(nj);
1183  int jdx = 0;
1184  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1185  ++j_it) {
1186  int j = index(j_it);
1187  xx[jdx] = x[j];
1188  yy[jdx] = tmp[j];
1189  jdx++;
1190  }
1191  Stokhos::multiply( matrix[k] , xx , yy, smo );
1192  n_apply += nj;
1193  jdx = 0;
1194  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1195  ++j_it) {
1196  typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1197  typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1198  for (typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1199  ++i_it) {
1200  int i = index(i_it);
1201  value_type c = value(i_it);
1202  Stokhos::update( value_type(1.0) , y[i] , c , yy[jdx] );
1203  ++n_add;
1204  }
1205  jdx++;
1206  }
1207  }
1208  }
1209 
1210  }
1211  Device::fence();
1212 
1213  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1214  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1215  static_cast<double>(n_add)*inner_length);
1216 
1217  // std::cout << "mat-free: flops = " << flops
1218  // << " time = " << seconds_per_iter << std::endl;
1219 
1220  std::vector<double> perf(4);
1221  perf[0] = outer_length * inner_length;
1222  perf[1] = seconds_per_iter ;
1223  perf[2] = flops/seconds_per_iter;
1224  perf[3] = flops;
1225 
1226  return perf;
1227 }
1228 
1229 template< typename ScalarType , class Device , class SparseMatOps >
1230 std::vector<double>
1232  const std::vector<int> & var_degree ,
1233  const int nGrid ,
1234  const int iterCount ,
1235  const bool test_block ,
1236  const bool symmetric )
1237 {
1238  typedef ScalarType value_type ;
1239  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1242  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1244 
1245  using Teuchos::rcp;
1246  using Teuchos::RCP;
1247  using Teuchos::Array;
1248 
1249  // Create Stochastic Galerkin basis and expansion
1250  const size_t num_KL = var_degree.size();
1251  Array< RCP<const abstract_basis_type> > bases(num_KL);
1252  for (size_t i=0; i<num_KL; i++) {
1253  if (symmetric)
1254  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1255  else
1256  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1257  }
1258  RCP<const product_basis_type> basis =
1259  rcp(new product_basis_type(
1261  const size_t outer_length = basis->size();
1262  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1263 
1264  //------------------------------
1265 
1266  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1267  typedef typename matrix_type::values_type matrix_values_type;
1268  typedef typename matrix_type::graph_type matrix_graph_type;
1269 
1270  //------------------------------
1271  // Generate FEM graph:
1272 
1273  std::vector< std::vector<size_t> > fem_graph ;
1274 
1275  const size_t inner_length = nGrid * nGrid * nGrid ;
1276  const size_t graph_length =
1277  unit_test::generate_fem_graph( nGrid , fem_graph );
1278 
1279  //------------------------------
1280 
1281  typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged> vec_type ;
1282  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1283 
1284  std::vector<matrix_type> matrix( outer_length ) ;
1285  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1286  inner_length, outer_length ) ;
1287  multi_vec_type y("y", inner_length, outer_length ) ;
1288  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1289  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1290 
1291  Kokkos::deep_copy( x , ScalarType(1.0) );
1292 
1293  for (size_t block=0; block<outer_length; ++block) {
1294  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1295  std::string("testing") , fem_graph );
1296 
1297  matrix[block].values = matrix_values_type( "matrix" , graph_length );
1298 
1299  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1300  }
1301 
1302  Device::fence();
1303  SparseMatOps smo;
1304  Kokkos::Impl::Timer clock ;
1305  int n_apply = 0;
1306  int n_add = 0;
1307  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1308 
1309  // Original matrix-free multiply algorithm using a block apply
1310  typedef typename Cijk_type::k_iterator k_iterator;
1311  typedef typename Cijk_type::kj_iterator kj_iterator;
1312  typedef typename Cijk_type::kji_iterator kji_iterator;
1313  n_apply = 0;
1314  n_add = 0;
1315  k_iterator k_begin = Cijk->k_begin();
1316  k_iterator k_end = Cijk->k_end();
1317  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1318  unsigned nj = Cijk->num_j(k_it);
1319  if (nj > 0) {
1320  int k = index(k_it);
1321  kj_iterator j_begin = Cijk->j_begin(k_it);
1322  kj_iterator j_end = Cijk->j_end(k_it);
1323  std::vector<int> j_indices(nj);
1324  unsigned jdx = 0;
1325  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1326  int j = index(j_it);
1327  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1328  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1329  Kokkos::deep_copy(tt, xx);
1330  }
1331  multi_vec_type tmp_x_view =
1332  Kokkos::subview( tmp_x, Kokkos::ALL(),
1333  std::make_pair(0u,nj));
1334  multi_vec_type tmp_y_view =
1335  Kokkos::subview( tmp_y, Kokkos::ALL(),
1336  std::make_pair(0u,nj));
1337  Stokhos::multiply( matrix[k] , tmp_x_view , tmp_y_view, smo );
1338  n_apply += nj;
1339  jdx = 0;
1340  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1341  vec_type tmp_y_view =
1342  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1343  kji_iterator i_begin = Cijk->i_begin(j_it);
1344  kji_iterator i_end = Cijk->i_end(j_it);
1345  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1346  int i = index(i_it);
1347  value_type c = value(i_it);
1348  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1349  Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1350  ++n_add;
1351  }
1352  }
1353  }
1354  }
1355 
1356  }
1357  Device::fence();
1358 
1359  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1360  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1361  static_cast<double>(n_add)*inner_length);
1362 
1363  // std::cout << "mat-free: flops = " << flops
1364  // << " time = " << seconds_per_iter << std::endl;
1365 
1366  std::vector<double> perf(4);
1367  perf[0] = outer_length * inner_length;
1368  perf[1] = seconds_per_iter ;
1369  perf[2] = flops/seconds_per_iter;
1370  perf[3] = flops;
1371 
1372  return perf;
1373 }
1374 
1375 #ifdef HAVE_STOKHOS_KOKKOSLINALG
1376 template< typename ScalarType , class Device >
1377 std::vector<double>
1378 test_original_matrix_free_kokkos(
1379  const std::vector<int> & var_degree ,
1380  const int nGrid ,
1381  const int iterCount ,
1382  const bool test_block ,
1383  const bool symmetric )
1384 {
1385  typedef ScalarType value_type ;
1386  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1389  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1391 
1392  using Teuchos::rcp;
1393  using Teuchos::RCP;
1394  using Teuchos::Array;
1395 
1396  // Create Stochastic Galerkin basis and expansion
1397  const size_t num_KL = var_degree.size();
1398  Array< RCP<const abstract_basis_type> > bases(num_KL);
1399  for (size_t i=0; i<num_KL; i++) {
1400  if (symmetric)
1401  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1402  else
1403  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1404  }
1405  RCP<const product_basis_type> basis =
1406  rcp(new product_basis_type(
1407  bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1408  const size_t outer_length = basis->size();
1409  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1410 
1411  //------------------------------
1412 
1413  typedef int ordinal_type;
1414  typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1415  typedef typename matrix_type::values_type matrix_values_type;
1416  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1417 
1418  //------------------------------
1419  // Generate FEM graph:
1420 
1421  std::vector< std::vector<size_t> > fem_graph ;
1422 
1423  const size_t inner_length = nGrid * nGrid * nGrid ;
1424  const size_t graph_length =
1425  unit_test::generate_fem_graph( nGrid , fem_graph );
1426 
1427  //------------------------------
1428 
1429  typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged> vec_type ;
1430  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1431 
1432  std::vector<matrix_type> matrix( outer_length ) ;
1433  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1434  inner_length, outer_length ) ;
1435  multi_vec_type y( "y", inner_length, outer_length ) ;
1436  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1437  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1438 
1439  Kokkos::deep_copy( x , ScalarType(1.0) );
1440 
1441  for (size_t block=0; block<outer_length; ++block) {
1442  matrix_graph_type matrix_graph =
1443  Kokkos::create_staticcrsgraph<matrix_graph_type>(
1444  std::string("test crs graph") , fem_graph );
1445 
1446  matrix_values_type matrix_values = matrix_values_type(
1447  Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1448  Kokkos::deep_copy(matrix_values , ScalarType(1.0) );
1449  matrix[block] = matrix_type("matrix", outer_length, matrix_values,
1450  matrix_graph);
1451  }
1452 
1453  Device::fence();
1454  Kokkos::Impl::Timer clock ;
1455  int n_apply = 0;
1456  int n_add = 0;
1457  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1458 
1459  // Original matrix-free multiply algorithm using a block apply
1460  typedef typename Cijk_type::k_iterator k_iterator;
1461  typedef typename Cijk_type::kj_iterator kj_iterator;
1462  typedef typename Cijk_type::kji_iterator kji_iterator;
1463  n_apply = 0;
1464  n_add = 0;
1465  k_iterator k_begin = Cijk->k_begin();
1466  k_iterator k_end = Cijk->k_end();
1467  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1468  unsigned nj = Cijk->num_j(k_it);
1469  if (nj > 0) {
1470  int k = index(k_it);
1471  kj_iterator j_begin = Cijk->j_begin(k_it);
1472  kj_iterator j_end = Cijk->j_end(k_it);
1473  unsigned jdx = 0;
1474  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1475  int j = index(j_it);
1476  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1477  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1478  Kokkos::deep_copy(tt, xx);
1479  }
1480  multi_vec_type tmp_x_view =
1481  Kokkos::subview( tmp_x, Kokkos::ALL(),
1482  std::make_pair(0u,nj));
1483  multi_vec_type tmp_y_view =
1484  Kokkos::subview( tmp_y, Kokkos::ALL(),
1485  std::make_pair(0u,nj));
1486  KokkosSparse::spmv( "N" , value_type(1.0) , matrix[k] , tmp_x_view , value_type(0.0) , tmp_y_view );
1487  n_apply += nj;
1488  jdx = 0;
1489  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1490  vec_type tmp_y_view =
1491  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1492  kji_iterator i_begin = Cijk->i_begin(j_it);
1493  kji_iterator i_end = Cijk->i_end(j_it);
1494  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1495  int i = index(i_it);
1496  value_type c = value(i_it);
1497  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1498  //Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1499  KokkosBlas::update(value_type(1.0) , y_view, c, tmp_y_view, value_type(0.0), y_view);
1500  ++n_add;
1501  }
1502  }
1503  }
1504  }
1505 
1506  }
1507  Device::fence();
1508 
1509  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1510  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1511  static_cast<double>(n_add)*inner_length);
1512 
1513  // std::cout << "mat-free: flops = " << flops
1514  // << " time = " << seconds_per_iter << std::endl;
1515 
1516  std::vector<double> perf(4);
1517  perf[0] = outer_length * inner_length;
1518  perf[1] = seconds_per_iter ;
1519  perf[2] = flops/seconds_per_iter;
1520  perf[3] = flops;
1521 
1522  return perf;
1523 }
1524 #endif
1525 
1526 template< class Scalar, class Device >
1527 void performance_test_driver_all( const int pdeg ,
1528  const int minvar ,
1529  const int maxvar ,
1530  const int nGrid ,
1531  const int nIter ,
1532  const bool test_block ,
1533  const bool symmetric )
1534 {
1535  //------------------------------
1536  // Generate FEM graph:
1537 
1538  std::vector< std::vector<size_t> > fem_graph ;
1539 
1540  const size_t fem_nonzeros =
1541  unit_test::generate_fem_graph( nGrid , fem_graph );
1542 
1543  //------------------------------
1544 
1545  std::cout.precision(8);
1546 
1547  //------------------------------
1548 
1549  std::cout << std::endl << "\"FEM NNZ = " << fem_nonzeros << "\"" << std::endl;
1550 
1551  std::cout << std::endl
1552  << "\"#nGrid\" , "
1553  << "\"#Variable\" , "
1554  << "\"PolyDegree\" , "
1555  << "\"#Bases\" , "
1556  << "\"#TensorEntry\" , "
1557  << "\"VectorSize\" , "
1558  << "\"Original-Flat MXV-Time\" , "
1559  << "\"Original-Flat MXV-Speedup\" , "
1560  << "\"Original-Flat MXV-GFLOPS\" , "
1561  << "\"Commuted-Flat MXV-Speedup\" , "
1562  << "\"Commuted-Flat MXV-GFLOPS\" , "
1563  << "\"Block-Diagonal MXV-Speedup\" , "
1564  << "\"Block-Diagonal MXV-GFLOPS\" , "
1565  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1566  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1567  << std::endl ;
1568 
1569  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1570 
1571  std::vector<int> var_degree( nvar , pdeg );
1572 
1573  //------------------------------
1574 
1575  const std::vector<double> perf_flat_original =
1576  test_product_flat_original_matrix<Scalar,Device>(
1577  var_degree , nGrid , nIter , symmetric );
1578 
1579  const std::vector<double> perf_flat_commuted =
1580  test_product_flat_commuted_matrix<Scalar,Device>(
1581  var_degree , nGrid , nIter , symmetric );
1582 
1583  const std::vector<double> perf_matrix =
1584  test_product_tensor_diagonal_matrix<Scalar,Device>(
1585  var_degree , nGrid , nIter , symmetric );
1586 
1587  const std::vector<double> perf_crs_tensor =
1588  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1589  var_degree , nGrid , nIter , symmetric );
1590 
1591  if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1592  perf_flat_commuted[3] != perf_flat_original[3] ) {
1593  std::cout << "ERROR: Original and commuted matrix sizes do not match"
1594  << std::endl
1595  << " original size = " << perf_flat_original[0]
1596  << " , nonzero = " << perf_flat_original[3]
1597  << std::endl
1598  << " commuted size = " << perf_flat_commuted[0]
1599  << " , nonzero = " << perf_flat_commuted[3]
1600  << std::endl ;
1601  }
1602 
1603  std::cout << nGrid << " , "
1604  << nvar << " , "
1605  << pdeg << " , "
1606  << perf_crs_tensor[4] << " , "
1607  << perf_crs_tensor[3] << " , "
1608  << perf_flat_original[0] << " , "
1609  << perf_flat_original[1] << " , "
1610  << perf_flat_original[1] / perf_flat_original[1] << " , "
1611  << perf_flat_original[2] << " , "
1612  << perf_flat_original[1] / perf_flat_commuted[1] << " , "
1613  << perf_flat_commuted[2] << " , "
1614  << perf_flat_original[1] / perf_matrix[1] << " , "
1615  << perf_matrix[2] << " , "
1616  << perf_flat_original[1] / perf_crs_tensor[1] << " , "
1617  << perf_crs_tensor[2] << " , "
1618  << std::endl ;
1619  }
1620 
1621  //------------------------------
1622 }
1623 
1624 template< class Scalar, class Device , class SparseMatOps >
1625 void performance_test_driver_poly( const int pdeg ,
1626  const int minvar ,
1627  const int maxvar ,
1628  const int nGrid ,
1629  const int nIter ,
1630  const bool test_block ,
1631  const bool symmetric )
1632 {
1633  std::cout.precision(8);
1634 
1635  //------------------------------
1636 
1637  std::vector< std::vector<size_t> > fem_graph ;
1638  const size_t graph_length =
1639  unit_test::generate_fem_graph( nGrid , fem_graph );
1640  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1641 
1642  std::cout << std::endl
1643  << "\"#nGrid\" , "
1644  << "\"#Variable\" , "
1645  << "\"PolyDegree\" , "
1646  << "\"#Bases\" , "
1647  << "\"#TensorEntry\" , "
1648  << "\"VectorSize\" , "
1649  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1650  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1651  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1652  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1653  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1654  // << "\"Block-Coo-Tensor MXV-Speedup\" , "
1655  // << "\"Block-Coo-Tensor MXV-GFLOPS\" , "
1656  // << "\"Block-Tiled Crs-Tensor MXV-Speedup\" , "
1657  // << "\"Block-Tiled Crs-3-Tensor MXV-GFLOPS\" , "
1658  << std::endl ;
1659 
1660  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1661  std::vector<int> var_degree( nvar , pdeg );
1662 
1663  const std::vector<double> perf_crs_tensor =
1664  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1665  var_degree , nGrid , nIter , symmetric );
1666 
1667  // const bool Pack = Kokkos::Impl::is_same<Device,Kokkos::Cuda>::value;
1668  // const std::vector<double> perf_coo_tensor =
1669  // test_product_tensor_matrix<Scalar,Stokhos::CooProductTensor<Scalar,Device,Pack>,Device>(
1670  // var_degree , nGrid , nIter , symmetric );
1671 
1672  // const std::vector<double> perf_tiled_crs_tensor =
1673  // test_simple_tiled_product_tensor_matrix<Scalar,Device>(
1674  // var_degree , nGrid , nIter , symmetric );
1675 
1676  std::vector<double> perf_original_mat_free_block;
1677 #if defined(HAVE_STOKHOS_KOKKOSLINALG)
1678 #if defined( KOKKOS_ENABLE_CUDA )
1679  enum { is_cuda = Kokkos::Impl::is_same<Device,Kokkos::Cuda>::value };
1680 #else
1681  enum { is_cuda = false };
1682 #endif
1683  if ( is_cuda )
1684  perf_original_mat_free_block =
1685  test_original_matrix_free_kokkos<Scalar,Device>(
1686  var_degree , nGrid , nIter , test_block , symmetric );
1687  else
1688  perf_original_mat_free_block =
1689  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1690  var_degree , nGrid , nIter , test_block , symmetric );
1691 #else
1692  perf_original_mat_free_block =
1693  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1694  var_degree , nGrid , nIter , test_block , symmetric );
1695 #endif
1696 
1697  std::cout << nGrid << " , "
1698  << nvar << " , "
1699  << pdeg << " , "
1700  << perf_crs_tensor[4] << " , "
1701  << perf_crs_tensor[3] << " , "
1702  << perf_original_mat_free_block[0] << " , "
1703  << perf_original_mat_free_block[1] << " , "
1704  << perf_original_mat_free_block[1] /
1705  perf_original_mat_free_block[1] << " , "
1706  << perf_original_mat_free_block[2] << " , "
1707  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1708  << perf_crs_tensor[2] << " , "
1709  // << perf_original_mat_free_block[1] / perf_coo_tensor[1] << " , "
1710  // << perf_coo_tensor[2] << " , "
1711  // << perf_original_mat_free_block[1] / perf_tiled_crs_tensor[1] << " , "
1712  // << perf_tiled_crs_tensor[2]
1713  << std::endl ;
1714  }
1715 
1716  //------------------------------
1717 }
1718 
1719 template< class Scalar, class Device , class SparseMatOps >
1720 void performance_test_driver_poly_deg( const int nvar ,
1721  const int minp ,
1722  const int maxp ,
1723  const int nGrid ,
1724  const int nIter ,
1725  const bool test_block ,
1726  const bool symmetric )
1727 {
1728  bool do_flat_sparse =
1729  Kokkos::Impl::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1730 
1731  std::cout.precision(8);
1732 
1733  //------------------------------
1734 
1735  std::vector< std::vector<size_t> > fem_graph ;
1736  const size_t graph_length =
1737  unit_test::generate_fem_graph( nGrid , fem_graph );
1738  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1739 
1740  std::cout << std::endl
1741  << "\"#nGrid\" , "
1742  << "\"#Variable\" , "
1743  << "\"PolyDegree\" , "
1744  << "\"#Bases\" , "
1745  << "\"#TensorEntry\" , "
1746  << "\"VectorSize\" , "
1747  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1748  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1749  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1750  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1751  << "\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1752  if (do_flat_sparse)
1753  std::cout << "\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1754  << "\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1755  << "\"Lexicographic FLOPS / Crs FLOPS\" , ";
1756  std::cout << std::endl ;
1757 
1758  for ( int p = minp ; p <= maxp ; ++p ) {
1759  std::vector<int> var_degree( nvar , p );
1760 
1761  const std::vector<double> perf_crs_tensor =
1762  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1763  var_degree , nGrid , nIter , symmetric );
1764 
1765  std::vector<double> perf_lexo_sparse_3_tensor;
1766  if (do_flat_sparse) {
1767  perf_lexo_sparse_3_tensor =
1768  test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1769  }
1770 
1771  const std::vector<double> perf_original_mat_free_block =
1772  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1773  var_degree , nGrid , nIter , test_block , symmetric );
1774 
1775  std::cout << nGrid << " , "
1776  << nvar << " , "
1777  << p << " , "
1778  << perf_crs_tensor[4] << " , "
1779  << perf_crs_tensor[3] << " , "
1780  << perf_original_mat_free_block[0] << " , "
1781  << perf_original_mat_free_block[1] << " , "
1782  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1783  << perf_original_mat_free_block[2] << " , "
1784  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1785  << perf_crs_tensor[2] << " , ";
1786  if (do_flat_sparse) {
1787  std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] << " , "
1788  << perf_lexo_sparse_3_tensor[2] << " , "
1789  << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1790  }
1791 
1792 
1793  std::cout << std::endl ;
1794  }
1795 
1796  //------------------------------
1797 }
1798 
1799 template< class Scalar, class Device , class SparseMatOps >
1800 void performance_test_driver_linear( const int minvar ,
1801  const int maxvar ,
1802  const int varinc ,
1803  const int nGrid ,
1804  const int nIter ,
1805  const bool test_block ,
1806  const bool symmetric )
1807 {
1808  std::cout.precision(8);
1809 
1810  //------------------------------
1811 
1812  std::vector< std::vector<size_t> > fem_graph ;
1813  const size_t graph_length =
1814  unit_test::generate_fem_graph( nGrid , fem_graph );
1815  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1816 
1817  std::cout << std::endl
1818  << "\"#nGrid\" , "
1819  << "\"#Variable\" , "
1820  << "\"PolyDegree\" , "
1821  << "\"#Bases\" , "
1822  << "\"#TensorEntry\" , "
1823  << "\"VectorSize\" , "
1824  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1825  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1826  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1827  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1828  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1829  << "\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1830  << "\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1831  << std::endl ;
1832 
1833  for ( int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1834  std::vector<int> var_degree( nvar , 1 );
1835 
1836  const std::vector<double> perf_crs_tensor =
1837  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1838  var_degree , nGrid , nIter , symmetric );
1839 
1840  const std::vector<double> perf_linear_sparse_3_tensor =
1841  test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1842 
1843  const std::vector<double> perf_original_mat_free_block =
1844  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1845  var_degree , nGrid , nIter , test_block , symmetric );
1846 
1847  std::cout << nGrid << " , "
1848  << nvar << " , "
1849  << 1 << " , "
1850  << perf_crs_tensor[4] << " , "
1851  << perf_crs_tensor[3] << " , "
1852  << perf_original_mat_free_block[0] << " , "
1853  << perf_original_mat_free_block[1] << " , "
1854  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1855  << perf_original_mat_free_block[2] << " , "
1856  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1857  << perf_crs_tensor[2] << " , "
1858  << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] << " , "
1859  << perf_linear_sparse_3_tensor[2] << " , "
1860  << std::endl ;
1861  }
1862 
1863  //------------------------------
1864 }
1865 
1866 template< class Scalar, class Device >
1868 
1869 //----------------------------------------------------------------------------
1870 
1871 }
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
int NumGlobalIndices(long long Row) const
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Jacobi polynomial basis.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP...> >::value >::type update(const typename Kokkos::View< XD, XP...>::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP...> &x, const typename Kokkos::View< YD, YP...>::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP...> &y, const typename Kokkos::View< ZD, ZP...>::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP...> &z)
CRS matrix of dense blocks.
size_type size() const
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
int ExtractGlobalRowCopy(int_type Row, int LenOfIndices, int &NumIndices, int_type *Indices) const
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)