14 #include "Kokkos_Core.hpp"
15 #include "Kokkos_Timer.hpp"
31 #if defined(HAVE_MPI) && 0
44 #ifdef HAVE_STOKHOS_KOKKOSLINALG
45 #include "KokkosSparse_CrsMatrix.hpp"
46 #include "KokkosSparse_spmv.hpp"
47 #include "KokkosBlas1_update.hpp"
52 template<
typename IntType >
59 return k + N * ( j + N * i );
64 std::vector< std::vector<size_t> > & graph )
66 graph.resize( N * N * N , std::vector<size_t>() );
70 for (
int i = 0 ; i < (int) N ; ++i ) {
71 for (
int j = 0 ;
j < (int) N ; ++
j ) {
72 for (
int k = 0 ; k < (int) N ; ++k ) {
76 graph[row].reserve(27);
78 for (
int ii = -1 ; ii < 2 ; ++ii ) {
79 for (
int jj = -1 ; jj < 2 ; ++jj ) {
80 for (
int kk = -1 ; kk < 2 ; ++kk ) {
81 if ( 0 <= i + ii && i + ii < (
int) N &&
82 0 <=
j + jj &&
j + jj < (
int) N &&
83 0 <= k + kk && k + kk < (
int) N ) {
86 graph[row].push_back(col);
89 total += graph[row].size();
107 template<
typename ScalarType ,
typename TensorType,
class Device >
110 const std::vector<int> & var_degree ,
112 const int iterCount ,
113 const bool symmetric )
116 typedef Kokkos::View< value_type** ,
118 Device > block_vector_type ;
123 typedef typename matrix_type::graph_type graph_type ;
137 const size_t num_KL = var_degree.
size();
138 Array< RCP<const abstract_basis_type> > bases(num_KL);
139 for (
size_t i=0; i<num_KL; i++) {
145 RCP<const product_basis_type> basis =
146 rcp(
new product_basis_type(
148 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
153 std::vector< std::vector<size_t> > graph ;
155 const size_t outer_length = nGrid * nGrid * nGrid ;
164 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
166 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
168 const size_t inner_length = matrix.block.dimension();
169 const size_t inner_length_aligned = matrix.block.aligned_dimension();
172 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
174 block_vector_type x =
175 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
176 block_vector_type y =
177 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
185 block_vector_type x0 =
186 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"),
187 inner_length_aligned , outer_length );
193 Kokkos::Timer clock ;
194 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
200 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
201 const double flops_per_block = matrix.block.tensor().num_flops();
202 const double flops = 1.0e-9*graph_length*flops_per_block;
204 std::vector<double> perf(6) ;
206 perf[0] = outer_length * inner_length ;
207 perf[1] = seconds_per_iter ;
208 perf[2] = flops / seconds_per_iter;
209 perf[3] = matrix.block.tensor().entry_count();
210 perf[4] = inner_length ;
211 perf[5] = flops_per_block;
216 template<
typename ScalarType ,
class Device >
219 const std::vector<int> & var_degree ,
221 const int iterCount ,
222 const bool symmetric )
225 typedef Kokkos::View< value_type**,
227 Device > block_vector_type ;
234 typedef typename matrix_type::graph_type graph_type ;
248 const size_t num_KL = var_degree.
size();
249 Array< RCP<const abstract_basis_type> > bases(num_KL);
250 for (
size_t i=0; i<num_KL; i++) {
256 RCP<const product_basis_type> basis =
257 rcp(
new product_basis_type(
259 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
264 std::vector< std::vector<size_t> > fem_graph ;
266 const size_t fem_length = nGrid * nGrid * nGrid ;
269 const size_t stoch_length = basis->size();
273 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), stoch_length , fem_length );
274 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), stoch_length , fem_length );
284 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
285 std::string(
"test product tensor graph") , fem_graph );
286 matrix.values = block_vector_type(
287 Kokkos::ViewAllocateWithoutInitializing(
"matrix"), matrix.block.matrix_size() , fem_graph_length );
294 Kokkos::Timer clock ;
295 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
300 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
301 const double flops_per_block = 2.0*stoch_length*stoch_length;
302 const double flops = 1e-9*fem_graph_length*flops_per_block;
304 std::vector<double> perf(6);
305 perf[0] = fem_length * stoch_length ;
306 perf[1] = seconds_per_iter;
307 perf[2] = flops / seconds_per_iter;
308 perf[3] = Cijk->num_entries();
309 perf[4] = stoch_length;
310 perf[5] = flops_per_block;
320 template<
typename ScalarType ,
class Device >
323 const std::vector<int> & var_degree ,
325 const int iterCount ,
326 const bool symmetric )
329 typedef Kokkos::View< value_type* , Device > vector_type ;
334 typedef typename matrix_type::values_type matrix_values_type;
335 typedef typename matrix_type::graph_type matrix_graph_type;
349 const size_t num_KL = var_degree.
size();
350 Array< RCP<const abstract_basis_type> > bases(num_KL);
351 for (
size_t i=0; i<num_KL; i++) {
357 RCP<const product_basis_type> basis =
358 rcp(
new product_basis_type(
360 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
365 std::vector< std::vector<size_t> > fem_graph ;
367 const size_t fem_length = nGrid * nGrid * nGrid ;
374 const size_t stoch_length = basis->size();
375 std::vector< std::vector< int > > stoch_graph( stoch_length );
376 #if defined(HAVE_MPI) && 0
382 *basis, *Cijk, comm);
383 for (
size_t i = 0 ; i < stoch_length ; ++i ) {
385 stoch_graph[i].resize(len);
393 const size_t flat_length = fem_length * stoch_length ;
395 std::vector< std::vector<size_t> > flat_graph( flat_length );
397 for (
size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
399 const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
401 for (
size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
403 const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
404 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
405 const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
407 flat_graph[iFlatRow].resize( iFlatRowNZ );
409 size_t iFlatEntry = 0 ;
411 for (
size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
413 const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
415 for (
size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
417 const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
418 const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
420 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
430 vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), flat_length );
431 vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), flat_length );
439 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
440 std::string(
"testing") , flat_graph );
442 const size_t flat_graph_length = matrix.graph.entries.extent(0);
444 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), flat_graph_length );
453 Kokkos::Timer clock ;
454 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
459 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
460 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
462 std::vector<double> perf(4);
463 perf[0] = flat_length ;
464 perf[1] = seconds_per_iter;
466 perf[3] = flat_graph_length ;
477 template<
typename ScalarType ,
class Device >
480 const std::vector<int> & var_degree ,
482 const int iterCount ,
483 const bool symmetric )
486 typedef Kokkos::View< value_type* , Device > vector_type ;
491 typedef typename matrix_type::values_type matrix_values_type;
492 typedef typename matrix_type::graph_type matrix_graph_type;
506 const size_t num_KL = var_degree.
size();
507 Array< RCP<const abstract_basis_type> > bases(num_KL);
508 for (
size_t i=0; i<num_KL; i++) {
514 RCP<const product_basis_type> basis =
515 rcp(
new product_basis_type(
517 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
522 std::vector< std::vector<size_t> > fem_graph ;
524 const size_t fem_length = nGrid * nGrid * nGrid ;
531 const size_t stoch_length = basis->size();
532 std::vector< std::vector< int > > stoch_graph( stoch_length );
533 #if defined(HAVE_MPI) && 0
539 *basis, *Cijk, comm);
540 for (
size_t i = 0 ; i < stoch_length ; ++i ) {
542 stoch_graph[i].resize(len);
550 const size_t flat_length = fem_length * stoch_length ;
552 std::vector< std::vector<size_t> > flat_graph( flat_length );
554 for (
size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
556 const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
558 for (
size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
560 const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
561 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
562 const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
564 flat_graph[iFlatRow].resize( iFlatRowNZ );
566 size_t iFlatEntry = 0 ;
568 for (
size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
570 const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
572 for (
size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
574 const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
575 const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
577 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
586 vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), flat_length );
587 vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), flat_length );
595 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string(
"testing") , flat_graph );
597 const size_t flat_graph_length = matrix.graph.entries.extent(0);
599 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), flat_graph_length );
608 Kokkos::Timer clock ;
609 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
614 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
615 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
617 std::vector<double> perf(4);
618 perf[0] = flat_length ;
619 perf[1] = seconds_per_iter;
621 perf[3] = flat_graph_length ;
625 template<
typename ScalarType ,
class Device >
628 const std::vector<int> & var_degree ,
630 const int iterCount ,
631 const bool symmetric )
634 typedef Kokkos::View< value_type** ,
636 Device > block_vector_type ;
642 typedef typename matrix_type::graph_type graph_type ;
656 const size_t num_KL = var_degree.
size();
657 Array< RCP<const abstract_basis_type> > bases(num_KL);
658 for (
size_t i=0; i<num_KL; i++) {
664 RCP<const product_basis_type> basis =
665 rcp(
new product_basis_type(
667 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
672 std::vector< std::vector<size_t> > graph ;
674 const size_t outer_length = nGrid * nGrid * nGrid ;
683 params.
set(
"Tile Size", 128);
684 params.
set(
"Max Tiles", 10000);
686 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
688 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
690 const size_t inner_length = matrix.block.dimension();
691 const size_t inner_length_aligned = matrix.block.aligned_dimension();
694 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
696 block_vector_type x =
697 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
698 block_vector_type y =
699 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
711 Kokkos::Timer clock ;
712 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
717 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
718 const double flops_per_block = matrix.block.tensor().num_flops();
719 const double flops = 1.0e-9*graph_length*flops_per_block;
724 std::vector<double> perf(6) ;
726 perf[0] = outer_length * inner_length ;
727 perf[1] = seconds_per_iter ;
728 perf[2] = flops / seconds_per_iter;
729 perf[3] = matrix.block.tensor().entry_count();
730 perf[4] = inner_length ;
731 perf[5] = flops_per_block;
736 template<
typename ScalarType ,
class Device >
739 const std::vector<int> & var_degree ,
741 const int iterCount ,
742 const bool symmetric )
745 typedef Kokkos::View< value_type** ,
747 Device > block_vector_type ;
753 typedef typename matrix_type::graph_type graph_type ;
767 const size_t num_KL = var_degree.
size();
768 Array< RCP<const abstract_basis_type> > bases(num_KL);
769 for (
size_t i=0; i<num_KL; i++) {
775 RCP<const product_basis_type> basis =
776 rcp(
new product_basis_type(
778 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
783 std::vector< std::vector<size_t> > graph ;
785 const size_t outer_length = nGrid * nGrid * nGrid ;
794 params.
set(
"Tile Size", 128);
796 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
798 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
800 const size_t inner_length = matrix.block.dimension();
801 const size_t inner_length_aligned = matrix.block.aligned_dimension();
804 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
806 block_vector_type x =
807 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
808 block_vector_type y =
809 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
821 Kokkos::Timer clock ;
822 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
827 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
828 const double flops_per_block = matrix.block.tensor().num_flops();
829 const double flops = 1.0e-9*graph_length*flops_per_block;
834 std::vector<double> perf(6) ;
836 perf[0] = outer_length * inner_length ;
837 perf[1] = seconds_per_iter ;
838 perf[2] = flops / seconds_per_iter;
839 perf[3] = matrix.block.tensor().entry_count();
840 perf[4] = inner_length ;
841 perf[5] = flops_per_block;
846 template<
typename ScalarType ,
class Device >
849 const std::vector<int> & var_degree ,
851 const int iterCount ,
852 const bool symmetric )
855 typedef Kokkos::View< value_type** ,
857 Device > block_vector_type ;
863 typedef typename matrix_type::graph_type graph_type ;
877 const size_t num_KL = var_degree.
size();
878 Array< RCP<const abstract_basis_type> > bases(num_KL);
879 for (
size_t i=0; i<num_KL; i++) {
885 RCP<const product_basis_type> basis =
886 rcp(
new product_basis_type(
888 RCP<Cijk_type> Cijk =
894 std::vector< std::vector<size_t> > graph ;
896 const size_t outer_length = nGrid * nGrid * nGrid ;
905 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
907 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
909 const size_t inner_length = matrix.block.dimension();
911 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length , graph_length );
913 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length , outer_length );
914 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length , outer_length );
926 Kokkos::Timer clock ;
927 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
932 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
933 const double flops_per_block = matrix.block.tensor().num_flops();
934 const double flops = 1.0e-9*graph_length*flops_per_block;
939 std::vector<double> perf(6) ;
941 perf[0] = outer_length * inner_length ;
942 perf[1] = seconds_per_iter ;
943 perf[2] = flops / seconds_per_iter;
944 perf[3] = matrix.block.tensor().num_non_zeros();
945 perf[4] = inner_length ;
946 perf[5] = flops_per_block;
951 template<
typename ScalarType ,
class Device >
954 const std::vector<int> & var_degree ,
956 const int iterCount ,
957 const bool symmetric )
960 typedef Kokkos::View< value_type** ,
962 Device > block_vector_type ;
968 typedef typename matrix_type::graph_type graph_type ;
982 const size_t num_KL = var_degree.
size();
983 Array< RCP<const abstract_basis_type> > bases(num_KL);
984 for (
size_t i=0; i<num_KL; i++) {
990 RCP<const product_basis_type> basis =
991 rcp(
new product_basis_type(
993 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
998 std::vector< std::vector<size_t> > graph ;
1000 const size_t outer_length = nGrid * nGrid * nGrid ;
1006 matrix_type matrix ;
1009 params.
set(
"Symmetric", symmetric);
1011 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1014 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
1016 const size_t inner_length = matrix.block.tensor().dimension();
1017 const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1019 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
1021 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
1022 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
1034 Kokkos::Timer clock ;
1035 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1040 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1041 const double flops_per_block = matrix.block.tensor().num_flops();
1042 const double flops = 1.0e-9*graph_length*flops_per_block;
1047 std::vector<double> perf(6) ;
1049 perf[0] = outer_length * inner_length ;
1050 perf[1] = seconds_per_iter ;
1051 perf[2] = flops / seconds_per_iter;
1052 perf[3] = matrix.block.tensor().num_non_zeros();
1053 perf[4] = inner_length ;
1054 perf[5] = flops_per_block;
1059 template<
typename ScalarType ,
class Device ,
class SparseMatOps >
1062 const std::vector<int> & var_degree ,
1064 const int iterCount ,
1065 const bool test_block ,
1066 const bool symmetric )
1080 const size_t num_KL = var_degree.
size();
1081 Array< RCP<const abstract_basis_type> > bases(num_KL);
1082 for (
size_t i=0; i<num_KL; i++) {
1088 RCP<const product_basis_type> basis =
1089 rcp(
new product_basis_type(
1091 const size_t outer_length = basis->size();
1092 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1097 typedef typename matrix_type::values_type matrix_values_type;
1098 typedef typename matrix_type::graph_type matrix_graph_type;
1103 std::vector< std::vector<size_t> > fem_graph ;
1105 const size_t inner_length = nGrid * nGrid * nGrid ;
1106 const size_t graph_length =
1111 typedef Kokkos::View<value_type*,Device>
vec_type ;
1113 std::vector<matrix_type> matrix( outer_length ) ;
1114 std::vector<vec_type> x( outer_length ) ;
1115 std::vector<vec_type> y( outer_length ) ;
1116 std::vector<vec_type> tmp( outer_length ) ;
1118 for (
size_t block=0; block<outer_length; ++block) {
1119 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string(
"testing") , fem_graph );
1121 matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length );
1123 x[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length );
1124 y[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length );
1125 tmp[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"tmp"), inner_length );
1135 Kokkos::Timer clock ;
1138 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1143 typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1144 typename Cijk_type::k_iterator k_end = Cijk->k_end();
1145 for (
typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1146 int nj = Cijk->num_j(k_it);
1148 int k = index(k_it);
1149 typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1150 typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1151 std::vector<vec_type> xx(nj), yy(nj);
1153 for (
typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1155 int j = index(j_it);
1163 for (
typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1165 typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1166 typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1167 for (
typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1169 int i = index(i_it);
1170 value_type c = value(i_it);
1182 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1183 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1184 static_cast<double>(n_add)*inner_length);
1189 std::vector<double> perf(4);
1190 perf[0] = outer_length * inner_length;
1191 perf[1] = seconds_per_iter ;
1192 perf[2] = flops/seconds_per_iter;
1198 template<
typename ScalarType ,
class Device ,
class SparseMatOps >
1201 const std::vector<int> & var_degree ,
1203 const int iterCount ,
1204 const bool test_block ,
1205 const bool symmetric )
1219 const size_t num_KL = var_degree.
size();
1220 Array< RCP<const abstract_basis_type> > bases(num_KL);
1221 for (
size_t i=0; i<num_KL; i++) {
1227 RCP<const product_basis_type> basis =
1228 rcp(
new product_basis_type(
1230 const size_t outer_length = basis->size();
1231 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1236 typedef typename matrix_type::values_type matrix_values_type;
1237 typedef typename matrix_type::graph_type matrix_graph_type;
1242 std::vector< std::vector<size_t> > fem_graph ;
1244 const size_t inner_length = nGrid * nGrid * nGrid ;
1245 const size_t graph_length =
1250 typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged>
vec_type ;
1251 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1253 std::vector<matrix_type> matrix( outer_length ) ;
1254 multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing(
"x"),
1255 inner_length, outer_length ) ;
1256 multi_vec_type y(
"y", inner_length, outer_length ) ;
1257 multi_vec_type tmp_x(
"tmp_x", inner_length, outer_length ) ;
1258 multi_vec_type tmp_y(
"tmp_y", inner_length, outer_length ) ;
1262 for (
size_t block=0; block<outer_length; ++block) {
1263 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1264 std::string(
"testing") , fem_graph );
1266 matrix[block].values = matrix_values_type(
"matrix" , graph_length );
1273 Kokkos::Timer clock ;
1276 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1279 typedef typename Cijk_type::k_iterator k_iterator;
1280 typedef typename Cijk_type::kj_iterator kj_iterator;
1281 typedef typename Cijk_type::kji_iterator kji_iterator;
1284 k_iterator k_begin = Cijk->k_begin();
1285 k_iterator k_end = Cijk->k_end();
1286 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1287 unsigned nj = Cijk->num_j(k_it);
1289 int k = index(k_it);
1290 kj_iterator j_begin = Cijk->j_begin(k_it);
1291 kj_iterator j_end = Cijk->j_end(k_it);
1292 std::vector<int> j_indices(nj);
1294 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1295 int j = index(j_it);
1296 vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1297 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1300 multi_vec_type tmp_x_view =
1301 Kokkos::subview( tmp_x, Kokkos::ALL(),
1302 std::make_pair(0u,nj));
1303 multi_vec_type tmp_y_view =
1304 Kokkos::subview( tmp_y, Kokkos::ALL(),
1305 std::make_pair(0u,nj));
1309 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1310 vec_type tmp_y_view =
1311 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1312 kji_iterator i_begin = Cijk->i_begin(j_it);
1313 kji_iterator i_end = Cijk->i_end(j_it);
1314 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1315 int i = index(i_it);
1316 value_type c = value(i_it);
1317 vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1328 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1329 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1330 static_cast<double>(n_add)*inner_length);
1335 std::vector<double> perf(4);
1336 perf[0] = outer_length * inner_length;
1337 perf[1] = seconds_per_iter ;
1338 perf[2] = flops/seconds_per_iter;
1344 #ifdef HAVE_STOKHOS_KOKKOSLINALG
1345 template<
typename ScalarType ,
class Device >
1347 test_original_matrix_free_kokkos(
1348 const std::vector<int> & var_degree ,
1350 const int iterCount ,
1351 const bool test_block ,
1352 const bool symmetric )
1366 const size_t num_KL = var_degree.
size();
1367 Array< RCP<const abstract_basis_type> > bases(num_KL);
1368 for (
size_t i=0; i<num_KL; i++) {
1374 RCP<const product_basis_type> basis =
1375 rcp(
new product_basis_type(
1376 bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1377 const size_t outer_length = basis->size();
1378 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1383 typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1384 typedef typename matrix_type::values_type matrix_values_type;
1385 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1390 std::vector< std::vector<size_t> > fem_graph ;
1392 const size_t inner_length = nGrid * nGrid * nGrid ;
1393 const size_t graph_length =
1398 typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged>
vec_type ;
1399 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1401 std::vector<matrix_type> matrix( outer_length ) ;
1402 multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing(
"x"),
1403 inner_length, outer_length ) ;
1404 multi_vec_type y(
"y", inner_length, outer_length ) ;
1405 multi_vec_type tmp_x(
"tmp_x", inner_length, outer_length ) ;
1406 multi_vec_type tmp_y(
"tmp_y", inner_length, outer_length ) ;
1410 for (
size_t block=0; block<outer_length; ++block) {
1411 matrix_graph_type matrix_graph =
1412 Kokkos::create_staticcrsgraph<matrix_graph_type>(
1413 std::string(
"test crs graph") , fem_graph );
1415 matrix_values_type matrix_values = matrix_values_type(
1416 Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length );
1418 matrix[block] = matrix_type(
"matrix", outer_length, matrix_values,
1423 Kokkos::Timer clock ;
1426 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1429 typedef typename Cijk_type::k_iterator k_iterator;
1430 typedef typename Cijk_type::kj_iterator kj_iterator;
1431 typedef typename Cijk_type::kji_iterator kji_iterator;
1434 k_iterator k_begin = Cijk->k_begin();
1435 k_iterator k_end = Cijk->k_end();
1436 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1437 unsigned nj = Cijk->num_j(k_it);
1439 int k = index(k_it);
1440 kj_iterator j_begin = Cijk->j_begin(k_it);
1441 kj_iterator j_end = Cijk->j_end(k_it);
1443 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1444 int j = index(j_it);
1445 vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1446 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1449 multi_vec_type tmp_x_view =
1450 Kokkos::subview( tmp_x, Kokkos::ALL(),
1451 std::make_pair(0u,nj));
1452 multi_vec_type tmp_y_view =
1453 Kokkos::subview( tmp_y, Kokkos::ALL(),
1454 std::make_pair(0u,nj));
1458 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1459 vec_type tmp_y_view =
1460 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1461 kji_iterator i_begin = Cijk->i_begin(j_it);
1462 kji_iterator i_end = Cijk->i_end(j_it);
1463 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1464 int i = index(i_it);
1465 value_type c = value(i_it);
1466 vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1478 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1479 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1480 static_cast<double>(n_add)*inner_length);
1485 std::vector<double> perf(4);
1486 perf[0] = outer_length * inner_length;
1487 perf[1] = seconds_per_iter ;
1488 perf[2] = flops/seconds_per_iter;
1495 template<
class Scalar,
class Device >
1501 const bool test_block ,
1502 const bool symmetric )
1507 std::vector< std::vector<size_t> > fem_graph ;
1509 const size_t fem_nonzeros =
1514 std::cout.precision(8);
1518 std::cout << std::endl <<
"\"FEM NNZ = " << fem_nonzeros <<
"\"" << std::endl;
1520 std::cout << std::endl
1522 <<
"\"#Variable\" , "
1523 <<
"\"PolyDegree\" , "
1525 <<
"\"#TensorEntry\" , "
1526 <<
"\"VectorSize\" , "
1527 <<
"\"Original-Flat MXV-Time\" , "
1528 <<
"\"Original-Flat MXV-Speedup\" , "
1529 <<
"\"Original-Flat MXV-GFLOPS\" , "
1530 <<
"\"Commuted-Flat MXV-Speedup\" , "
1531 <<
"\"Commuted-Flat MXV-GFLOPS\" , "
1532 <<
"\"Block-Diagonal MXV-Speedup\" , "
1533 <<
"\"Block-Diagonal MXV-GFLOPS\" , "
1534 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1535 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , "
1538 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1540 std::vector<int> var_degree( nvar , pdeg );
1544 const std::vector<double> perf_flat_original =
1545 test_product_flat_original_matrix<Scalar,Device>(
1546 var_degree , nGrid , nIter , symmetric );
1548 const std::vector<double> perf_flat_commuted =
1549 test_product_flat_commuted_matrix<Scalar,Device>(
1550 var_degree , nGrid , nIter , symmetric );
1552 const std::vector<double> perf_matrix =
1553 test_product_tensor_diagonal_matrix<Scalar,Device>(
1554 var_degree , nGrid , nIter , symmetric );
1556 const std::vector<double> perf_crs_tensor =
1557 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1558 var_degree , nGrid , nIter , symmetric );
1560 if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1561 perf_flat_commuted[3] != perf_flat_original[3] ) {
1562 std::cout <<
"ERROR: Original and commuted matrix sizes do not match"
1564 <<
" original size = " << perf_flat_original[0]
1565 <<
" , nonzero = " << perf_flat_original[3]
1567 <<
" commuted size = " << perf_flat_commuted[0]
1568 <<
" , nonzero = " << perf_flat_commuted[3]
1572 std::cout << nGrid <<
" , "
1575 << perf_crs_tensor[4] <<
" , "
1576 << perf_crs_tensor[3] <<
" , "
1577 << perf_flat_original[0] <<
" , "
1578 << perf_flat_original[1] <<
" , "
1579 << perf_flat_original[1] / perf_flat_original[1] <<
" , "
1580 << perf_flat_original[2] <<
" , "
1581 << perf_flat_original[1] / perf_flat_commuted[1] <<
" , "
1582 << perf_flat_commuted[2] <<
" , "
1583 << perf_flat_original[1] / perf_matrix[1] <<
" , "
1584 << perf_matrix[2] <<
" , "
1585 << perf_flat_original[1] / perf_crs_tensor[1] <<
" , "
1586 << perf_crs_tensor[2] <<
" , "
1593 template<
class Scalar,
class Device ,
class SparseMatOps >
1599 const bool test_block ,
1600 const bool symmetric )
1602 std::cout.precision(8);
1606 std::vector< std::vector<size_t> > fem_graph ;
1607 const size_t graph_length =
1609 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1611 std::cout << std::endl
1613 <<
"\"#Variable\" , "
1614 <<
"\"PolyDegree\" , "
1616 <<
"\"#TensorEntry\" , "
1617 <<
"\"VectorSize\" , "
1618 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
1619 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1620 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1621 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1622 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , "
1629 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1630 std::vector<int> var_degree( nvar , pdeg );
1632 const std::vector<double> perf_crs_tensor =
1633 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1634 var_degree , nGrid , nIter , symmetric );
1645 std::vector<double> perf_original_mat_free_block;
1646 #if defined(HAVE_STOKHOS_KOKKOSLINALG)
1647 #if defined( KOKKOS_ENABLE_CUDA )
1648 enum { is_cuda = std::is_same<Device,Kokkos::Cuda>::value };
1650 enum { is_cuda =
false };
1653 perf_original_mat_free_block =
1654 test_original_matrix_free_kokkos<Scalar,Device>(
1655 var_degree , nGrid , nIter , test_block , symmetric );
1657 perf_original_mat_free_block =
1658 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1659 var_degree , nGrid , nIter , test_block , symmetric );
1661 perf_original_mat_free_block =
1662 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1663 var_degree , nGrid , nIter , test_block , symmetric );
1666 std::cout << nGrid <<
" , "
1669 << perf_crs_tensor[4] <<
" , "
1670 << perf_crs_tensor[3] <<
" , "
1671 << perf_original_mat_free_block[0] <<
" , "
1672 << perf_original_mat_free_block[1] <<
" , "
1673 << perf_original_mat_free_block[1] /
1674 perf_original_mat_free_block[1] <<
" , "
1675 << perf_original_mat_free_block[2] <<
" , "
1676 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , "
1677 << perf_crs_tensor[2] <<
" , "
1688 template<
class Scalar,
class Device ,
class SparseMatOps >
1694 const bool test_block ,
1695 const bool symmetric )
1697 bool do_flat_sparse =
1698 std::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1700 std::cout.precision(8);
1704 std::vector< std::vector<size_t> > fem_graph ;
1705 const size_t graph_length =
1707 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1709 std::cout << std::endl
1711 <<
"\"#Variable\" , "
1712 <<
"\"PolyDegree\" , "
1714 <<
"\"#TensorEntry\" , "
1715 <<
"\"VectorSize\" , "
1716 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
1717 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1718 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1719 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1720 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1722 std::cout <<
"\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1723 <<
"\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1724 <<
"\"Lexicographic FLOPS / Crs FLOPS\" , ";
1725 std::cout << std::endl ;
1727 for (
int p = minp ; p <= maxp ; ++p ) {
1728 std::vector<int> var_degree( nvar , p );
1730 const std::vector<double> perf_crs_tensor =
1731 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1732 var_degree , nGrid , nIter , symmetric );
1734 std::vector<double> perf_lexo_sparse_3_tensor;
1735 if (do_flat_sparse) {
1736 perf_lexo_sparse_3_tensor =
1737 test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1740 const std::vector<double> perf_original_mat_free_block =
1741 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1742 var_degree , nGrid , nIter , test_block , symmetric );
1744 std::cout << nGrid <<
" , "
1747 << perf_crs_tensor[4] <<
" , "
1748 << perf_crs_tensor[3] <<
" , "
1749 << perf_original_mat_free_block[0] <<
" , "
1750 << perf_original_mat_free_block[1] <<
" , "
1751 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] <<
" , "
1752 << perf_original_mat_free_block[2] <<
" , "
1753 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , "
1754 << perf_crs_tensor[2] <<
" , ";
1755 if (do_flat_sparse) {
1756 std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] <<
" , "
1757 << perf_lexo_sparse_3_tensor[2] <<
" , "
1758 << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1762 std::cout << std::endl ;
1768 template<
class Scalar,
class Device ,
class SparseMatOps >
1774 const bool test_block ,
1775 const bool symmetric )
1777 std::cout.precision(8);
1781 std::vector< std::vector<size_t> > fem_graph ;
1782 const size_t graph_length =
1784 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1786 std::cout << std::endl
1788 <<
"\"#Variable\" , "
1789 <<
"\"PolyDegree\" , "
1791 <<
"\"#TensorEntry\" , "
1792 <<
"\"VectorSize\" , "
1793 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
1794 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1795 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1796 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1797 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , "
1798 <<
"\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1799 <<
"\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1802 for (
int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1803 std::vector<int> var_degree( nvar , 1 );
1805 const std::vector<double> perf_crs_tensor =
1806 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1807 var_degree , nGrid , nIter , symmetric );
1809 const std::vector<double> perf_linear_sparse_3_tensor =
1810 test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1812 const std::vector<double> perf_original_mat_free_block =
1813 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1814 var_degree , nGrid , nIter , test_block , symmetric );
1816 std::cout << nGrid <<
" , "
1819 << perf_crs_tensor[4] <<
" , "
1820 << perf_crs_tensor[3] <<
" , "
1821 << perf_original_mat_free_block[0] <<
" , "
1822 << perf_original_mat_free_block[1] <<
" , "
1823 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] <<
" , "
1824 << perf_original_mat_free_block[2] <<
" , "
1825 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , "
1826 << perf_crs_tensor[2] <<
" , "
1827 << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] <<
" , "
1828 << perf_linear_sparse_3_tensor[2] <<
" , "
1835 template<
class Scalar,
class Device >
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
int NumGlobalIndices(long long Row) const
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static scalar_type sparse_cijk_tol()
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
static scalar_type sparse_cijk_tol()
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP...> >::value >::type update(const typename Kokkos::View< XD, XP...>::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP...> &x, const typename Kokkos::View< YD, YP...>::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP...> &y, const typename Kokkos::View< ZD, ZP...>::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP...> &z)
CRS matrix of dense blocks.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(KokkosKernels::Experimental::Controls, const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
int ExtractGlobalRowCopy(int_type Row, int LenOfIndices, int &NumIndices, int_type *Indices) const
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)