45 #include "Kokkos_Core.hpp"
46 #include "impl/Kokkos_Timer.hpp"
62 #if defined(HAVE_MPI) && 0
75 #ifdef HAVE_STOKHOS_KOKKOSLINALG
76 #include "KokkosSparse_CrsMatrix.hpp"
77 #include "KokkosSparse_spmv.hpp"
78 #include "KokkosBlas1_update.hpp"
83 template<
typename IntType >
90 return k + N * ( j + N * i );
95 std::vector< std::vector<size_t> > & graph )
97 graph.resize( N * N * N , std::vector<size_t>() );
101 for (
int i = 0 ; i < (
int) N ; ++i ) {
102 for (
int j = 0 ;
j < (
int) N ; ++
j ) {
103 for (
int k = 0 ; k < (
int) N ; ++k ) {
107 graph[row].reserve(27);
109 for (
int ii = -1 ; ii < 2 ; ++ii ) {
110 for (
int jj = -1 ; jj < 2 ; ++jj ) {
111 for (
int kk = -1 ; kk < 2 ; ++kk ) {
112 if ( 0 <= i + ii && i + ii < (
int) N &&
113 0 <=
j + jj &&
j + jj < (
int) N &&
114 0 <= k + kk && k + kk < (
int) N ) {
117 graph[row].push_back(col);
120 total += graph[row].size();
138 template<
typename ScalarType ,
typename TensorType,
class Device >
141 const std::vector<int> & var_degree ,
143 const int iterCount ,
144 const bool symmetric )
147 typedef Kokkos::View< value_type** ,
149 Device > block_vector_type ;
154 typedef typename matrix_type::graph_type graph_type ;
168 const size_t num_KL = var_degree.
size();
169 Array< RCP<const abstract_basis_type> > bases(num_KL);
170 for (
size_t i=0; i<num_KL; i++) {
176 RCP<const product_basis_type> basis =
177 rcp(
new product_basis_type(
179 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
184 std::vector< std::vector<size_t> > graph ;
186 const size_t outer_length = nGrid * nGrid * nGrid ;
195 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
197 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
199 const size_t inner_length = matrix.block.dimension();
200 const size_t inner_length_aligned = matrix.block.aligned_dimension();
203 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
205 block_vector_type x =
206 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
207 block_vector_type y =
208 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
216 block_vector_type x0 =
217 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"),
218 inner_length_aligned , outer_length );
224 Kokkos::Impl::Timer clock ;
225 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
231 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
232 const double flops_per_block = matrix.block.tensor().num_flops();
233 const double flops = 1.0e-9*graph_length*flops_per_block;
235 std::vector<double> perf(6) ;
237 perf[0] = outer_length * inner_length ;
238 perf[1] = seconds_per_iter ;
239 perf[2] = flops / seconds_per_iter;
240 perf[3] = matrix.block.tensor().entry_count();
241 perf[4] = inner_length ;
242 perf[5] = flops_per_block;
247 template<
typename ScalarType ,
class Device >
250 const std::vector<int> & var_degree ,
252 const int iterCount ,
253 const bool symmetric )
256 typedef Kokkos::View< value_type**,
258 Device > block_vector_type ;
265 typedef typename matrix_type::graph_type graph_type ;
279 const size_t num_KL = var_degree.
size();
280 Array< RCP<const abstract_basis_type> > bases(num_KL);
281 for (
size_t i=0; i<num_KL; i++) {
287 RCP<const product_basis_type> basis =
288 rcp(
new product_basis_type(
290 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
295 std::vector< std::vector<size_t> > fem_graph ;
297 const size_t fem_length = nGrid * nGrid * nGrid ;
300 const size_t stoch_length = basis->size();
304 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), stoch_length , fem_length );
305 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), stoch_length , fem_length );
315 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
316 std::string(
"test product tensor graph") , fem_graph );
317 matrix.values = block_vector_type(
318 Kokkos::ViewAllocateWithoutInitializing(
"matrix"), matrix.block.matrix_size() , fem_graph_length );
325 Kokkos::Impl::Timer clock ;
326 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
331 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
332 const double flops_per_block = 2.0*stoch_length*stoch_length;
333 const double flops = 1e-9*fem_graph_length*flops_per_block;
335 std::vector<double> perf(6);
336 perf[0] = fem_length * stoch_length ;
337 perf[1] = seconds_per_iter;
338 perf[2] = flops / seconds_per_iter;
339 perf[3] = Cijk->num_entries();
340 perf[4] = stoch_length;
341 perf[5] = flops_per_block;
351 template<
typename ScalarType ,
class Device >
354 const std::vector<int> & var_degree ,
356 const int iterCount ,
357 const bool symmetric )
360 typedef Kokkos::View< value_type* , Device > vector_type ;
365 typedef typename matrix_type::values_type matrix_values_type;
366 typedef typename matrix_type::graph_type matrix_graph_type;
380 const size_t num_KL = var_degree.
size();
381 Array< RCP<const abstract_basis_type> > bases(num_KL);
382 for (
size_t i=0; i<num_KL; i++) {
388 RCP<const product_basis_type> basis =
389 rcp(
new product_basis_type(
391 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
396 std::vector< std::vector<size_t> > fem_graph ;
398 const size_t fem_length = nGrid * nGrid * nGrid ;
405 const size_t stoch_length = basis->size();
406 std::vector< std::vector< int > > stoch_graph( stoch_length );
407 #if defined(HAVE_MPI) && 0
413 *basis, *Cijk, comm);
414 for (
size_t i = 0 ; i < stoch_length ; ++i ) {
416 stoch_graph[i].resize(len);
424 const size_t flat_length = fem_length * stoch_length ;
426 std::vector< std::vector<size_t> > flat_graph( flat_length );
428 for (
size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
430 const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
432 for (
size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
434 const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
435 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
436 const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
438 flat_graph[iFlatRow].resize( iFlatRowNZ );
440 size_t iFlatEntry = 0 ;
442 for (
size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
444 const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
446 for (
size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
448 const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
449 const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
451 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
461 vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), flat_length );
462 vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), flat_length );
470 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
471 std::string(
"testing") , flat_graph );
473 const size_t flat_graph_length = matrix.graph.entries.extent(0);
475 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), flat_graph_length );
484 Kokkos::Impl::Timer clock ;
485 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
490 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
491 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
493 std::vector<double> perf(4);
494 perf[0] = flat_length ;
495 perf[1] = seconds_per_iter;
497 perf[3] = flat_graph_length ;
508 template<
typename ScalarType ,
class Device >
511 const std::vector<int> & var_degree ,
513 const int iterCount ,
514 const bool symmetric )
517 typedef Kokkos::View< value_type* , Device > vector_type ;
522 typedef typename matrix_type::values_type matrix_values_type;
523 typedef typename matrix_type::graph_type matrix_graph_type;
537 const size_t num_KL = var_degree.
size();
538 Array< RCP<const abstract_basis_type> > bases(num_KL);
539 for (
size_t i=0; i<num_KL; i++) {
545 RCP<const product_basis_type> basis =
546 rcp(
new product_basis_type(
548 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
553 std::vector< std::vector<size_t> > fem_graph ;
555 const size_t fem_length = nGrid * nGrid * nGrid ;
562 const size_t stoch_length = basis->size();
563 std::vector< std::vector< int > > stoch_graph( stoch_length );
564 #if defined(HAVE_MPI) && 0
570 *basis, *Cijk, comm);
571 for (
size_t i = 0 ; i < stoch_length ; ++i ) {
573 stoch_graph[i].resize(len);
581 const size_t flat_length = fem_length * stoch_length ;
583 std::vector< std::vector<size_t> > flat_graph( flat_length );
585 for (
size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
587 const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
589 for (
size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
591 const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
592 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
593 const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
595 flat_graph[iFlatRow].resize( iFlatRowNZ );
597 size_t iFlatEntry = 0 ;
599 for (
size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
601 const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
603 for (
size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
605 const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
606 const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
608 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
617 vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), flat_length );
618 vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), flat_length );
626 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string(
"testing") , flat_graph );
628 const size_t flat_graph_length = matrix.graph.entries.extent(0);
630 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), flat_graph_length );
639 Kokkos::Impl::Timer clock ;
640 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
645 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
646 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
648 std::vector<double> perf(4);
649 perf[0] = flat_length ;
650 perf[1] = seconds_per_iter;
652 perf[3] = flat_graph_length ;
656 template<
typename ScalarType ,
class Device >
659 const std::vector<int> & var_degree ,
661 const int iterCount ,
662 const bool symmetric )
665 typedef Kokkos::View< value_type** ,
667 Device > block_vector_type ;
673 typedef typename matrix_type::graph_type graph_type ;
687 const size_t num_KL = var_degree.
size();
688 Array< RCP<const abstract_basis_type> > bases(num_KL);
689 for (
size_t i=0; i<num_KL; i++) {
695 RCP<const product_basis_type> basis =
696 rcp(
new product_basis_type(
698 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
703 std::vector< std::vector<size_t> > graph ;
705 const size_t outer_length = nGrid * nGrid * nGrid ;
714 params.
set(
"Tile Size", 128);
715 params.
set(
"Max Tiles", 10000);
717 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
719 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
721 const size_t inner_length = matrix.block.dimension();
722 const size_t inner_length_aligned = matrix.block.aligned_dimension();
725 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
727 block_vector_type x =
728 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
729 block_vector_type y =
730 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
742 Kokkos::Impl::Timer clock ;
743 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
748 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
749 const double flops_per_block = matrix.block.tensor().num_flops();
750 const double flops = 1.0e-9*graph_length*flops_per_block;
755 std::vector<double> perf(6) ;
757 perf[0] = outer_length * inner_length ;
758 perf[1] = seconds_per_iter ;
759 perf[2] = flops / seconds_per_iter;
760 perf[3] = matrix.block.tensor().entry_count();
761 perf[4] = inner_length ;
762 perf[5] = flops_per_block;
767 template<
typename ScalarType ,
class Device >
770 const std::vector<int> & var_degree ,
772 const int iterCount ,
773 const bool symmetric )
776 typedef Kokkos::View< value_type** ,
778 Device > block_vector_type ;
784 typedef typename matrix_type::graph_type graph_type ;
798 const size_t num_KL = var_degree.
size();
799 Array< RCP<const abstract_basis_type> > bases(num_KL);
800 for (
size_t i=0; i<num_KL; i++) {
806 RCP<const product_basis_type> basis =
807 rcp(
new product_basis_type(
809 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
814 std::vector< std::vector<size_t> > graph ;
816 const size_t outer_length = nGrid * nGrid * nGrid ;
825 params.
set(
"Tile Size", 128);
827 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
829 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
831 const size_t inner_length = matrix.block.dimension();
832 const size_t inner_length_aligned = matrix.block.aligned_dimension();
835 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
837 block_vector_type x =
838 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
839 block_vector_type y =
840 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
852 Kokkos::Impl::Timer clock ;
853 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
858 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
859 const double flops_per_block = matrix.block.tensor().num_flops();
860 const double flops = 1.0e-9*graph_length*flops_per_block;
865 std::vector<double> perf(6) ;
867 perf[0] = outer_length * inner_length ;
868 perf[1] = seconds_per_iter ;
869 perf[2] = flops / seconds_per_iter;
870 perf[3] = matrix.block.tensor().entry_count();
871 perf[4] = inner_length ;
872 perf[5] = flops_per_block;
877 template<
typename ScalarType ,
class Device >
880 const std::vector<int> & var_degree ,
882 const int iterCount ,
883 const bool symmetric )
886 typedef Kokkos::View< value_type** ,
888 Device > block_vector_type ;
894 typedef typename matrix_type::graph_type graph_type ;
908 const size_t num_KL = var_degree.
size();
909 Array< RCP<const abstract_basis_type> > bases(num_KL);
910 for (
size_t i=0; i<num_KL; i++) {
916 RCP<const product_basis_type> basis =
917 rcp(
new product_basis_type(
919 RCP<Cijk_type> Cijk =
925 std::vector< std::vector<size_t> > graph ;
927 const size_t outer_length = nGrid * nGrid * nGrid ;
936 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
938 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
940 const size_t inner_length = matrix.block.dimension();
942 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length , graph_length );
944 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length , outer_length );
945 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length , outer_length );
957 Kokkos::Impl::Timer clock ;
958 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
963 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
964 const double flops_per_block = matrix.block.tensor().num_flops();
965 const double flops = 1.0e-9*graph_length*flops_per_block;
970 std::vector<double> perf(6) ;
972 perf[0] = outer_length * inner_length ;
973 perf[1] = seconds_per_iter ;
974 perf[2] = flops / seconds_per_iter;
975 perf[3] = matrix.block.tensor().num_non_zeros();
976 perf[4] = inner_length ;
977 perf[5] = flops_per_block;
982 template<
typename ScalarType ,
class Device >
985 const std::vector<int> & var_degree ,
987 const int iterCount ,
988 const bool symmetric )
991 typedef Kokkos::View< value_type** ,
993 Device > block_vector_type ;
999 typedef typename matrix_type::graph_type graph_type ;
1013 const size_t num_KL = var_degree.
size();
1014 Array< RCP<const abstract_basis_type> > bases(num_KL);
1015 for (
size_t i=0; i<num_KL; i++) {
1021 RCP<const product_basis_type> basis =
1022 rcp(
new product_basis_type(
1024 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1029 std::vector< std::vector<size_t> > graph ;
1031 const size_t outer_length = nGrid * nGrid * nGrid ;
1037 matrix_type matrix ;
1040 params.
set(
"Symmetric", symmetric);
1042 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1045 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
1047 const size_t inner_length = matrix.block.tensor().dimension();
1048 const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1050 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
1052 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
1053 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
1065 Kokkos::Impl::Timer clock ;
1066 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1071 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1072 const double flops_per_block = matrix.block.tensor().num_flops();
1073 const double flops = 1.0e-9*graph_length*flops_per_block;
1078 std::vector<double> perf(6) ;
1080 perf[0] = outer_length * inner_length ;
1081 perf[1] = seconds_per_iter ;
1082 perf[2] = flops / seconds_per_iter;
1083 perf[3] = matrix.block.tensor().num_non_zeros();
1084 perf[4] = inner_length ;
1085 perf[5] = flops_per_block;
1090 template<
typename ScalarType ,
class Device ,
class SparseMatOps >
1093 const std::vector<int> & var_degree ,
1095 const int iterCount ,
1096 const bool test_block ,
1097 const bool symmetric )
1111 const size_t num_KL = var_degree.
size();
1112 Array< RCP<const abstract_basis_type> > bases(num_KL);
1113 for (
size_t i=0; i<num_KL; i++) {
1119 RCP<const product_basis_type> basis =
1120 rcp(
new product_basis_type(
1122 const size_t outer_length = basis->size();
1123 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1128 typedef typename matrix_type::values_type matrix_values_type;
1129 typedef typename matrix_type::graph_type matrix_graph_type;
1134 std::vector< std::vector<size_t> > fem_graph ;
1136 const size_t inner_length = nGrid * nGrid * nGrid ;
1137 const size_t graph_length =
1142 typedef Kokkos::View<value_type*,Device>
vec_type ;
1144 std::vector<matrix_type> matrix( outer_length ) ;
1145 std::vector<vec_type> x( outer_length ) ;
1146 std::vector<vec_type> y( outer_length ) ;
1147 std::vector<vec_type> tmp( outer_length ) ;
1149 for (
size_t block=0; block<outer_length; ++block) {
1150 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string(
"testing") , fem_graph );
1152 matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length );
1154 x[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length );
1155 y[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length );
1156 tmp[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"tmp"), inner_length );
1166 Kokkos::Impl::Timer clock ;
1169 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1174 typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1175 typename Cijk_type::k_iterator k_end = Cijk->k_end();
1176 for (
typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1177 int nj = Cijk->num_j(k_it);
1179 int k = index(k_it);
1180 typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1181 typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1182 std::vector<vec_type> xx(nj), yy(nj);
1184 for (
typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1186 int j = index(j_it);
1194 for (
typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1196 typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1197 typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1198 for (
typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1200 int i = index(i_it);
1201 value_type c = value(i_it);
1213 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1214 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1215 static_cast<double>(n_add)*inner_length);
1220 std::vector<double> perf(4);
1221 perf[0] = outer_length * inner_length;
1222 perf[1] = seconds_per_iter ;
1223 perf[2] = flops/seconds_per_iter;
1229 template<
typename ScalarType ,
class Device ,
class SparseMatOps >
1232 const std::vector<int> & var_degree ,
1234 const int iterCount ,
1235 const bool test_block ,
1236 const bool symmetric )
1250 const size_t num_KL = var_degree.
size();
1251 Array< RCP<const abstract_basis_type> > bases(num_KL);
1252 for (
size_t i=0; i<num_KL; i++) {
1258 RCP<const product_basis_type> basis =
1259 rcp(
new product_basis_type(
1261 const size_t outer_length = basis->size();
1262 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1267 typedef typename matrix_type::values_type matrix_values_type;
1268 typedef typename matrix_type::graph_type matrix_graph_type;
1273 std::vector< std::vector<size_t> > fem_graph ;
1275 const size_t inner_length = nGrid * nGrid * nGrid ;
1276 const size_t graph_length =
1281 typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged>
vec_type ;
1282 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1284 std::vector<matrix_type> matrix( outer_length ) ;
1285 multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing(
"x"),
1286 inner_length, outer_length ) ;
1287 multi_vec_type y(
"y", inner_length, outer_length ) ;
1288 multi_vec_type tmp_x(
"tmp_x", inner_length, outer_length ) ;
1289 multi_vec_type tmp_y(
"tmp_y", inner_length, outer_length ) ;
1293 for (
size_t block=0; block<outer_length; ++block) {
1294 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1295 std::string(
"testing") , fem_graph );
1297 matrix[block].values = matrix_values_type(
"matrix" , graph_length );
1304 Kokkos::Impl::Timer clock ;
1307 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1310 typedef typename Cijk_type::k_iterator k_iterator;
1311 typedef typename Cijk_type::kj_iterator kj_iterator;
1312 typedef typename Cijk_type::kji_iterator kji_iterator;
1315 k_iterator k_begin = Cijk->k_begin();
1316 k_iterator k_end = Cijk->k_end();
1317 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1318 unsigned nj = Cijk->num_j(k_it);
1320 int k = index(k_it);
1321 kj_iterator j_begin = Cijk->j_begin(k_it);
1322 kj_iterator j_end = Cijk->j_end(k_it);
1323 std::vector<int> j_indices(nj);
1325 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1326 int j = index(j_it);
1327 vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1328 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1331 multi_vec_type tmp_x_view =
1332 Kokkos::subview( tmp_x, Kokkos::ALL(),
1333 std::make_pair(0u,nj));
1334 multi_vec_type tmp_y_view =
1335 Kokkos::subview( tmp_y, Kokkos::ALL(),
1336 std::make_pair(0u,nj));
1340 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1341 vec_type tmp_y_view =
1342 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1343 kji_iterator i_begin = Cijk->i_begin(j_it);
1344 kji_iterator i_end = Cijk->i_end(j_it);
1345 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1346 int i = index(i_it);
1347 value_type c = value(i_it);
1348 vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1359 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1360 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1361 static_cast<double>(n_add)*inner_length);
1366 std::vector<double> perf(4);
1367 perf[0] = outer_length * inner_length;
1368 perf[1] = seconds_per_iter ;
1369 perf[2] = flops/seconds_per_iter;
1375 #ifdef HAVE_STOKHOS_KOKKOSLINALG
1376 template<
typename ScalarType ,
class Device >
1378 test_original_matrix_free_kokkos(
1379 const std::vector<int> & var_degree ,
1381 const int iterCount ,
1382 const bool test_block ,
1383 const bool symmetric )
1397 const size_t num_KL = var_degree.
size();
1398 Array< RCP<const abstract_basis_type> > bases(num_KL);
1399 for (
size_t i=0; i<num_KL; i++) {
1405 RCP<const product_basis_type> basis =
1406 rcp(
new product_basis_type(
1407 bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1408 const size_t outer_length = basis->size();
1409 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1414 typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1415 typedef typename matrix_type::values_type matrix_values_type;
1416 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1421 std::vector< std::vector<size_t> > fem_graph ;
1423 const size_t inner_length = nGrid * nGrid * nGrid ;
1424 const size_t graph_length =
1429 typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged>
vec_type ;
1430 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1432 std::vector<matrix_type> matrix( outer_length ) ;
1433 multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing(
"x"),
1434 inner_length, outer_length ) ;
1435 multi_vec_type y(
"y", inner_length, outer_length ) ;
1436 multi_vec_type tmp_x(
"tmp_x", inner_length, outer_length ) ;
1437 multi_vec_type tmp_y(
"tmp_y", inner_length, outer_length ) ;
1441 for (
size_t block=0; block<outer_length; ++block) {
1442 matrix_graph_type matrix_graph =
1443 Kokkos::create_staticcrsgraph<matrix_graph_type>(
1444 std::string(
"test crs graph") , fem_graph );
1446 matrix_values_type matrix_values = matrix_values_type(
1447 Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length );
1449 matrix[block] = matrix_type(
"matrix", outer_length, matrix_values,
1454 Kokkos::Impl::Timer clock ;
1457 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1460 typedef typename Cijk_type::k_iterator k_iterator;
1461 typedef typename Cijk_type::kj_iterator kj_iterator;
1462 typedef typename Cijk_type::kji_iterator kji_iterator;
1465 k_iterator k_begin = Cijk->k_begin();
1466 k_iterator k_end = Cijk->k_end();
1467 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1468 unsigned nj = Cijk->num_j(k_it);
1470 int k = index(k_it);
1471 kj_iterator j_begin = Cijk->j_begin(k_it);
1472 kj_iterator j_end = Cijk->j_end(k_it);
1474 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1475 int j = index(j_it);
1476 vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1477 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1480 multi_vec_type tmp_x_view =
1481 Kokkos::subview( tmp_x, Kokkos::ALL(),
1482 std::make_pair(0u,nj));
1483 multi_vec_type tmp_y_view =
1484 Kokkos::subview( tmp_y, Kokkos::ALL(),
1485 std::make_pair(0u,nj));
1489 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1490 vec_type tmp_y_view =
1491 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1492 kji_iterator i_begin = Cijk->i_begin(j_it);
1493 kji_iterator i_end = Cijk->i_end(j_it);
1494 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1495 int i = index(i_it);
1496 value_type c = value(i_it);
1497 vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1509 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1510 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1511 static_cast<double>(n_add)*inner_length);
1516 std::vector<double> perf(4);
1517 perf[0] = outer_length * inner_length;
1518 perf[1] = seconds_per_iter ;
1519 perf[2] = flops/seconds_per_iter;
1526 template<
class Scalar,
class Device >
1532 const bool test_block ,
1533 const bool symmetric )
1538 std::vector< std::vector<size_t> > fem_graph ;
1540 const size_t fem_nonzeros =
1545 std::cout.precision(8);
1549 std::cout << std::endl <<
"\"FEM NNZ = " << fem_nonzeros <<
"\"" << std::endl;
1551 std::cout << std::endl
1553 <<
"\"#Variable\" , "
1554 <<
"\"PolyDegree\" , "
1556 <<
"\"#TensorEntry\" , "
1557 <<
"\"VectorSize\" , "
1558 <<
"\"Original-Flat MXV-Time\" , "
1559 <<
"\"Original-Flat MXV-Speedup\" , "
1560 <<
"\"Original-Flat MXV-GFLOPS\" , "
1561 <<
"\"Commuted-Flat MXV-Speedup\" , "
1562 <<
"\"Commuted-Flat MXV-GFLOPS\" , "
1563 <<
"\"Block-Diagonal MXV-Speedup\" , "
1564 <<
"\"Block-Diagonal MXV-GFLOPS\" , "
1565 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1566 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , "
1569 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1571 std::vector<int> var_degree( nvar , pdeg );
1575 const std::vector<double> perf_flat_original =
1576 test_product_flat_original_matrix<Scalar,Device>(
1577 var_degree , nGrid , nIter , symmetric );
1579 const std::vector<double> perf_flat_commuted =
1580 test_product_flat_commuted_matrix<Scalar,Device>(
1581 var_degree , nGrid , nIter , symmetric );
1583 const std::vector<double> perf_matrix =
1584 test_product_tensor_diagonal_matrix<Scalar,Device>(
1585 var_degree , nGrid , nIter , symmetric );
1587 const std::vector<double> perf_crs_tensor =
1588 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1589 var_degree , nGrid , nIter , symmetric );
1591 if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1592 perf_flat_commuted[3] != perf_flat_original[3] ) {
1593 std::cout <<
"ERROR: Original and commuted matrix sizes do not match"
1595 <<
" original size = " << perf_flat_original[0]
1596 <<
" , nonzero = " << perf_flat_original[3]
1598 <<
" commuted size = " << perf_flat_commuted[0]
1599 <<
" , nonzero = " << perf_flat_commuted[3]
1603 std::cout << nGrid <<
" , "
1606 << perf_crs_tensor[4] <<
" , "
1607 << perf_crs_tensor[3] <<
" , "
1608 << perf_flat_original[0] <<
" , "
1609 << perf_flat_original[1] <<
" , "
1610 << perf_flat_original[1] / perf_flat_original[1] <<
" , "
1611 << perf_flat_original[2] <<
" , "
1612 << perf_flat_original[1] / perf_flat_commuted[1] <<
" , "
1613 << perf_flat_commuted[2] <<
" , "
1614 << perf_flat_original[1] / perf_matrix[1] <<
" , "
1615 << perf_matrix[2] <<
" , "
1616 << perf_flat_original[1] / perf_crs_tensor[1] <<
" , "
1617 << perf_crs_tensor[2] <<
" , "
1624 template<
class Scalar,
class Device ,
class SparseMatOps >
1630 const bool test_block ,
1631 const bool symmetric )
1633 std::cout.precision(8);
1637 std::vector< std::vector<size_t> > fem_graph ;
1638 const size_t graph_length =
1640 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1642 std::cout << std::endl
1644 <<
"\"#Variable\" , "
1645 <<
"\"PolyDegree\" , "
1647 <<
"\"#TensorEntry\" , "
1648 <<
"\"VectorSize\" , "
1649 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
1650 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1651 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1652 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1653 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , "
1660 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1661 std::vector<int> var_degree( nvar , pdeg );
1663 const std::vector<double> perf_crs_tensor =
1664 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1665 var_degree , nGrid , nIter , symmetric );
1676 std::vector<double> perf_original_mat_free_block;
1677 #if defined(HAVE_STOKHOS_KOKKOSLINALG)
1678 #if defined( KOKKOS_ENABLE_CUDA )
1679 enum { is_cuda = Kokkos::Impl::is_same<Device,Kokkos::Cuda>::value };
1681 enum { is_cuda =
false };
1684 perf_original_mat_free_block =
1685 test_original_matrix_free_kokkos<Scalar,Device>(
1686 var_degree , nGrid , nIter , test_block , symmetric );
1688 perf_original_mat_free_block =
1689 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1690 var_degree , nGrid , nIter , test_block , symmetric );
1692 perf_original_mat_free_block =
1693 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1694 var_degree , nGrid , nIter , test_block , symmetric );
1697 std::cout << nGrid <<
" , "
1700 << perf_crs_tensor[4] <<
" , "
1701 << perf_crs_tensor[3] <<
" , "
1702 << perf_original_mat_free_block[0] <<
" , "
1703 << perf_original_mat_free_block[1] <<
" , "
1704 << perf_original_mat_free_block[1] /
1705 perf_original_mat_free_block[1] <<
" , "
1706 << perf_original_mat_free_block[2] <<
" , "
1707 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , "
1708 << perf_crs_tensor[2] <<
" , "
1719 template<
class Scalar,
class Device ,
class SparseMatOps >
1725 const bool test_block ,
1726 const bool symmetric )
1728 bool do_flat_sparse =
1729 Kokkos::Impl::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1731 std::cout.precision(8);
1735 std::vector< std::vector<size_t> > fem_graph ;
1736 const size_t graph_length =
1738 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1740 std::cout << std::endl
1742 <<
"\"#Variable\" , "
1743 <<
"\"PolyDegree\" , "
1745 <<
"\"#TensorEntry\" , "
1746 <<
"\"VectorSize\" , "
1747 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
1748 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1749 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1750 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1751 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1753 std::cout <<
"\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1754 <<
"\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1755 <<
"\"Lexicographic FLOPS / Crs FLOPS\" , ";
1756 std::cout << std::endl ;
1758 for (
int p = minp ; p <= maxp ; ++p ) {
1759 std::vector<int> var_degree( nvar , p );
1761 const std::vector<double> perf_crs_tensor =
1762 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1763 var_degree , nGrid , nIter , symmetric );
1765 std::vector<double> perf_lexo_sparse_3_tensor;
1766 if (do_flat_sparse) {
1767 perf_lexo_sparse_3_tensor =
1768 test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1771 const std::vector<double> perf_original_mat_free_block =
1772 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1773 var_degree , nGrid , nIter , test_block , symmetric );
1775 std::cout << nGrid <<
" , "
1778 << perf_crs_tensor[4] <<
" , "
1779 << perf_crs_tensor[3] <<
" , "
1780 << perf_original_mat_free_block[0] <<
" , "
1781 << perf_original_mat_free_block[1] <<
" , "
1782 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] <<
" , "
1783 << perf_original_mat_free_block[2] <<
" , "
1784 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , "
1785 << perf_crs_tensor[2] <<
" , ";
1786 if (do_flat_sparse) {
1787 std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] <<
" , "
1788 << perf_lexo_sparse_3_tensor[2] <<
" , "
1789 << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1793 std::cout << std::endl ;
1799 template<
class Scalar,
class Device ,
class SparseMatOps >
1805 const bool test_block ,
1806 const bool symmetric )
1808 std::cout.precision(8);
1812 std::vector< std::vector<size_t> > fem_graph ;
1813 const size_t graph_length =
1815 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1817 std::cout << std::endl
1819 <<
"\"#Variable\" , "
1820 <<
"\"PolyDegree\" , "
1822 <<
"\"#TensorEntry\" , "
1823 <<
"\"VectorSize\" , "
1824 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
1825 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1826 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1827 <<
"\"Block-Crs-Tensor MXV-Speedup\" , "
1828 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , "
1829 <<
"\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1830 <<
"\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1833 for (
int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1834 std::vector<int> var_degree( nvar , 1 );
1836 const std::vector<double> perf_crs_tensor =
1837 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1838 var_degree , nGrid , nIter , symmetric );
1840 const std::vector<double> perf_linear_sparse_3_tensor =
1841 test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1843 const std::vector<double> perf_original_mat_free_block =
1844 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1845 var_degree , nGrid , nIter , test_block , symmetric );
1847 std::cout << nGrid <<
" , "
1850 << perf_crs_tensor[4] <<
" , "
1851 << perf_crs_tensor[3] <<
" , "
1852 << perf_original_mat_free_block[0] <<
" , "
1853 << perf_original_mat_free_block[1] <<
" , "
1854 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] <<
" , "
1855 << perf_original_mat_free_block[2] <<
" , "
1856 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , "
1857 << perf_crs_tensor[2] <<
" , "
1858 << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] <<
" , "
1859 << perf_linear_sparse_3_tensor[2] <<
" , "
1866 template<
class Scalar,
class Device >
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
int NumGlobalIndices(long long Row) const
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
static scalar_type sparse_cijk_tol()
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
static scalar_type sparse_cijk_tol()
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP...> >::value >::type update(const typename Kokkos::View< XD, XP...>::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP...> &x, const typename Kokkos::View< YD, YP...>::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP...> &y, const typename Kokkos::View< ZD, ZP...>::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP...> &z)
CRS matrix of dense blocks.
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
int ExtractGlobalRowCopy(int_type Row, int LenOfIndices, int &NumIndices, int_type *Indices) const
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)