14 #include "KokkosSparse_CrsMatrix.hpp"
15 #include "KokkosSparse_spmv.hpp"
19 #include "Sacado_mpl_range_c.hpp"
20 #include "Sacado_mpl_for_each.hpp"
21 #include "Sacado_mpl_integral_c.hpp"
24 #include "Kokkos_Timer.hpp"
26 template<
typename IntType >
33 return k + N * ( j + N * i );
38 std::vector< std::vector<size_t> > & graph )
40 graph.resize( N * N * N , std::vector<size_t>() );
44 for (
int i = 0 ; i < (int) N ; ++i ) {
45 for (
int j = 0 ;
j < (int) N ; ++
j ) {
46 for (
int k = 0 ; k < (int) N ; ++k ) {
50 graph[row].reserve(27);
52 for (
int ii = -1 ; ii < 2 ; ++ii ) {
53 for (
int jj = -1 ; jj < 2 ; ++jj ) {
54 for (
int kk = -1 ; kk < 2 ; ++kk ) {
55 if ( 0 <= i + ii && i + ii < (
int) N &&
56 0 <=
j + jj &&
j + jj < (
int) N &&
57 0 <= k + kk && k + kk < (
int) N ) {
60 graph[row].push_back(col);
63 total += graph[row].size();
69 template <
typename StorageType,
typename MultiplyTag>
74 KokkosSparse::DeviceConfig dev_config,
81 typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
83 typedef Kokkos::LayoutRight Layout;
84 typedef Kokkos::View< VectorType*, Layout, execution_space > vector_type;
85 typedef KokkosSparse::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type;
86 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
87 typedef typename matrix_type::values_type matrix_values_type;
92 std::vector< std::vector<size_t> > fem_graph;
93 const size_t fem_length = nGrid * nGrid * nGrid;
100 vector_type(Kokkos::ViewAllocateWithoutInitializing(
"x"), fem_length, ensemble_length);
102 vector_type(Kokkos::ViewAllocateWithoutInitializing(
"y"), fem_length, ensemble_length);
106 matrix_graph_type matrix_graph =
107 Kokkos::create_staticcrsgraph<matrix_graph_type>(
108 std::string(
"test crs graph"), fem_graph);
109 matrix_values_type matrix_values =
110 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length, ensemble_length);
111 matrix_type matrix(
"block_matrix", fem_length, matrix_values, matrix_graph);
112 matrix.dev_config = dev_config;
121 typename vector_type::array_type xx( x );
122 typename vector_type::array_type yy( y );
123 typename matrix_values_type::array_type mm( matrix_values );
136 Kokkos::Timer clock ;
137 for (
int iter = 0; iter < iterCount; ++iter) {
142 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
143 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
145 std::vector<double> perf(5);
146 perf[0] = fem_length;
147 perf[1] = ensemble_length;
148 perf[2] = graph_length;
149 perf[3] = seconds_per_iter;
150 perf[4] = flops / seconds_per_iter;
154 template <
typename ScalarType,
typename OrdinalType,
typename Device>
159 KokkosSparse::DeviceConfig dev_config)
164 typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
165 typedef Kokkos::View< value_type*, execution_space > vector_type;
166 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, device_type > matrix_type;
167 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
168 typedef typename matrix_type::values_type matrix_values_type;
173 std::vector< std::vector<size_t> > fem_graph;
174 const size_t fem_length = nGrid * nGrid * nGrid;
180 std::vector<vector_type> x(ensemble_length);
181 std::vector<vector_type> y(ensemble_length);
182 for (
int e=0; e<ensemble_length; ++e) {
183 x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing(
"x"), fem_length);
184 y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing(
"y"), fem_length);
192 std::vector<matrix_type> matrix(ensemble_length);
193 for (
int e=0; e<ensemble_length; ++e) {
194 matrix_graph_type matrix_graph =
195 Kokkos::create_staticcrsgraph<matrix_graph_type>(
196 std::string(
"test crs graph"), fem_graph);
197 matrix_values_type matrix_values =
198 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length);
199 matrix[e] = matrix_type(
"matrix", fem_length, matrix_values, matrix_graph);
207 for (
int iter = 0; iter < iterCount; ++iter) {
208 for (
int e=0; e<ensemble_length; ++e) {
214 Kokkos::Timer clock ;
215 for (
int iter = 0; iter < iterCount; ++iter) {
216 for (
int e=0; e<ensemble_length; ++e) {
222 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
223 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
225 std::vector<double> perf(5);
226 perf[0] = fem_length;
227 perf[1] = ensemble_length;
228 perf[2] = graph_length;
229 perf[3] = seconds_per_iter;
230 perf[4] = flops / seconds_per_iter;
234 template <
class Storage>
243 KokkosSparse::DeviceConfig dev_config_) :
246 template <
typename ArgT>
248 const int ensemble = ArgT::value;
249 typedef typename Storage::template apply_N<ensemble> NewStorageApply;
252 const std::vector<double> perf_scalar =
253 test_scalar_spmv<Scalar,Ordinal,Device>(
256 const std::vector<double> perf_mpvector =
257 test_mpvector_spmv<storage_type>(
260 std::cout << nGrid <<
" , "
261 << perf_scalar[0] <<
" , "
262 << perf_scalar[2] <<
" , "
263 << perf_scalar[1] <<
" , "
264 << perf_scalar[3] <<
" , "
265 << perf_scalar[4] / perf_scalar[4] <<
" , "
266 << perf_scalar[4] <<
" , "
267 << perf_mpvector[4]/ perf_scalar[4] <<
" , "
268 << perf_mpvector[4] <<
" , "
273 template <
class Storage,
int entry_min,
int entry_max,
int entry_step>
276 KokkosSparse::DeviceConfig dev_config)
278 std::cout.precision(8);
279 std::cout << std::endl
280 <<
"\"Grid Size\" , "
282 <<
"\"FEM Graph Size\" , "
283 <<
"\"Ensemble Size\" , "
284 <<
"\"Scalar SpMv Time\" , "
285 <<
"\"Scalar SpMv Speedup\" , "
286 <<
"\"Scalar SpMv GFLOPS\" , "
287 <<
"\"MPVector SpMv Speedup\" , "
288 <<
"\"MPVector SpMv GFLOPS\" , "
292 typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
294 Sacado::mpl::for_each_no_kokkos<Range>
f(op);
Stokhos::StandardStorage< int, double > storage_type
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
Stokhos_MV_Multiply_Op< Stokhos::DefaultMultiply > DefaultMultiply
Kokkos::DefaultExecutionSpace execution_space
std::vector< double > test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config)
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< double > test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config, MultiplyTag tag)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(KokkosKernels::Experimental::Controls, const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)