44 #include "KokkosSparse_CrsMatrix.hpp"
45 #include "KokkosSparse_spmv.hpp"
49 #include "impl/Kokkos_Timer.hpp"
51 template<
typename IntType >
58 return k + N * ( j + N * i );
63 std::vector< std::vector<size_t> > & graph )
65 graph.resize( N * N * N , std::vector<size_t>() );
69 for (
int i = 0 ; i < (
int) N ; ++i ) {
70 for (
int j = 0 ;
j < (
int) N ; ++
j ) {
71 for (
int k = 0 ; k < (
int) N ; ++k ) {
75 graph[row].reserve(27);
77 for (
int ii = -1 ; ii < 2 ; ++ii ) {
78 for (
int jj = -1 ; jj < 2 ; ++jj ) {
79 for (
int kk = -1 ; kk < 2 ; ++kk ) {
80 if ( 0 <= i + ii && i + ii < (
int) N &&
81 0 <=
j + jj &&
j + jj < (
int) N &&
82 0 <= k + kk && k + kk < (
int) N ) {
85 graph[row].push_back(col);
88 total += graph[row].size();
94 template <
typename ScalarType,
typename OrdinalType,
typename Device>
97 const OrdinalType nGrid,
98 const OrdinalType iterCount,
99 std::vector<double>& scalar_perf,
100 std::vector<double>& block_left_perf,
101 std::vector<double>& block_right_perf)
106 typedef Kokkos::View< value_type*, execution_space > vector_type;
107 typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
109 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
110 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
111 typedef typename matrix_type::values_type matrix_values_type;
116 std::vector< std::vector<size_t> > fem_graph;
117 const size_t fem_length = nGrid * nGrid * nGrid;
123 std::vector<vector_type> x(ensemble_length);
124 std::vector<vector_type> y(ensemble_length);
125 for (ordinal_type e=0; e<ensemble_length; ++e) {
126 x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing(
"x"), fem_length);
127 y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing(
"y"), fem_length);
132 left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing(
"xl"), fem_length, ensemble_length);
133 left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing(
"yl"), fem_length, ensemble_length);
144 matrix_graph_type matrix_graph =
145 Kokkos::create_staticcrsgraph<matrix_graph_type>(
146 std::string(
"test crs graph"), fem_graph);
147 matrix_values_type matrix_values =
148 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length);
149 matrix_type matrix(
"matrix", fem_length, matrix_values, matrix_graph);
157 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
158 for (ordinal_type e=0; e<ensemble_length; ++e) {
163 execution_space::fence();
164 Kokkos::Impl::Timer clock ;
165 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
166 for (ordinal_type e=0; e<ensemble_length; ++e) {
170 execution_space::fence();
172 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
173 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
175 scalar_perf.resize(5);
176 scalar_perf[0] = fem_length;
177 scalar_perf[1] = ensemble_length;
178 scalar_perf[2] = graph_length;
179 scalar_perf[3] = seconds_per_iter;
180 scalar_perf[4] = flops / seconds_per_iter;
188 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
192 execution_space::fence();
193 Kokkos::Impl::Timer clock ;
194 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
197 execution_space::fence();
199 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
200 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
202 block_left_perf.resize(5);
203 block_left_perf[0] = fem_length;
204 block_left_perf[1] = ensemble_length;
205 block_left_perf[2] = graph_length;
206 block_left_perf[3] = seconds_per_iter;
207 block_left_perf[4] = flops / seconds_per_iter;
216 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
220 execution_space::fence();
221 Kokkos::Impl::Timer clock ;
222 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
225 execution_space::fence();
227 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
228 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
230 block_right_perf.resize(5);
231 block_right_perf[0] = fem_length;
232 block_right_perf[1] = ensemble_length;
233 block_right_perf[2] = graph_length;
234 block_right_perf[3] = seconds_per_iter;
235 block_right_perf[4] = flops / seconds_per_iter;
241 template <
typename Scalar,
typename Ordinal,
typename Device>
248 std::cout.precision(8);
249 std::cout << std::endl
250 <<
"\"Grid Size\" , "
252 <<
"\"FEM Graph Size\" , "
253 <<
"\"Ensemble Size\" , "
254 <<
"\"Scalar SpMM Time\" , "
255 <<
"\"Scalar SpMM Speedup\" , "
256 <<
"\"Scalar SpMM GFLOPS\" , "
257 <<
"\"Block-Left SpMM Speedup\" , "
258 <<
"\"Block-Left SpMM GFLOPS\" , "
263 std::vector<double> perf_scalar, perf_block_left, perf_block_right;
264 for (
Ordinal e=ensemble_min; e<=ensemble_max; e+=ensemble_step) {
266 test_spmm<Scalar,Ordinal,Device>(
267 e, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right );
269 std::cout << nGrid <<
" , "
270 << perf_scalar[0] <<
" , "
271 << perf_scalar[2] <<
" , "
272 << perf_scalar[1] <<
" , "
273 << perf_scalar[3] <<
" , "
274 << perf_scalar[4] / perf_scalar[4] <<
" , "
275 << perf_scalar[4] <<
" , "
276 << perf_block_left[4]/ perf_scalar[4] <<
" , "
277 << perf_block_left[4] <<
" , "
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
void test_spmm(const OrdinalType ensemble_length, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf)
Kokkos::DefaultExecutionSpace execution_space
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)