45 #include <cusp/array2d.h>
46 #include <cusp/csr_matrix.h>
48 #include <cusp/gallery/poisson.h>
49 #include <cusp/detail/timer.h>
57 typedef int IndexType;
58 typedef double ValueType;
59 typedef cusp::device_memory MemorySpace;
68 CLP.
setDocString(
"This test performance of block multiply routines.\n");
70 CLP.
setOption(
"n", &n,
"Number of mesh points in the each direction");
71 IndexType nrhs_begin = 32;
73 "Staring number of right-hand-sides");
74 IndexType nrhs_end = 512;
76 "Ending number of right-hand-sides");
77 IndexType nrhs_step = 32;
79 "Increment in number of right-hand-sides");
82 "Number of multiply iterations");
84 CLP.
setOption(
"device", &device_id,
"CUDA device ID");
85 CLP.
parse( argc, argv );
88 cudaSetDevice(device_id);
89 cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
92 cusp::csr_matrix<IndexType, ValueType, MemorySpace>
A;
93 cusp::gallery::poisson27pt(A, n, n, n);
95 std::cout <<
"nrhs , num_rows , num_entries , row_time , row_gflops , "
96 <<
"col_time , col_gflops" << std::endl;
98 for (IndexType nrhs = nrhs_begin; nrhs <= nrhs_end; nrhs += nrhs_step) {
101 2.0 *
static_cast<double>(A.num_entries) * static_cast<double>(nrhs);
104 cusp::array2d<ValueType, MemorySpace, cusp::row_major> x_row(
105 A.num_rows, nrhs, 1);
106 cusp::array2d<ValueType, MemorySpace, cusp::row_major> y_row(
107 A.num_rows, nrhs, 0);
109 cusp::detail::timer row_timer;
111 for (IndexType iter=0; iter<nits; ++iter) {
114 cudaDeviceSynchronize();
115 double row_time = row_timer.seconds_elapsed() / nits;
116 double row_gflops = 1.0e-9 * flops / row_time;
119 cusp::array2d<ValueType, MemorySpace, cusp::column_major> x_col(
120 A.num_rows, nrhs, 1);
121 cusp::array2d<ValueType, MemorySpace, cusp::column_major> y_col(
122 A.num_rows, nrhs, 0);
124 cusp::detail::timer col_timer;
126 for (IndexType iter=0; iter<nits; ++iter) {
129 cudaDeviceSynchronize();
130 double col_time = col_timer.seconds_elapsed() / nits;
131 double col_gflops = 1.0e-9 * flops / col_time;
133 std::cout << nrhs <<
" , "
134 << A.num_rows <<
" , " << A.num_entries <<
" , "
135 << row_time <<
" , " << row_gflops <<
" , "
136 << col_time <<
" , " << col_gflops
void MVmultiply(LinearOperator &A, MatrixOrVector1 &B, MatrixOrVector2 &C)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void setDocString(const char doc_string[])