Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CuspSpMM/TestSpMM.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include <iostream>
11 
12 // CUSP
13 #include <cusp/array2d.h>
14 #include <cusp/csr_matrix.h>
15 #include <cusp/MVmultiply.h>
16 #include <cusp/gallery/poisson.h>
17 #include <cusp/detail/timer.h>
18 
19 // Utilities
22 
23 int main(int argc, char *argv[])
24 {
25  typedef int IndexType;
26  typedef double ValueType;
27  typedef cusp::device_memory MemorySpace;
28  //typedef cusp::row_major Orientation;
29 
30  bool success = true;
31  bool verbose = false;
32  try {
33 
34  // Setup command line options
36  CLP.setDocString("This test performance of block multiply routines.\n");
37  IndexType n = 32;
38  CLP.setOption("n", &n, "Number of mesh points in the each direction");
39  IndexType nrhs_begin = 32;
40  CLP.setOption("begin", &nrhs_begin,
41  "Staring number of right-hand-sides");
42  IndexType nrhs_end = 512;
43  CLP.setOption("end", &nrhs_end,
44  "Ending number of right-hand-sides");
45  IndexType nrhs_step = 32;
46  CLP.setOption("step", &nrhs_step,
47  "Increment in number of right-hand-sides");
48  IndexType nits = 10;
49  CLP.setOption("nits", &nits,
50  "Number of multiply iterations");
51  int device_id = 0;
52  CLP.setOption("device", &device_id, "CUDA device ID");
53  CLP.parse( argc, argv );
54 
55  // Set CUDA device
56  cudaSetDevice(device_id);
57  cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
58 
59  // create 3D Poisson problem
60  cusp::csr_matrix<IndexType, ValueType, MemorySpace> A;
61  cusp::gallery::poisson27pt(A, n, n, n);
62 
63  std::cout << "nrhs , num_rows , num_entries , row_time , row_gflops , "
64  << "col_time , col_gflops" << std::endl;
65 
66  for (IndexType nrhs = nrhs_begin; nrhs <= nrhs_end; nrhs += nrhs_step) {
67 
68  double flops =
69  2.0 * static_cast<double>(A.num_entries) * static_cast<double>(nrhs);
70 
71  // test row-major storage
72  cusp::array2d<ValueType, MemorySpace, cusp::row_major> x_row(
73  A.num_rows, nrhs, 1);
74  cusp::array2d<ValueType, MemorySpace, cusp::row_major> y_row(
75  A.num_rows, nrhs, 0);
76 
77  cusp::detail::timer row_timer;
78  row_timer.start();
79  for (IndexType iter=0; iter<nits; ++iter) {
80  cusp::MVmultiply(A, x_row, y_row);
81  }
82  cudaDeviceSynchronize();
83  double row_time = row_timer.seconds_elapsed() / nits;
84  double row_gflops = 1.0e-9 * flops / row_time;
85 
86  // test column-major storage
87  cusp::array2d<ValueType, MemorySpace, cusp::column_major> x_col(
88  A.num_rows, nrhs, 1);
89  cusp::array2d<ValueType, MemorySpace, cusp::column_major> y_col(
90  A.num_rows, nrhs, 0);
91 
92  cusp::detail::timer col_timer;
93  col_timer.start();
94  for (IndexType iter=0; iter<nits; ++iter) {
95  cusp::MVmultiply(A, x_col, y_col);
96  }
97  cudaDeviceSynchronize();
98  double col_time = col_timer.seconds_elapsed() / nits;
99  double col_gflops = 1.0e-9 * flops / col_time;
100 
101  std::cout << nrhs << " , "
102  << A.num_rows << " , " << A.num_entries << " , "
103  << row_time << " , " << row_gflops << " , "
104  << col_time << " , " << col_gflops
105  << std::endl;
106 
107  }
108 
109  }
110  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
111 
112  if (success)
113  return 0;
114  return -1;
115 }
void MVmultiply(LinearOperator &A, MatrixOrVector1 &B, MatrixOrVector2 &C)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
int main(int argc, char **argv)
void setDocString(const char doc_string[])
int n