Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_KokkosCrsMatrixMPVectorUnitTest_Cuda.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
13 
15 
16 // Instantiate test for Cuda device
17 using Kokkos::Cuda;
19 
20 template <typename Storage, typename Ordinal, typename MultiplyOp,
21  Ordinal NumPerThread, Ordinal ThreadsPerVector>
22 bool test_cuda_embedded_vector(Ordinal num_blocks,
23  Ordinal num_vec_threads,
24  Ordinal num_row_threads,
25  Teuchos::FancyOStream& out) {
26  typedef Kokkos::Cuda Device;
27 
28  const Ordinal VectorSize = NumPerThread * ThreadsPerVector;
29  typedef typename Storage::template apply_N<VectorSize>::type storage_type;
30  typedef Sacado::MP::Vector<storage_type> Vector;
31 
32  const Ordinal nGrid = 5;
33  KokkosSparse::DeviceConfig dev_config(num_blocks, num_vec_threads, num_row_threads);
34 
35  bool success = test_embedded_vector<Vector>(
36  nGrid, VectorSize, dev_config, MultiplyOp(), out);
37 
38  return success;
39 }
40 
41 // Test default configuration
43  Kokkos_CrsMatrix_MP, Multiply_Default, Storage, MultiplyOp )
44 {
45  typedef typename Storage::ordinal_type Ordinal;
46  const Ordinal NumPerThread = 1;
47  const Ordinal ThreadsPerVector = 16;
48 
49  const Ordinal num_blocks = 0;
50  const Ordinal num_vec_threads = 0;
51  const Ordinal num_row_threads = 0;
52 
53  success =
54  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
55 }
56 
58  Kokkos_CrsMatrix_MP, Multiply_1, Storage, MultiplyOp )
59 {
60  typedef typename Storage::ordinal_type Ordinal;
61  const Ordinal NumPerThread = 1;
62  const Ordinal ThreadsPerVector = 16;
63 
64  const Ordinal num_blocks = 10;
65  const Ordinal num_vec_threads = ThreadsPerVector;
66  const Ordinal num_row_threads = 4;
67 
68  success =
69  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
70 }
71 
73  Kokkos_CrsMatrix_MP, Multiply_2, Storage, MultiplyOp )
74 {
75  typedef typename Storage::ordinal_type Ordinal;
76  const Ordinal NumPerThread = 2;
77  const Ordinal ThreadsPerVector = 16;
78 
79  const Ordinal num_blocks = 10;
80  const Ordinal num_vec_threads = ThreadsPerVector;
81  const Ordinal num_row_threads = 4;
82 
83  success =
84  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
85 }
86 
88  Kokkos_CrsMatrix_MP, Multiply_3, Storage, MultiplyOp )
89 {
90  typedef typename Storage::ordinal_type Ordinal;
91  const Ordinal NumPerThread = 3;
92  const Ordinal ThreadsPerVector = 16;
93 
94  const Ordinal num_blocks = 10;
95  const Ordinal num_vec_threads = ThreadsPerVector;
96  const Ordinal num_row_threads = 4;
97 
98  success =
99  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
100 }
101 
103  Kokkos_CrsMatrix_MP, Multiply_4, Storage, MultiplyOp )
104 {
105  typedef typename Storage::ordinal_type Ordinal;
106  const Ordinal NumPerThread = 4;
107  const Ordinal ThreadsPerVector = 16;
108 
109  const Ordinal num_blocks = 10;
110  const Ordinal num_vec_threads = ThreadsPerVector;
111  const Ordinal num_row_threads = 4;
112 
113  success =
114  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
115 }
116 
117 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
118  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
119  Kokkos_CrsMatrix_MP, Multiply_Default, STORAGE, OP ) \
120  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
121  Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
122  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
123  Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP ) \
124  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
125  Kokkos_CrsMatrix_MP, Multiply_3, STORAGE, OP ) \
126  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
127  Kokkos_CrsMatrix_MP, Multiply_4, STORAGE, OP )
128 
129 // Notes: SFS, DS are defined in main test header (we are also being lazy
130 // and not putting ordinal/scalar/device in the names, assuming we will only
131 // do one combination).
132 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
133  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
134  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
135  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
136  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
137 
139 
140 int main( int argc, char* argv[] ) {
141  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
142 
143  // Initialize Cuda
144  Kokkos::InitializationSettings init_args;
145  init_args.set_device_id(0);
146  Kokkos::initialize( init_args );
147  Kokkos::print_configuration(std::cout);
148 
149  // Run tests
151 
152  // Finish up
153  Kokkos::finalize();
154 
155  return ret;
156 }
Stokhos::StandardStorage< int, double > storage_type
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
static int runUnitTestsFromMain(int argc, char *argv[])
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_SG_SpMv, CrsProductTensorCijk, Scalar, Device)
bool test_cuda_embedded_vector(Ordinal num_blocks, Ordinal num_vec_threads, Ordinal num_row_threads, Teuchos::FancyOStream &out)
int main(int argc, char **argv)
basic_FancyOStream< char > FancyOStream