Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_KokkosCrsMatrixMPVectorUnitTest_Cuda.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
45 
47 
48 // Instantiate test for Cuda device
49 using Kokkos::Cuda;
51 
52 template <typename Storage, typename Ordinal, typename MultiplyOp,
53  Ordinal NumPerThread, Ordinal ThreadsPerVector>
54 bool test_cuda_embedded_vector(Ordinal num_blocks,
55  Ordinal num_vec_threads,
56  Ordinal num_row_threads,
57  Teuchos::FancyOStream& out) {
58  typedef Kokkos::Cuda Device;
59 
60  const Ordinal VectorSize = NumPerThread * ThreadsPerVector;
61  typedef typename Storage::template apply_N<VectorSize>::type storage_type;
62  typedef Sacado::MP::Vector<storage_type> Vector;
63 
64  const Ordinal nGrid = 5;
65  KokkosSparse::DeviceConfig dev_config(num_blocks, num_vec_threads, num_row_threads);
66 
67  bool success = test_embedded_vector<Vector>(
68  nGrid, VectorSize, dev_config, MultiplyOp(), out);
69 
70  return success;
71 }
72 
73 // Test default configuration
75  Kokkos_CrsMatrix_MP, Multiply_Default, Storage, MultiplyOp )
76 {
77  typedef typename Storage::ordinal_type Ordinal;
78  const Ordinal NumPerThread = 1;
79  const Ordinal ThreadsPerVector = 16;
80 
81  const Ordinal num_blocks = 0;
82  const Ordinal num_vec_threads = 0;
83  const Ordinal num_row_threads = 0;
84 
85  success =
86  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
87 }
88 
90  Kokkos_CrsMatrix_MP, Multiply_1, Storage, MultiplyOp )
91 {
92  typedef typename Storage::ordinal_type Ordinal;
93  const Ordinal NumPerThread = 1;
94  const Ordinal ThreadsPerVector = 16;
95 
96  const Ordinal num_blocks = 10;
97  const Ordinal num_vec_threads = ThreadsPerVector;
98  const Ordinal num_row_threads = 4;
99 
100  success =
101  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
102 }
103 
105  Kokkos_CrsMatrix_MP, Multiply_2, Storage, MultiplyOp )
106 {
107  typedef typename Storage::ordinal_type Ordinal;
108  const Ordinal NumPerThread = 2;
109  const Ordinal ThreadsPerVector = 16;
110 
111  const Ordinal num_blocks = 10;
112  const Ordinal num_vec_threads = ThreadsPerVector;
113  const Ordinal num_row_threads = 4;
114 
115  success =
116  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
117 }
118 
120  Kokkos_CrsMatrix_MP, Multiply_3, Storage, MultiplyOp )
121 {
122  typedef typename Storage::ordinal_type Ordinal;
123  const Ordinal NumPerThread = 3;
124  const Ordinal ThreadsPerVector = 16;
125 
126  const Ordinal num_blocks = 10;
127  const Ordinal num_vec_threads = ThreadsPerVector;
128  const Ordinal num_row_threads = 4;
129 
130  success =
131  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
132 }
133 
135  Kokkos_CrsMatrix_MP, Multiply_4, Storage, MultiplyOp )
136 {
137  typedef typename Storage::ordinal_type Ordinal;
138  const Ordinal NumPerThread = 4;
139  const Ordinal ThreadsPerVector = 16;
140 
141  const Ordinal num_blocks = 10;
142  const Ordinal num_vec_threads = ThreadsPerVector;
143  const Ordinal num_row_threads = 4;
144 
145  success =
146  test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
147 }
148 
149 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
150  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
151  Kokkos_CrsMatrix_MP, Multiply_Default, STORAGE, OP ) \
152  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
153  Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
154  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
155  Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP ) \
156  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
157  Kokkos_CrsMatrix_MP, Multiply_3, STORAGE, OP ) \
158  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
159  Kokkos_CrsMatrix_MP, Multiply_4, STORAGE, OP )
160 
161 // Notes: SFS, DS are defined in main test header (we are also being lazy
162 // and not putting ordinal/scalar/device in the names, assuming we will only
163 // do one combination).
164 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
165  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
166  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
167  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
168  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
169 
171 
172 int main( int argc, char* argv[] ) {
173  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
174 
175  // Initialize Cuda
176  Kokkos::InitArguments init_args;
177  init_args.device_id = 0;
178  Kokkos::initialize( init_args );
179  Kokkos::print_configuration(std::cout);
180 
181  // Run tests
183 
184  // Finish up
185  Kokkos::finalize();
186 
187  return ret;
188 }
Stokhos::StandardStorage< int, double > storage_type
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
static int runUnitTestsFromMain(int argc, char *argv[])
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_SG_SpMv, CrsProductTensorCijk, Scalar, Device)
bool test_cuda_embedded_vector(Ordinal num_blocks, Ordinal num_vec_threads, Ordinal num_row_threads, Teuchos::FancyOStream &out)
int main(int argc, char **argv)
basic_FancyOStream< char > FancyOStream