Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_KokkosCrsMatrixMPVectorUnitTest_Threads.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
13 
15 
16 #include "Kokkos_Core.hpp"
17 
18 // Instantiate test for Threads device
19 using Kokkos::Threads;
21 
22 template <typename Storage, typename Ordinal, typename MultiplyOp,
23  Ordinal NumPerThread, Ordinal ThreadsPerVector>
25  Ordinal num_cores,
26  Teuchos::FancyOStream& out) {
27  const Ordinal VectorSize = NumPerThread * ThreadsPerVector;
28  typedef typename Storage::template apply_N<VectorSize>::type storage_type;
29  typedef Sacado::MP::Vector<storage_type> Vector;
30 
31  const Ordinal nGrid = 5;
32 
33  bool success = true;
34  if (num_hyper_threads >= ThreadsPerVector) {
35  int row_threads = num_hyper_threads / ThreadsPerVector;
36  KokkosSparse::DeviceConfig dev_config(num_cores, ThreadsPerVector, row_threads);
37 
38  success = test_embedded_vector<Vector>(
39  nGrid, VectorSize, dev_config, MultiplyOp(), out);
40  }
41  return success;
42 }
43 
45 
47  Kokkos_CrsMatrix_MP, Multiply_1, Storage, MultiplyOp )
48 {
49  typedef typename Storage::ordinal_type Ordinal;
50  const Ordinal NumPerThread = 16;
51  const Ordinal ThreadsPerVector = 1;
52  success =
53  test_host_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_hyper_threads, num_cores, out);
54 }
55 
57  Kokkos_CrsMatrix_MP, Multiply_2, Storage, MultiplyOp )
58 {
59  typedef typename Storage::ordinal_type Ordinal;
60  const Ordinal NumPerThread = 8;
61  const Ordinal ThreadsPerVector = 2;
62  success =
63  test_host_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_hyper_threads, num_cores, out);
64 }
65 
66 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
67  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
68  Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
69  TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
70  Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP )
71 
72 // Notes: SFS, DS are defined in main test header (we are also being lazy
73 // and not putting ordinal/scalar/device in the names, assuming we will only
74 // do one combination). We can't do DefaultMultiply for DS because it
75 // uses partitioning
76 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
77  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
78  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
79  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
80  CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
81 
83 
84 int main( int argc, char* argv[] ) {
85  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
86 
87  // Initialize threads
88  num_cores =
89  Kokkos::hwloc::get_available_numa_count() *
90  Kokkos::hwloc::get_available_cores_per_numa();
92  Kokkos::hwloc::get_available_threads_per_core();
93 
94  Kokkos::InitializationSettings init_args;
95  init_args.set_num_threads(num_cores*num_hyper_threads);
96  Kokkos::initialize( init_args );
97  Kokkos::print_configuration(std::cout);
98 
99  // Run tests
101 
102  // Finish up
103  Kokkos::finalize();
104 
105  return ret;
106 }
Stokhos::StandardStorage< int, double > storage_type
bool test_host_embedded_vector(Ordinal num_hyper_threads, Ordinal num_cores, Teuchos::FancyOStream &out)
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
static int runUnitTestsFromMain(int argc, char *argv[])
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_SG_SpMv, CrsProductTensorCijk, Scalar, Device)
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
int main(int argc, char **argv)
basic_FancyOStream< char > FancyOStream