48 #include "Kokkos_Core.hpp"
51 using Kokkos::Threads;
55 Ordinal NumPerThread, Ordinal ThreadsPerVector>
59 const Ordinal
VectorSize = NumPerThread * ThreadsPerVector;
60 typedef typename Storage::template apply_N<VectorSize>::type
storage_type;
63 const Ordinal nGrid = 5;
66 if (num_hyper_threads >= ThreadsPerVector) {
67 int row_threads = num_hyper_threads / ThreadsPerVector;
68 KokkosSparse::DeviceConfig dev_config(num_cores, ThreadsPerVector, row_threads);
70 success = test_embedded_vector<Vector>(
71 nGrid,
VectorSize, dev_config, MultiplyOp(), out);
79 Kokkos_CrsMatrix_MP, Multiply_1,
Storage, MultiplyOp )
82 const Ordinal NumPerThread = 16;
83 const Ordinal ThreadsPerVector = 1;
89 Kokkos_CrsMatrix_MP, Multiply_2,
Storage, MultiplyOp )
92 const Ordinal NumPerThread = 8;
93 const Ordinal ThreadsPerVector = 2;
98 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
99 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
100 Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
101 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
102 Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP )
108 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
109 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
110 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
111 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
112 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
121 Kokkos::hwloc::get_available_numa_count() *
122 Kokkos::hwloc::get_available_cores_per_numa();
124 Kokkos::hwloc::get_available_threads_per_core();
126 Kokkos::InitArguments init_args;
128 Kokkos::initialize( init_args );
129 Kokkos::print_configuration(std::cout);
Stokhos::StandardStorage< int, double > storage_type
bool test_host_embedded_vector(Ordinal num_hyper_threads, Ordinal num_cores, Teuchos::FancyOStream &out)
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
static int runUnitTestsFromMain(int argc, char *argv[])
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_SG_SpMv, CrsProductTensorCijk, Scalar, Device)
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
int main(int argc, char **argv)
basic_FancyOStream< char > FancyOStream
const unsigned VectorSize