16 #include "Kokkos_Core.hpp"
19 using Kokkos::Threads;
23 Ordinal NumPerThread, Ordinal ThreadsPerVector>
27 const Ordinal
VectorSize = NumPerThread * ThreadsPerVector;
28 typedef typename Storage::template apply_N<VectorSize>::type
storage_type;
31 const Ordinal nGrid = 5;
34 if (num_hyper_threads >= ThreadsPerVector) {
35 int row_threads = num_hyper_threads / ThreadsPerVector;
36 KokkosSparse::DeviceConfig dev_config(num_cores, ThreadsPerVector, row_threads);
38 success = test_embedded_vector<Vector>(
39 nGrid,
VectorSize, dev_config, MultiplyOp(), out);
47 Kokkos_CrsMatrix_MP, Multiply_1,
Storage, MultiplyOp )
50 const Ordinal NumPerThread = 16;
51 const Ordinal ThreadsPerVector = 1;
57 Kokkos_CrsMatrix_MP, Multiply_2,
Storage, MultiplyOp )
60 const Ordinal NumPerThread = 8;
61 const Ordinal ThreadsPerVector = 2;
66 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
67 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
68 Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
69 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
70 Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP )
76 #define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
77 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
78 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
79 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
80 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
89 Kokkos::hwloc::get_available_numa_count() *
90 Kokkos::hwloc::get_available_cores_per_numa();
92 Kokkos::hwloc::get_available_threads_per_core();
94 Kokkos::InitializationSettings init_args;
96 Kokkos::initialize( init_args );
97 Kokkos::print_configuration(std::cout);
Stokhos::StandardStorage< int, double > storage_type
bool test_host_embedded_vector(Ordinal num_hyper_threads, Ordinal num_cores, Teuchos::FancyOStream &out)
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
static int runUnitTestsFromMain(int argc, char *argv[])
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_SG_SpMv, CrsProductTensorCijk, Scalar, Device)
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
int main(int argc, char **argv)
basic_FancyOStream< char > FancyOStream
const unsigned VectorSize