12 #include "Kokkos_Core.hpp"
29 template<
typename Scalar>
31 static void run(
bool test_flat,
bool test_orig,
bool test_lin,
32 bool test_block,
bool symmetric) {
33 typedef Kokkos::Cuda Device;
42 performance_test_driver_all<Scalar,Device>(
43 3 , 1 , 9 , nGrid , nIter , test_block , symmetric );
44 performance_test_driver_all<Scalar,Device>(
45 5 , 1 , 5 , nGrid , nIter , test_block , symmetric );
52 performance_test_driver_poly<Scalar,Device,Stokhos::DefaultMultiply>(
53 3 , 1 , 12 , nGrid , nIter , test_block , symmetric );
54 performance_test_driver_poly<Scalar,Device,Stokhos::DefaultMultiply>(
55 5 , 1 , 6 , nGrid , nIter , test_block , symmetric );
62 performance_test_driver_linear<Scalar,Device,Stokhos::DefaultMultiply>(
63 31 , 255 , 32 , nGrid , nIter , test_block , symmetric );
72 template <
typename Scalar>
73 int mainCuda(
bool test_flat,
bool test_orig,
bool test_lin,
bool test_block,
74 bool symmetric,
int device_id)
76 typedef unsigned long long int IntType ;
78 Kokkos::InitializationSettings init_args;
79 init_args.set_device_id(device_id);
80 Kokkos::initialize( init_args );
81 Kokkos::print_configuration( std::cout );
83 cudaDeviceProp deviceProp;
84 cudaGetDeviceProperties(&deviceProp, device_id);
85 std::cout << std::endl
86 <<
"Device " << device_id <<
": " << deviceProp.name
89 cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
91 std::cout << std::endl <<
"\"Cuda Performance\"" << std::endl ;
93 test_flat, test_orig, test_lin, test_block, symmetric);
void mainCuda(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
template int mainCuda< double >(bool, bool, bool, bool, bool, int)
template int mainCuda< float >(bool, bool, bool, bool, bool, int)