12 #include "Kokkos_Core.hpp" 
   29 template<
typename Scalar>
 
   31   static void run(
bool test_flat, 
bool test_orig, 
bool test_lin,
 
   32                   bool test_block, 
bool symmetric) {
 
   33     typedef Kokkos::Cuda Device;
 
   42       performance_test_driver_all<Scalar,Device>(
 
   43         3 , 1 ,  9 , nGrid , nIter , test_block , symmetric );
 
   44       performance_test_driver_all<Scalar,Device>(
 
   45         5 , 1 ,  5 , nGrid , nIter , test_block , symmetric );
 
   52       performance_test_driver_poly<Scalar,Device,Stokhos::DefaultMultiply>(
 
   53         3 , 1 , 12 , nGrid , nIter , test_block , symmetric );
 
   54       performance_test_driver_poly<Scalar,Device,Stokhos::DefaultMultiply>(
 
   55         5 , 1 ,  6 , nGrid , nIter , test_block , symmetric );
 
   62       performance_test_driver_linear<Scalar,Device,Stokhos::DefaultMultiply>(
 
   63         31 ,  255 , 32 , nGrid , nIter , test_block , symmetric );
 
   72 template <
typename Scalar>
 
   73 int mainCuda(
bool test_flat, 
bool test_orig, 
bool test_lin, 
bool test_block,
 
   74              bool symmetric, 
int device_id)
 
   76   typedef unsigned long long int IntType ;
 
   78   Kokkos::InitializationSettings init_args;
 
   79   init_args.set_device_id(device_id);
 
   80   Kokkos::initialize( init_args );
 
   81   Kokkos::print_configuration( std::cout );
 
   83   cudaDeviceProp deviceProp;
 
   84   cudaGetDeviceProperties(&deviceProp, device_id);
 
   85   std::cout << std::endl
 
   86             << 
"Device " << device_id << 
": " << deviceProp.name
 
   89   cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
 
   91   std::cout << std::endl << 
"\"Cuda Performance\"" << std::endl ;
 
   93     test_flat, test_orig, test_lin, test_block, symmetric);
 
void mainCuda(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
 
template int mainCuda< double >(bool, bool, bool, bool, bool, int)
 
template int mainCuda< float >(bool, bool, bool, bool, bool, int)