31 #include "Kokkos_Core.hpp" 
   48 template<
typename Scalar>
 
   50   static void run(
bool test_flat, 
bool test_orig, 
bool test_lin,
 
   51                   bool test_block, 
bool symmetric) {
 
   52     typedef Kokkos::Cuda Device;
 
   61       performance_test_driver_all<Scalar,Device>(
 
   62         3 , 1 ,  9 , nGrid , nIter , test_block , symmetric );
 
   63       performance_test_driver_all<Scalar,Device>(
 
   64         5 , 1 ,  5 , nGrid , nIter , test_block , symmetric );
 
   71       performance_test_driver_poly<Scalar,Device,Stokhos::DefaultMultiply>(
 
   72         3 , 1 , 12 , nGrid , nIter , test_block , symmetric );
 
   73       performance_test_driver_poly<Scalar,Device,Stokhos::DefaultMultiply>(
 
   74         5 , 1 ,  6 , nGrid , nIter , test_block , symmetric );
 
   81       performance_test_driver_linear<Scalar,Device,Stokhos::DefaultMultiply>(
 
   82         31 ,  255 , 32 , nGrid , nIter , test_block , symmetric );
 
   91 template <
typename Scalar>
 
   92 int mainCuda(
bool test_flat, 
bool test_orig, 
bool test_lin, 
bool test_block,
 
   93              bool symmetric, 
int device_id)
 
   95   typedef unsigned long long int IntType ;
 
   97   Kokkos::InitArguments init_args;
 
   98   init_args.device_id = device_id;
 
   99   Kokkos::initialize( init_args );
 
  100   Kokkos::print_configuration( std::cout );
 
  102   cudaDeviceProp deviceProp;
 
  103   cudaGetDeviceProperties(&deviceProp, device_id);
 
  104   std::cout << std::endl
 
  105             << 
"Device " << device_id << 
": " << deviceProp.name
 
  108   cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
 
  110   std::cout << std::endl << 
"\"Cuda Performance\"" << std::endl ;
 
  112     test_flat, test_orig, test_lin, test_block, symmetric);
 
void mainCuda(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
template int mainCuda< double >(bool, bool, bool, bool, bool, int)
template int mainCuda< float >(bool, bool, bool, bool, bool, int)