16 use_auto_team_size_(true),
21 fad_use_shared_memory_(false)
23 #if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
24 #if defined(KOKKOS_ENABLE_CUDA)
27 #if defined(KOKKOS_ENABLE_HIP)
40 int roundDownToPowerOfTwo(
int in) {
50 const int& in_vector_size,
51 const int& in_fad_vector_size,
52 const bool force_override)
55 if ( force_override ) {
62 Kokkos::TeamPolicy<PHX::Device> policy(1, Kokkos::AUTO);
63 auto blank_functor = KOKKOS_LAMBDA (
const Kokkos::TeamPolicy<PHX::exec_space>::member_type) {};
65 int team_size_max = std::min(in_team_size, policy.team_size_max(blank_functor, Kokkos::ParallelForTag()));
66 team_size_=roundDownToPowerOfTwo(team_size_max);
68 int vec_size_max = policy.vector_length_max();
69 vector_size_ = roundDownToPowerOfTwo(std::min(vec_size_max, in_vector_size));
70 fad_vector_size_ = roundDownToPowerOfTwo(std::min(vec_size_max, in_fad_vector_size));
74 const bool& in_fad_use_shared_memory)
int fad_vector_size_
Default vector size for non-AD types.
bool use_shared_memory_
FAD vector size.
const bool use_shared_memory_
int vector_size_
User specified team size.
bool fad_use_shared_memory_
Use shared memory kokkos kernels for non-fad types.
int team_size_
If true, the team size is set with Kokkos::AUTO()
void overrideSizes(const int &team_size, const int &vector_size, const int &fad_vector_size, const bool force_override_safety=false)
HP()
Use shared memory kokkos kernels for fad types.
Singleton class for accessing kokkos hierarchical parallelism parameters.
static HP & inst()
Private ctor.
void setUseSharedMemory(const bool &use_shared_memory, const bool &fad_use_shared_memory)
Tell kokkos kernels if they should use shared memory. This is very problem dependent.