Panzer  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Panzer_HierarchicParallelism.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Panzer: A partial differential equation assembly
4 // engine for strongly coupled complex multiphysics systems
5 //
6 // Copyright 2011 NTESS and the Panzer contributors.
7 // SPDX-License-Identifier: BSD-3-Clause
8 // *****************************************************************************
9 // @HEADER
10 
12 
13 namespace panzer {
14 
15  HP::HP() :
16  use_auto_team_size_(true),
17  team_size_(-1),
18  vector_size_(1),
19  fad_vector_size_(1),
20  use_shared_memory_(true),
21  fad_use_shared_memory_(false)
22  {
23 #if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
24 #if defined(KOKKOS_ENABLE_CUDA)
25  fad_vector_size_ = 32;
26 #endif
27 #if defined(KOKKOS_ENABLE_HIP)
28  fad_vector_size_ = 64;
29 #endif
30 #endif
31  }
32 
34  {
35  static HP hp;
36  return hp;
37  }
38 
39  namespace {
40  int roundDownToPowerOfTwo(int in) {
41  int out=1;
42  while (in > 1) {
43  out *= 2;
44  in /= 2;
45  }
46  return out;
47  }
48  }
49  void HP::overrideSizes(const int& in_team_size,
50  const int& in_vector_size,
51  const int& in_fad_vector_size,
52  const bool force_override)
53  {
54  use_auto_team_size_ = false;
55  if ( force_override ) {
56  team_size_=in_team_size;
57  vector_size_=in_vector_size;
58  fad_vector_size_=in_fad_vector_size;
59  return;
60  }
61 
62  Kokkos::TeamPolicy<PHX::Device> policy(1, Kokkos::AUTO);
63  auto blank_functor = KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<PHX::exec_space>::member_type) {};
64 
65  int team_size_max = std::min(in_team_size, policy.team_size_max(blank_functor, Kokkos::ParallelForTag()));
66  team_size_=roundDownToPowerOfTwo(team_size_max);
67 
68  int vec_size_max = policy.vector_length_max();
69  vector_size_ = roundDownToPowerOfTwo(std::min(vec_size_max, in_vector_size));
70  fad_vector_size_ = roundDownToPowerOfTwo(std::min(vec_size_max, in_fad_vector_size));
71  }
72 
73  void HP::setUseSharedMemory(const bool& in_use_shared_memory,
74  const bool& in_fad_use_shared_memory)
75  {
76  use_shared_memory_ = in_use_shared_memory;
77  fad_use_shared_memory_ = in_fad_use_shared_memory;
78  }
79 
80 }
int fad_vector_size_
Default vector size for non-AD types.
bool use_shared_memory_
FAD vector size.
const bool use_shared_memory_
int vector_size_
User specified team size.
bool fad_use_shared_memory_
Use shared memory kokkos kernels for non-fad types.
int team_size_
If true, the team size is set with Kokkos::AUTO()
void overrideSizes(const int &team_size, const int &vector_size, const int &fad_vector_size, const bool force_override_safety=false)
HP()
Use shared memory kokkos kernels for fad types.
Singleton class for accessing kokkos hierarchical parallelism parameters.
static HP & inst()
Private ctor.
void setUseSharedMemory(const bool &use_shared_memory, const bool &fad_use_shared_memory)
Tell kokkos kernels if they should use shared memory. This is very problem dependent.