Compadre  1.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Compadre_ParallelManager.hpp
Go to the documentation of this file.
1 #ifndef _COMPADRE_PARALLELMANAGER_HPP_
2 #define _COMPADRE_PARALLELMANAGER_HPP_
3 
4 #include "Compadre_Config.h"
5 #include "Compadre_Typedefs.hpp"
6 
7 namespace Compadre {
8 
9 //! Parallel Manager
10 /*!
11 * This class sets and manages thread / teams levels, scratch memory sizes, and kernel executions.
12 * ex:
13 * Compadre::ConvertLayoutLeftToRight clr;
14 * Compadre::ParallelManager pm;
15 * // no tag specified
16 * pm.CallFunctorWithTeamThreads(100, clr, "MyFunctorName");
17 * // some tag specified
18 * pm.CallFunctorWithTeamThreads<DefaultTag>(100, clr);
19 */
21 protected:
22 
23  //! lowest level memory for Kokkos::parallel_for for team access memory
26 
27  //! higher (slower) level memory for Kokkos::parallel_for for team access memory
30 
31  //! lowest level memory for Kokkos::parallel_for for thread access memory
34 
35  //! higher (slower) level memory for Kokkos::parallel_for for thread access memory
38 
39  //! calculated number of threads per team
42 
43 
44 /** @name Private Modifiers
45  * Private function because information lives on the device
46  */
47 ///@{
48 ///@}
49 
50 /** @name Private Accessors
51  * Private function because information lives on the device
52  */
53 ///@{
54 ///@}
55 
56 /** @name Private Utility
57  *
58  */
59 ///@{
60 ///@}
61 
62 public:
63 
64 /** @name Instantiation / Destruction
65  *
66  */
67 ///@{
68 
71 
72 #ifdef COMPADRE_USE_CUDA
77  _threads_per_team = 128;
79 #else
86 #endif
87  }
88 
89 ///@}
90 
91 /** @name Public Utility
92  *
93  */
94 ///@{
95 ///@}
96 
97 /** @name Accessors
98  * Retrieve member variables through public member functions
99  */
100 ///@{
101 
102  //! Calls a parallel_for
103  //! parallel_for will break out over loops over teams with each vector lane executing code be default
104  template<typename Tag, class C>
105  void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team,
106  const int vector_lanes_per_thread, C functor) const {
107 
109  // all levels of each type need specified separately
110  Kokkos::parallel_for(
111  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
112  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
113  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
114  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
115  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
116  functor, typeid(Tag).name());
118  // scratch thread levels are the same
119  Kokkos::parallel_for(
120  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
121  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
122  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
123  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
124  functor, typeid(Tag).name());
126  // scratch team levels are the same
127  Kokkos::parallel_for(
128  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
129  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
130  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
131  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
132  functor, typeid(Tag).name());
133  } else {
134  // scratch team levels and thread levels are the same
135  Kokkos::parallel_for(
136  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
137  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
138  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
139  functor, typeid(Tag).name());
140  }
141 
142  }
143 
144  //! Calls a parallel_for
145  //! parallel_for will break out over loops over teams with each vector lane executing code be default
146  template<class C>
147  void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team,
148  const int vector_lanes_per_thread, C functor, std::string functor_name = typeid(C).name()) const {
149 
151  // all levels of each type need specified separately
152  Kokkos::parallel_for(
153  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
154  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
155  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
156  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
157  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
158  functor, functor_name);
160  // scratch thread levels are the same
161  Kokkos::parallel_for(
162  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
163  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
164  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
165  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
166  functor, functor_name);
168  // scratch team levels are the same
169  Kokkos::parallel_for(
170  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
171  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
172  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
173  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
174  functor, functor_name);
175  } else {
176  // scratch team levels and thread levels are the same
177  Kokkos::parallel_for(
178  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
179  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
180  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
181  functor, functor_name);
182  }
183 
184  }
185 
186  //! Calls a parallel_for
187  //! parallel_for will break out over loops over teams with each thread executing code be default
188  template<typename Tag, class C>
189  void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor) const {
190  // calls breakout over vector lanes with vector lane size of 1
191  CallFunctorWithTeamThreadsAndVectors<Tag,C>(batch_size, this->getThreadsPerTeam(), 1, functor);
192  }
193 
194  //! Calls a parallel_for
195  //! parallel_for will break out over loops over teams with each thread executing code be default
196  template<class C>
197  void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor, std::string functor_name = typeid(C).name()) const {
198  // calls breakout over vector lanes with vector lane size of 1
199  CallFunctorWithTeamThreadsAndVectors<C>(batch_size, this->getThreadsPerTeam(), 1, functor, functor_name);
200  }
201 
202  KOKKOS_INLINE_FUNCTION
203  int getTeamScratchLevel(const int level) const {
204  if (level == 0) {
205  return _scratch_team_level_a;
206  } else {
207  return _scratch_team_level_b;
208  }
209  }
210 
211  KOKKOS_INLINE_FUNCTION
212  int getThreadScratchLevel(const int level) const {
213  if (level == 0) {
215  } else {
217  }
218  }
219 
220  KOKKOS_INLINE_FUNCTION
221  int getTeamScratchSize(const int level) const {
222  if (level == 0) {
223  return _team_scratch_size_a;
224  } else {
225  return _team_scratch_size_b;
226  }
227  }
228 
229  KOKKOS_INLINE_FUNCTION
230  int getThreadScratchSize(const int level) const {
231  if (level == 0) {
232  return _thread_scratch_size_a;
233  } else {
234  return _thread_scratch_size_b;
235  }
236  }
237 
238  KOKKOS_INLINE_FUNCTION
239  int getThreadsPerTeam(const int vector_lanes_per_thread = 1) const {
240  return _threads_per_team / vector_lanes_per_thread;
241  }
242 
243  KOKKOS_INLINE_FUNCTION
246  }
247 
248 ///@}
249 
250 
251 /** @name Modifiers
252  * Changed member variables through public member functions
253  */
254 ///@{
255 
256  void setTeamScratchLevel(const int level, const int value) {
257  if (level == 0) {
258  _scratch_team_level_a = value;
259  } else {
260  _scratch_team_level_b = value;
261  }
262  }
263 
264  void setThreadScratchLevel(const int level, const int value) {
265  if (level == 0) {
266  _scratch_thread_level_a = value;
267  } else {
268  _scratch_thread_level_b = value;
269  }
270  }
271 
272  void setTeamScratchSize(const int level, const int value) {
273  if (level == 0) {
274  _team_scratch_size_a = value;
275  } else {
276  _team_scratch_size_b = value;
277  }
278  }
279 
280  void setThreadScratchSize(const int level, const int value) {
281  if (level == 0) {
282  _thread_scratch_size_a = value;
283  } else {
284  _thread_scratch_size_b = value;
285  }
286  }
287 
293  }
294 
295  void setThreadsPerTeam(const int value) {
296  _threads_per_team = value;
297  }
298 
299  void setVectorLanesPerThread(const int value) {
300  _vector_lanes_per_thread = value;
301  }
302 
303 ///@}
304 
305 
306 }; // ParallelManager Class
307 } // Compadre
308 
309 #endif
310 
311 
KOKKOS_INLINE_FUNCTION int getVectorLanesPerThread() const
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
void setVectorLanesPerThread(const int value)
int _scratch_thread_level_b
higher (slower) level memory for Kokkos::parallel_for for thread access memory
KOKKOS_INLINE_FUNCTION int getTeamScratchSize(const int level) const
KOKKOS_INLINE_FUNCTION int getThreadScratchSize(const int level) const
void setTeamScratchLevel(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION int getThreadsPerTeam(const int vector_lanes_per_thread=1) const
void setThreadScratchLevel(const int level, const int value)
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
int _scratch_team_level_b
lowest level memory for Kokkos::parallel_for for thread access memory
void setTeamScratchSize(const int level, const int value)
void setThreadScratchSize(const int level, const int value)
int _scratch_thread_level_a
higher (slower) level memory for Kokkos::parallel_for for team access memory
int _scratch_team_level_a
lowest level memory for Kokkos::parallel_for for team access memory
void setThreadsPerTeam(const int value)
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
int _threads_per_team
calculated number of threads per team
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const