Compadre  1.5.9
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Compadre_ParallelManager.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Compadre: COMpatible PArticle Discretization and REmap Toolkit
4 //
5 // Copyright 2018 NTESS and the Compadre contributors.
6 // SPDX-License-Identifier: BSD-2-Clause
7 // *****************************************************************************
8 // @HEADER
9 #ifndef _COMPADRE_PARALLELMANAGER_HPP_
10 #define _COMPADRE_PARALLELMANAGER_HPP_
11 
12 #include "Compadre_Config.h"
13 #include "Compadre_Typedefs.hpp"
14 
15 namespace Compadre {
16 
17 
18 //! Parallel Manager
19 /*!
20 * This class sets and manages thread / teams levels, scratch memory sizes, and kernel executions.
21 * ex:
22 * Compadre::ConvertLayoutLeftToRight clr;
23 * Compadre::ParallelManager pm;
24 * // no tag specified
25 * pm.CallFunctorWithTeamThreads(clr, 100, "MyFunctorName");
26 * // some tag specified
27 * pm.CallFunctorWithTeamThreads<DefaultTag>(clr, 100);
28 */
30 public:
31 
32  //! lowest level memory for Kokkos::parallel_for for team access memory
35 
36  //! higher (slower) level memory for Kokkos::parallel_for for team access memory
39 
40  //! lowest level memory for Kokkos::parallel_for for thread access memory
43 
44  //! higher (slower) level memory for Kokkos::parallel_for for thread access memory
47 
48  //! largest team size
51 
52 
53 /** @name Private Modifiers
54  * Private function because information lives on the device
55  */
56 ///@{
57 ///@}
58 
59 /** @name Private Accessors
60  * Private function because information lives on the device
61  */
62 ///@{
63 ///@}
64 
65 /** @name Private Utility
66  *
67  */
68 ///@{
69 ///@}
70 
71 public:
72 
73 /** @name Instantiation / Destruction
74  *
75  */
76 ///@{
77 
80 
81 #ifdef COMPADRE_USE_CUDA
86 
87  _default_threads = 16;
89 #else
94 
95  _default_threads = 1;
97 #endif
98  if (const char* env_threads = std::getenv("THREADS")) {
99  _default_threads = std::atoi(env_threads);
100  }
101  if (const char* env_vector_lanes = std::getenv("VECTORLANES")) {
102  _default_vector_lanes = std::atoi(env_vector_lanes);
103  }
104 #ifdef COMPADRE_EXTREME_DEBUG
105  printf("threads per team: %d, vector lanes per team: %d\n", _default_threads, _default_vector_lanes);
106 #endif
107  }
108 
109 ///@}
110 
111 /** @name Public Utility
112  *
113  */
114 ///@{
115 ///@}
116 
117 /** @name Accessors
118  * Retrieve member variables through public member functions
119  */
120 ///@{
121 
122  //! Creates a team policy for a parallel_for
123  //! parallel_for will break out over loops over teams with each vector lane executing code be default
124  Kokkos::TeamPolicy<device_execution_space>
125  TeamPolicyThreadsAndVectors(const global_index_type batch_size, const int threads_per_team = -1,
126  const int vector_lanes_per_thread = -1) const {
127 
128  if (threads_per_team>0 && vector_lanes_per_thread>0) {
130  // all levels of each type need specified separately
131  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
132  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
133  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
134  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
135  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
137  // scratch thread levels are the same
138  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
139  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
140  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
141  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
143  // scratch team levels are the same
144  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
145  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
146  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
147  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
148  } else {
149  // scratch team levels and thread levels are the same
150  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
151  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
152  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
153  }
154  } else if (threads_per_team>0) {
156  // all levels of each type need specified separately
157  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
158  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
159  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
160  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
161  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
163  // scratch thread levels are the same
164  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
165  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
166  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
167  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
169  // scratch team levels are the same
170  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
171  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
172  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
173  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
174  } else {
175  // scratch team levels and thread levels are the same
176  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
177  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
178  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
179  }
180  } else if (vector_lanes_per_thread>0) {
182  // all levels of each type need specified separately
183  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
184  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
185  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
186  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
187  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
189  // scratch thread levels are the same
190  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
191  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
192  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
193  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
195  // scratch team levels are the same
196  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
197  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
198  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
199  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
200  } else {
201  // scratch team levels and thread levels are the same
202  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
203  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
204  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
205  }
206  } else {
208  // all levels of each type need specified separately
209  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
210  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
211  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
212  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
213  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
215  // scratch thread levels are the same
216  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
217  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
218  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
219  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
221  // scratch team levels are the same
222  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
223  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
224  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
225  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
226  } else {
227  // scratch team levels and thread levels are the same
228  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
229  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
230  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
231  }
232  }
233  }
234 
235  //! Calls a parallel_for
236  //! parallel_for will break out over loops over teams with each vector lane executing code be default
237  template<typename Tag, class C>
238  void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team = -1,
239  const int vector_lanes_per_thread = -1) const {
240 
241  if (threads_per_team>0 && vector_lanes_per_thread>0) {
243  // all levels of each type need specified separately
244  Kokkos::parallel_for(
245  typeid(Tag).name(),
246  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
247  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
248  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
249  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
250  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
251  functor);
253  // scratch thread levels are the same
254  Kokkos::parallel_for(
255  typeid(Tag).name(),
256  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
257  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
258  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
259  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
260  functor);
262  // scratch team levels are the same
263  Kokkos::parallel_for(
264  typeid(Tag).name(),
265  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
266  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
267  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
268  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
269  functor);
270  } else {
271  // scratch team levels and thread levels are the same
272  Kokkos::parallel_for(
273  typeid(Tag).name(),
274  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
275  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
276  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
277  functor);
278  }
279  } else if (threads_per_team>0) {
281  // all levels of each type need specified separately
282  Kokkos::parallel_for(
283  typeid(Tag).name(),
284  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
285  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
286  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
287  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
288  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
289  functor);
291  // scratch thread levels are the same
292  Kokkos::parallel_for(
293  typeid(Tag).name(),
294  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
295  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
296  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
297  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
298  functor);
300  // scratch team levels are the same
301  Kokkos::parallel_for(
302  typeid(Tag).name(),
303  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
304  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
305  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
306  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
307  functor);
308  } else {
309  // scratch team levels and thread levels are the same
310  Kokkos::parallel_for(
311  typeid(Tag).name(),
312  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
313  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
314  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
315  functor);
316  }
317  } else if (vector_lanes_per_thread>0) {
319  // all levels of each type need specified separately
320  Kokkos::parallel_for(
321  typeid(Tag).name(),
322  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
323  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
324  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
325  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
326  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
327  functor);
329  // scratch thread levels are the same
330  Kokkos::parallel_for(
331  typeid(Tag).name(),
332  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
333  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
334  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
335  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
336  functor);
338  // scratch team levels are the same
339  Kokkos::parallel_for(
340  typeid(Tag).name(),
341  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
342  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
343  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
344  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
345  functor);
346  } else {
347  // scratch team levels and thread levels are the same
348  Kokkos::parallel_for(
349  typeid(Tag).name(),
350  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
351  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
352  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
353  functor);
354  }
355  } else {
357  // all levels of each type need specified separately
358  Kokkos::parallel_for(
359  typeid(Tag).name(),
360  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
361  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
362  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
363  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
364  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
365  functor);
367  // scratch thread levels are the same
368  Kokkos::parallel_for(
369  typeid(Tag).name(),
370  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
371  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
372  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
373  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
374  functor);
376  // scratch team levels are the same
377  Kokkos::parallel_for(
378  typeid(Tag).name(),
379  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
380  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
381  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
382  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
383  functor);
384  } else {
385  // scratch team levels and thread levels are the same
386  Kokkos::parallel_for(
387  typeid(Tag).name(),
388  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
389  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
390  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
391  functor);
392  }
393  }
394  }
395 
396  //! Calls a parallel_for
397  //! parallel_for will break out over loops over teams with each vector lane executing code be default
398  template<class C>
399  void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team = -1,
400  const int vector_lanes_per_thread = -1, std::string functor_name = typeid(C).name()) const {
401 
402  if (threads_per_team>0 && vector_lanes_per_thread>0) {
404  // all levels of each type need specified separately
405  Kokkos::parallel_for(
406  functor_name,
407  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
408  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
409  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
410  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
411  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
412  functor);
414  // scratch thread levels are the same
415  Kokkos::parallel_for(
416  functor_name,
417  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
418  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
419  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
420  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
421  functor);
423  // scratch team levels are the same
424  Kokkos::parallel_for(
425  functor_name,
426  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
427  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
428  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
429  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
430  functor);
431  } else {
432  // scratch team levels and thread levels are the same
433  Kokkos::parallel_for(
434  functor_name,
435  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
436  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
437  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
438  functor);
439  }
440  } else if (threads_per_team>0) {
442  // all levels of each type need specified separately
443  Kokkos::parallel_for(
444  functor_name,
445  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
446  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
447  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
448  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
449  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
450  functor);
452  // scratch thread levels are the same
453  Kokkos::parallel_for(
454  functor_name,
455  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
456  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
457  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
458  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
459  functor);
461  // scratch team levels are the same
462  Kokkos::parallel_for(
463  functor_name,
464  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
465  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
466  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
467  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
468  functor);
469  } else {
470  // scratch team levels and thread levels are the same
471  Kokkos::parallel_for(
472  functor_name,
473  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
474  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
475  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
476  functor);
477  }
478  } else if (vector_lanes_per_thread>0) {
480  Kokkos::parallel_for(
481  functor_name,
482  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
483  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
484  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
485  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
486  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
487  functor);
489  // scratch thread levels are the same
490  Kokkos::parallel_for(
491  functor_name,
492  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
493  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
494  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
495  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
496  functor);
498  // scratch team levels are the same
499  Kokkos::parallel_for(
500  functor_name,
501  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
502  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
503  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
504  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
505  functor);
506  } else {
507  // scratch team levels and thread levels are the same
508  Kokkos::parallel_for(
509  functor_name,
510  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
511  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
512  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
513  functor);
514  }
515  } else {
517  Kokkos::parallel_for(
518  functor_name,
519  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
520  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
521  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
522  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
523  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
524  functor);
526  // scratch thread levels are the same
527  Kokkos::parallel_for(
528  functor_name,
529  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
530  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
531  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
532  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
533  functor);
535  // scratch team levels are the same
536  Kokkos::parallel_for(
537  functor_name,
538  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
539  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
540  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
541  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
542  functor);
543  } else {
544  // scratch team levels and thread levels are the same
545  Kokkos::parallel_for(
546  functor_name,
547  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
548  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
549  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
550  functor);
551  }
552  }
553  }
554 
555  //! Calls a parallel_for
556  //! parallel_for will break out over loops over teams with each thread executing code be default
557  template<typename Tag, class C>
558  void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size) const {
559  // calls breakout over vector lanes with vector lane size of 1
560  CallFunctorWithTeamThreadsAndVectors<Tag,C>(functor, batch_size, _default_threads, 1);
561  }
562 
563  //! Calls a parallel_for
564  //! parallel_for will break out over loops over teams with each thread executing code be default
565  template<class C>
566  void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size, std::string functor_name = typeid(C).name()) const {
567  // calls breakout over vector lanes with vector lane size of 1
568  CallFunctorWithTeamThreadsAndVectors<C>(functor, batch_size, _default_threads, 1, functor_name);
569  }
570 
571  KOKKOS_INLINE_FUNCTION
572  int getTeamScratchLevel(const int level) const {
573  if (level == 0) {
574  return _scratch_team_level_a;
575  } else {
576  return _scratch_team_level_b;
577  }
578  }
579 
580  KOKKOS_INLINE_FUNCTION
581  int getThreadScratchLevel(const int level) const {
582  if (level == 0) {
584  } else {
586  }
587  }
588 
589  KOKKOS_INLINE_FUNCTION
590  int getTeamScratchSize(const int level) const {
591  if (level == 0) {
592  return _team_scratch_size_a;
593  } else {
594  return _team_scratch_size_b;
595  }
596  }
597 
598  KOKKOS_INLINE_FUNCTION
599  int getThreadScratchSize(const int level) const {
600  if (level == 0) {
601  return _thread_scratch_size_a;
602  } else {
603  return _thread_scratch_size_b;
604  }
605  }
606 
607 ///@}
608 
609 
610 /** @name Modifiers
611  * Changed member variables through public member functions
612  */
613 ///@{
614 
615  void setTeamScratchLevel(const int level, const int value) {
616  if (level == 0) {
617  _scratch_team_level_a = value;
618  } else {
619  _scratch_team_level_b = value;
620  }
621  }
622 
623  void setThreadScratchLevel(const int level, const int value) {
624  if (level == 0) {
625  _scratch_thread_level_a = value;
626  } else {
627  _scratch_thread_level_b = value;
628  }
629  }
630 
631  void setTeamScratchSize(const int level, const int value) {
632  if (level == 0) {
633  _team_scratch_size_a = value;
634  } else {
635  _team_scratch_size_b = value;
636  }
637  }
638 
639  void setThreadScratchSize(const int level, const int value) {
640  if (level == 0) {
641  _thread_scratch_size_a = value;
642  } else {
643  _thread_scratch_size_b = value;
644  }
645  }
646 
652  }
653 
654 ///@}
655 
656 
657 }; // ParallelManager Class
658 } // Compadre
659 
660 #endif
661 
662 
Kokkos::TeamPolicy< device_execution_space > TeamPolicyThreadsAndVectors(const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Creates a team policy for a parallel_for parallel_for will break out over loops over teams with each ...
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
int _scratch_thread_level_b
higher (slower) level memory for Kokkos::parallel_for for thread access memory
KOKKOS_INLINE_FUNCTION int getTeamScratchSize(const int level) const
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
KOKKOS_INLINE_FUNCTION int getThreadScratchSize(const int level) const
void setTeamScratchLevel(const int level, const int value)
void setThreadScratchLevel(const int level, const int value)
int _default_threads
largest team size
int _scratch_team_level_b
lowest level memory for Kokkos::parallel_for for thread access memory
void setTeamScratchSize(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
void setThreadScratchSize(const int level, const int value)
int _scratch_thread_level_a
higher (slower) level memory for Kokkos::parallel_for for team access memory
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
int _scratch_team_level_a
lowest level memory for Kokkos::parallel_for for team access memory
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const