Compadre  1.5.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Compadre_ParallelManager.hpp
Go to the documentation of this file.
1 #ifndef _COMPADRE_PARALLELMANAGER_HPP_
2 #define _COMPADRE_PARALLELMANAGER_HPP_
3 
4 #include "Compadre_Config.h"
5 #include "Compadre_Typedefs.hpp"
6 
7 namespace Compadre {
8 
9 
10 //! Parallel Manager
11 /*!
12 * This class sets and manages thread / teams levels, scratch memory sizes, and kernel executions.
13 * ex:
14 * Compadre::ConvertLayoutLeftToRight clr;
15 * Compadre::ParallelManager pm;
16 * // no tag specified
17 * pm.CallFunctorWithTeamThreads(clr, 100, "MyFunctorName");
18 * // some tag specified
19 * pm.CallFunctorWithTeamThreads<DefaultTag>(clr, 100);
20 */
22 public:
23 
24  //! lowest level memory for Kokkos::parallel_for for team access memory
27 
28  //! higher (slower) level memory for Kokkos::parallel_for for team access memory
31 
32  //! lowest level memory for Kokkos::parallel_for for thread access memory
35 
36  //! higher (slower) level memory for Kokkos::parallel_for for thread access memory
39 
40  //! largest team size
43 
44 
45 /** @name Private Modifiers
46  * Private function because information lives on the device
47  */
48 ///@{
49 ///@}
50 
51 /** @name Private Accessors
52  * Private function because information lives on the device
53  */
54 ///@{
55 ///@}
56 
57 /** @name Private Utility
58  *
59  */
60 ///@{
61 ///@}
62 
63 public:
64 
65 /** @name Instantiation / Destruction
66  *
67  */
68 ///@{
69 
72 
73 #ifdef COMPADRE_USE_CUDA
78 
79  _default_threads = 16;
81 #else
86 
87  _default_threads = 1;
89 #endif
90  if (const char* env_threads = std::getenv("THREADS")) {
91  _default_threads = std::atoi(env_threads);
92  }
93  if (const char* env_vector_lanes = std::getenv("VECTORLANES")) {
94  _default_vector_lanes = std::atoi(env_vector_lanes);
95  }
96 #ifdef COMPADRE_EXTREME_DEBUG
97  printf("threads per team: %d, vector lanes per team: %d\n", _default_threads, _default_vector_lanes);
98 #endif
99  }
100 
101 ///@}
102 
103 /** @name Public Utility
104  *
105  */
106 ///@{
107 ///@}
108 
109 /** @name Accessors
110  * Retrieve member variables through public member functions
111  */
112 ///@{
113 
114  //! Creates a team policy for a parallel_for
115  //! parallel_for will break out over loops over teams with each vector lane executing code be default
116  Kokkos::TeamPolicy<device_execution_space>
117  TeamPolicyThreadsAndVectors(const global_index_type batch_size, const int threads_per_team = -1,
118  const int vector_lanes_per_thread = -1) const {
119 
120  if (threads_per_team>0 && vector_lanes_per_thread>0) {
122  // all levels of each type need specified separately
123  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
124  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
125  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
126  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
127  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
129  // scratch thread levels are the same
130  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
131  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
132  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
133  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
135  // scratch team levels are the same
136  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
137  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
138  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
139  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
140  } else {
141  // scratch team levels and thread levels are the same
142  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, vector_lanes_per_thread)
143  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
144  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
145  }
146  } else if (threads_per_team>0) {
148  // all levels of each type need specified separately
149  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
150  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
151  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
152  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
153  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
155  // scratch thread levels are the same
156  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
157  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
158  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
159  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
161  // scratch team levels are the same
162  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
163  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
164  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
165  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
166  } else {
167  // scratch team levels and thread levels are the same
168  return Kokkos::TeamPolicy<device_execution_space>(batch_size, threads_per_team, _default_vector_lanes)
169  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
170  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
171  }
172  } else if (vector_lanes_per_thread>0) {
174  // all levels of each type need specified separately
175  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
176  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
177  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
178  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
179  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
181  // scratch thread levels are the same
182  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
183  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
184  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
185  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
187  // scratch team levels are the same
188  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
189  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
190  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
191  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
192  } else {
193  // scratch team levels and thread levels are the same
194  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, vector_lanes_per_thread)
195  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
196  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
197  }
198  } else {
200  // all levels of each type need specified separately
201  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
202  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
203  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
204  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
205  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
207  // scratch thread levels are the same
208  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
209  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
210  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
211  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
213  // scratch team levels are the same
214  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
215  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
216  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
217  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b));
218  } else {
219  // scratch team levels and thread levels are the same
220  return Kokkos::TeamPolicy<device_execution_space>(batch_size, _default_threads, _default_vector_lanes)
221  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
222  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b));
223  }
224  }
225  }
226 
227  //! Calls a parallel_for
228  //! parallel_for will break out over loops over teams with each vector lane executing code be default
229  template<typename Tag, class C>
230  void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team = -1,
231  const int vector_lanes_per_thread = -1) const {
232 
233  if (threads_per_team>0 && vector_lanes_per_thread>0) {
235  // all levels of each type need specified separately
236  Kokkos::parallel_for(
237  typeid(Tag).name(),
238  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
239  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
240  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
241  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
242  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
243  functor);
245  // scratch thread levels are the same
246  Kokkos::parallel_for(
247  typeid(Tag).name(),
248  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
249  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
250  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
251  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
252  functor);
254  // scratch team levels are the same
255  Kokkos::parallel_for(
256  typeid(Tag).name(),
257  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
258  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
259  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
260  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
261  functor);
262  } else {
263  // scratch team levels and thread levels are the same
264  Kokkos::parallel_for(
265  typeid(Tag).name(),
266  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
267  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
268  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
269  functor);
270  }
271  } else if (threads_per_team>0) {
273  // all levels of each type need specified separately
274  Kokkos::parallel_for(
275  typeid(Tag).name(),
276  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
277  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
278  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
279  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
280  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
281  functor);
283  // scratch thread levels are the same
284  Kokkos::parallel_for(
285  typeid(Tag).name(),
286  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
287  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
288  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
289  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
290  functor);
292  // scratch team levels are the same
293  Kokkos::parallel_for(
294  typeid(Tag).name(),
295  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
296  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
297  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
298  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
299  functor);
300  } else {
301  // scratch team levels and thread levels are the same
302  Kokkos::parallel_for(
303  typeid(Tag).name(),
304  Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, _default_vector_lanes)
305  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
306  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
307  functor);
308  }
309  } else if (vector_lanes_per_thread>0) {
311  // all levels of each type need specified separately
312  Kokkos::parallel_for(
313  typeid(Tag).name(),
314  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
315  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
316  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
317  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
318  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
319  functor);
321  // scratch thread levels are the same
322  Kokkos::parallel_for(
323  typeid(Tag).name(),
324  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
325  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
326  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
327  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
328  functor);
330  // scratch team levels are the same
331  Kokkos::parallel_for(
332  typeid(Tag).name(),
333  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
334  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
335  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
336  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
337  functor);
338  } else {
339  // scratch team levels and thread levels are the same
340  Kokkos::parallel_for(
341  typeid(Tag).name(),
342  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, vector_lanes_per_thread)
343  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
344  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
345  functor);
346  }
347  } else {
349  // all levels of each type need specified separately
350  Kokkos::parallel_for(
351  typeid(Tag).name(),
352  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
353  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
354  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
355  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
356  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
357  functor);
359  // scratch thread levels are the same
360  Kokkos::parallel_for(
361  typeid(Tag).name(),
362  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
363  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
364  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
365  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
366  functor);
368  // scratch team levels are the same
369  Kokkos::parallel_for(
370  typeid(Tag).name(),
371  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
372  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
373  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
374  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
375  functor);
376  } else {
377  // scratch team levels and thread levels are the same
378  Kokkos::parallel_for(
379  typeid(Tag).name(),
380  Kokkos::TeamPolicy<Tag>(batch_size, _default_threads, _default_vector_lanes)
381  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
382  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
383  functor);
384  }
385  }
386  }
387 
388  //! Calls a parallel_for
389  //! parallel_for will break out over loops over teams with each vector lane executing code be default
390  template<class C>
391  void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team = -1,
392  const int vector_lanes_per_thread = -1, std::string functor_name = typeid(C).name()) const {
393 
394  if (threads_per_team>0 && vector_lanes_per_thread>0) {
396  // all levels of each type need specified separately
397  Kokkos::parallel_for(
398  functor_name,
399  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
400  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
401  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
402  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
403  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
404  functor);
406  // scratch thread levels are the same
407  Kokkos::parallel_for(
408  functor_name,
409  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
410  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
411  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
412  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
413  functor);
415  // scratch team levels are the same
416  Kokkos::parallel_for(
417  functor_name,
418  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
419  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
420  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
421  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
422  functor);
423  } else {
424  // scratch team levels and thread levels are the same
425  Kokkos::parallel_for(
426  functor_name,
427  Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
428  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
429  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
430  functor);
431  }
432  } else if (threads_per_team>0) {
434  // all levels of each type need specified separately
435  Kokkos::parallel_for(
436  functor_name,
437  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
438  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
439  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
440  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
441  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
442  functor);
444  // scratch thread levels are the same
445  Kokkos::parallel_for(
446  functor_name,
447  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
448  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
449  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
450  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
451  functor);
453  // scratch team levels are the same
454  Kokkos::parallel_for(
455  functor_name,
456  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
457  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
458  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
459  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
460  functor);
461  } else {
462  // scratch team levels and thread levels are the same
463  Kokkos::parallel_for(
464  functor_name,
465  Kokkos::TeamPolicy<>(batch_size, threads_per_team, _default_vector_lanes)
466  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
467  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
468  functor);
469  }
470  } else if (vector_lanes_per_thread>0) {
472  Kokkos::parallel_for(
473  functor_name,
474  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
475  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
476  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
477  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
478  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
479  functor);
481  // scratch thread levels are the same
482  Kokkos::parallel_for(
483  functor_name,
484  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
485  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
486  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
487  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
488  functor);
490  // scratch team levels are the same
491  Kokkos::parallel_for(
492  functor_name,
493  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
494  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
495  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
496  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
497  functor);
498  } else {
499  // scratch team levels and thread levels are the same
500  Kokkos::parallel_for(
501  functor_name,
502  Kokkos::TeamPolicy<>(batch_size, _default_threads, vector_lanes_per_thread)
503  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
504  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
505  functor);
506  }
507  } else {
509  Kokkos::parallel_for(
510  functor_name,
511  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
512  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
513  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
514  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
515  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
516  functor);
518  // scratch thread levels are the same
519  Kokkos::parallel_for(
520  functor_name,
521  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
522  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a))
523  .set_scratch_size(_scratch_team_level_b, Kokkos::PerTeam(_team_scratch_size_b))
524  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
525  functor);
527  // scratch team levels are the same
528  Kokkos::parallel_for(
529  functor_name,
530  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
531  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
532  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a))
533  .set_scratch_size(_scratch_thread_level_b, Kokkos::PerThread(_thread_scratch_size_b)),
534  functor);
535  } else {
536  // scratch team levels and thread levels are the same
537  Kokkos::parallel_for(
538  functor_name,
539  Kokkos::TeamPolicy<>(batch_size, _default_threads, _default_vector_lanes)
540  .set_scratch_size(_scratch_team_level_a, Kokkos::PerTeam(_team_scratch_size_a + _team_scratch_size_b))
541  .set_scratch_size(_scratch_thread_level_a, Kokkos::PerThread(_thread_scratch_size_a + _thread_scratch_size_b)),
542  functor);
543  }
544  }
545  }
546 
547  //! Calls a parallel_for
548  //! parallel_for will break out over loops over teams with each thread executing code be default
549  template<typename Tag, class C>
550  void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size) const {
551  // calls breakout over vector lanes with vector lane size of 1
552  CallFunctorWithTeamThreadsAndVectors<Tag,C>(functor, batch_size, _default_threads, 1);
553  }
554 
555  //! Calls a parallel_for
556  //! parallel_for will break out over loops over teams with each thread executing code be default
557  template<class C>
558  void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size, std::string functor_name = typeid(C).name()) const {
559  // calls breakout over vector lanes with vector lane size of 1
560  CallFunctorWithTeamThreadsAndVectors<C>(functor, batch_size, _default_threads, 1, functor_name);
561  }
562 
563  KOKKOS_INLINE_FUNCTION
564  int getTeamScratchLevel(const int level) const {
565  if (level == 0) {
566  return _scratch_team_level_a;
567  } else {
568  return _scratch_team_level_b;
569  }
570  }
571 
572  KOKKOS_INLINE_FUNCTION
573  int getThreadScratchLevel(const int level) const {
574  if (level == 0) {
576  } else {
578  }
579  }
580 
581  KOKKOS_INLINE_FUNCTION
582  int getTeamScratchSize(const int level) const {
583  if (level == 0) {
584  return _team_scratch_size_a;
585  } else {
586  return _team_scratch_size_b;
587  }
588  }
589 
590  KOKKOS_INLINE_FUNCTION
591  int getThreadScratchSize(const int level) const {
592  if (level == 0) {
593  return _thread_scratch_size_a;
594  } else {
595  return _thread_scratch_size_b;
596  }
597  }
598 
599 ///@}
600 
601 
602 /** @name Modifiers
603  * Changed member variables through public member functions
604  */
605 ///@{
606 
607  void setTeamScratchLevel(const int level, const int value) {
608  if (level == 0) {
609  _scratch_team_level_a = value;
610  } else {
611  _scratch_team_level_b = value;
612  }
613  }
614 
615  void setThreadScratchLevel(const int level, const int value) {
616  if (level == 0) {
617  _scratch_thread_level_a = value;
618  } else {
619  _scratch_thread_level_b = value;
620  }
621  }
622 
623  void setTeamScratchSize(const int level, const int value) {
624  if (level == 0) {
625  _team_scratch_size_a = value;
626  } else {
627  _team_scratch_size_b = value;
628  }
629  }
630 
631  void setThreadScratchSize(const int level, const int value) {
632  if (level == 0) {
633  _thread_scratch_size_a = value;
634  } else {
635  _thread_scratch_size_b = value;
636  }
637  }
638 
644  }
645 
646 ///@}
647 
648 
649 }; // ParallelManager Class
650 } // Compadre
651 
652 #endif
653 
654 
Kokkos::TeamPolicy< device_execution_space > TeamPolicyThreadsAndVectors(const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Creates a team policy for a parallel_for parallel_for will break out over loops over teams with each ...
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
int _scratch_thread_level_b
higher (slower) level memory for Kokkos::parallel_for for thread access memory
KOKKOS_INLINE_FUNCTION int getTeamScratchSize(const int level) const
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
KOKKOS_INLINE_FUNCTION int getThreadScratchSize(const int level) const
void setTeamScratchLevel(const int level, const int value)
void setThreadScratchLevel(const int level, const int value)
int _default_threads
largest team size
int _scratch_team_level_b
lowest level memory for Kokkos::parallel_for for thread access memory
void setTeamScratchSize(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
void setThreadScratchSize(const int level, const int value)
int _scratch_thread_level_a
higher (slower) level memory for Kokkos::parallel_for for team access memory
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
int _scratch_team_level_a
lowest level memory for Kokkos::parallel_for for team access memory
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const