Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_ExecPolicy.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_EXECPOLICY_HPP
23 #define KOKKOS_EXECPOLICY_HPP
24 
25 #include <Kokkos_Core_fwd.hpp>
26 #include <impl/Kokkos_Traits.hpp>
27 #include <impl/Kokkos_Error.hpp>
28 #include <impl/Kokkos_AnalyzePolicy.hpp>
29 #include <Kokkos_BitManipulation.hpp>
30 #include <Kokkos_Concepts.hpp>
31 #include <Kokkos_TypeInfo.hpp>
32 #ifndef KOKKOS_ENABLE_IMPL_TYPEINFO
33 #include <typeinfo>
34 #endif
35 #include <limits>
36 
37 //----------------------------------------------------------------------------
38 
39 namespace Kokkos {
40 
41 struct ParallelForTag {};
42 struct ParallelScanTag {};
43 struct ParallelReduceTag {};
44 
45 struct ChunkSize {
46  int value;
47  explicit ChunkSize(int value_) : value(value_) {}
48 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
49  template <typename T = void>
50  KOKKOS_DEPRECATED_WITH_COMMENT("ChunkSize should be constructed explicitly.")
51  ChunkSize(int value_) : value(value_) {}
52 #endif
53 };
54 
76 template <class... Properties>
77 class RangePolicy : public Impl::PolicyTraits<Properties...> {
78  public:
79  using traits = Impl::PolicyTraits<Properties...>;
80 
81  private:
82  typename traits::execution_space m_space;
83  typename traits::index_type m_begin;
84  typename traits::index_type m_end;
85  typename traits::index_type m_granularity;
86  typename traits::index_type m_granularity_mask;
87 
88  template <class... OtherProperties>
89  friend class RangePolicy;
90 
91  public:
93  using execution_policy = RangePolicy<Properties...>;
94  using member_type = typename traits::index_type;
95  using index_type = typename traits::index_type;
96 
97  KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
98  return m_space;
99  }
100  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
101  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
102 
103  // TODO: find a better workaround for Clangs weird instantiation order
104  // This thing is here because of an instantiation error, where the RangePolicy
105  // is inserted into FunctorValue Traits, which tries decltype on the operator.
106  // It tries to do this even though the first argument of parallel for clearly
107  // doesn't match.
108  void operator()(const int&) const {}
109 
110  template <class... OtherProperties>
111  RangePolicy(const RangePolicy<OtherProperties...>& p)
112  : traits(p), // base class may contain data such as desired occupancy
113  m_space(p.m_space),
114  m_begin(p.m_begin),
115  m_end(p.m_end),
116  m_granularity(p.m_granularity),
117  m_granularity_mask(p.m_granularity_mask) {}
118 
119  inline RangePolicy()
120  : m_space(),
121  m_begin(0),
122  m_end(0),
123  m_granularity(0),
124  m_granularity_mask(0) {}
125 
127  template <typename IndexType1, typename IndexType2,
128  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
129  std::is_convertible_v<IndexType2, member_type>),
130  bool> = false>
131  inline RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
132  : RangePolicy(typename traits::execution_space(), work_begin, work_end) {}
133 
135  template <typename IndexType1, typename IndexType2,
136  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
137  std::is_convertible_v<IndexType2, member_type>),
138  bool> = false>
139  inline RangePolicy(const typename traits::execution_space& work_space,
140  const IndexType1 work_begin, const IndexType2 work_end)
141  : m_space(work_space),
142  m_begin(work_begin),
143  m_end(work_end),
144  m_granularity(0),
145  m_granularity_mask(0) {
146  check_conversion_safety(work_begin);
147  check_conversion_safety(work_end);
148  check_bounds_validity();
149  set_auto_chunk_size();
150  }
151 
152  template <typename IndexType1, typename IndexType2,
153  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
154  std::is_convertible_v<IndexType2, member_type>),
155  bool> = false>
156  RangePolicy(const typename traits::execution_space& work_space,
157  const IndexType1 work_begin, const IndexType2 work_end,
158  const ChunkSize chunk_size)
159  : m_space(work_space),
160  m_begin(work_begin),
161  m_end(work_end),
162  m_granularity(0),
163  m_granularity_mask(0) {
164  check_conversion_safety(work_begin);
165  check_conversion_safety(work_end);
166  check_bounds_validity();
167  set_chunk_size(chunk_size.value);
168  }
169 
171  template <typename IndexType1, typename IndexType2, typename... Args,
172  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
173  std::is_convertible_v<IndexType2, member_type>),
174  bool> = false>
175  RangePolicy(const IndexType1 work_begin, const IndexType2 work_end,
176  const ChunkSize chunk_size)
177  : RangePolicy(typename traits::execution_space(), work_begin, work_end,
178  chunk_size) {}
179 
180  public:
181 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
182  KOKKOS_DEPRECATED_WITH_COMMENT("Use set_chunk_size instead")
183  inline void set(ChunkSize chunksize) {
184  m_granularity = chunksize.value;
185  m_granularity_mask = m_granularity - 1;
186  }
187 #endif
188 
189  public:
191  inline member_type chunk_size() const { return m_granularity; }
192 
194  inline RangePolicy& set_chunk_size(int chunk_size) {
195  m_granularity = chunk_size;
196  m_granularity_mask = m_granularity - 1;
197  return *this;
198  }
199 
200  private:
202  inline void set_auto_chunk_size() {
203 #ifdef KOKKOS_ENABLE_SYCL
204  if (std::is_same_v<typename traits::execution_space, Kokkos::SYCL>) {
205  // chunk_size <=1 lets the compiler choose the workgroup size when
206  // launching kernels
207  m_granularity = 1;
208  m_granularity_mask = 0;
209  return;
210  }
211 #endif
212  auto concurrency = static_cast<int64_t>(m_space.concurrency());
213  if (concurrency == 0) concurrency = 1;
214 
215  if (m_granularity > 0 &&
216  !Kokkos::has_single_bit(static_cast<unsigned>(m_granularity))) {
217  Kokkos::abort("RangePolicy blocking granularity must be power of two");
218  }
219 
220  int64_t new_chunk_size = 1;
221  while (new_chunk_size * 100 * concurrency <
222  static_cast<int64_t>(m_end - m_begin))
223  new_chunk_size *= 2;
224  if (new_chunk_size < 128) {
225  new_chunk_size = 1;
226  while ((new_chunk_size * 40 * concurrency <
227  static_cast<int64_t>(m_end - m_begin)) &&
228  (new_chunk_size < 128))
229  new_chunk_size *= 2;
230  }
231  m_granularity = new_chunk_size;
232  m_granularity_mask = m_granularity - 1;
233  }
234 
235  void check_bounds_validity() {
236  if (m_end < m_begin) {
237  std::string msg = "Kokkos::RangePolicy bounds error: The lower bound (" +
238  std::to_string(m_begin) +
239  ") is greater than the upper bound (" +
240  std::to_string(m_end) + ").\n";
241 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
242  Kokkos::abort(msg.c_str());
243 #endif
244  m_begin = 0;
245  m_end = 0;
246 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
247  Kokkos::Impl::log_warning(msg);
248 #endif
249  }
250  }
251 
252  // To be replaced with std::in_range (c++20)
253  template <typename IndexType>
254  static void check_conversion_safety([[maybe_unused]] const IndexType bound) {
255  // Checking that the round-trip conversion preserves input index value
256  if constexpr (std::is_convertible_v<member_type, IndexType>) {
257 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \
258  defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
259  bool warn = false;
260 
261  if constexpr (std::is_arithmetic_v<member_type> &&
262  (std::is_signed_v<IndexType> !=
263  std::is_signed_v<member_type>)) {
264  // check signed to unsigned
265  if constexpr (std::is_signed_v<IndexType>)
266  warn |= (bound < static_cast<IndexType>(
267  std::numeric_limits<member_type>::min()));
268 
269  // check unsigned to signed
270  if constexpr (std::is_signed_v<member_type>)
271  warn |= (bound > static_cast<IndexType>(
272  std::numeric_limits<member_type>::max()));
273  }
274 
275  // check narrowing
276  warn |=
277  (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
278 
279  if (warn) {
280  std::string msg =
281  "Kokkos::RangePolicy bound type error: an unsafe implicit "
282  "conversion is performed on a bound (" +
283  std::to_string(bound) +
284  "), which may not preserve its original value.\n";
285 
286 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
287  Kokkos::abort(msg.c_str());
288 #endif
289 
290 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
291  Kokkos::Impl::log_warning(msg);
292 #endif
293  }
294 #endif
295  }
296  }
297 
298  public:
303  struct WorkRange {
304  using work_tag = typename RangePolicy<Properties...>::work_tag;
305  using member_type = typename RangePolicy<Properties...>::member_type;
306 
307  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
308  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
309 
314  KOKKOS_INLINE_FUNCTION
315  WorkRange(const RangePolicy& range, const int part_rank,
316  const int part_size)
317  : m_begin(0), m_end(0) {
318  if (part_size) {
319  // Split evenly among partitions, then round up to the granularity.
320  const member_type work_part =
321  ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
322  range.m_granularity_mask) &
323  ~member_type(range.m_granularity_mask);
324 
325  m_begin = range.begin() + work_part * part_rank;
326  m_end = m_begin + work_part;
327 
328  if (range.end() < m_begin) m_begin = range.end();
329  if (range.end() < m_end) m_end = range.end();
330  }
331  }
332 
333  private:
334  member_type m_begin;
335  member_type m_end;
336  WorkRange();
337  WorkRange& operator=(const WorkRange&);
338  };
339 };
340 
341 RangePolicy() -> RangePolicy<>;
342 
343 RangePolicy(int64_t, int64_t) -> RangePolicy<>;
344 RangePolicy(int64_t, int64_t, ChunkSize const&) -> RangePolicy<>;
345 
346 RangePolicy(DefaultExecutionSpace const&, int64_t, int64_t) -> RangePolicy<>;
347 RangePolicy(DefaultExecutionSpace const&, int64_t, int64_t, ChunkSize const&)
348  -> RangePolicy<>;
349 
350 template <typename ES, typename = std::enable_if_t<is_execution_space_v<ES>>>
351 RangePolicy(ES const&, int64_t, int64_t) -> RangePolicy<ES>;
352 
353 template <typename ES, typename = std::enable_if_t<is_execution_space_v<ES>>>
354 RangePolicy(ES const&, int64_t, int64_t, ChunkSize const&) -> RangePolicy<ES>;
355 
356 } // namespace Kokkos
357 
358 //----------------------------------------------------------------------------
359 //----------------------------------------------------------------------------
360 
361 namespace Kokkos {
362 
363 namespace Impl {
364 
365 template <class ExecSpace, class... Properties>
366 class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
367  private:
368  using traits = Impl::PolicyTraits<Properties...>;
369 
370  public:
371  using index_type = typename traits::index_type;
372 
373  //----------------------------------------
384  template <class FunctorType>
385  static int team_size_max(const FunctorType&);
386 
397  template <class FunctorType>
398  static int team_size_recommended(const FunctorType&);
399 
400  template <class FunctorType>
401  static int team_size_recommended(const FunctorType&, const int&);
402 
403  template <class FunctorType>
404  int team_size_recommended(const FunctorType& functor,
405  const int vector_length);
406 
407  //----------------------------------------
409  TeamPolicyInternal(const typename traits::execution_space&,
410  int league_size_request, int team_size_request,
411  int vector_length_request = 1);
412 
413  TeamPolicyInternal(const typename traits::execution_space&,
414  int league_size_request, const Kokkos::AUTO_t&,
415  int vector_length_request = 1);
416 
419  TeamPolicyInternal(int league_size_request, int team_size_request,
420  int vector_length_request = 1);
421 
422  TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
423  int vector_length_request = 1);
424 
425  /* TeamPolicyInternal( int league_size_request , int team_size_request );
426 
427  TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
428 
434  KOKKOS_INLINE_FUNCTION int league_size() const;
435 
441  KOKKOS_INLINE_FUNCTION int team_size() const;
442 
445  inline bool impl_auto_team_size() const;
448  inline bool impl_auto_vector_length() const;
449 
450  static int vector_length_max();
451 
452  KOKKOS_INLINE_FUNCTION int impl_vector_length() const;
453 
454  inline typename traits::index_type chunk_size() const;
455 
456  inline TeamPolicyInternal& set_chunk_size(int chunk_size);
457 
461  struct member_type {
463  KOKKOS_INLINE_FUNCTION
464  typename traits::execution_space::scratch_memory_space team_shmem() const;
465 
467  KOKKOS_INLINE_FUNCTION int league_rank() const;
468 
470  KOKKOS_INLINE_FUNCTION int league_size() const;
471 
473  KOKKOS_INLINE_FUNCTION int team_rank() const;
474 
476  KOKKOS_INLINE_FUNCTION int team_size() const;
477 
479  KOKKOS_INLINE_FUNCTION void team_barrier() const;
480 
483  template <class JoinOp>
484  KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
485  const typename JoinOp::value_type, const JoinOp&) const;
486 
492  template <typename Type>
493  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
494 
504  template <typename Type>
505  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
506  Type* const global_accum) const;
507  };
508 };
509 
510 struct PerTeamValue {
511  size_t value;
512  PerTeamValue(size_t arg);
513 };
514 
515 struct PerThreadValue {
516  size_t value;
517  PerThreadValue(size_t arg);
518 };
519 
520 template <class iType, class... Args>
521 struct ExtractVectorLength {
522  static inline iType value(
523  std::enable_if_t<std::is_integral_v<iType>, iType> val, Args...) {
524  return val;
525  }
526  static inline std::enable_if_t<!std::is_integral_v<iType>, int> value(
527  std::enable_if_t<!std::is_integral_v<iType>, iType>, Args...) {
528  return 1;
529  }
530 };
531 
532 template <class iType, class... Args>
533 inline std::enable_if_t<std::is_integral_v<iType>, iType> extract_vector_length(
534  iType val, Args...) {
535  return val;
536 }
537 
538 template <class iType, class... Args>
539 inline std::enable_if_t<!std::is_integral_v<iType>, int> extract_vector_length(
540  iType, Args...) {
541  return 1;
542 }
543 
544 } // namespace Impl
545 
546 Impl::PerTeamValue PerTeam(const size_t& arg);
547 Impl::PerThreadValue PerThread(const size_t& arg);
548 
549 struct ScratchRequest {
550  int level;
551 
552  size_t per_team;
553  size_t per_thread;
554 
555  inline ScratchRequest(const int& level_,
556  const Impl::PerTeamValue& team_value) {
557  level = level_;
558  per_team = team_value.value;
559  per_thread = 0;
560  }
561 
562  inline ScratchRequest(const int& level_,
563  const Impl::PerThreadValue& thread_value) {
564  level = level_;
565  per_team = 0;
566  per_thread = thread_value.value;
567  }
568 
569  inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
570  const Impl::PerThreadValue& thread_value) {
571  level = level_;
572  per_team = team_value.value;
573  per_thread = thread_value.value;
574  }
575 
576  inline ScratchRequest(const int& level_,
577  const Impl::PerThreadValue& thread_value,
578  const Impl::PerTeamValue& team_value) {
579  level = level_;
580  per_team = team_value.value;
581  per_thread = thread_value.value;
582  }
583 };
584 
585 // Causes abnormal program termination if level is not `0` or `1`
586 void team_policy_check_valid_storage_level_argument(int level);
587 
614 template <class... Properties>
616  : public Impl::TeamPolicyInternal<
617  typename Impl::PolicyTraits<Properties...>::execution_space,
618  Properties...> {
619  using internal_policy = Impl::TeamPolicyInternal<
620  typename Impl::PolicyTraits<Properties...>::execution_space,
621  Properties...>;
622 
623  template <class... OtherProperties>
624  friend class TeamPolicy;
625 
626  public:
627  using traits = Impl::PolicyTraits<Properties...>;
628 
629  using execution_policy = TeamPolicy<Properties...>;
630 
631  TeamPolicy() : internal_policy(0, AUTO) {}
632 
634  TeamPolicy(const typename traits::execution_space& space_,
635  int league_size_request, int team_size_request,
636  int vector_length_request = 1)
637  : internal_policy(space_, league_size_request, team_size_request,
638  vector_length_request) {}
639 
640  TeamPolicy(const typename traits::execution_space& space_,
641  int league_size_request, const Kokkos::AUTO_t&,
642  int vector_length_request = 1)
643  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
644  vector_length_request) {}
645 
646  TeamPolicy(const typename traits::execution_space& space_,
647  int league_size_request, const Kokkos::AUTO_t&,
648  const Kokkos::AUTO_t&)
649  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
650  Kokkos::AUTO()) {}
651  TeamPolicy(const typename traits::execution_space& space_,
652  int league_size_request, const int team_size_request,
653  const Kokkos::AUTO_t&)
654  : internal_policy(space_, league_size_request, team_size_request,
655  Kokkos::AUTO()) {}
658  TeamPolicy(int league_size_request, int team_size_request,
659  int vector_length_request = 1)
660  : internal_policy(league_size_request, team_size_request,
661  vector_length_request) {}
662 
663  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
664  int vector_length_request = 1)
665  : internal_policy(league_size_request, Kokkos::AUTO(),
666  vector_length_request) {}
667 
668  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
669  const Kokkos::AUTO_t&)
670  : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
671  TeamPolicy(int league_size_request, const int team_size_request,
672  const Kokkos::AUTO_t&)
673  : internal_policy(league_size_request, team_size_request,
674  Kokkos::AUTO()) {}
675 
676  template <class... OtherProperties>
677  TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
678  // Cannot call converting constructor in the member initializer list because
679  // it is not a direct base.
680  internal_policy::traits::operator=(p);
681  }
682 
683  private:
684  TeamPolicy(const internal_policy& p) : internal_policy(p) {}
685 
686  public:
687  inline TeamPolicy& set_chunk_size(int chunk) {
688  static_assert(
689  std::is_same_v<decltype(internal_policy::set_chunk_size(chunk)),
690  internal_policy&>,
691  "internal set_chunk_size should return a reference");
692  return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
693  }
694 
695  inline TeamPolicy& set_scratch_size(const int& level,
696  const Impl::PerTeamValue& per_team) {
697  static_assert(std::is_same_v<decltype(internal_policy::set_scratch_size(
698  level, per_team)),
699  internal_policy&>,
700  "internal set_chunk_size should return a reference");
701 
702  team_policy_check_valid_storage_level_argument(level);
703  return static_cast<TeamPolicy&>(
704  internal_policy::set_scratch_size(level, per_team));
705  }
706  inline TeamPolicy& set_scratch_size(const int& level,
707  const Impl::PerThreadValue& per_thread) {
708  team_policy_check_valid_storage_level_argument(level);
709  return static_cast<TeamPolicy&>(
710  internal_policy::set_scratch_size(level, per_thread));
711  }
712  inline TeamPolicy& set_scratch_size(const int& level,
713  const Impl::PerTeamValue& per_team,
714  const Impl::PerThreadValue& per_thread) {
715  team_policy_check_valid_storage_level_argument(level);
716  return static_cast<TeamPolicy&>(
717  internal_policy::set_scratch_size(level, per_team, per_thread));
718  }
719  inline TeamPolicy& set_scratch_size(const int& level,
720  const Impl::PerThreadValue& per_thread,
721  const Impl::PerTeamValue& per_team) {
722  team_policy_check_valid_storage_level_argument(level);
723  return static_cast<TeamPolicy&>(
724  internal_policy::set_scratch_size(level, per_team, per_thread));
725  }
726 };
727 
728 // Execution space not provided deduces to TeamPolicy<>
729 
730 TeamPolicy() -> TeamPolicy<>;
731 
732 TeamPolicy(int, int) -> TeamPolicy<>;
733 TeamPolicy(int, int, int) -> TeamPolicy<>;
734 TeamPolicy(int, Kokkos::AUTO_t const&) -> TeamPolicy<>;
735 TeamPolicy(int, Kokkos::AUTO_t const&, int) -> TeamPolicy<>;
736 TeamPolicy(int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&) -> TeamPolicy<>;
737 TeamPolicy(int, int, Kokkos::AUTO_t const&) -> TeamPolicy<>;
738 
739 // DefaultExecutionSpace deduces to TeamPolicy<>
740 
741 TeamPolicy(DefaultExecutionSpace const&, int, int) -> TeamPolicy<>;
742 TeamPolicy(DefaultExecutionSpace const&, int, int, int) -> TeamPolicy<>;
743 TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&)
744  -> TeamPolicy<>;
745 TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, int)
746  -> TeamPolicy<>;
747 TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&,
748  Kokkos::AUTO_t const&) -> TeamPolicy<>;
749 TeamPolicy(DefaultExecutionSpace const&, int, int, Kokkos::AUTO_t const&)
750  -> TeamPolicy<>;
751 
752 // ES != DefaultExecutionSpace deduces to TeamPolicy<ES>
753 
754 template <typename ES,
755  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
756 TeamPolicy(ES const&, int, int) -> TeamPolicy<ES>;
757 
758 template <typename ES,
759  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
760 TeamPolicy(ES const&, int, int, int) -> TeamPolicy<ES>;
761 
762 template <typename ES,
763  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
764 TeamPolicy(ES const&, int, Kokkos::AUTO_t const&) -> TeamPolicy<ES>;
765 
766 template <typename ES,
767  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
768 TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, int) -> TeamPolicy<ES>;
769 
770 template <typename ES,
771  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
772 TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)
773  -> TeamPolicy<ES>;
774 
775 template <typename ES,
776  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
777 TeamPolicy(ES const&, int, int, Kokkos::AUTO_t const&) -> TeamPolicy<ES>;
778 
779 namespace Impl {
780 
781 template <typename iType, class TeamMemberType>
782 struct TeamThreadRangeBoundariesStruct {
783  private:
784  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
785  const iType& arg_end,
786  const iType& arg_rank,
787  const iType& arg_size) {
788  return arg_begin +
789  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
790  }
791 
792  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
793  const iType& arg_end,
794  const iType& arg_rank,
795  const iType& arg_size) {
796  const iType end_ =
797  arg_begin +
798  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
799  return end_ < arg_end ? end_ : arg_end;
800  }
801 
802  public:
803  using index_type = iType;
804  const iType start;
805  const iType end;
806  enum { increment = 1 };
807  const TeamMemberType& member;
808 
809  KOKKOS_INLINE_FUNCTION
810  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
811  const iType& arg_count)
812  : start(ibegin(0, arg_count, arg_thread.team_rank(),
813  arg_thread.team_size())),
814  end(iend(0, arg_count, arg_thread.team_rank(), arg_thread.team_size())),
815  member(arg_thread) {}
816 
817  KOKKOS_INLINE_FUNCTION
818  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
819  const iType& arg_begin, const iType& arg_end)
820  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
821  arg_thread.team_size())),
822  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
823  arg_thread.team_size())),
824  member(arg_thread) {}
825 };
826 
827 template <typename iType, class TeamMemberType>
828 struct TeamVectorRangeBoundariesStruct {
829  private:
830  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
831  const iType& arg_end,
832  const iType& arg_rank,
833  const iType& arg_size) {
834  return arg_begin +
835  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
836  }
837 
838  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
839  const iType& arg_end,
840  const iType& arg_rank,
841  const iType& arg_size) {
842  const iType end_ =
843  arg_begin +
844  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
845  return end_ < arg_end ? end_ : arg_end;
846  }
847 
848  public:
849  using index_type = iType;
850  const iType start;
851  const iType end;
852  enum { increment = 1 };
853  const TeamMemberType& member;
854 
855  KOKKOS_INLINE_FUNCTION
856  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
857  const iType& arg_count)
858  : start(ibegin(0, arg_count, arg_thread.team_rank(),
859  arg_thread.team_size())),
860  end(iend(0, arg_count, arg_thread.team_rank(), arg_thread.team_size())),
861  member(arg_thread) {}
862 
863  KOKKOS_INLINE_FUNCTION
864  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
865  const iType& arg_begin, const iType& arg_end)
866  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
867  arg_thread.team_size())),
868  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
869  arg_thread.team_size())),
870  member(arg_thread) {}
871 };
872 
873 template <typename iType, class TeamMemberType>
874 struct ThreadVectorRangeBoundariesStruct {
875  using index_type = iType;
876  const index_type start;
877  const index_type end;
878  enum { increment = 1 };
879 
880  KOKKOS_INLINE_FUNCTION
881  constexpr ThreadVectorRangeBoundariesStruct(
882  const TeamMemberType, const index_type& arg_count) noexcept
883  : start(static_cast<index_type>(0)), end(arg_count) {}
884 
885  KOKKOS_INLINE_FUNCTION
886  constexpr ThreadVectorRangeBoundariesStruct(
887  const TeamMemberType, const index_type& arg_begin,
888  const index_type& arg_end) noexcept
889  : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
890 };
891 
892 template <class TeamMemberType>
893 struct ThreadSingleStruct {
894  const TeamMemberType& team_member;
895  KOKKOS_INLINE_FUNCTION
896  ThreadSingleStruct(const TeamMemberType& team_member_)
897  : team_member(team_member_) {}
898 };
899 
900 template <class TeamMemberType>
901 struct VectorSingleStruct {
902  const TeamMemberType& team_member;
903  KOKKOS_INLINE_FUNCTION
904  VectorSingleStruct(const TeamMemberType& team_member_)
905  : team_member(team_member_) {}
906 };
907 
908 } // namespace Impl
909 
917 template <typename iType, class TeamMemberType, class _never_use_this_overload>
918 KOKKOS_INLINE_FUNCTION_DELETED
919  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
920  TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
921 
929 template <typename iType1, typename iType2, class TeamMemberType,
930  class _never_use_this_overload>
931 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
932  std::common_type_t<iType1, iType2>, TeamMemberType>
933 TeamThreadRange(const TeamMemberType&, const iType1& begin,
934  const iType2& end) = delete;
935 
943 template <typename iType, class TeamMemberType, class _never_use_this_overload>
944 KOKKOS_INLINE_FUNCTION_DELETED
945  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
946  TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
947 
955 template <typename iType1, typename iType2, class TeamMemberType,
956  class _never_use_this_overload>
957 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
958  std::common_type_t<iType1, iType2>, TeamMemberType>
959 TeamVectorRange(const TeamMemberType&, const iType1& begin,
960  const iType2& end) = delete;
961 
969 template <typename iType, class TeamMemberType, class _never_use_this_overload>
970 KOKKOS_INLINE_FUNCTION_DELETED
971  Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
972  ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
973 
974 template <typename iType1, typename iType2, class TeamMemberType,
975  class _never_use_this_overload>
976 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
977  std::common_type_t<iType1, iType2>, TeamMemberType>
978 ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin,
979  const iType2& arg_end) = delete;
980 
981 namespace Impl {
982 
983 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
984 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
985 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
986 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
987 
988 template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
989 struct HostBasedNestLevel;
990 
991 template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
992 struct AcceleratorBasedNestLevel;
993 
994 // ThreadAndVectorNestLevel determines on which nested level parallelization
995 // happens.
996 // - Rank is Kokkos::Rank<TotalNestLevel, Iter>
997 // - TotalNestLevel is the total number of loop nests
998 // - Iter is whether to go forward or backward through ranks (i.e. the
999 // iteration order for MDRangePolicy)
1000 // - ThreadAndVector determines whether both vector and thread parallelism is
1001 // in use
1002 template <typename Rank, typename ExecSpace,
1003  TeamMDRangeThreadAndVector ThreadAndVector>
1004 struct ThreadAndVectorNestLevel;
1005 
1006 struct NoReductionTag {};
1007 
1008 template <typename Rank, typename TeamMDPolicy, typename Lambda,
1009  typename ReductionValueType>
1010 KOKKOS_INLINE_FUNCTION void md_parallel_impl(TeamMDPolicy const& policy,
1011  Lambda const& lambda,
1012  ReductionValueType&& val);
1013 } // namespace Impl
1014 
1015 template <typename Rank, typename TeamHandle>
1016 struct TeamThreadMDRange;
1017 
1018 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1019 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1020  using NestLevelType = int;
1021  using BoundaryType = int;
1022  using TeamHandleType = TeamHandle;
1023  using ExecutionSpace = typename TeamHandleType::execution_space;
1024  using ArrayLayout = typename ExecutionSpace::array_layout;
1025 
1026  static constexpr NestLevelType total_nest_level =
1027  Rank<N, OuterDir, InnerDir>::rank;
1028  static constexpr Iterate iter = OuterDir;
1029  static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1030  static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
1031 
1032  static constexpr Iterate direction =
1033  OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1034  ArrayLayout>::outer_iteration_pattern
1035  : iter;
1036 
1037  template <class... Args>
1038  KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
1039  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1040  static_assert(sizeof...(Args) == total_nest_level);
1041  }
1042 
1043  TeamHandleType const& team;
1044  BoundaryType boundaries[total_nest_level];
1045 };
1046 
1047 template <typename TeamHandle, typename... Args>
1048 KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle const&, Args&&...)
1049  -> TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1050 
1051 template <typename Rank, typename TeamHandle>
1052 struct ThreadVectorMDRange;
1053 
1054 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1055 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1056  using NestLevelType = int;
1057  using BoundaryType = int;
1058  using TeamHandleType = TeamHandle;
1059  using ExecutionSpace = typename TeamHandleType::execution_space;
1060  using ArrayLayout = typename ExecutionSpace::array_layout;
1061 
1062  static constexpr NestLevelType total_nest_level =
1063  Rank<N, OuterDir, InnerDir>::rank;
1064  static constexpr Iterate iter = OuterDir;
1065  static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
1066  static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1067 
1068  static constexpr Iterate direction =
1069  OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1070  ArrayLayout>::outer_iteration_pattern
1071  : iter;
1072 
1073  template <class... Args>
1074  KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
1075  Args&&... args)
1076  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1077  static_assert(sizeof...(Args) == total_nest_level);
1078  }
1079 
1080  TeamHandleType const& team;
1081  BoundaryType boundaries[total_nest_level];
1082 };
1083 
1084 template <typename TeamHandle, typename... Args>
1085 KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle const&, Args&&...)
1086  -> ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1087 
1088 template <typename Rank, typename TeamHandle>
1089 struct TeamVectorMDRange;
1090 
1091 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1092 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1093  using NestLevelType = int;
1094  using BoundaryType = int;
1095  using TeamHandleType = TeamHandle;
1096  using ExecutionSpace = typename TeamHandleType::execution_space;
1097  using ArrayLayout = typename ExecutionSpace::array_layout;
1098 
1099  static constexpr NestLevelType total_nest_level =
1100  Rank<N, OuterDir, InnerDir>::rank;
1101  static constexpr Iterate iter = OuterDir;
1102  static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1103  static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1104 
1105  static constexpr Iterate direction =
1106  iter == Iterate::Default ? Impl::layout_iterate_type_selector<
1107  ArrayLayout>::outer_iteration_pattern
1108  : iter;
1109 
1110  template <class... Args>
1111  KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
1112  Args&&... args)
1113  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1114  static_assert(sizeof...(Args) == total_nest_level);
1115  }
1116 
1117  TeamHandleType const& team;
1118  BoundaryType boundaries[total_nest_level];
1119 };
1120 
1121 template <typename TeamHandle, typename... Args>
1122 KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle const&, Args&&...)
1123  -> TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1124 
1125 template <typename Rank, typename TeamHandle, typename Lambda,
1126  typename ReducerValueType>
1127 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1128  TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1129  ReducerValueType& val) {
1130  static_assert(
1131  !std::is_array_v<ReducerValueType> &&
1132  !std::is_pointer_v<ReducerValueType> &&
1133  !Kokkos::is_reducer_v<ReducerValueType>,
1134  "Only scalar return types are allowed!");
1135 
1136  val = ReducerValueType{};
1137  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1138  policy.team.team_reduce(
1139  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1140 }
1141 
1142 template <typename Rank, typename TeamHandle, typename Lambda>
1143 KOKKOS_INLINE_FUNCTION void parallel_for(
1144  TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1145  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1146 }
1147 
1148 template <typename Rank, typename TeamHandle, typename Lambda,
1149  typename ReducerValueType>
1150 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1151  ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1152  ReducerValueType& val) {
1153  static_assert(
1154  !std::is_array_v<ReducerValueType> &&
1155  !std::is_pointer_v<ReducerValueType> &&
1156  !Kokkos::is_reducer_v<ReducerValueType>,
1157  "Only a scalar return types are allowed!");
1158 
1159  val = ReducerValueType{};
1160  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1161  if constexpr (false
1162 #ifdef KOKKOS_ENABLE_CUDA
1163  || std::is_same_v<typename TeamHandle::execution_space,
1164  Kokkos::Cuda>
1165 #elif defined(KOKKOS_ENABLE_HIP)
1166  || std::is_same_v<typename TeamHandle::execution_space,
1167  Kokkos::HIP>
1168 #elif defined(KOKKOS_ENABLE_SYCL)
1169  || std::is_same_v<typename TeamHandle::execution_space,
1170  Kokkos::SYCL>
1171 #endif
1172  )
1173  policy.team.vector_reduce(
1174  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1175  val});
1176 }
1177 
1178 template <typename Rank, typename TeamHandle, typename Lambda>
1179 KOKKOS_INLINE_FUNCTION void parallel_for(
1180  ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1181  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1182 }
1183 
1184 template <typename Rank, typename TeamHandle, typename Lambda,
1185  typename ReducerValueType>
1186 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1187  TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1188  ReducerValueType& val) {
1189  static_assert(
1190  !std::is_array_v<ReducerValueType> &&
1191  !std::is_pointer_v<ReducerValueType> &&
1192  !Kokkos::is_reducer_v<ReducerValueType>,
1193  "Only a scalar return types are allowed!");
1194 
1195  val = ReducerValueType{};
1196  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1197  if constexpr (false
1198 #ifdef KOKKOS_ENABLE_CUDA
1199  || std::is_same_v<typename TeamHandle::execution_space,
1200  Kokkos::Cuda>
1201 #elif defined(KOKKOS_ENABLE_HIP)
1202  || std::is_same_v<typename TeamHandle::execution_space,
1203  Kokkos::HIP>
1204 #elif defined(KOKKOS_ENABLE_SYCL)
1205  || std::is_same_v<typename TeamHandle::execution_space,
1206  Kokkos::SYCL>
1207 #endif
1208  )
1209  policy.team.vector_reduce(
1210  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1211  val});
1212  policy.team.team_reduce(
1213  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1214 }
1215 
1216 template <typename Rank, typename TeamHandle, typename Lambda>
1217 KOKKOS_INLINE_FUNCTION void parallel_for(
1218  TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1219  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1220 }
1221 
1222 namespace Impl {
1223 
1224 template <typename FunctorType, typename TagType,
1225  bool HasTag = !std::is_void_v<TagType>>
1226 struct ParallelConstructName;
1227 
1228 template <typename FunctorType, typename TagType>
1229 struct ParallelConstructName<FunctorType, TagType, true> {
1230  ParallelConstructName(std::string const& label) : label_ref(label) {
1231  if (label.empty()) {
1232 #ifdef KOKKOS_ENABLE_IMPL_TYPEINFO
1233  default_name =
1234  std::string(TypeInfo<std::remove_const_t<FunctorType>>::name()) +
1235  "/" + std::string(TypeInfo<TagType>::name());
1236 #else
1237  default_name = std::string(typeid(FunctorType).name()) + "/" +
1238  typeid(TagType).name();
1239 #endif
1240  }
1241  }
1242  std::string const& get() {
1243  return (label_ref.empty()) ? default_name : label_ref;
1244  }
1245  std::string const& label_ref;
1246  std::string default_name;
1247 };
1248 
1249 template <typename FunctorType, typename TagType>
1250 struct ParallelConstructName<FunctorType, TagType, false> {
1251  ParallelConstructName(std::string const& label) : label_ref(label) {
1252  if (label.empty()) {
1253 #ifdef KOKKOS_ENABLE_IMPL_TYPEINFO
1254  default_name = TypeInfo<std::remove_const_t<FunctorType>>::name();
1255 #else
1256  default_name = typeid(FunctorType).name();
1257 #endif
1258  }
1259  }
1260  std::string const& get() {
1261  return (label_ref.empty()) ? default_name : label_ref;
1262  }
1263  std::string const& label_ref;
1264  std::string default_name;
1265 };
1266 
1267 } // namespace Impl
1268 
1269 } // namespace Kokkos
1270 
1271 namespace Kokkos {
1272 
1273 namespace Impl {
1274 
1275 template <class PatternTag, class... Args>
1276 struct PatternImplSpecializationFromTag;
1277 
1278 template <class... Args>
1279 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1280  : type_identity<ParallelFor<Args...>> {};
1281 
1282 template <class... Args>
1283 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1284  : type_identity<ParallelReduce<Args...>> {};
1285 
1286 template <class... Args>
1287 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1288  : type_identity<ParallelScan<Args...>> {};
1289 
1290 template <class PatternImpl>
1291 struct PatternTagFromImplSpecialization;
1292 
1293 template <class... Args>
1294 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1295  : type_identity<ParallelForTag> {};
1296 
1297 template <class... Args>
1298 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1299  : type_identity<ParallelReduceTag> {};
1300 
1301 template <class... Args>
1302 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1303  : type_identity<ParallelScanTag> {};
1304 
1305 } // end namespace Impl
1306 
1307 } // namespace Kokkos
1308 #endif /* #define KOKKOS_EXECPOLICY_HPP */
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition&#39;s rank and size.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
Subrange for a partition&#39;s rank and size.
Execution policy for parallel work over a league of teams of threads.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...