Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_ExecPolicy.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_EXECPOLICY_HPP
23 #define KOKKOS_EXECPOLICY_HPP
24 
25 #include <Kokkos_Core_fwd.hpp>
26 #include <impl/Kokkos_Traits.hpp>
27 #include <impl/Kokkos_Error.hpp>
28 #include <impl/Kokkos_AnalyzePolicy.hpp>
29 #include <Kokkos_Concepts.hpp>
30 #include <typeinfo>
31 #include <limits>
32 
33 //----------------------------------------------------------------------------
34 
35 namespace Kokkos {
36 
37 struct ParallelForTag {};
38 struct ParallelScanTag {};
39 struct ParallelReduceTag {};
40 
41 struct ChunkSize {
42  int value;
43  explicit ChunkSize(int value_) : value(value_) {}
44 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
45  template <typename T = void>
46  KOKKOS_DEPRECATED_WITH_COMMENT("ChunkSize should be constructed explicitly.")
47  ChunkSize(int value_) : value(value_) {}
48 #endif
49 };
50 
72 template <class... Properties>
73 class RangePolicy : public Impl::PolicyTraits<Properties...> {
74  public:
75  using traits = Impl::PolicyTraits<Properties...>;
76 
77  private:
78  typename traits::execution_space m_space;
79  typename traits::index_type m_begin;
80  typename traits::index_type m_end;
81  typename traits::index_type m_granularity;
82  typename traits::index_type m_granularity_mask;
83 
84  template <class... OtherProperties>
85  friend class RangePolicy;
86 
87  public:
89  using execution_policy = RangePolicy<Properties...>;
90  using member_type = typename traits::index_type;
91  using index_type = typename traits::index_type;
92 
93  KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
94  return m_space;
95  }
96  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
97  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
98 
99  // TODO: find a better workaround for Clangs weird instantiation order
100  // This thing is here because of an instantiation error, where the RangePolicy
101  // is inserted into FunctorValue Traits, which tries decltype on the operator.
102  // It tries to do this even though the first argument of parallel for clearly
103  // doesn't match.
104  void operator()(const int&) const {}
105 
106  template <class... OtherProperties>
107  RangePolicy(const RangePolicy<OtherProperties...>& p)
108  : traits(p), // base class may contain data such as desired occupancy
109  m_space(p.m_space),
110  m_begin(p.m_begin),
111  m_end(p.m_end),
112  m_granularity(p.m_granularity),
113  m_granularity_mask(p.m_granularity_mask) {}
114 
115  inline RangePolicy()
116  : m_space(),
117  m_begin(0),
118  m_end(0),
119  m_granularity(0),
120  m_granularity_mask(0) {}
121 
123  template <typename IndexType1, typename IndexType2,
124  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
125  std::is_convertible_v<IndexType2, member_type>),
126  bool> = false>
127  inline RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
128  : RangePolicy(typename traits::execution_space(), work_begin, work_end) {}
129 
131  template <typename IndexType1, typename IndexType2,
132  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
133  std::is_convertible_v<IndexType2, member_type>),
134  bool> = false>
135  inline RangePolicy(const typename traits::execution_space& work_space,
136  const IndexType1 work_begin, const IndexType2 work_end)
137  : m_space(work_space),
138  m_begin(work_begin),
139  m_end(work_end),
140  m_granularity(0),
141  m_granularity_mask(0) {
142  check_conversion_safety(work_begin);
143  check_conversion_safety(work_end);
144  check_bounds_validity();
145  set_auto_chunk_size();
146  }
147 
148  template <typename IndexType1, typename IndexType2,
149  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
150  std::is_convertible_v<IndexType2, member_type>),
151  bool> = false>
152  RangePolicy(const typename traits::execution_space& work_space,
153  const IndexType1 work_begin, const IndexType2 work_end,
154  const ChunkSize chunk_size)
155  : m_space(work_space),
156  m_begin(work_begin),
157  m_end(work_end),
158  m_granularity(0),
159  m_granularity_mask(0) {
160  check_conversion_safety(work_begin);
161  check_conversion_safety(work_end);
162  check_bounds_validity();
163  set_chunk_size(chunk_size.value);
164  }
165 
167  template <typename IndexType1, typename IndexType2, typename... Args,
168  std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
169  std::is_convertible_v<IndexType2, member_type>),
170  bool> = false>
171  RangePolicy(const IndexType1 work_begin, const IndexType2 work_end,
172  const ChunkSize chunk_size)
173  : RangePolicy(typename traits::execution_space(), work_begin, work_end,
174  chunk_size) {}
175 
176  public:
177 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
178  KOKKOS_DEPRECATED_WITH_COMMENT("Use set_chunk_size instead")
179  inline void set(ChunkSize chunksize) {
180  m_granularity = chunksize.value;
181  m_granularity_mask = m_granularity - 1;
182  }
183 #endif
184 
185  public:
187  inline member_type chunk_size() const { return m_granularity; }
188 
190  inline RangePolicy& set_chunk_size(int chunk_size) {
191  m_granularity = chunk_size;
192  m_granularity_mask = m_granularity - 1;
193  return *this;
194  }
195 
196  private:
198  inline void set_auto_chunk_size() {
199 #ifdef KOKKOS_ENABLE_SYCL
200  if (std::is_same_v<typename traits::execution_space,
201  Kokkos::Experimental::SYCL>) {
202  // chunk_size <=1 lets the compiler choose the workgroup size when
203  // launching kernels
204  m_granularity = 1;
205  m_granularity_mask = 0;
206  return;
207  }
208 #endif
209  auto concurrency = static_cast<int64_t>(m_space.concurrency());
210  if (concurrency == 0) concurrency = 1;
211 
212  if (m_granularity > 0) {
213  if (!Impl::is_integral_power_of_two(m_granularity))
214  Kokkos::abort("RangePolicy blocking granularity must be power of two");
215  }
216 
217  int64_t new_chunk_size = 1;
218  while (new_chunk_size * 100 * concurrency <
219  static_cast<int64_t>(m_end - m_begin))
220  new_chunk_size *= 2;
221  if (new_chunk_size < 128) {
222  new_chunk_size = 1;
223  while ((new_chunk_size * 40 * concurrency <
224  static_cast<int64_t>(m_end - m_begin)) &&
225  (new_chunk_size < 128))
226  new_chunk_size *= 2;
227  }
228  m_granularity = new_chunk_size;
229  m_granularity_mask = m_granularity - 1;
230  }
231 
232  void check_bounds_validity() {
233  if (m_end < m_begin) {
234  std::string msg = "Kokkos::RangePolicy bounds error: The lower bound (" +
235  std::to_string(m_begin) +
236  ") is greater than the upper bound (" +
237  std::to_string(m_end) + ").\n";
238 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
239  Kokkos::abort(msg.c_str());
240 #endif
241  m_begin = 0;
242  m_end = 0;
243 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
244  Kokkos::Impl::log_warning(msg);
245 #endif
246  }
247  }
248 
249  // To be replaced with std::in_range (c++20)
250  template <typename IndexType>
251  static void check_conversion_safety(const IndexType bound) {
252 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \
253  defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
254 
255  std::string msg =
256  "Kokkos::RangePolicy bound type error: an unsafe implicit conversion "
257  "is performed on a bound (" +
258  std::to_string(bound) +
259  "), which may "
260  "not preserve its original value.\n";
261  bool warn = false;
262 
263  if constexpr (std::is_signed_v<IndexType> !=
264  std::is_signed_v<member_type>) {
265  // check signed to unsigned
266  if constexpr (std::is_signed_v<IndexType>)
267  warn |= (bound < static_cast<IndexType>(
268  std::numeric_limits<member_type>::min()));
269 
270  // check unsigned to signed
271  if constexpr (std::is_signed_v<member_type>)
272  warn |= (bound > static_cast<IndexType>(
273  std::numeric_limits<member_type>::max()));
274  }
275 
276  // check narrowing
277  warn |= (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
278 
279  if (warn) {
280 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
281  Kokkos::abort(msg.c_str());
282 #endif
283 
284 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
285  Kokkos::Impl::log_warning(msg);
286 #endif
287  }
288 #else
289  (void)bound;
290 #endif
291  }
292 
293  public:
298  struct WorkRange {
299  using work_tag = typename RangePolicy<Properties...>::work_tag;
300  using member_type = typename RangePolicy<Properties...>::member_type;
301 
302  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
303  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
304 
309  KOKKOS_INLINE_FUNCTION
310  WorkRange(const RangePolicy& range, const int part_rank,
311  const int part_size)
312  : m_begin(0), m_end(0) {
313  if (part_size) {
314  // Split evenly among partitions, then round up to the granularity.
315  const member_type work_part =
316  ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
317  range.m_granularity_mask) &
318  ~member_type(range.m_granularity_mask);
319 
320  m_begin = range.begin() + work_part * part_rank;
321  m_end = m_begin + work_part;
322 
323  if (range.end() < m_begin) m_begin = range.end();
324  if (range.end() < m_end) m_end = range.end();
325  }
326  }
327 
328  private:
329  member_type m_begin;
330  member_type m_end;
331  WorkRange();
332  WorkRange& operator=(const WorkRange&);
333  };
334 };
335 
336 RangePolicy()->RangePolicy<>;
337 
338 RangePolicy(int64_t, int64_t)->RangePolicy<>;
339 RangePolicy(int64_t, int64_t, ChunkSize const&)->RangePolicy<>;
340 
341 RangePolicy(DefaultExecutionSpace const&, int64_t, int64_t)->RangePolicy<>;
342 RangePolicy(DefaultExecutionSpace const&, int64_t, int64_t, ChunkSize const&)
343  ->RangePolicy<>;
344 
345 template <typename ES, typename = std::enable_if_t<is_execution_space_v<ES>>>
346 RangePolicy(ES const&, int64_t, int64_t)->RangePolicy<ES>;
347 
348 template <typename ES, typename = std::enable_if_t<is_execution_space_v<ES>>>
349 RangePolicy(ES const&, int64_t, int64_t, ChunkSize const&)->RangePolicy<ES>;
350 
351 } // namespace Kokkos
352 
353 //----------------------------------------------------------------------------
354 //----------------------------------------------------------------------------
355 
356 namespace Kokkos {
357 
358 namespace Impl {
359 
360 template <class ExecSpace, class... Properties>
361 class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
362  private:
363  using traits = Impl::PolicyTraits<Properties...>;
364 
365  public:
366  using index_type = typename traits::index_type;
367 
368  //----------------------------------------
379  template <class FunctorType>
380  static int team_size_max(const FunctorType&);
381 
392  template <class FunctorType>
393  static int team_size_recommended(const FunctorType&);
394 
395  template <class FunctorType>
396  static int team_size_recommended(const FunctorType&, const int&);
397 
398  template <class FunctorType>
399  int team_size_recommended(const FunctorType& functor,
400  const int vector_length);
401 
402  //----------------------------------------
404  TeamPolicyInternal(const typename traits::execution_space&,
405  int league_size_request, int team_size_request,
406  int vector_length_request = 1);
407 
408  TeamPolicyInternal(const typename traits::execution_space&,
409  int league_size_request, const Kokkos::AUTO_t&,
410  int vector_length_request = 1);
411 
414  TeamPolicyInternal(int league_size_request, int team_size_request,
415  int vector_length_request = 1);
416 
417  TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
418  int vector_length_request = 1);
419 
420  /* TeamPolicyInternal( int league_size_request , int team_size_request );
421 
422  TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
423 
429  KOKKOS_INLINE_FUNCTION int league_size() const;
430 
436  KOKKOS_INLINE_FUNCTION int team_size() const;
437 
440  inline bool impl_auto_team_size() const;
443  inline bool impl_auto_vector_length() const;
444 
445  static int vector_length_max();
446 
447  KOKKOS_INLINE_FUNCTION int impl_vector_length() const;
448 
449  inline typename traits::index_type chunk_size() const;
450 
451  inline TeamPolicyInternal& set_chunk_size(int chunk_size);
452 
456  struct member_type {
458  KOKKOS_INLINE_FUNCTION
459  typename traits::execution_space::scratch_memory_space team_shmem() const;
460 
462  KOKKOS_INLINE_FUNCTION int league_rank() const;
463 
465  KOKKOS_INLINE_FUNCTION int league_size() const;
466 
468  KOKKOS_INLINE_FUNCTION int team_rank() const;
469 
471  KOKKOS_INLINE_FUNCTION int team_size() const;
472 
474  KOKKOS_INLINE_FUNCTION void team_barrier() const;
475 
478  template <class JoinOp>
479  KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
480  const typename JoinOp::value_type, const JoinOp&) const;
481 
487  template <typename Type>
488  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
489 
499  template <typename Type>
500  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
501  Type* const global_accum) const;
502  };
503 };
504 
505 struct PerTeamValue {
506  size_t value;
507  PerTeamValue(size_t arg);
508 };
509 
510 struct PerThreadValue {
511  size_t value;
512  PerThreadValue(size_t arg);
513 };
514 
515 template <class iType, class... Args>
516 struct ExtractVectorLength {
517  static inline iType value(
518  std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
519  return val;
520  }
521  static inline std::enable_if_t<!std::is_integral<iType>::value, int> value(
522  std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
523  return 1;
524  }
525 };
526 
527 template <class iType, class... Args>
528 inline std::enable_if_t<std::is_integral<iType>::value, iType>
529 extract_vector_length(iType val, Args...) {
530  return val;
531 }
532 
533 template <class iType, class... Args>
534 inline std::enable_if_t<!std::is_integral<iType>::value, int>
535 extract_vector_length(iType, Args...) {
536  return 1;
537 }
538 
539 } // namespace Impl
540 
541 Impl::PerTeamValue PerTeam(const size_t& arg);
542 Impl::PerThreadValue PerThread(const size_t& arg);
543 
544 struct ScratchRequest {
545  int level;
546 
547  size_t per_team;
548  size_t per_thread;
549 
550  inline ScratchRequest(const int& level_,
551  const Impl::PerTeamValue& team_value) {
552  level = level_;
553  per_team = team_value.value;
554  per_thread = 0;
555  }
556 
557  inline ScratchRequest(const int& level_,
558  const Impl::PerThreadValue& thread_value) {
559  level = level_;
560  per_team = 0;
561  per_thread = thread_value.value;
562  }
563 
564  inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
565  const Impl::PerThreadValue& thread_value) {
566  level = level_;
567  per_team = team_value.value;
568  per_thread = thread_value.value;
569  }
570 
571  inline ScratchRequest(const int& level_,
572  const Impl::PerThreadValue& thread_value,
573  const Impl::PerTeamValue& team_value) {
574  level = level_;
575  per_team = team_value.value;
576  per_thread = thread_value.value;
577  }
578 };
579 
580 // Throws a runtime exception if level is not `0` or `1`
581 void team_policy_check_valid_storage_level_argument(int level);
582 
609 template <class... Properties>
611  : public Impl::TeamPolicyInternal<
612  typename Impl::PolicyTraits<Properties...>::execution_space,
613  Properties...> {
614  using internal_policy = Impl::TeamPolicyInternal<
615  typename Impl::PolicyTraits<Properties...>::execution_space,
616  Properties...>;
617 
618  template <class... OtherProperties>
619  friend class TeamPolicy;
620 
621  public:
622  using traits = Impl::PolicyTraits<Properties...>;
623 
624  using execution_policy = TeamPolicy<Properties...>;
625 
626  TeamPolicy() : internal_policy(0, AUTO) {}
627 
629  TeamPolicy(const typename traits::execution_space& space_,
630  int league_size_request, int team_size_request,
631  int vector_length_request = 1)
632  : internal_policy(space_, league_size_request, team_size_request,
633  vector_length_request) {}
634 
635  TeamPolicy(const typename traits::execution_space& space_,
636  int league_size_request, const Kokkos::AUTO_t&,
637  int vector_length_request = 1)
638  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
639  vector_length_request) {}
640 
641  TeamPolicy(const typename traits::execution_space& space_,
642  int league_size_request, const Kokkos::AUTO_t&,
643  const Kokkos::AUTO_t&)
644  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
645  Kokkos::AUTO()) {}
646  TeamPolicy(const typename traits::execution_space& space_,
647  int league_size_request, const int team_size_request,
648  const Kokkos::AUTO_t&)
649  : internal_policy(space_, league_size_request, team_size_request,
650  Kokkos::AUTO()) {}
653  TeamPolicy(int league_size_request, int team_size_request,
654  int vector_length_request = 1)
655  : internal_policy(league_size_request, team_size_request,
656  vector_length_request) {}
657 
658  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
659  int vector_length_request = 1)
660  : internal_policy(league_size_request, Kokkos::AUTO(),
661  vector_length_request) {}
662 
663  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
664  const Kokkos::AUTO_t&)
665  : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
666  TeamPolicy(int league_size_request, const int team_size_request,
667  const Kokkos::AUTO_t&)
668  : internal_policy(league_size_request, team_size_request,
669  Kokkos::AUTO()) {}
670 
671  template <class... OtherProperties>
672  TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
673  // Cannot call converting constructor in the member initializer list because
674  // it is not a direct base.
675  internal_policy::traits::operator=(p);
676  }
677 
678  private:
679  TeamPolicy(const internal_policy& p) : internal_policy(p) {}
680 
681  public:
682  inline TeamPolicy& set_chunk_size(int chunk) {
683  static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
684  internal_policy&>::value,
685  "internal set_chunk_size should return a reference");
686  return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
687  }
688 
689  inline TeamPolicy& set_scratch_size(const int& level,
690  const Impl::PerTeamValue& per_team) {
691  static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
692  level, per_team)),
693  internal_policy&>::value,
694  "internal set_chunk_size should return a reference");
695 
696  team_policy_check_valid_storage_level_argument(level);
697  return static_cast<TeamPolicy&>(
698  internal_policy::set_scratch_size(level, per_team));
699  }
700  inline TeamPolicy& set_scratch_size(const int& level,
701  const Impl::PerThreadValue& per_thread) {
702  team_policy_check_valid_storage_level_argument(level);
703  return static_cast<TeamPolicy&>(
704  internal_policy::set_scratch_size(level, per_thread));
705  }
706  inline TeamPolicy& set_scratch_size(const int& level,
707  const Impl::PerTeamValue& per_team,
708  const Impl::PerThreadValue& per_thread) {
709  team_policy_check_valid_storage_level_argument(level);
710  return static_cast<TeamPolicy&>(
711  internal_policy::set_scratch_size(level, per_team, per_thread));
712  }
713  inline TeamPolicy& set_scratch_size(const int& level,
714  const Impl::PerThreadValue& per_thread,
715  const Impl::PerTeamValue& per_team) {
716  team_policy_check_valid_storage_level_argument(level);
717  return static_cast<TeamPolicy&>(
718  internal_policy::set_scratch_size(level, per_team, per_thread));
719  }
720 };
721 
722 // Execution space not provided deduces to TeamPolicy<>
723 
724 TeamPolicy()->TeamPolicy<>;
725 
726 TeamPolicy(int, int)->TeamPolicy<>;
727 TeamPolicy(int, int, int)->TeamPolicy<>;
728 TeamPolicy(int, Kokkos::AUTO_t const&)->TeamPolicy<>;
729 TeamPolicy(int, Kokkos::AUTO_t const&, int)->TeamPolicy<>;
730 TeamPolicy(int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)->TeamPolicy<>;
731 TeamPolicy(int, int, Kokkos::AUTO_t const&)->TeamPolicy<>;
732 
733 // DefaultExecutionSpace deduces to TeamPolicy<>
734 
735 TeamPolicy(DefaultExecutionSpace const&, int, int)->TeamPolicy<>;
736 TeamPolicy(DefaultExecutionSpace const&, int, int, int)->TeamPolicy<>;
737 TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&)
738  ->TeamPolicy<>;
739 TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, int)
740  ->TeamPolicy<>;
741 TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&,
742  Kokkos::AUTO_t const&)
743  ->TeamPolicy<>;
744 TeamPolicy(DefaultExecutionSpace const&, int, int, Kokkos::AUTO_t const&)
745  ->TeamPolicy<>;
746 
747 // ES != DefaultExecutionSpace deduces to TeamPolicy<ES>
748 
749 template <typename ES,
750  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
751 TeamPolicy(ES const&, int, int)->TeamPolicy<ES>;
752 
753 template <typename ES,
754  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
755 TeamPolicy(ES const&, int, int, int)->TeamPolicy<ES>;
756 
757 template <typename ES,
758  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
759 TeamPolicy(ES const&, int, Kokkos::AUTO_t const&)->TeamPolicy<ES>;
760 
761 template <typename ES,
762  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
763 TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, int)->TeamPolicy<ES>;
764 
765 template <typename ES,
766  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
767 TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)
768  ->TeamPolicy<ES>;
769 
770 template <typename ES,
771  typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
772 TeamPolicy(ES const&, int, int, Kokkos::AUTO_t const&)->TeamPolicy<ES>;
773 
774 namespace Impl {
775 
776 template <typename iType, class TeamMemberType>
777 struct TeamThreadRangeBoundariesStruct {
778  private:
779  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
780  const iType& arg_end,
781  const iType& arg_rank,
782  const iType& arg_size) {
783  return arg_begin +
784  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
785  }
786 
787  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
788  const iType& arg_end,
789  const iType& arg_rank,
790  const iType& arg_size) {
791  const iType end_ =
792  arg_begin +
793  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
794  return end_ < arg_end ? end_ : arg_end;
795  }
796 
797  public:
798  using index_type = iType;
799  const iType start;
800  const iType end;
801  enum { increment = 1 };
802  const TeamMemberType& thread;
803 
804  KOKKOS_INLINE_FUNCTION
805  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
806  const iType& arg_end)
807  : start(
808  ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
809  end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
810  thread(arg_thread) {}
811 
812  KOKKOS_INLINE_FUNCTION
813  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
814  const iType& arg_begin, const iType& arg_end)
815  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
816  arg_thread.team_size())),
817  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
818  arg_thread.team_size())),
819  thread(arg_thread) {}
820 };
821 
822 template <typename iType, class TeamMemberType>
823 struct TeamVectorRangeBoundariesStruct {
824  private:
825  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
826  const iType& arg_end,
827  const iType& arg_rank,
828  const iType& arg_size) {
829  return arg_begin +
830  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
831  }
832 
833  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
834  const iType& arg_end,
835  const iType& arg_rank,
836  const iType& arg_size) {
837  const iType end_ =
838  arg_begin +
839  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
840  return end_ < arg_end ? end_ : arg_end;
841  }
842 
843  public:
844  using index_type = iType;
845  const iType start;
846  const iType end;
847  enum { increment = 1 };
848  const TeamMemberType& thread;
849 
850  KOKKOS_INLINE_FUNCTION
851  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
852  const iType& arg_end)
853  : start(
854  ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
855  end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
856  thread(arg_thread) {}
857 
858  KOKKOS_INLINE_FUNCTION
859  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
860  const iType& arg_begin, const iType& arg_end)
861  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
862  arg_thread.team_size())),
863  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
864  arg_thread.team_size())),
865  thread(arg_thread) {}
866 };
867 
868 template <typename iType, class TeamMemberType>
869 struct ThreadVectorRangeBoundariesStruct {
870  using index_type = iType;
871  const index_type start;
872  const index_type end;
873  enum { increment = 1 };
874 
875  KOKKOS_INLINE_FUNCTION
876  constexpr ThreadVectorRangeBoundariesStruct(const TeamMemberType,
877  const index_type& count) noexcept
878  : start(static_cast<index_type>(0)), end(count) {}
879 
880  KOKKOS_INLINE_FUNCTION
881  constexpr ThreadVectorRangeBoundariesStruct(
882  const TeamMemberType, const index_type& arg_begin,
883  const index_type& arg_end) noexcept
884  : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
885 };
886 
887 template <class TeamMemberType>
888 struct ThreadSingleStruct {
889  const TeamMemberType& team_member;
890  KOKKOS_INLINE_FUNCTION
891  ThreadSingleStruct(const TeamMemberType& team_member_)
892  : team_member(team_member_) {}
893 };
894 
895 template <class TeamMemberType>
896 struct VectorSingleStruct {
897  const TeamMemberType& team_member;
898  KOKKOS_INLINE_FUNCTION
899  VectorSingleStruct(const TeamMemberType& team_member_)
900  : team_member(team_member_) {}
901 };
902 
903 } // namespace Impl
904 
912 template <typename iType, class TeamMemberType, class _never_use_this_overload>
913 KOKKOS_INLINE_FUNCTION_DELETED
914  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
915  TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
916 
924 template <typename iType1, typename iType2, class TeamMemberType,
925  class _never_use_this_overload>
926 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
927  std::common_type_t<iType1, iType2>, TeamMemberType>
928 TeamThreadRange(const TeamMemberType&, const iType1& begin,
929  const iType2& end) = delete;
930 
938 template <typename iType, class TeamMemberType, class _never_use_this_overload>
939 KOKKOS_INLINE_FUNCTION_DELETED
940  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
941  TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
942 
950 template <typename iType1, typename iType2, class TeamMemberType,
951  class _never_use_this_overload>
952 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
953  std::common_type_t<iType1, iType2>, TeamMemberType>
954 TeamVectorRange(const TeamMemberType&, const iType1& begin,
955  const iType2& end) = delete;
956 
964 template <typename iType, class TeamMemberType, class _never_use_this_overload>
965 KOKKOS_INLINE_FUNCTION_DELETED
966  Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
967  ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
968 
969 template <typename iType1, typename iType2, class TeamMemberType,
970  class _never_use_this_overload>
971 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
972  std::common_type_t<iType1, iType2>, TeamMemberType>
973 ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin,
974  const iType2& arg_end) = delete;
975 
976 namespace Impl {
977 
978 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
979 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
980 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
981 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
982 
983 template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
984 struct HostBasedNestLevel;
985 
986 template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
987 struct AcceleratorBasedNestLevel;
988 
989 // ThreadAndVectorNestLevel determines on which nested level parallelization
990 // happens.
991 // - Rank is Kokkos::Rank<TotalNestLevel, Iter>
992 // - TotalNestLevel is the total number of loop nests
993 // - Iter is whether to go forward or backward through ranks (i.e. the
994 // iteration order for MDRangePolicy)
995 // - ThreadAndVector determines whether both vector and thread parallelism is
996 // in use
997 template <typename Rank, typename ExecSpace,
998  TeamMDRangeThreadAndVector ThreadAndVector>
999 struct ThreadAndVectorNestLevel;
1000 
1001 struct NoReductionTag {};
1002 
1003 template <typename Rank, typename TeamMDPolicy, typename Lambda,
1004  typename ReductionValueType>
1005 KOKKOS_INLINE_FUNCTION void md_parallel_impl(TeamMDPolicy const& policy,
1006  Lambda const& lambda,
1007  ReductionValueType&& val);
1008 } // namespace Impl
1009 
1010 template <typename Rank, typename TeamHandle>
1011 struct TeamThreadMDRange;
1012 
1013 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1014 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1015  using NestLevelType = int;
1016  using BoundaryType = int;
1017  using TeamHandleType = TeamHandle;
1018  using ExecutionSpace = typename TeamHandleType::execution_space;
1019  using ArrayLayout = typename ExecutionSpace::array_layout;
1020 
1021  static constexpr NestLevelType total_nest_level =
1022  Rank<N, OuterDir, InnerDir>::rank;
1023  static constexpr Iterate iter = OuterDir;
1024  static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1025  static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
1026 
1027  static constexpr Iterate direction =
1028  OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1029  ArrayLayout>::outer_iteration_pattern
1030  : iter;
1031 
1032  template <class... Args>
1033  KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
1034  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1035  static_assert(sizeof...(Args) == total_nest_level);
1036  }
1037 
1038  TeamHandleType const& team;
1039  BoundaryType boundaries[total_nest_level];
1040 };
1041 
1042 template <typename TeamHandle, typename... Args>
1043 KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle const&, Args&&...)
1044  ->TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1045 
1046 template <typename Rank, typename TeamHandle>
1047 struct ThreadVectorMDRange;
1048 
1049 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1050 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1051  using NestLevelType = int;
1052  using BoundaryType = int;
1053  using TeamHandleType = TeamHandle;
1054  using ExecutionSpace = typename TeamHandleType::execution_space;
1055  using ArrayLayout = typename ExecutionSpace::array_layout;
1056 
1057  static constexpr NestLevelType total_nest_level =
1058  Rank<N, OuterDir, InnerDir>::rank;
1059  static constexpr Iterate iter = OuterDir;
1060  static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
1061  static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1062 
1063  static constexpr Iterate direction =
1064  OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1065  ArrayLayout>::outer_iteration_pattern
1066  : iter;
1067 
1068  template <class... Args>
1069  KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
1070  Args&&... args)
1071  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1072  static_assert(sizeof...(Args) == total_nest_level);
1073  }
1074 
1075  TeamHandleType const& team;
1076  BoundaryType boundaries[total_nest_level];
1077 };
1078 
1079 template <typename TeamHandle, typename... Args>
1080 KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle const&, Args&&...)
1081  ->ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1082 
1083 template <typename Rank, typename TeamHandle>
1084 struct TeamVectorMDRange;
1085 
1086 template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1087 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1088  using NestLevelType = int;
1089  using BoundaryType = int;
1090  using TeamHandleType = TeamHandle;
1091  using ExecutionSpace = typename TeamHandleType::execution_space;
1092  using ArrayLayout = typename ExecutionSpace::array_layout;
1093 
1094  static constexpr NestLevelType total_nest_level =
1095  Rank<N, OuterDir, InnerDir>::rank;
1096  static constexpr Iterate iter = OuterDir;
1097  static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1098  static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1099 
1100  static constexpr Iterate direction =
1101  iter == Iterate::Default ? Impl::layout_iterate_type_selector<
1102  ArrayLayout>::outer_iteration_pattern
1103  : iter;
1104 
1105  template <class... Args>
1106  KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
1107  Args&&... args)
1108  : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1109  static_assert(sizeof...(Args) == total_nest_level);
1110  }
1111 
1112  TeamHandleType const& team;
1113  BoundaryType boundaries[total_nest_level];
1114 };
1115 
1116 template <typename TeamHandle, typename... Args>
1117 KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle const&, Args&&...)
1118  ->TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1119 
1120 template <typename Rank, typename TeamHandle, typename Lambda,
1121  typename ReducerValueType>
1122 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1123  TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1124  ReducerValueType& val) {
1125  static_assert(
1126  !std::is_array_v<ReducerValueType> &&
1127  !std::is_pointer_v<ReducerValueType> &&
1128  !Kokkos::is_reducer_v<ReducerValueType>,
1129  "Only scalar return types are allowed!");
1130 
1131  val = ReducerValueType{};
1132  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1133  policy.team.team_reduce(
1134  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1135 }
1136 
1137 template <typename Rank, typename TeamHandle, typename Lambda>
1138 KOKKOS_INLINE_FUNCTION void parallel_for(
1139  TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1140  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1141 }
1142 
1143 template <typename Rank, typename TeamHandle, typename Lambda,
1144  typename ReducerValueType>
1145 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1146  ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1147  ReducerValueType& val) {
1148  static_assert(
1149  !std::is_array_v<ReducerValueType> &&
1150  !std::is_pointer_v<ReducerValueType> &&
1151  !Kokkos::is_reducer_v<ReducerValueType>,
1152  "Only a scalar return types are allowed!");
1153 
1154  val = ReducerValueType{};
1155  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1156  if constexpr (false
1157 #ifdef KOKKOS_ENABLE_CUDA
1158  || std::is_same_v<typename TeamHandle::execution_space,
1159  Kokkos::Cuda>
1160 #elif defined(KOKKOS_ENABLE_HIP)
1161  || std::is_same_v<typename TeamHandle::execution_space,
1162  Kokkos::HIP>
1163 #elif defined(KOKKOS_ENABLE_SYCL)
1164  || std::is_same_v<typename TeamHandle::execution_space,
1165  Kokkos::Experimental::SYCL>
1166 #endif
1167  )
1168  policy.team.vector_reduce(
1169  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1170  val});
1171 }
1172 
1173 template <typename Rank, typename TeamHandle, typename Lambda>
1174 KOKKOS_INLINE_FUNCTION void parallel_for(
1175  ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1176  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1177 }
1178 
1179 template <typename Rank, typename TeamHandle, typename Lambda,
1180  typename ReducerValueType>
1181 KOKKOS_INLINE_FUNCTION void parallel_reduce(
1182  TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1183  ReducerValueType& val) {
1184  static_assert(
1185  !std::is_array_v<ReducerValueType> &&
1186  !std::is_pointer_v<ReducerValueType> &&
1187  !Kokkos::is_reducer_v<ReducerValueType>,
1188  "Only a scalar return types are allowed!");
1189 
1190  val = ReducerValueType{};
1191  Impl::md_parallel_impl<Rank>(policy, lambda, val);
1192  if constexpr (false
1193 #ifdef KOKKOS_ENABLE_CUDA
1194  || std::is_same_v<typename TeamHandle::execution_space,
1195  Kokkos::Cuda>
1196 #elif defined(KOKKOS_ENABLE_HIP)
1197  || std::is_same_v<typename TeamHandle::execution_space,
1198  Kokkos::HIP>
1199 #elif defined(KOKKOS_ENABLE_SYCL)
1200  || std::is_same_v<typename TeamHandle::execution_space,
1201  Kokkos::Experimental::SYCL>
1202 #endif
1203  )
1204  policy.team.vector_reduce(
1205  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1206  val});
1207  policy.team.team_reduce(
1208  Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1209 }
1210 
1211 template <typename Rank, typename TeamHandle, typename Lambda>
1212 KOKKOS_INLINE_FUNCTION void parallel_for(
1213  TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1214  Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1215 }
1216 
1217 namespace Impl {
1218 
1219 template <typename FunctorType, typename TagType,
1220  bool HasTag = !std::is_void<TagType>::value>
1221 struct ParallelConstructName;
1222 
1223 template <typename FunctorType, typename TagType>
1224 struct ParallelConstructName<FunctorType, TagType, true> {
1225  ParallelConstructName(std::string const& label) : label_ref(label) {
1226  if (label.empty()) {
1227  default_name = std::string(typeid(FunctorType).name()) + "/" +
1228  typeid(TagType).name();
1229  }
1230  }
1231  std::string const& get() {
1232  return (label_ref.empty()) ? default_name : label_ref;
1233  }
1234  std::string const& label_ref;
1235  std::string default_name;
1236 };
1237 
1238 template <typename FunctorType, typename TagType>
1239 struct ParallelConstructName<FunctorType, TagType, false> {
1240  ParallelConstructName(std::string const& label) : label_ref(label) {
1241  if (label.empty()) {
1242  default_name = std::string(typeid(FunctorType).name());
1243  }
1244  }
1245  std::string const& get() {
1246  return (label_ref.empty()) ? default_name : label_ref;
1247  }
1248  std::string const& label_ref;
1249  std::string default_name;
1250 };
1251 
1252 } // namespace Impl
1253 
1254 } // namespace Kokkos
1255 
1256 namespace Kokkos {
1257 
1258 namespace Impl {
1259 
1260 template <class PatternTag, class... Args>
1261 struct PatternImplSpecializationFromTag;
1262 
1263 template <class... Args>
1264 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1265  : type_identity<ParallelFor<Args...>> {};
1266 
1267 template <class... Args>
1268 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1269  : type_identity<ParallelReduce<Args...>> {};
1270 
1271 template <class... Args>
1272 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1273  : type_identity<ParallelScan<Args...>> {};
1274 
1275 template <class PatternImpl>
1276 struct PatternTagFromImplSpecialization;
1277 
1278 template <class... Args>
1279 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1280  : type_identity<ParallelForTag> {};
1281 
1282 template <class... Args>
1283 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1284  : type_identity<ParallelReduceTag> {};
1285 
1286 template <class... Args>
1287 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1288  : type_identity<ParallelScanTag> {};
1289 
1290 } // end namespace Impl
1291 
1292 } // namespace Kokkos
1293 #endif /* #define KOKKOS_EXECPOLICY_HPP */
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition&#39;s rank and size.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
Subrange for a partition&#39;s rank and size.
Execution policy for parallel work over a league of teams of threads.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...