17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_EXECPOLICY_HPP
23 #define KOKKOS_EXECPOLICY_HPP
25 #include <Kokkos_Core_fwd.hpp>
26 #include <impl/Kokkos_Traits.hpp>
27 #include <impl/Kokkos_Error.hpp>
28 #include <impl/Kokkos_AnalyzePolicy.hpp>
29 #include <Kokkos_BitManipulation.hpp>
30 #include <Kokkos_Concepts.hpp>
31 #include <Kokkos_TypeInfo.hpp>
32 #ifndef KOKKOS_ENABLE_IMPL_TYPEINFO
41 struct ParallelForTag {};
42 struct ParallelScanTag {};
43 struct ParallelReduceTag {};
47 explicit ChunkSize(
int value_) : value(value_) {}
48 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
49 template <
typename T =
void>
50 KOKKOS_DEPRECATED_WITH_COMMENT(
"ChunkSize should be constructed explicitly.")
51 ChunkSize(
int value_) : value(value_) {}
76 template <
class... Properties>
77 class RangePolicy :
public Impl::PolicyTraits<Properties...> {
79 using traits = Impl::PolicyTraits<Properties...>;
82 typename traits::execution_space m_space;
83 typename traits::index_type m_begin;
84 typename traits::index_type m_end;
85 typename traits::index_type m_granularity;
86 typename traits::index_type m_granularity_mask;
88 template <
class... OtherProperties>
94 using member_type =
typename traits::index_type;
95 using index_type =
typename traits::index_type;
97 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space& space()
const {
100 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
101 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
108 void operator()(
const int&)
const {}
110 template <
class... OtherProperties>
111 RangePolicy(
const RangePolicy<OtherProperties...>& p)
116 m_granularity(p.m_granularity),
117 m_granularity_mask(p.m_granularity_mask) {}
124 m_granularity_mask(0) {}
127 template <
typename IndexType1,
typename IndexType2,
128 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
129 std::is_convertible_v<IndexType2, member_type>),
131 inline RangePolicy(
const IndexType1 work_begin,
const IndexType2 work_end)
132 :
RangePolicy(
typename traits::execution_space(), work_begin, work_end) {}
135 template <
typename IndexType1,
typename IndexType2,
136 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
137 std::is_convertible_v<IndexType2, member_type>),
139 inline RangePolicy(
const typename traits::execution_space& work_space,
140 const IndexType1 work_begin,
const IndexType2 work_end)
141 : m_space(work_space),
145 m_granularity_mask(0) {
146 check_conversion_safety(work_begin);
147 check_conversion_safety(work_end);
148 check_bounds_validity();
149 set_auto_chunk_size();
152 template <
typename IndexType1,
typename IndexType2,
153 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
154 std::is_convertible_v<IndexType2, member_type>),
156 RangePolicy(
const typename traits::execution_space& work_space,
157 const IndexType1 work_begin,
const IndexType2 work_end,
159 : m_space(work_space),
163 m_granularity_mask(0) {
164 check_conversion_safety(work_begin);
165 check_conversion_safety(work_end);
166 check_bounds_validity();
171 template <
typename IndexType1,
typename IndexType2,
typename... Args,
172 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
173 std::is_convertible_v<IndexType2, member_type>),
175 RangePolicy(
const IndexType1 work_begin,
const IndexType2 work_end,
176 const ChunkSize chunk_size)
177 :
RangePolicy(
typename traits::execution_space(), work_begin, work_end,
181 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
182 KOKKOS_DEPRECATED_WITH_COMMENT(
"Use set_chunk_size instead")
183 inline
void set(ChunkSize chunksize) {
184 m_granularity = chunksize.value;
185 m_granularity_mask = m_granularity - 1;
191 inline member_type
chunk_size()
const {
return m_granularity; }
196 m_granularity_mask = m_granularity - 1;
202 inline void set_auto_chunk_size() {
203 #ifdef KOKKOS_ENABLE_SYCL
204 if (std::is_same_v<typename traits::execution_space, Kokkos::SYCL>) {
208 m_granularity_mask = 0;
212 auto concurrency =
static_cast<int64_t
>(m_space.concurrency());
213 if (concurrency == 0) concurrency = 1;
215 if (m_granularity > 0 &&
216 !Kokkos::has_single_bit(static_cast<unsigned>(m_granularity))) {
217 Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
220 int64_t new_chunk_size = 1;
221 while (new_chunk_size * 100 * concurrency <
222 static_cast<int64_t>(m_end - m_begin))
224 if (new_chunk_size < 128) {
226 while ((new_chunk_size * 40 * concurrency <
227 static_cast<int64_t>(m_end - m_begin)) &&
228 (new_chunk_size < 128))
231 m_granularity = new_chunk_size;
232 m_granularity_mask = m_granularity - 1;
235 void check_bounds_validity() {
236 if (m_end < m_begin) {
237 std::string msg =
"Kokkos::RangePolicy bounds error: The lower bound (" +
238 std::to_string(m_begin) +
239 ") is greater than the upper bound (" +
240 std::to_string(m_end) +
").\n";
241 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
242 Kokkos::abort(msg.c_str());
246 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
247 Kokkos::Impl::log_warning(msg);
253 template <
typename IndexType>
254 static void check_conversion_safety([[maybe_unused]]
const IndexType bound) {
256 if constexpr (std::is_convertible_v<member_type, IndexType>) {
257 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \
258 defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
261 if constexpr (std::is_arithmetic_v<member_type> &&
262 (std::is_signed_v<IndexType> !=
263 std::is_signed_v<member_type>)) {
265 if constexpr (std::is_signed_v<IndexType>)
266 warn |= (bound < static_cast<IndexType>(
267 std::numeric_limits<member_type>::min()));
270 if constexpr (std::is_signed_v<member_type>)
271 warn |= (bound > static_cast<IndexType>(
272 std::numeric_limits<member_type>::max()));
277 (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
281 "Kokkos::RangePolicy bound type error: an unsafe implicit "
282 "conversion is performed on a bound (" +
283 std::to_string(bound) +
284 "), which may not preserve its original value.\n";
286 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
287 Kokkos::abort(msg.c_str());
290 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
291 Kokkos::Impl::log_warning(msg);
304 using work_tag =
typename RangePolicy<Properties...>::work_tag;
305 using member_type =
typename RangePolicy<Properties...>::member_type;
307 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
308 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
314 KOKKOS_INLINE_FUNCTION
317 : m_begin(0), m_end(0) {
320 const member_type work_part =
321 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
322 range.m_granularity_mask) &
323 ~member_type(range.m_granularity_mask);
325 m_begin = range.begin() + work_part * part_rank;
326 m_end = m_begin + work_part;
328 if (range.end() < m_begin) m_begin = range.end();
329 if (range.end() < m_end) m_end = range.end();
341 RangePolicy() -> RangePolicy<>;
343 RangePolicy(int64_t, int64_t) -> RangePolicy<>;
344 RangePolicy(int64_t, int64_t, ChunkSize
const&) -> RangePolicy<>;
346 RangePolicy(DefaultExecutionSpace
const&, int64_t, int64_t) -> RangePolicy<>;
347 RangePolicy(DefaultExecutionSpace
const&, int64_t, int64_t, ChunkSize
const&)
350 template <
typename ES,
typename = std::enable_if_t<is_execution_space_v<ES>>>
351 RangePolicy(ES
const&, int64_t, int64_t) -> RangePolicy<ES>;
353 template <
typename ES,
typename = std::enable_if_t<is_execution_space_v<ES>>>
354 RangePolicy(ES
const&, int64_t, int64_t, ChunkSize
const&) -> RangePolicy<ES>;
365 template <
class ExecSpace,
class... Properties>
366 class TeamPolicyInternal :
public Impl::PolicyTraits<Properties...> {
368 using traits = Impl::PolicyTraits<Properties...>;
371 using index_type =
typename traits::index_type;
384 template <
class FunctorType>
385 static int team_size_max(
const FunctorType&);
397 template <
class FunctorType>
398 static int team_size_recommended(
const FunctorType&);
400 template <
class FunctorType>
401 static int team_size_recommended(
const FunctorType&,
const int&);
403 template <
class FunctorType>
404 int team_size_recommended(
const FunctorType& functor,
405 const int vector_length);
409 TeamPolicyInternal(
const typename traits::execution_space&,
410 int league_size_request,
int team_size_request,
411 int vector_length_request = 1);
413 TeamPolicyInternal(
const typename traits::execution_space&,
414 int league_size_request,
const Kokkos::AUTO_t&,
415 int vector_length_request = 1);
419 TeamPolicyInternal(
int league_size_request,
int team_size_request,
420 int vector_length_request = 1);
422 TeamPolicyInternal(
int league_size_request,
const Kokkos::AUTO_t&,
423 int vector_length_request = 1);
434 KOKKOS_INLINE_FUNCTION
int league_size()
const;
441 KOKKOS_INLINE_FUNCTION
int team_size()
const;
445 inline bool impl_auto_team_size()
const;
448 inline bool impl_auto_vector_length()
const;
450 static int vector_length_max();
452 KOKKOS_INLINE_FUNCTION
int impl_vector_length()
const;
454 inline typename traits::index_type chunk_size()
const;
456 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
463 KOKKOS_INLINE_FUNCTION
464 typename traits::execution_space::scratch_memory_space team_shmem()
const;
467 KOKKOS_INLINE_FUNCTION
int league_rank()
const;
470 KOKKOS_INLINE_FUNCTION
int league_size()
const;
473 KOKKOS_INLINE_FUNCTION
int team_rank()
const;
476 KOKKOS_INLINE_FUNCTION
int team_size()
const;
479 KOKKOS_INLINE_FUNCTION
void team_barrier()
const;
483 template <
class JoinOp>
484 KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type team_reduce(
485 const typename JoinOp::value_type,
const JoinOp&)
const;
492 template <
typename Type>
493 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value)
const;
504 template <
typename Type>
505 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value,
506 Type*
const global_accum)
const;
510 struct PerTeamValue {
512 PerTeamValue(
size_t arg);
515 struct PerThreadValue {
517 PerThreadValue(
size_t arg);
520 template <
class iType,
class... Args>
521 struct ExtractVectorLength {
522 static inline iType value(
523 std::enable_if_t<std::is_integral_v<iType>, iType> val, Args...) {
526 static inline std::enable_if_t<!std::is_integral_v<iType>,
int> value(
527 std::enable_if_t<!std::is_integral_v<iType>, iType>, Args...) {
532 template <
class iType,
class... Args>
533 inline std::enable_if_t<std::is_integral_v<iType>, iType> extract_vector_length(
534 iType val, Args...) {
538 template <
class iType,
class... Args>
539 inline std::enable_if_t<!std::is_integral_v<iType>,
int> extract_vector_length(
546 Impl::PerTeamValue PerTeam(
const size_t& arg);
547 Impl::PerThreadValue PerThread(
const size_t& arg);
549 struct ScratchRequest {
555 inline ScratchRequest(
const int& level_,
556 const Impl::PerTeamValue& team_value) {
558 per_team = team_value.value;
562 inline ScratchRequest(
const int& level_,
563 const Impl::PerThreadValue& thread_value) {
566 per_thread = thread_value.value;
569 inline ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
570 const Impl::PerThreadValue& thread_value) {
572 per_team = team_value.value;
573 per_thread = thread_value.value;
576 inline ScratchRequest(
const int& level_,
577 const Impl::PerThreadValue& thread_value,
578 const Impl::PerTeamValue& team_value) {
580 per_team = team_value.value;
581 per_thread = thread_value.value;
586 void team_policy_check_valid_storage_level_argument(
int level);
614 template <
class... Properties>
616 :
public Impl::TeamPolicyInternal<
617 typename Impl::PolicyTraits<Properties...>::execution_space,
619 using internal_policy = Impl::TeamPolicyInternal<
620 typename Impl::PolicyTraits<Properties...>::execution_space,
623 template <
class... OtherProperties>
627 using traits = Impl::PolicyTraits<Properties...>;
635 int league_size_request,
int team_size_request,
636 int vector_length_request = 1)
637 : internal_policy(space_, league_size_request, team_size_request,
638 vector_length_request) {}
640 TeamPolicy(
const typename traits::execution_space& space_,
641 int league_size_request,
const Kokkos::AUTO_t&,
642 int vector_length_request = 1)
643 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
644 vector_length_request) {}
646 TeamPolicy(
const typename traits::execution_space& space_,
647 int league_size_request,
const Kokkos::AUTO_t&,
648 const Kokkos::AUTO_t&)
649 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
651 TeamPolicy(
const typename traits::execution_space& space_,
652 int league_size_request,
const int team_size_request,
653 const Kokkos::AUTO_t&)
654 : internal_policy(space_, league_size_request, team_size_request,
659 int vector_length_request = 1)
660 : internal_policy(league_size_request, team_size_request,
661 vector_length_request) {}
663 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
664 int vector_length_request = 1)
665 : internal_policy(league_size_request, Kokkos::AUTO(),
666 vector_length_request) {}
668 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
669 const Kokkos::AUTO_t&)
670 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
671 TeamPolicy(
int league_size_request,
const int team_size_request,
672 const Kokkos::AUTO_t&)
673 : internal_policy(league_size_request, team_size_request,
676 template <
class... OtherProperties>
677 TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
680 internal_policy::traits::operator=(p);
684 TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
687 inline TeamPolicy& set_chunk_size(
int chunk) {
689 std::is_same_v<decltype(internal_policy::set_chunk_size(chunk)),
691 "internal set_chunk_size should return a reference");
692 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
695 inline TeamPolicy& set_scratch_size(
const int& level,
696 const Impl::PerTeamValue& per_team) {
697 static_assert(std::is_same_v<decltype(internal_policy::set_scratch_size(
700 "internal set_chunk_size should return a reference");
702 team_policy_check_valid_storage_level_argument(level);
703 return static_cast<TeamPolicy&
>(
704 internal_policy::set_scratch_size(level, per_team));
706 inline TeamPolicy& set_scratch_size(
const int& level,
707 const Impl::PerThreadValue& per_thread) {
708 team_policy_check_valid_storage_level_argument(level);
709 return static_cast<TeamPolicy&
>(
710 internal_policy::set_scratch_size(level, per_thread));
712 inline TeamPolicy& set_scratch_size(
const int& level,
713 const Impl::PerTeamValue& per_team,
714 const Impl::PerThreadValue& per_thread) {
715 team_policy_check_valid_storage_level_argument(level);
716 return static_cast<TeamPolicy&
>(
717 internal_policy::set_scratch_size(level, per_team, per_thread));
719 inline TeamPolicy& set_scratch_size(
const int& level,
720 const Impl::PerThreadValue& per_thread,
721 const Impl::PerTeamValue& per_team) {
722 team_policy_check_valid_storage_level_argument(level);
723 return static_cast<TeamPolicy&
>(
724 internal_policy::set_scratch_size(level, per_team, per_thread));
730 TeamPolicy() -> TeamPolicy<>;
732 TeamPolicy(
int,
int) -> TeamPolicy<>;
733 TeamPolicy(
int,
int,
int) -> TeamPolicy<>;
734 TeamPolicy(
int, Kokkos::AUTO_t
const&) -> TeamPolicy<>;
735 TeamPolicy(
int, Kokkos::AUTO_t
const&,
int) -> TeamPolicy<>;
736 TeamPolicy(
int, Kokkos::AUTO_t
const&, Kokkos::AUTO_t
const&) -> TeamPolicy<>;
737 TeamPolicy(
int,
int, Kokkos::AUTO_t
const&) -> TeamPolicy<>;
741 TeamPolicy(DefaultExecutionSpace
const&,
int,
int) -> TeamPolicy<>;
742 TeamPolicy(DefaultExecutionSpace
const&,
int,
int,
int) -> TeamPolicy<>;
743 TeamPolicy(DefaultExecutionSpace
const&,
int, Kokkos::AUTO_t
const&)
745 TeamPolicy(DefaultExecutionSpace
const&,
int, Kokkos::AUTO_t
const&,
int)
747 TeamPolicy(DefaultExecutionSpace
const&,
int, Kokkos::AUTO_t
const&,
748 Kokkos::AUTO_t
const&) -> TeamPolicy<>;
749 TeamPolicy(DefaultExecutionSpace
const&,
int,
int, Kokkos::AUTO_t
const&)
754 template <
typename ES,
755 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
756 TeamPolicy(ES
const&,
int,
int) -> TeamPolicy<ES>;
758 template <
typename ES,
759 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
760 TeamPolicy(ES
const&,
int,
int,
int) -> TeamPolicy<ES>;
762 template <
typename ES,
763 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
764 TeamPolicy(ES
const&,
int, Kokkos::AUTO_t
const&) -> TeamPolicy<ES>;
766 template <
typename ES,
767 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
768 TeamPolicy(ES
const&,
int, Kokkos::AUTO_t
const&,
int) -> TeamPolicy<ES>;
770 template <
typename ES,
771 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
772 TeamPolicy(ES
const&,
int, Kokkos::AUTO_t
const&, Kokkos::AUTO_t
const&)
775 template <
typename ES,
776 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
777 TeamPolicy(ES
const&,
int,
int, Kokkos::AUTO_t
const&) -> TeamPolicy<ES>;
781 template <
typename iType,
class TeamMemberType>
782 struct TeamThreadRangeBoundariesStruct {
784 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
785 const iType& arg_end,
786 const iType& arg_rank,
787 const iType& arg_size) {
789 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
792 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
793 const iType& arg_end,
794 const iType& arg_rank,
795 const iType& arg_size) {
798 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
799 return end_ < arg_end ? end_ : arg_end;
803 using index_type = iType;
806 enum { increment = 1 };
807 const TeamMemberType& member;
809 KOKKOS_INLINE_FUNCTION
810 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
811 const iType& arg_count)
812 : start(ibegin(0, arg_count, arg_thread.team_rank(),
813 arg_thread.team_size())),
814 end(iend(0, arg_count, arg_thread.team_rank(), arg_thread.team_size())),
815 member(arg_thread) {}
817 KOKKOS_INLINE_FUNCTION
818 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
819 const iType& arg_begin,
const iType& arg_end)
820 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
821 arg_thread.team_size())),
822 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
823 arg_thread.team_size())),
824 member(arg_thread) {}
827 template <
typename iType,
class TeamMemberType>
828 struct TeamVectorRangeBoundariesStruct {
830 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
831 const iType& arg_end,
832 const iType& arg_rank,
833 const iType& arg_size) {
835 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
838 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
839 const iType& arg_end,
840 const iType& arg_rank,
841 const iType& arg_size) {
844 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
845 return end_ < arg_end ? end_ : arg_end;
849 using index_type = iType;
852 enum { increment = 1 };
853 const TeamMemberType& member;
855 KOKKOS_INLINE_FUNCTION
856 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
857 const iType& arg_count)
858 : start(ibegin(0, arg_count, arg_thread.team_rank(),
859 arg_thread.team_size())),
860 end(iend(0, arg_count, arg_thread.team_rank(), arg_thread.team_size())),
861 member(arg_thread) {}
863 KOKKOS_INLINE_FUNCTION
864 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
865 const iType& arg_begin,
const iType& arg_end)
866 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
867 arg_thread.team_size())),
868 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
869 arg_thread.team_size())),
870 member(arg_thread) {}
873 template <
typename iType,
class TeamMemberType>
874 struct ThreadVectorRangeBoundariesStruct {
875 using index_type = iType;
876 const index_type start;
877 const index_type end;
878 enum { increment = 1 };
880 KOKKOS_INLINE_FUNCTION
881 constexpr ThreadVectorRangeBoundariesStruct(
882 const TeamMemberType,
const index_type& arg_count) noexcept
883 : start(static_cast<index_type>(0)), end(arg_count) {}
885 KOKKOS_INLINE_FUNCTION
886 constexpr ThreadVectorRangeBoundariesStruct(
887 const TeamMemberType,
const index_type& arg_begin,
888 const index_type& arg_end) noexcept
889 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
892 template <
class TeamMemberType>
893 struct ThreadSingleStruct {
894 const TeamMemberType& team_member;
895 KOKKOS_INLINE_FUNCTION
896 ThreadSingleStruct(
const TeamMemberType& team_member_)
897 : team_member(team_member_) {}
900 template <
class TeamMemberType>
901 struct VectorSingleStruct {
902 const TeamMemberType& team_member;
903 KOKKOS_INLINE_FUNCTION
904 VectorSingleStruct(
const TeamMemberType& team_member_)
905 : team_member(team_member_) {}
917 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
918 KOKKOS_INLINE_FUNCTION_DELETED
919 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
920 TeamThreadRange(
const TeamMemberType&,
const iType& count) =
delete;
929 template <
typename iType1,
typename iType2,
class TeamMemberType,
930 class _never_use_this_overload>
931 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
932 std::common_type_t<iType1, iType2>, TeamMemberType>
933 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
934 const iType2& end) =
delete;
943 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
944 KOKKOS_INLINE_FUNCTION_DELETED
945 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
946 TeamVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
955 template <
typename iType1,
typename iType2,
class TeamMemberType,
956 class _never_use_this_overload>
957 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
958 std::common_type_t<iType1, iType2>, TeamMemberType>
959 TeamVectorRange(
const TeamMemberType&,
const iType1& begin,
960 const iType2& end) =
delete;
969 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
970 KOKKOS_INLINE_FUNCTION_DELETED
971 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
972 ThreadVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
974 template <
typename iType1,
typename iType2,
class TeamMemberType,
975 class _never_use_this_overload>
976 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
977 std::common_type_t<iType1, iType2>, TeamMemberType>
978 ThreadVectorRange(
const TeamMemberType&,
const iType1& arg_begin,
979 const iType2& arg_end) =
delete;
983 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
984 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
985 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
986 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
988 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
989 struct HostBasedNestLevel;
991 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
992 struct AcceleratorBasedNestLevel;
1002 template <
typename Rank,
typename ExecSpace,
1003 TeamMDRangeThreadAndVector ThreadAndVector>
1004 struct ThreadAndVectorNestLevel;
1006 struct NoReductionTag {};
1008 template <
typename Rank,
typename TeamMDPolicy,
typename Lambda,
1009 typename ReductionValueType>
1010 KOKKOS_INLINE_FUNCTION
void md_parallel_impl(TeamMDPolicy
const& policy,
1011 Lambda
const& lambda,
1012 ReductionValueType&& val);
1015 template <
typename Rank,
typename TeamHandle>
1016 struct TeamThreadMDRange;
1018 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1019 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1020 using NestLevelType = int;
1021 using BoundaryType = int;
1022 using TeamHandleType = TeamHandle;
1023 using ExecutionSpace =
typename TeamHandleType::execution_space;
1024 using ArrayLayout =
typename ExecutionSpace::array_layout;
1026 static constexpr NestLevelType total_nest_level =
1027 Rank<N, OuterDir, InnerDir>::rank;
1028 static constexpr Iterate iter = OuterDir;
1029 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1030 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
1032 static constexpr Iterate direction =
1033 OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1034 ArrayLayout>::outer_iteration_pattern
1037 template <
class... Args>
1038 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType
const& team_, Args&&... args)
1039 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1040 static_assert(
sizeof...(Args) == total_nest_level);
1043 TeamHandleType
const& team;
1044 BoundaryType boundaries[total_nest_level];
1047 template <
typename TeamHandle,
typename... Args>
1048 KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle
const&, Args&&...)
1049 -> TeamThreadMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1051 template <
typename Rank,
typename TeamHandle>
1052 struct ThreadVectorMDRange;
1054 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1055 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1056 using NestLevelType = int;
1057 using BoundaryType = int;
1058 using TeamHandleType = TeamHandle;
1059 using ExecutionSpace =
typename TeamHandleType::execution_space;
1060 using ArrayLayout =
typename ExecutionSpace::array_layout;
1062 static constexpr NestLevelType total_nest_level =
1063 Rank<N, OuterDir, InnerDir>::rank;
1064 static constexpr Iterate iter = OuterDir;
1065 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
1066 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1068 static constexpr Iterate direction =
1069 OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1070 ArrayLayout>::outer_iteration_pattern
1073 template <
class... Args>
1074 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType
const& team_,
1076 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1077 static_assert(
sizeof...(Args) == total_nest_level);
1080 TeamHandleType
const& team;
1081 BoundaryType boundaries[total_nest_level];
1084 template <
typename TeamHandle,
typename... Args>
1085 KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle
const&, Args&&...)
1086 -> ThreadVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1088 template <
typename Rank,
typename TeamHandle>
1089 struct TeamVectorMDRange;
1091 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1092 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1093 using NestLevelType = int;
1094 using BoundaryType = int;
1095 using TeamHandleType = TeamHandle;
1096 using ExecutionSpace =
typename TeamHandleType::execution_space;
1097 using ArrayLayout =
typename ExecutionSpace::array_layout;
1099 static constexpr NestLevelType total_nest_level =
1100 Rank<N, OuterDir, InnerDir>::rank;
1101 static constexpr Iterate iter = OuterDir;
1102 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1103 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1105 static constexpr Iterate direction =
1106 iter == Iterate::Default ? Impl::layout_iterate_type_selector<
1107 ArrayLayout>::outer_iteration_pattern
1110 template <
class... Args>
1111 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType
const& team_,
1113 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1114 static_assert(
sizeof...(Args) == total_nest_level);
1117 TeamHandleType
const& team;
1118 BoundaryType boundaries[total_nest_level];
1121 template <
typename TeamHandle,
typename... Args>
1122 KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle
const&, Args&&...)
1123 -> TeamVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1125 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1126 typename ReducerValueType>
1127 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1128 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1129 ReducerValueType& val) {
1131 !std::is_array_v<ReducerValueType> &&
1132 !std::is_pointer_v<ReducerValueType> &&
1133 !Kokkos::is_reducer_v<ReducerValueType>,
1134 "Only scalar return types are allowed!");
1136 val = ReducerValueType{};
1137 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1138 policy.team.team_reduce(
1139 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1142 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1143 KOKKOS_INLINE_FUNCTION
void parallel_for(
1144 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1145 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1148 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1149 typename ReducerValueType>
1150 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1151 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1152 ReducerValueType& val) {
1154 !std::is_array_v<ReducerValueType> &&
1155 !std::is_pointer_v<ReducerValueType> &&
1156 !Kokkos::is_reducer_v<ReducerValueType>,
1157 "Only a scalar return types are allowed!");
1159 val = ReducerValueType{};
1160 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1162 #ifdef KOKKOS_ENABLE_CUDA
1163 || std::is_same_v<
typename TeamHandle::execution_space,
1165 #elif defined(KOKKOS_ENABLE_HIP)
1166 || std::is_same_v<
typename TeamHandle::execution_space,
1168 #elif defined(KOKKOS_ENABLE_SYCL)
1169 || std::is_same_v<
typename TeamHandle::execution_space,
1173 policy.team.vector_reduce(
1174 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1178 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1179 KOKKOS_INLINE_FUNCTION
void parallel_for(
1180 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1181 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1184 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1185 typename ReducerValueType>
1186 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1187 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1188 ReducerValueType& val) {
1190 !std::is_array_v<ReducerValueType> &&
1191 !std::is_pointer_v<ReducerValueType> &&
1192 !Kokkos::is_reducer_v<ReducerValueType>,
1193 "Only a scalar return types are allowed!");
1195 val = ReducerValueType{};
1196 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1198 #ifdef KOKKOS_ENABLE_CUDA
1199 || std::is_same_v<
typename TeamHandle::execution_space,
1201 #elif defined(KOKKOS_ENABLE_HIP)
1202 || std::is_same_v<
typename TeamHandle::execution_space,
1204 #elif defined(KOKKOS_ENABLE_SYCL)
1205 || std::is_same_v<
typename TeamHandle::execution_space,
1209 policy.team.vector_reduce(
1210 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1212 policy.team.team_reduce(
1213 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1216 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1217 KOKKOS_INLINE_FUNCTION
void parallel_for(
1218 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1219 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1224 template <
typename FunctorType,
typename TagType,
1225 bool HasTag = !std::is_void_v<TagType>>
1226 struct ParallelConstructName;
1228 template <
typename FunctorType,
typename TagType>
1229 struct ParallelConstructName<FunctorType, TagType, true> {
1230 ParallelConstructName(std::string
const& label) : label_ref(label) {
1231 if (label.empty()) {
1232 #ifdef KOKKOS_ENABLE_IMPL_TYPEINFO
1234 std::string(TypeInfo<std::remove_const_t<FunctorType>>::name()) +
1235 "/" + std::string(TypeInfo<TagType>::name());
1237 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
1238 typeid(TagType).name();
1242 std::string
const&
get() {
1243 return (label_ref.empty()) ? default_name : label_ref;
1245 std::string
const& label_ref;
1246 std::string default_name;
1249 template <
typename FunctorType,
typename TagType>
1250 struct ParallelConstructName<FunctorType, TagType, false> {
1251 ParallelConstructName(std::string
const& label) : label_ref(label) {
1252 if (label.empty()) {
1253 #ifdef KOKKOS_ENABLE_IMPL_TYPEINFO
1254 default_name = TypeInfo<std::remove_const_t<FunctorType>>::name();
1256 default_name =
typeid(FunctorType).name();
1260 std::string
const&
get() {
1261 return (label_ref.empty()) ? default_name : label_ref;
1263 std::string
const& label_ref;
1264 std::string default_name;
1275 template <
class PatternTag,
class... Args>
1276 struct PatternImplSpecializationFromTag;
1278 template <
class... Args>
1279 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1280 : type_identity<ParallelFor<Args...>> {};
1282 template <
class... Args>
1283 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1284 : type_identity<ParallelReduce<Args...>> {};
1286 template <
class... Args>
1287 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1288 : type_identity<ParallelScan<Args...>> {};
1290 template <
class PatternImpl>
1291 struct PatternTagFromImplSpecialization;
1293 template <
class... Args>
1294 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1295 : type_identity<ParallelForTag> {};
1297 template <
class... Args>
1298 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1299 : type_identity<ParallelReduceTag> {};
1301 template <
class... Args>
1302 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1303 : type_identity<ParallelScanTag> {};
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...