17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_EXECPOLICY_HPP
23 #define KOKKOS_EXECPOLICY_HPP
25 #include <Kokkos_Core_fwd.hpp>
26 #include <impl/Kokkos_Traits.hpp>
27 #include <impl/Kokkos_Error.hpp>
28 #include <impl/Kokkos_AnalyzePolicy.hpp>
29 #include <Kokkos_Concepts.hpp>
37 struct ParallelForTag {};
38 struct ParallelScanTag {};
39 struct ParallelReduceTag {};
43 explicit ChunkSize(
int value_) : value(value_) {}
44 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
45 template <
typename T =
void>
46 KOKKOS_DEPRECATED_WITH_COMMENT(
"ChunkSize should be constructed explicitly.")
47 ChunkSize(
int value_) : value(value_) {}
72 template <
class... Properties>
73 class RangePolicy :
public Impl::PolicyTraits<Properties...> {
75 using traits = Impl::PolicyTraits<Properties...>;
78 typename traits::execution_space m_space;
79 typename traits::index_type m_begin;
80 typename traits::index_type m_end;
81 typename traits::index_type m_granularity;
82 typename traits::index_type m_granularity_mask;
84 template <
class... OtherProperties>
90 using member_type =
typename traits::index_type;
91 using index_type =
typename traits::index_type;
93 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space& space()
const {
96 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
97 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
104 void operator()(
const int&)
const {}
106 template <
class... OtherProperties>
107 RangePolicy(
const RangePolicy<OtherProperties...>& p)
112 m_granularity(p.m_granularity),
113 m_granularity_mask(p.m_granularity_mask) {}
120 m_granularity_mask(0) {}
123 template <
typename IndexType1,
typename IndexType2,
124 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
125 std::is_convertible_v<IndexType2, member_type>),
127 inline RangePolicy(
const IndexType1 work_begin,
const IndexType2 work_end)
128 :
RangePolicy(
typename traits::execution_space(), work_begin, work_end) {}
131 template <
typename IndexType1,
typename IndexType2,
132 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
133 std::is_convertible_v<IndexType2, member_type>),
135 inline RangePolicy(
const typename traits::execution_space& work_space,
136 const IndexType1 work_begin,
const IndexType2 work_end)
137 : m_space(work_space),
141 m_granularity_mask(0) {
142 check_conversion_safety(work_begin);
143 check_conversion_safety(work_end);
144 check_bounds_validity();
145 set_auto_chunk_size();
148 template <
typename IndexType1,
typename IndexType2,
149 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
150 std::is_convertible_v<IndexType2, member_type>),
152 RangePolicy(
const typename traits::execution_space& work_space,
153 const IndexType1 work_begin,
const IndexType2 work_end,
155 : m_space(work_space),
159 m_granularity_mask(0) {
160 check_conversion_safety(work_begin);
161 check_conversion_safety(work_end);
162 check_bounds_validity();
167 template <
typename IndexType1,
typename IndexType2,
typename... Args,
168 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
169 std::is_convertible_v<IndexType2, member_type>),
171 RangePolicy(
const IndexType1 work_begin,
const IndexType2 work_end,
172 const ChunkSize chunk_size)
173 :
RangePolicy(
typename traits::execution_space(), work_begin, work_end,
177 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
178 KOKKOS_DEPRECATED_WITH_COMMENT(
"Use set_chunk_size instead")
179 inline
void set(ChunkSize chunksize) {
180 m_granularity = chunksize.value;
181 m_granularity_mask = m_granularity - 1;
187 inline member_type
chunk_size()
const {
return m_granularity; }
192 m_granularity_mask = m_granularity - 1;
198 inline void set_auto_chunk_size() {
199 #ifdef KOKKOS_ENABLE_SYCL
200 if (std::is_same_v<
typename traits::execution_space,
201 Kokkos::Experimental::SYCL>) {
205 m_granularity_mask = 0;
209 auto concurrency =
static_cast<int64_t
>(m_space.concurrency());
210 if (concurrency == 0) concurrency = 1;
212 if (m_granularity > 0) {
213 if (!Impl::is_integral_power_of_two(m_granularity))
214 Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
217 int64_t new_chunk_size = 1;
218 while (new_chunk_size * 100 * concurrency <
219 static_cast<int64_t>(m_end - m_begin))
221 if (new_chunk_size < 128) {
223 while ((new_chunk_size * 40 * concurrency <
224 static_cast<int64_t>(m_end - m_begin)) &&
225 (new_chunk_size < 128))
228 m_granularity = new_chunk_size;
229 m_granularity_mask = m_granularity - 1;
232 void check_bounds_validity() {
233 if (m_end < m_begin) {
234 std::string msg =
"Kokkos::RangePolicy bounds error: The lower bound (" +
235 std::to_string(m_begin) +
236 ") is greater than the upper bound (" +
237 std::to_string(m_end) +
").\n";
238 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
239 Kokkos::abort(msg.c_str());
243 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
244 Kokkos::Impl::log_warning(msg);
250 template <
typename IndexType>
251 static void check_conversion_safety(
const IndexType bound) {
252 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \
253 defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
256 "Kokkos::RangePolicy bound type error: an unsafe implicit conversion "
257 "is performed on a bound (" +
258 std::to_string(bound) +
260 "not preserve its original value.\n";
263 if constexpr (std::is_signed_v<IndexType> !=
264 std::is_signed_v<member_type>) {
266 if constexpr (std::is_signed_v<IndexType>)
267 warn |= (bound < static_cast<IndexType>(
268 std::numeric_limits<member_type>::min()));
271 if constexpr (std::is_signed_v<member_type>)
272 warn |= (bound > static_cast<IndexType>(
273 std::numeric_limits<member_type>::max()));
277 warn |= (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
280 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
281 Kokkos::abort(msg.c_str());
284 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
285 Kokkos::Impl::log_warning(msg);
299 using work_tag =
typename RangePolicy<Properties...>::work_tag;
300 using member_type =
typename RangePolicy<Properties...>::member_type;
302 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
303 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
309 KOKKOS_INLINE_FUNCTION
312 : m_begin(0), m_end(0) {
315 const member_type work_part =
316 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
317 range.m_granularity_mask) &
318 ~member_type(range.m_granularity_mask);
320 m_begin = range.begin() + work_part * part_rank;
321 m_end = m_begin + work_part;
323 if (range.end() < m_begin) m_begin = range.end();
324 if (range.end() < m_end) m_end = range.end();
336 RangePolicy()->RangePolicy<>;
338 RangePolicy(int64_t, int64_t)->RangePolicy<>;
339 RangePolicy(int64_t, int64_t, ChunkSize
const&)->RangePolicy<>;
341 RangePolicy(DefaultExecutionSpace
const&, int64_t, int64_t)->RangePolicy<>;
342 RangePolicy(DefaultExecutionSpace
const&, int64_t, int64_t, ChunkSize
const&)
345 template <
typename ES,
typename = std::enable_if_t<is_execution_space_v<ES>>>
346 RangePolicy(ES
const&, int64_t, int64_t)->RangePolicy<ES>;
348 template <
typename ES,
typename = std::enable_if_t<is_execution_space_v<ES>>>
349 RangePolicy(ES
const&, int64_t, int64_t, ChunkSize
const&)->RangePolicy<ES>;
360 template <
class ExecSpace,
class... Properties>
361 class TeamPolicyInternal :
public Impl::PolicyTraits<Properties...> {
363 using traits = Impl::PolicyTraits<Properties...>;
366 using index_type =
typename traits::index_type;
379 template <
class FunctorType>
380 static int team_size_max(
const FunctorType&);
392 template <
class FunctorType>
393 static int team_size_recommended(
const FunctorType&);
395 template <
class FunctorType>
396 static int team_size_recommended(
const FunctorType&,
const int&);
398 template <
class FunctorType>
399 int team_size_recommended(
const FunctorType& functor,
400 const int vector_length);
404 TeamPolicyInternal(
const typename traits::execution_space&,
405 int league_size_request,
int team_size_request,
406 int vector_length_request = 1);
408 TeamPolicyInternal(
const typename traits::execution_space&,
409 int league_size_request,
const Kokkos::AUTO_t&,
410 int vector_length_request = 1);
414 TeamPolicyInternal(
int league_size_request,
int team_size_request,
415 int vector_length_request = 1);
417 TeamPolicyInternal(
int league_size_request,
const Kokkos::AUTO_t&,
418 int vector_length_request = 1);
429 KOKKOS_INLINE_FUNCTION
int league_size()
const;
436 KOKKOS_INLINE_FUNCTION
int team_size()
const;
440 inline bool impl_auto_team_size()
const;
443 inline bool impl_auto_vector_length()
const;
445 static int vector_length_max();
447 KOKKOS_INLINE_FUNCTION
int impl_vector_length()
const;
449 inline typename traits::index_type chunk_size()
const;
451 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
458 KOKKOS_INLINE_FUNCTION
459 typename traits::execution_space::scratch_memory_space team_shmem()
const;
462 KOKKOS_INLINE_FUNCTION
int league_rank()
const;
465 KOKKOS_INLINE_FUNCTION
int league_size()
const;
468 KOKKOS_INLINE_FUNCTION
int team_rank()
const;
471 KOKKOS_INLINE_FUNCTION
int team_size()
const;
474 KOKKOS_INLINE_FUNCTION
void team_barrier()
const;
478 template <
class JoinOp>
479 KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type team_reduce(
480 const typename JoinOp::value_type,
const JoinOp&)
const;
487 template <
typename Type>
488 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value)
const;
499 template <
typename Type>
500 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value,
501 Type*
const global_accum)
const;
505 struct PerTeamValue {
507 PerTeamValue(
size_t arg);
510 struct PerThreadValue {
512 PerThreadValue(
size_t arg);
515 template <
class iType,
class... Args>
516 struct ExtractVectorLength {
517 static inline iType value(
518 std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
521 static inline std::enable_if_t<!std::is_integral<iType>::value,
int> value(
522 std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
527 template <
class iType,
class... Args>
528 inline std::enable_if_t<std::is_integral<iType>::value, iType>
529 extract_vector_length(iType val, Args...) {
533 template <
class iType,
class... Args>
534 inline std::enable_if_t<!std::is_integral<iType>::value,
int>
535 extract_vector_length(iType, Args...) {
541 Impl::PerTeamValue PerTeam(
const size_t& arg);
542 Impl::PerThreadValue PerThread(
const size_t& arg);
544 struct ScratchRequest {
550 inline ScratchRequest(
const int& level_,
551 const Impl::PerTeamValue& team_value) {
553 per_team = team_value.value;
557 inline ScratchRequest(
const int& level_,
558 const Impl::PerThreadValue& thread_value) {
561 per_thread = thread_value.value;
564 inline ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
565 const Impl::PerThreadValue& thread_value) {
567 per_team = team_value.value;
568 per_thread = thread_value.value;
571 inline ScratchRequest(
const int& level_,
572 const Impl::PerThreadValue& thread_value,
573 const Impl::PerTeamValue& team_value) {
575 per_team = team_value.value;
576 per_thread = thread_value.value;
581 void team_policy_check_valid_storage_level_argument(
int level);
609 template <
class... Properties>
611 :
public Impl::TeamPolicyInternal<
612 typename Impl::PolicyTraits<Properties...>::execution_space,
614 using internal_policy = Impl::TeamPolicyInternal<
615 typename Impl::PolicyTraits<Properties...>::execution_space,
618 template <
class... OtherProperties>
622 using traits = Impl::PolicyTraits<Properties...>;
630 int league_size_request,
int team_size_request,
631 int vector_length_request = 1)
632 : internal_policy(space_, league_size_request, team_size_request,
633 vector_length_request) {}
635 TeamPolicy(
const typename traits::execution_space& space_,
636 int league_size_request,
const Kokkos::AUTO_t&,
637 int vector_length_request = 1)
638 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
639 vector_length_request) {}
641 TeamPolicy(
const typename traits::execution_space& space_,
642 int league_size_request,
const Kokkos::AUTO_t&,
643 const Kokkos::AUTO_t&)
644 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
646 TeamPolicy(
const typename traits::execution_space& space_,
647 int league_size_request,
const int team_size_request,
648 const Kokkos::AUTO_t&)
649 : internal_policy(space_, league_size_request, team_size_request,
654 int vector_length_request = 1)
655 : internal_policy(league_size_request, team_size_request,
656 vector_length_request) {}
658 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
659 int vector_length_request = 1)
660 : internal_policy(league_size_request, Kokkos::AUTO(),
661 vector_length_request) {}
663 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
664 const Kokkos::AUTO_t&)
665 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
666 TeamPolicy(
int league_size_request,
const int team_size_request,
667 const Kokkos::AUTO_t&)
668 : internal_policy(league_size_request, team_size_request,
671 template <
class... OtherProperties>
672 TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
675 internal_policy::traits::operator=(p);
679 TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
682 inline TeamPolicy& set_chunk_size(
int chunk) {
683 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
684 internal_policy&>::value,
685 "internal set_chunk_size should return a reference");
686 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
689 inline TeamPolicy& set_scratch_size(
const int& level,
690 const Impl::PerTeamValue& per_team) {
691 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
693 internal_policy&>::value,
694 "internal set_chunk_size should return a reference");
696 team_policy_check_valid_storage_level_argument(level);
697 return static_cast<TeamPolicy&
>(
698 internal_policy::set_scratch_size(level, per_team));
700 inline TeamPolicy& set_scratch_size(
const int& level,
701 const Impl::PerThreadValue& per_thread) {
702 team_policy_check_valid_storage_level_argument(level);
703 return static_cast<TeamPolicy&
>(
704 internal_policy::set_scratch_size(level, per_thread));
706 inline TeamPolicy& set_scratch_size(
const int& level,
707 const Impl::PerTeamValue& per_team,
708 const Impl::PerThreadValue& per_thread) {
709 team_policy_check_valid_storage_level_argument(level);
710 return static_cast<TeamPolicy&
>(
711 internal_policy::set_scratch_size(level, per_team, per_thread));
713 inline TeamPolicy& set_scratch_size(
const int& level,
714 const Impl::PerThreadValue& per_thread,
715 const Impl::PerTeamValue& per_team) {
716 team_policy_check_valid_storage_level_argument(level);
717 return static_cast<TeamPolicy&
>(
718 internal_policy::set_scratch_size(level, per_team, per_thread));
724 TeamPolicy()->TeamPolicy<>;
726 TeamPolicy(
int,
int)->TeamPolicy<>;
727 TeamPolicy(
int,
int,
int)->TeamPolicy<>;
728 TeamPolicy(
int, Kokkos::AUTO_t
const&)->TeamPolicy<>;
729 TeamPolicy(
int, Kokkos::AUTO_t
const&,
int)->TeamPolicy<>;
730 TeamPolicy(
int, Kokkos::AUTO_t
const&, Kokkos::AUTO_t
const&)->TeamPolicy<>;
731 TeamPolicy(
int,
int, Kokkos::AUTO_t
const&)->TeamPolicy<>;
735 TeamPolicy(DefaultExecutionSpace
const&,
int,
int)->TeamPolicy<>;
736 TeamPolicy(DefaultExecutionSpace
const&,
int,
int,
int)->TeamPolicy<>;
737 TeamPolicy(DefaultExecutionSpace
const&,
int, Kokkos::AUTO_t
const&)
739 TeamPolicy(DefaultExecutionSpace
const&,
int, Kokkos::AUTO_t
const&,
int)
741 TeamPolicy(DefaultExecutionSpace
const&,
int, Kokkos::AUTO_t
const&,
742 Kokkos::AUTO_t
const&)
744 TeamPolicy(DefaultExecutionSpace
const&,
int,
int, Kokkos::AUTO_t
const&)
749 template <
typename ES,
750 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
751 TeamPolicy(ES
const&,
int,
int)->TeamPolicy<ES>;
753 template <
typename ES,
754 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
755 TeamPolicy(ES
const&,
int,
int,
int)->TeamPolicy<ES>;
757 template <
typename ES,
758 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
759 TeamPolicy(ES
const&,
int, Kokkos::AUTO_t
const&)->TeamPolicy<ES>;
761 template <
typename ES,
762 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
763 TeamPolicy(ES
const&,
int, Kokkos::AUTO_t
const&,
int)->TeamPolicy<ES>;
765 template <
typename ES,
766 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
767 TeamPolicy(ES
const&,
int, Kokkos::AUTO_t
const&, Kokkos::AUTO_t
const&)
770 template <
typename ES,
771 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
772 TeamPolicy(ES
const&,
int,
int, Kokkos::AUTO_t
const&)->TeamPolicy<ES>;
776 template <
typename iType,
class TeamMemberType>
777 struct TeamThreadRangeBoundariesStruct {
779 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
780 const iType& arg_end,
781 const iType& arg_rank,
782 const iType& arg_size) {
784 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
787 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
788 const iType& arg_end,
789 const iType& arg_rank,
790 const iType& arg_size) {
793 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
794 return end_ < arg_end ? end_ : arg_end;
798 using index_type = iType;
801 enum { increment = 1 };
802 const TeamMemberType& thread;
804 KOKKOS_INLINE_FUNCTION
805 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
806 const iType& arg_end)
808 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
809 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
810 thread(arg_thread) {}
812 KOKKOS_INLINE_FUNCTION
813 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
814 const iType& arg_begin,
const iType& arg_end)
815 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
816 arg_thread.team_size())),
817 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
818 arg_thread.team_size())),
819 thread(arg_thread) {}
822 template <
typename iType,
class TeamMemberType>
823 struct TeamVectorRangeBoundariesStruct {
825 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
826 const iType& arg_end,
827 const iType& arg_rank,
828 const iType& arg_size) {
830 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
833 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
834 const iType& arg_end,
835 const iType& arg_rank,
836 const iType& arg_size) {
839 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
840 return end_ < arg_end ? end_ : arg_end;
844 using index_type = iType;
847 enum { increment = 1 };
848 const TeamMemberType& thread;
850 KOKKOS_INLINE_FUNCTION
851 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
852 const iType& arg_end)
854 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
855 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
856 thread(arg_thread) {}
858 KOKKOS_INLINE_FUNCTION
859 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
860 const iType& arg_begin,
const iType& arg_end)
861 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
862 arg_thread.team_size())),
863 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
864 arg_thread.team_size())),
865 thread(arg_thread) {}
868 template <
typename iType,
class TeamMemberType>
869 struct ThreadVectorRangeBoundariesStruct {
870 using index_type = iType;
871 const index_type start;
872 const index_type end;
873 enum { increment = 1 };
875 KOKKOS_INLINE_FUNCTION
876 constexpr ThreadVectorRangeBoundariesStruct(
const TeamMemberType,
877 const index_type& count) noexcept
878 : start(static_cast<index_type>(0)), end(count) {}
880 KOKKOS_INLINE_FUNCTION
881 constexpr ThreadVectorRangeBoundariesStruct(
882 const TeamMemberType,
const index_type& arg_begin,
883 const index_type& arg_end) noexcept
884 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
887 template <
class TeamMemberType>
888 struct ThreadSingleStruct {
889 const TeamMemberType& team_member;
890 KOKKOS_INLINE_FUNCTION
891 ThreadSingleStruct(
const TeamMemberType& team_member_)
892 : team_member(team_member_) {}
895 template <
class TeamMemberType>
896 struct VectorSingleStruct {
897 const TeamMemberType& team_member;
898 KOKKOS_INLINE_FUNCTION
899 VectorSingleStruct(
const TeamMemberType& team_member_)
900 : team_member(team_member_) {}
912 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
913 KOKKOS_INLINE_FUNCTION_DELETED
914 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
915 TeamThreadRange(
const TeamMemberType&,
const iType& count) =
delete;
924 template <
typename iType1,
typename iType2,
class TeamMemberType,
925 class _never_use_this_overload>
926 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
927 std::common_type_t<iType1, iType2>, TeamMemberType>
928 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
929 const iType2& end) =
delete;
938 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
939 KOKKOS_INLINE_FUNCTION_DELETED
940 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
941 TeamVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
950 template <
typename iType1,
typename iType2,
class TeamMemberType,
951 class _never_use_this_overload>
952 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
953 std::common_type_t<iType1, iType2>, TeamMemberType>
954 TeamVectorRange(
const TeamMemberType&,
const iType1& begin,
955 const iType2& end) =
delete;
964 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
965 KOKKOS_INLINE_FUNCTION_DELETED
966 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
967 ThreadVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
969 template <
typename iType1,
typename iType2,
class TeamMemberType,
970 class _never_use_this_overload>
971 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
972 std::common_type_t<iType1, iType2>, TeamMemberType>
973 ThreadVectorRange(
const TeamMemberType&,
const iType1& arg_begin,
974 const iType2& arg_end) =
delete;
978 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
979 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
980 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
981 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
983 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
984 struct HostBasedNestLevel;
986 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
987 struct AcceleratorBasedNestLevel;
997 template <
typename Rank,
typename ExecSpace,
998 TeamMDRangeThreadAndVector ThreadAndVector>
999 struct ThreadAndVectorNestLevel;
1001 struct NoReductionTag {};
1003 template <
typename Rank,
typename TeamMDPolicy,
typename Lambda,
1004 typename ReductionValueType>
1005 KOKKOS_INLINE_FUNCTION
void md_parallel_impl(TeamMDPolicy
const& policy,
1006 Lambda
const& lambda,
1007 ReductionValueType&& val);
1010 template <
typename Rank,
typename TeamHandle>
1011 struct TeamThreadMDRange;
1013 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1014 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1015 using NestLevelType = int;
1016 using BoundaryType = int;
1017 using TeamHandleType = TeamHandle;
1018 using ExecutionSpace =
typename TeamHandleType::execution_space;
1019 using ArrayLayout =
typename ExecutionSpace::array_layout;
1021 static constexpr NestLevelType total_nest_level =
1022 Rank<N, OuterDir, InnerDir>::rank;
1023 static constexpr Iterate iter = OuterDir;
1024 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1025 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
1027 static constexpr Iterate direction =
1028 OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1029 ArrayLayout>::outer_iteration_pattern
1032 template <
class... Args>
1033 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType
const& team_, Args&&... args)
1034 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1035 static_assert(
sizeof...(Args) == total_nest_level);
1038 TeamHandleType
const& team;
1039 BoundaryType boundaries[total_nest_level];
1042 template <
typename TeamHandle,
typename... Args>
1043 KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle
const&, Args&&...)
1044 ->TeamThreadMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1046 template <
typename Rank,
typename TeamHandle>
1047 struct ThreadVectorMDRange;
1049 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1050 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1051 using NestLevelType = int;
1052 using BoundaryType = int;
1053 using TeamHandleType = TeamHandle;
1054 using ExecutionSpace =
typename TeamHandleType::execution_space;
1055 using ArrayLayout =
typename ExecutionSpace::array_layout;
1057 static constexpr NestLevelType total_nest_level =
1058 Rank<N, OuterDir, InnerDir>::rank;
1059 static constexpr Iterate iter = OuterDir;
1060 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
1061 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1063 static constexpr Iterate direction =
1064 OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1065 ArrayLayout>::outer_iteration_pattern
1068 template <
class... Args>
1069 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType
const& team_,
1071 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1072 static_assert(
sizeof...(Args) == total_nest_level);
1075 TeamHandleType
const& team;
1076 BoundaryType boundaries[total_nest_level];
1079 template <
typename TeamHandle,
typename... Args>
1080 KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle
const&, Args&&...)
1081 ->ThreadVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1083 template <
typename Rank,
typename TeamHandle>
1084 struct TeamVectorMDRange;
1086 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1087 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1088 using NestLevelType = int;
1089 using BoundaryType = int;
1090 using TeamHandleType = TeamHandle;
1091 using ExecutionSpace =
typename TeamHandleType::execution_space;
1092 using ArrayLayout =
typename ExecutionSpace::array_layout;
1094 static constexpr NestLevelType total_nest_level =
1095 Rank<N, OuterDir, InnerDir>::rank;
1096 static constexpr Iterate iter = OuterDir;
1097 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1098 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1100 static constexpr Iterate direction =
1101 iter == Iterate::Default ? Impl::layout_iterate_type_selector<
1102 ArrayLayout>::outer_iteration_pattern
1105 template <
class... Args>
1106 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType
const& team_,
1108 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1109 static_assert(
sizeof...(Args) == total_nest_level);
1112 TeamHandleType
const& team;
1113 BoundaryType boundaries[total_nest_level];
1116 template <
typename TeamHandle,
typename... Args>
1117 KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle
const&, Args&&...)
1118 ->TeamVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1120 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1121 typename ReducerValueType>
1122 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1123 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1124 ReducerValueType& val) {
1126 !std::is_array_v<ReducerValueType> &&
1127 !std::is_pointer_v<ReducerValueType> &&
1128 !Kokkos::is_reducer_v<ReducerValueType>,
1129 "Only scalar return types are allowed!");
1131 val = ReducerValueType{};
1132 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1133 policy.team.team_reduce(
1134 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1137 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1138 KOKKOS_INLINE_FUNCTION
void parallel_for(
1139 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1140 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1143 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1144 typename ReducerValueType>
1145 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1146 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1147 ReducerValueType& val) {
1149 !std::is_array_v<ReducerValueType> &&
1150 !std::is_pointer_v<ReducerValueType> &&
1151 !Kokkos::is_reducer_v<ReducerValueType>,
1152 "Only a scalar return types are allowed!");
1154 val = ReducerValueType{};
1155 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1157 #ifdef KOKKOS_ENABLE_CUDA
1158 || std::is_same_v<
typename TeamHandle::execution_space,
1160 #elif defined(KOKKOS_ENABLE_HIP)
1161 || std::is_same_v<
typename TeamHandle::execution_space,
1163 #elif defined(KOKKOS_ENABLE_SYCL)
1164 || std::is_same_v<
typename TeamHandle::execution_space,
1165 Kokkos::Experimental::SYCL>
1168 policy.team.vector_reduce(
1169 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1173 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1174 KOKKOS_INLINE_FUNCTION
void parallel_for(
1175 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1176 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1179 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1180 typename ReducerValueType>
1181 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1182 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1183 ReducerValueType& val) {
1185 !std::is_array_v<ReducerValueType> &&
1186 !std::is_pointer_v<ReducerValueType> &&
1187 !Kokkos::is_reducer_v<ReducerValueType>,
1188 "Only a scalar return types are allowed!");
1190 val = ReducerValueType{};
1191 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1193 #ifdef KOKKOS_ENABLE_CUDA
1194 || std::is_same_v<
typename TeamHandle::execution_space,
1196 #elif defined(KOKKOS_ENABLE_HIP)
1197 || std::is_same_v<
typename TeamHandle::execution_space,
1199 #elif defined(KOKKOS_ENABLE_SYCL)
1200 || std::is_same_v<
typename TeamHandle::execution_space,
1201 Kokkos::Experimental::SYCL>
1204 policy.team.vector_reduce(
1205 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1207 policy.team.team_reduce(
1208 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1211 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1212 KOKKOS_INLINE_FUNCTION
void parallel_for(
1213 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1214 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1219 template <
typename FunctorType,
typename TagType,
1220 bool HasTag = !std::is_void<TagType>::value>
1221 struct ParallelConstructName;
1223 template <
typename FunctorType,
typename TagType>
1224 struct ParallelConstructName<FunctorType, TagType, true> {
1225 ParallelConstructName(std::string
const& label) : label_ref(label) {
1226 if (label.empty()) {
1227 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
1228 typeid(TagType).name();
1231 std::string
const&
get() {
1232 return (label_ref.empty()) ? default_name : label_ref;
1234 std::string
const& label_ref;
1235 std::string default_name;
1238 template <
typename FunctorType,
typename TagType>
1239 struct ParallelConstructName<FunctorType, TagType, false> {
1240 ParallelConstructName(std::string
const& label) : label_ref(label) {
1241 if (label.empty()) {
1242 default_name = std::string(
typeid(FunctorType).name());
1245 std::string
const&
get() {
1246 return (label_ref.empty()) ? default_name : label_ref;
1248 std::string
const& label_ref;
1249 std::string default_name;
1260 template <
class PatternTag,
class... Args>
1261 struct PatternImplSpecializationFromTag;
1263 template <
class... Args>
1264 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1265 : type_identity<ParallelFor<Args...>> {};
1267 template <
class... Args>
1268 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1269 : type_identity<ParallelReduce<Args...>> {};
1271 template <
class... Args>
1272 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1273 : type_identity<ParallelScan<Args...>> {};
1275 template <
class PatternImpl>
1276 struct PatternTagFromImplSpecialization;
1278 template <
class... Args>
1279 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1280 : type_identity<ParallelForTag> {};
1282 template <
class... Args>
1283 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1284 : type_identity<ParallelReduceTag> {};
1286 template <
class... Args>
1287 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1288 : type_identity<ParallelScanTag> {};
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...