17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_EXECPOLICY_HPP
23 #define KOKKOS_EXECPOLICY_HPP
25 #include <Kokkos_Core_fwd.hpp>
26 #include <impl/Kokkos_Traits.hpp>
27 #include <impl/Kokkos_Error.hpp>
28 #include <impl/Kokkos_AnalyzePolicy.hpp>
29 #include <Kokkos_Concepts.hpp>
37 struct ParallelForTag {};
38 struct ParallelScanTag {};
39 struct ParallelReduceTag {};
43 ChunkSize(
int value_) : value(value_) {}
67 template <
class... Properties>
68 class RangePolicy :
public Impl::PolicyTraits<Properties...> {
70 using traits = Impl::PolicyTraits<Properties...>;
73 typename traits::execution_space m_space;
74 typename traits::index_type m_begin;
75 typename traits::index_type m_end;
76 typename traits::index_type m_granularity;
77 typename traits::index_type m_granularity_mask;
79 template <
class... OtherProperties>
85 using member_type =
typename traits::index_type;
86 using index_type =
typename traits::index_type;
88 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space& space()
const {
91 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
92 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
99 void operator()(
const int&)
const {}
101 template <
class... OtherProperties>
102 RangePolicy(
const RangePolicy<OtherProperties...>& p)
107 m_granularity(p.m_granularity),
108 m_granularity_mask(p.m_granularity_mask) {}
115 m_granularity_mask(0) {}
118 template <
typename IndexType1,
typename IndexType2,
119 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
120 std::is_convertible_v<IndexType2, member_type>),
122 inline RangePolicy(
const IndexType1 work_begin,
const IndexType2 work_end)
123 :
RangePolicy(
typename traits::execution_space(), work_begin, work_end) {}
126 template <
typename IndexType1,
typename IndexType2,
127 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
128 std::is_convertible_v<IndexType2, member_type>),
130 inline RangePolicy(
const typename traits::execution_space& work_space,
131 const IndexType1 work_begin,
const IndexType2 work_end)
132 : m_space(work_space),
136 m_granularity_mask(0) {
137 check_conversion_safety(work_begin);
138 check_conversion_safety(work_end);
139 check_bounds_validity();
140 set_auto_chunk_size();
143 template <
typename IndexType1,
typename IndexType2,
144 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
145 std::is_convertible_v<IndexType2, member_type>),
147 RangePolicy(
const typename traits::execution_space& work_space,
148 const IndexType1 work_begin,
const IndexType2 work_end,
150 : m_space(work_space),
154 m_granularity_mask(0) {
155 check_conversion_safety(work_begin);
156 check_conversion_safety(work_end);
157 check_bounds_validity();
162 template <
typename IndexType1,
typename IndexType2,
typename... Args,
163 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
164 std::is_convertible_v<IndexType2, member_type>),
166 RangePolicy(
const IndexType1 work_begin,
const IndexType2 work_end,
167 const ChunkSize chunk_size)
168 :
RangePolicy(
typename traits::execution_space(), work_begin, work_end,
172 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
173 KOKKOS_DEPRECATED_WITH_COMMENT(
"Use set_chunk_size instead")
174 inline
void set(ChunkSize chunksize) {
175 m_granularity = chunksize.value;
176 m_granularity_mask = m_granularity - 1;
182 inline member_type
chunk_size()
const {
return m_granularity; }
187 m_granularity_mask = m_granularity - 1;
193 inline void set_auto_chunk_size() {
194 #ifdef KOKKOS_ENABLE_SYCL
195 if (std::is_same_v<
typename traits::execution_space,
196 Kokkos::Experimental::SYCL>) {
200 m_granularity_mask = 0;
204 auto concurrency =
static_cast<int64_t
>(m_space.concurrency());
205 if (concurrency == 0) concurrency = 1;
207 if (m_granularity > 0) {
208 if (!Impl::is_integral_power_of_two(m_granularity))
209 Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
212 int64_t new_chunk_size = 1;
213 while (new_chunk_size * 100 * concurrency <
214 static_cast<int64_t>(m_end - m_begin))
216 if (new_chunk_size < 128) {
218 while ((new_chunk_size * 40 * concurrency <
219 static_cast<int64_t>(m_end - m_begin)) &&
220 (new_chunk_size < 128))
223 m_granularity = new_chunk_size;
224 m_granularity_mask = m_granularity - 1;
227 void check_bounds_validity() {
228 if (m_end < m_begin) {
229 std::string msg =
"Kokkos::RangePolicy bounds error: The lower bound (" +
230 std::to_string(m_begin) +
231 ") is greater than the upper bound (" +
232 std::to_string(m_end) +
").\n";
233 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
234 Kokkos::abort(msg.c_str());
238 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
239 Kokkos::Impl::log_warning(msg);
245 template <
typename IndexType>
246 static void check_conversion_safety(
const IndexType bound) {
247 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \
248 defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
251 "Kokkos::RangePolicy bound type error: an unsafe implicit conversion "
252 "is performed on a bound (" +
253 std::to_string(bound) +
255 "not preserve its original value.\n";
258 if constexpr (std::is_signed_v<IndexType> !=
259 std::is_signed_v<member_type>) {
261 if constexpr (std::is_signed_v<IndexType>)
262 warn |= (bound < static_cast<IndexType>(
263 std::numeric_limits<member_type>::min()));
266 if constexpr (std::is_signed_v<member_type>)
267 warn |= (bound > static_cast<IndexType>(
268 std::numeric_limits<member_type>::max()));
272 warn |= (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
275 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
276 Kokkos::abort(msg.c_str());
279 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
280 Kokkos::Impl::log_warning(msg);
294 using work_tag =
typename RangePolicy<Properties...>::work_tag;
295 using member_type =
typename RangePolicy<Properties...>::member_type;
297 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
298 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
304 KOKKOS_INLINE_FUNCTION
307 : m_begin(0), m_end(0) {
310 const member_type work_part =
311 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
312 range.m_granularity_mask) &
313 ~member_type(range.m_granularity_mask);
315 m_begin = range.begin() + work_part * part_rank;
316 m_end = m_begin + work_part;
318 if (range.end() < m_begin) m_begin = range.end();
319 if (range.end() < m_end) m_end = range.end();
331 RangePolicy()->RangePolicy<>;
333 RangePolicy(int64_t, int64_t)->RangePolicy<>;
334 RangePolicy(int64_t, int64_t, ChunkSize
const&)->RangePolicy<>;
336 RangePolicy(DefaultExecutionSpace
const&, int64_t, int64_t)->RangePolicy<>;
337 RangePolicy(DefaultExecutionSpace
const&, int64_t, int64_t, ChunkSize
const&)
340 template <
typename ES,
typename = std::enable_if_t<is_execution_space_v<ES>>>
341 RangePolicy(ES
const&, int64_t, int64_t)->RangePolicy<ES>;
343 template <
typename ES,
typename = std::enable_if_t<is_execution_space_v<ES>>>
344 RangePolicy(ES
const&, int64_t, int64_t, ChunkSize
const&)->RangePolicy<ES>;
355 template <
class ExecSpace,
class... Properties>
356 class TeamPolicyInternal :
public Impl::PolicyTraits<Properties...> {
358 using traits = Impl::PolicyTraits<Properties...>;
361 using index_type =
typename traits::index_type;
374 template <
class FunctorType>
375 static int team_size_max(
const FunctorType&);
387 template <
class FunctorType>
388 static int team_size_recommended(
const FunctorType&);
390 template <
class FunctorType>
391 static int team_size_recommended(
const FunctorType&,
const int&);
393 template <
class FunctorType>
394 int team_size_recommended(
const FunctorType& functor,
395 const int vector_length);
399 TeamPolicyInternal(
const typename traits::execution_space&,
400 int league_size_request,
int team_size_request,
401 int vector_length_request = 1);
403 TeamPolicyInternal(
const typename traits::execution_space&,
404 int league_size_request,
const Kokkos::AUTO_t&,
405 int vector_length_request = 1);
409 TeamPolicyInternal(
int league_size_request,
int team_size_request,
410 int vector_length_request = 1);
412 TeamPolicyInternal(
int league_size_request,
const Kokkos::AUTO_t&,
413 int vector_length_request = 1);
424 KOKKOS_INLINE_FUNCTION
int league_size()
const;
431 KOKKOS_INLINE_FUNCTION
int team_size()
const;
435 inline bool impl_auto_team_size()
const;
438 inline bool impl_auto_vector_length()
const;
440 static int vector_length_max();
442 KOKKOS_INLINE_FUNCTION
int impl_vector_length()
const;
444 inline typename traits::index_type chunk_size()
const;
446 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
453 KOKKOS_INLINE_FUNCTION
454 typename traits::execution_space::scratch_memory_space team_shmem()
const;
457 KOKKOS_INLINE_FUNCTION
int league_rank()
const;
460 KOKKOS_INLINE_FUNCTION
int league_size()
const;
463 KOKKOS_INLINE_FUNCTION
int team_rank()
const;
466 KOKKOS_INLINE_FUNCTION
int team_size()
const;
469 KOKKOS_INLINE_FUNCTION
void team_barrier()
const;
473 template <
class JoinOp>
474 KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type team_reduce(
475 const typename JoinOp::value_type,
const JoinOp&)
const;
482 template <
typename Type>
483 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value)
const;
494 template <
typename Type>
495 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value,
496 Type*
const global_accum)
const;
500 struct PerTeamValue {
502 PerTeamValue(
size_t arg);
505 struct PerThreadValue {
507 PerThreadValue(
size_t arg);
510 template <
class iType,
class... Args>
511 struct ExtractVectorLength {
512 static inline iType value(
513 std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
516 static inline std::enable_if_t<!std::is_integral<iType>::value,
int> value(
517 std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
522 template <
class iType,
class... Args>
523 inline std::enable_if_t<std::is_integral<iType>::value, iType>
524 extract_vector_length(iType val, Args...) {
528 template <
class iType,
class... Args>
529 inline std::enable_if_t<!std::is_integral<iType>::value,
int>
530 extract_vector_length(iType, Args...) {
536 Impl::PerTeamValue PerTeam(
const size_t& arg);
537 Impl::PerThreadValue PerThread(
const size_t& arg);
539 struct ScratchRequest {
545 inline ScratchRequest(
const int& level_,
546 const Impl::PerTeamValue& team_value) {
548 per_team = team_value.value;
552 inline ScratchRequest(
const int& level_,
553 const Impl::PerThreadValue& thread_value) {
556 per_thread = thread_value.value;
559 inline ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
560 const Impl::PerThreadValue& thread_value) {
562 per_team = team_value.value;
563 per_thread = thread_value.value;
566 inline ScratchRequest(
const int& level_,
567 const Impl::PerThreadValue& thread_value,
568 const Impl::PerTeamValue& team_value) {
570 per_team = team_value.value;
571 per_thread = thread_value.value;
576 void team_policy_check_valid_storage_level_argument(
int level);
604 template <
class... Properties>
606 :
public Impl::TeamPolicyInternal<
607 typename Impl::PolicyTraits<Properties...>::execution_space,
609 using internal_policy = Impl::TeamPolicyInternal<
610 typename Impl::PolicyTraits<Properties...>::execution_space,
613 template <
class... OtherProperties>
617 using traits = Impl::PolicyTraits<Properties...>;
625 int league_size_request,
int team_size_request,
626 int vector_length_request = 1)
627 : internal_policy(space_, league_size_request, team_size_request,
628 vector_length_request) {}
630 TeamPolicy(
const typename traits::execution_space& space_,
631 int league_size_request,
const Kokkos::AUTO_t&,
632 int vector_length_request = 1)
633 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
634 vector_length_request) {}
636 TeamPolicy(
const typename traits::execution_space& space_,
637 int league_size_request,
const Kokkos::AUTO_t&,
638 const Kokkos::AUTO_t&)
639 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
641 TeamPolicy(
const typename traits::execution_space& space_,
642 int league_size_request,
const int team_size_request,
643 const Kokkos::AUTO_t&)
644 : internal_policy(space_, league_size_request, team_size_request,
649 int vector_length_request = 1)
650 : internal_policy(league_size_request, team_size_request,
651 vector_length_request) {}
653 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
654 int vector_length_request = 1)
655 : internal_policy(league_size_request, Kokkos::AUTO(),
656 vector_length_request) {}
658 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
659 const Kokkos::AUTO_t&)
660 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
661 TeamPolicy(
int league_size_request,
const int team_size_request,
662 const Kokkos::AUTO_t&)
663 : internal_policy(league_size_request, team_size_request,
666 template <
class... OtherProperties>
667 TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
670 internal_policy::traits::operator=(p);
674 TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
677 inline TeamPolicy& set_chunk_size(
int chunk) {
678 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
679 internal_policy&>::value,
680 "internal set_chunk_size should return a reference");
681 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
684 inline TeamPolicy& set_scratch_size(
const int& level,
685 const Impl::PerTeamValue& per_team) {
686 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
688 internal_policy&>::value,
689 "internal set_chunk_size should return a reference");
691 team_policy_check_valid_storage_level_argument(level);
692 return static_cast<TeamPolicy&
>(
693 internal_policy::set_scratch_size(level, per_team));
695 inline TeamPolicy& set_scratch_size(
const int& level,
696 const Impl::PerThreadValue& per_thread) {
697 team_policy_check_valid_storage_level_argument(level);
698 return static_cast<TeamPolicy&
>(
699 internal_policy::set_scratch_size(level, per_thread));
701 inline TeamPolicy& set_scratch_size(
const int& level,
702 const Impl::PerTeamValue& per_team,
703 const Impl::PerThreadValue& per_thread) {
704 team_policy_check_valid_storage_level_argument(level);
705 return static_cast<TeamPolicy&
>(
706 internal_policy::set_scratch_size(level, per_team, per_thread));
708 inline TeamPolicy& set_scratch_size(
const int& level,
709 const Impl::PerThreadValue& per_thread,
710 const Impl::PerTeamValue& per_team) {
711 team_policy_check_valid_storage_level_argument(level);
712 return static_cast<TeamPolicy&
>(
713 internal_policy::set_scratch_size(level, per_team, per_thread));
719 template <
typename iType,
class TeamMemberType>
720 struct TeamThreadRangeBoundariesStruct {
722 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
723 const iType& arg_end,
724 const iType& arg_rank,
725 const iType& arg_size) {
727 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
730 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
731 const iType& arg_end,
732 const iType& arg_rank,
733 const iType& arg_size) {
736 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
737 return end_ < arg_end ? end_ : arg_end;
741 using index_type = iType;
744 enum { increment = 1 };
745 const TeamMemberType& thread;
747 KOKKOS_INLINE_FUNCTION
748 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
749 const iType& arg_end)
751 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
752 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
753 thread(arg_thread) {}
755 KOKKOS_INLINE_FUNCTION
756 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
757 const iType& arg_begin,
const iType& arg_end)
758 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
759 arg_thread.team_size())),
760 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
761 arg_thread.team_size())),
762 thread(arg_thread) {}
765 template <
typename iType,
class TeamMemberType>
766 struct TeamVectorRangeBoundariesStruct {
768 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
769 const iType& arg_end,
770 const iType& arg_rank,
771 const iType& arg_size) {
773 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
776 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
777 const iType& arg_end,
778 const iType& arg_rank,
779 const iType& arg_size) {
782 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
783 return end_ < arg_end ? end_ : arg_end;
787 using index_type = iType;
790 enum { increment = 1 };
791 const TeamMemberType& thread;
793 KOKKOS_INLINE_FUNCTION
794 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
795 const iType& arg_end)
797 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
798 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
799 thread(arg_thread) {}
801 KOKKOS_INLINE_FUNCTION
802 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
803 const iType& arg_begin,
const iType& arg_end)
804 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
805 arg_thread.team_size())),
806 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
807 arg_thread.team_size())),
808 thread(arg_thread) {}
811 template <
typename iType,
class TeamMemberType>
812 struct ThreadVectorRangeBoundariesStruct {
813 using index_type = iType;
814 const index_type start;
815 const index_type end;
816 enum { increment = 1 };
818 KOKKOS_INLINE_FUNCTION
819 constexpr ThreadVectorRangeBoundariesStruct(
const TeamMemberType,
820 const index_type& count) noexcept
821 : start(static_cast<index_type>(0)), end(count) {}
823 KOKKOS_INLINE_FUNCTION
824 constexpr ThreadVectorRangeBoundariesStruct(
825 const TeamMemberType,
const index_type& arg_begin,
826 const index_type& arg_end) noexcept
827 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
830 template <
class TeamMemberType>
831 struct ThreadSingleStruct {
832 const TeamMemberType& team_member;
833 KOKKOS_INLINE_FUNCTION
834 ThreadSingleStruct(
const TeamMemberType& team_member_)
835 : team_member(team_member_) {}
838 template <
class TeamMemberType>
839 struct VectorSingleStruct {
840 const TeamMemberType& team_member;
841 KOKKOS_INLINE_FUNCTION
842 VectorSingleStruct(
const TeamMemberType& team_member_)
843 : team_member(team_member_) {}
855 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
856 KOKKOS_INLINE_FUNCTION_DELETED
857 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
858 TeamThreadRange(
const TeamMemberType&,
const iType& count) =
delete;
867 template <
typename iType1,
typename iType2,
class TeamMemberType,
868 class _never_use_this_overload>
869 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
870 std::common_type_t<iType1, iType2>, TeamMemberType>
871 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
872 const iType2& end) =
delete;
881 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
882 KOKKOS_INLINE_FUNCTION_DELETED
883 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
884 TeamVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
893 template <
typename iType1,
typename iType2,
class TeamMemberType,
894 class _never_use_this_overload>
895 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
896 std::common_type_t<iType1, iType2>, TeamMemberType>
897 TeamVectorRange(
const TeamMemberType&,
const iType1& begin,
898 const iType2& end) =
delete;
907 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
908 KOKKOS_INLINE_FUNCTION_DELETED
909 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
910 ThreadVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
912 template <
typename iType1,
typename iType2,
class TeamMemberType,
913 class _never_use_this_overload>
914 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
915 std::common_type_t<iType1, iType2>, TeamMemberType>
916 ThreadVectorRange(
const TeamMemberType&,
const iType1& arg_begin,
917 const iType2& arg_end) =
delete;
921 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
922 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
923 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
924 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
926 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
927 struct HostBasedNestLevel;
929 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
930 struct AcceleratorBasedNestLevel;
940 template <
typename Rank,
typename ExecSpace,
941 TeamMDRangeThreadAndVector ThreadAndVector>
942 struct ThreadAndVectorNestLevel;
944 struct NoReductionTag {};
946 template <
typename Rank,
typename TeamMDPolicy,
typename Lambda,
947 typename ReductionValueType>
948 KOKKOS_INLINE_FUNCTION
void md_parallel_impl(TeamMDPolicy
const& policy,
949 Lambda
const& lambda,
950 ReductionValueType&& val);
953 template <
typename Rank,
typename TeamHandle>
954 struct TeamThreadMDRange;
956 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
957 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
958 using NestLevelType = int;
959 using BoundaryType = int;
960 using TeamHandleType = TeamHandle;
961 using ExecutionSpace =
typename TeamHandleType::execution_space;
962 using ArrayLayout =
typename ExecutionSpace::array_layout;
964 static constexpr NestLevelType total_nest_level =
965 Rank<N, OuterDir, InnerDir>::rank;
966 static constexpr Iterate iter = OuterDir;
967 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
968 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
970 static constexpr Iterate direction =
971 OuterDir == Iterate::Default
972 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
975 template <
class... Args>
976 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType
const& team_, Args&&... args)
977 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
978 static_assert(
sizeof...(Args) == total_nest_level);
981 TeamHandleType
const& team;
982 BoundaryType boundaries[total_nest_level];
985 template <
typename TeamHandle,
typename... Args>
986 TeamThreadMDRange(TeamHandle
const&, Args&&...)
987 ->TeamThreadMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
989 template <
typename Rank,
typename TeamHandle>
990 struct ThreadVectorMDRange;
992 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
993 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
994 using NestLevelType = int;
995 using BoundaryType = int;
996 using TeamHandleType = TeamHandle;
997 using ExecutionSpace =
typename TeamHandleType::execution_space;
998 using ArrayLayout =
typename ExecutionSpace::array_layout;
1000 static constexpr NestLevelType total_nest_level =
1001 Rank<N, OuterDir, InnerDir>::rank;
1002 static constexpr Iterate iter = OuterDir;
1003 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
1004 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1006 static constexpr Iterate direction =
1007 OuterDir == Iterate::Default
1008 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
1011 template <
class... Args>
1012 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType
const& team_,
1014 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1015 static_assert(
sizeof...(Args) == total_nest_level);
1018 TeamHandleType
const& team;
1019 BoundaryType boundaries[total_nest_level];
1022 template <
typename TeamHandle,
typename... Args>
1023 ThreadVectorMDRange(TeamHandle
const&, Args&&...)
1024 ->ThreadVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1026 template <
typename Rank,
typename TeamHandle>
1027 struct TeamVectorMDRange;
1029 template <
unsigned N, Iterate OuterDir, Iterate InnerDir,
typename TeamHandle>
1030 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1031 using NestLevelType = int;
1032 using BoundaryType = int;
1033 using TeamHandleType = TeamHandle;
1034 using ExecutionSpace =
typename TeamHandleType::execution_space;
1035 using ArrayLayout =
typename ExecutionSpace::array_layout;
1037 static constexpr NestLevelType total_nest_level =
1038 Rank<N, OuterDir, InnerDir>::rank;
1039 static constexpr Iterate iter = OuterDir;
1040 static constexpr
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1041 static constexpr
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1043 static constexpr Iterate direction =
1044 iter == Iterate::Default
1045 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
1048 template <
class... Args>
1049 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType
const& team_,
1051 : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
1052 static_assert(
sizeof...(Args) == total_nest_level);
1055 TeamHandleType
const& team;
1056 BoundaryType boundaries[total_nest_level];
1059 template <
typename TeamHandle,
typename... Args>
1060 TeamVectorMDRange(TeamHandle
const&, Args&&...)
1061 ->TeamVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
1063 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1064 typename ReducerValueType>
1065 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1066 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1067 ReducerValueType& val) {
1069 !std::is_array_v<ReducerValueType> &&
1070 !std::is_pointer_v<ReducerValueType> &&
1071 !Kokkos::is_reducer_v<ReducerValueType>,
1072 "Only scalar return types are allowed!");
1074 val = ReducerValueType{};
1075 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1076 policy.team.team_reduce(
1077 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1080 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1081 KOKKOS_INLINE_FUNCTION
void parallel_for(
1082 TeamThreadMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1083 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1086 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1087 typename ReducerValueType>
1088 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1089 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1090 ReducerValueType& val) {
1092 !std::is_array_v<ReducerValueType> &&
1093 !std::is_pointer_v<ReducerValueType> &&
1094 !Kokkos::is_reducer_v<ReducerValueType>,
1095 "Only a scalar return types are allowed!");
1097 val = ReducerValueType{};
1098 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1100 #ifdef KOKKOS_ENABLE_CUDA
1101 || std::is_same_v<
typename TeamHandle::execution_space,
1103 #elif defined(KOKKOS_ENABLE_HIP)
1104 || std::is_same_v<
typename TeamHandle::execution_space,
1106 #elif defined(KOKKOS_ENABLE_SYCL)
1107 || std::is_same_v<
typename TeamHandle::execution_space,
1108 Kokkos::Experimental::SYCL>
1111 policy.team.vector_reduce(
1112 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1116 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1117 KOKKOS_INLINE_FUNCTION
void parallel_for(
1118 ThreadVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1119 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1122 template <
typename Rank,
typename TeamHandle,
typename Lambda,
1123 typename ReducerValueType>
1124 KOKKOS_INLINE_FUNCTION
void parallel_reduce(
1125 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda,
1126 ReducerValueType& val) {
1128 !std::is_array_v<ReducerValueType> &&
1129 !std::is_pointer_v<ReducerValueType> &&
1130 !Kokkos::is_reducer_v<ReducerValueType>,
1131 "Only a scalar return types are allowed!");
1133 val = ReducerValueType{};
1134 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1136 #ifdef KOKKOS_ENABLE_CUDA
1137 || std::is_same_v<
typename TeamHandle::execution_space,
1139 #elif defined(KOKKOS_ENABLE_HIP)
1140 || std::is_same_v<
typename TeamHandle::execution_space,
1142 #elif defined(KOKKOS_ENABLE_SYCL)
1143 || std::is_same_v<
typename TeamHandle::execution_space,
1144 Kokkos::Experimental::SYCL>
1147 policy.team.vector_reduce(
1148 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1150 policy.team.team_reduce(
1151 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1154 template <
typename Rank,
typename TeamHandle,
typename Lambda>
1155 KOKKOS_INLINE_FUNCTION
void parallel_for(
1156 TeamVectorMDRange<Rank, TeamHandle>
const& policy, Lambda
const& lambda) {
1157 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1162 template <
typename FunctorType,
typename TagType,
1163 bool HasTag = !std::is_void<TagType>::value>
1164 struct ParallelConstructName;
1166 template <
typename FunctorType,
typename TagType>
1167 struct ParallelConstructName<FunctorType, TagType, true> {
1168 ParallelConstructName(std::string
const& label) : label_ref(label) {
1169 if (label.empty()) {
1170 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
1171 typeid(TagType).name();
1174 std::string
const&
get() {
1175 return (label_ref.empty()) ? default_name : label_ref;
1177 std::string
const& label_ref;
1178 std::string default_name;
1181 template <
typename FunctorType,
typename TagType>
1182 struct ParallelConstructName<FunctorType, TagType, false> {
1183 ParallelConstructName(std::string
const& label) : label_ref(label) {
1184 if (label.empty()) {
1185 default_name = std::string(
typeid(FunctorType).name());
1188 std::string
const&
get() {
1189 return (label_ref.empty()) ? default_name : label_ref;
1191 std::string
const& label_ref;
1192 std::string default_name;
1203 template <
class PatternTag,
class... Args>
1204 struct PatternImplSpecializationFromTag;
1206 template <
class... Args>
1207 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1208 : type_identity<ParallelFor<Args...>> {};
1210 template <
class... Args>
1211 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1212 : type_identity<ParallelReduce<Args...>> {};
1214 template <
class... Args>
1215 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1216 : type_identity<ParallelScan<Args...>> {};
1218 template <
class PatternImpl>
1219 struct PatternTagFromImplSpecialization;
1221 template <
class... Args>
1222 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1223 : type_identity<ParallelForTag> {};
1225 template <
class... Args>
1226 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1227 : type_identity<ParallelReduceTag> {};
1229 template <
class... Args>
1230 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1231 : type_identity<ParallelScanTag> {};
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...