44 #ifndef KOKKOS_EXECPOLICY_HPP
45 #define KOKKOS_EXECPOLICY_HPP
47 #include <Kokkos_Core_fwd.hpp>
48 #include <impl/Kokkos_Traits.hpp>
49 #include <impl/Kokkos_StaticAssert.hpp>
50 #include <impl/Kokkos_Error.hpp>
51 #include <impl/Kokkos_Tags.hpp>
52 #include <impl/Kokkos_AnalyzePolicy.hpp>
53 #include <Kokkos_Concepts.hpp>
60 struct ParallelForTag {};
61 struct ParallelScanTag {};
62 struct ParallelReduceTag {};
66 ChunkSize(
int value_):value(value_) {}
90 template<
class ... Properties>
92 :
public Impl::PolicyTraits<Properties ... >
95 typedef Impl::PolicyTraits<Properties ... > traits;
97 typename traits::execution_space m_space ;
98 typename traits::index_type m_begin ;
99 typename traits::index_type m_end ;
100 typename traits::index_type m_granularity ;
101 typename traits::index_type m_granularity_mask ;
106 typedef typename traits::index_type member_type ;
107 typedef typename traits::index_type index_type;
109 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space & space()
const {
return m_space ; }
110 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
111 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
116 void operator()(
const int&)
const {}
118 RangePolicy(
const RangePolicy&) =
default;
119 RangePolicy(RangePolicy&&) =
default;
121 inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
126 ,
const member_type work_begin
127 ,
const member_type work_end
129 : m_space( work_space )
130 , m_begin( work_begin < work_end ? work_begin : 0 )
131 , m_end( work_begin < work_end ? work_end : 0 )
133 , m_granularity_mask(0)
135 set_auto_chunk_size();
141 ,
const member_type work_end
144 , work_begin , work_end )
146 set_auto_chunk_size();
150 template<
class ... Args>
153 ,
const member_type work_begin
154 ,
const member_type work_end
157 : m_space( work_space )
158 , m_begin( work_begin < work_end ? work_begin : 0 )
159 , m_end( work_begin < work_end ? work_end : 0 )
161 , m_granularity_mask(0)
163 set_auto_chunk_size();
168 template<
class ... Args>
171 ,
const member_type work_end
175 , work_begin , work_end )
177 set_auto_chunk_size();
185 template<
class ... Args>
186 inline void set(Args ...) {
187 static_assert( 0 ==
sizeof...(Args),
"Kokkos::RangePolicy: unhandled constructor arguments encountered.");
190 template<
class ... Args>
191 inline void set(
const ChunkSize& chunksize, Args ... args) {
192 m_granularity = chunksize.value;
193 m_granularity_mask = m_granularity - 1;
199 return m_granularity;
205 p.m_granularity = chunk_size_;
206 p.m_granularity_mask = p.m_granularity - 1;
212 inline void set_auto_chunk_size() {
214 typename traits::index_type concurrency = traits::execution_space::concurrency();
215 if( concurrency==0 ) concurrency=1;
217 if(m_granularity > 0) {
218 if(!Impl::is_integral_power_of_two( m_granularity ))
219 Kokkos::abort(
"RangePolicy blocking granularity must be power of two" );
222 member_type new_chunk_size = 1;
223 while(new_chunk_size*100*concurrency < m_end-m_begin)
225 if(new_chunk_size < 128) {
227 while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) )
230 m_granularity = new_chunk_size;
231 m_granularity_mask = m_granularity - 1;
240 typedef typename RangePolicy::work_tag work_tag ;
241 typedef typename RangePolicy::member_type member_type ;
243 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
244 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
250 KOKKOS_INLINE_FUNCTION
252 ,
const int part_rank
253 ,
const int part_size
255 : m_begin(0), m_end(0)
260 const member_type work_part =
261 ( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
262 + range.m_granularity_mask ) & ~member_type(range.m_granularity_mask);
264 m_begin = range.begin() + work_part * part_rank ;
265 m_end = m_begin + work_part ;
267 if ( range.end() < m_begin ) m_begin = range.end() ;
268 if ( range.end() < m_end ) m_end = range.end() ;
273 member_type m_begin ;
289 template<
class ExecSpace,
class ... Properties>
290 class TeamPolicyInternal:
public Impl::PolicyTraits<Properties ... > {
292 typedef Impl::PolicyTraits<Properties ... > traits;
296 typedef typename traits::index_type index_type;
309 template<
class FunctorType >
310 static int team_size_max(
const FunctorType & );
322 template<
class FunctorType >
323 static int team_size_recommended(
const FunctorType & );
325 template<
class FunctorType >
326 static int team_size_recommended(
const FunctorType & ,
const int&);
328 template<
class FunctorType>
329 int team_size_recommended(
const FunctorType & functor ,
const int vector_length);
333 TeamPolicyInternal(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
335 TeamPolicyInternal(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
338 TeamPolicyInternal(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
340 TeamPolicyInternal(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
351 KOKKOS_INLINE_FUNCTION
int league_size()
const ;
358 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
360 inline typename traits::index_type chunk_size()
const ;
362 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
363 inline TeamPolicyInternal set_chunk_size(
int chunk_size)
const ;
365 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
374 KOKKOS_INLINE_FUNCTION
375 typename traits::execution_space::scratch_memory_space
team_shmem()
const ;
384 KOKKOS_INLINE_FUNCTION
int team_rank()
const ;
387 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
393 template<
class JoinOp >
394 KOKKOS_INLINE_FUNCTION
395 typename JoinOp::value_type
team_reduce(
const typename JoinOp::value_type
396 ,
const JoinOp & )
const ;
403 template<
typename Type >
404 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type & value )
const ;
415 template<
typename Type >
416 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type & value , Type *
const global_accum )
const ;
421 struct PerTeamValue {
423 PerTeamValue(
int arg);
426 struct PerThreadValue {
428 PerThreadValue(
int arg);
431 template<
class iType,
class ... Args>
432 struct ExtractVectorLength {
433 static inline iType value(
typename std::enable_if<std::is_integral<iType>::value,iType>::type val, Args...) {
436 static inline typename std::enable_if<!std::is_integral<iType>::value,
int>::type value(
typename std::enable_if<!std::is_integral<iType>::value,iType>::type, Args...) {
441 template<
class iType,
class ... Args>
442 inline typename std::enable_if<std::is_integral<iType>::value,iType>::type extract_vector_length(iType val, Args...) {
446 template<
class iType,
class ... Args>
447 inline typename std::enable_if<!std::is_integral<iType>::value,
int>::type extract_vector_length(iType, Args...) {
453 Impl::PerTeamValue PerTeam(
const int& arg);
454 Impl::PerThreadValue PerThread(
const int& arg);
456 struct ScratchRequest {
463 ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value) {
465 per_team = team_value.value;
470 ScratchRequest(
const int& level_,
const Impl::PerThreadValue& thread_value) {
473 per_thread = thread_value.value;;
477 ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
const Impl::PerThreadValue& thread_value) {
479 per_team = team_value.value;
480 per_thread = thread_value.value;;
484 ScratchRequest(
const int& level_,
const Impl::PerThreadValue& thread_value,
const Impl::PerTeamValue& team_value) {
486 per_team = team_value.value;
487 per_thread = thread_value.value;;
517 template<
class ... Properties>
519 Impl::TeamPolicyInternal<
520 typename Impl::PolicyTraits<Properties ... >::execution_space,
522 typedef Impl::TeamPolicyInternal<
523 typename Impl::PolicyTraits<Properties ... >::execution_space,
524 Properties ...> internal_policy;
526 typedef Impl::PolicyTraits<Properties ... > traits;
534 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 )
535 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {first_arg =
false;}
537 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 )
538 : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(), vector_length_request) {first_arg =
false;}
541 TeamPolicy(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 )
542 : internal_policy(league_size_request,team_size_request, vector_length_request) {first_arg =
false;}
544 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 )
545 : internal_policy(league_size_request,Kokkos::AUTO(), vector_length_request) {first_arg =
false;}
547 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
549 template<
class ... Args>
550 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request,
552 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {
557 template<
class ... Args>
558 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request ,
560 : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(), vector_length_request) {
566 template<
class ... Args>
567 TeamPolicy(
int league_size_request ,
int team_size_request ,
int vector_length_request ,
569 : internal_policy(league_size_request,team_size_request, vector_length_request) {
574 template<
class ... Args>
575 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request ,
577 : internal_policy(league_size_request,Kokkos::AUTO(), vector_length_request) {
583 template<
class ... Args>
584 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
586 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request,
587 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
592 template<
class ... Args>
593 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
595 : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(),
596 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
602 template<
class ... Args>
603 TeamPolicy(
int league_size_request ,
int team_size_request ,
605 : internal_policy(league_size_request,team_size_request,
606 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
611 template<
class ... Args>
612 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
614 : internal_policy(league_size_request,Kokkos::AUTO(),
615 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
623 TeamPolicy(
const internal_policy& p):internal_policy(p) {first_arg =
false;}
625 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
630 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
631 template<
class ... Args>
632 inline void set(Args ...) {
633 static_assert( 0 ==
sizeof...(Args),
"Kokkos::TeamPolicy: unhandled constructor arguments encountered.");
636 template<
class iType,
class ... Args>
637 inline typename std::enable_if<std::is_integral<iType>::value>::type set(iType, Args ... args) {
643 Kokkos::Impl::throw_runtime_exception(
"Kokkos::TeamPolicy: integer argument to constructor in illegal place.");
647 template<
class ... Args>
648 inline void set(
const ChunkSize& chunksize, Args ... args) {
650 internal_policy::internal_set_chunk_size(chunksize.value);
654 template<
class ... Args>
655 inline void set(
const ScratchRequest& scr_request, Args ... args) {
657 internal_policy::internal_set_scratch_size(scr_request.level,Impl::PerTeamValue(scr_request.per_team),
658 Impl::PerThreadValue(scr_request.per_thread));
662 inline TeamPolicy set_chunk_size(
int chunk)
const {
663 return TeamPolicy(internal_policy::set_chunk_size(chunk));
666 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team)
const {
667 return TeamPolicy(internal_policy::set_scratch_size(level,per_team));
669 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread)
const {
670 return TeamPolicy(internal_policy::set_scratch_size(level,per_thread));
672 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team,
const Impl::PerThreadValue& per_thread)
const {
673 return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
675 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread,
const Impl::PerTeamValue& per_team)
const {
676 return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
680 inline TeamPolicy& set_chunk_size(
int chunk) {
681 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)), internal_policy&>::value,
"internal set_chunk_size should return a reference");
682 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
685 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team) {
686 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(level,per_team)), internal_policy&>::value,
"internal set_chunk_size should return a reference");
687 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level,per_team));
689 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread) {
690 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level,per_thread));
692 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team,
const Impl::PerThreadValue& per_thread) {
693 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level, per_team, per_thread));
695 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread,
const Impl::PerTeamValue& per_team) {
696 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level, per_team, per_thread));
704 template<
typename iType,
class TeamMemberType>
705 struct TeamThreadRangeBoundariesStruct {
708 KOKKOS_INLINE_FUNCTION
static
709 iType ibegin(
const iType & arg_begin
710 ,
const iType & arg_end
711 ,
const iType & arg_rank
712 ,
const iType & arg_size
715 return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
718 KOKKOS_INLINE_FUNCTION
static
719 iType iend(
const iType & arg_begin
720 ,
const iType & arg_end
721 ,
const iType & arg_rank
722 ,
const iType & arg_size
725 const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
726 return end_ < arg_end ? end_ : arg_end ;
731 typedef iType index_type;
734 enum {increment = 1};
735 const TeamMemberType& thread;
737 KOKKOS_INLINE_FUNCTION
738 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
739 ,
const iType& arg_end
741 : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
742 , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
743 , thread( arg_thread )
746 KOKKOS_INLINE_FUNCTION
747 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
748 ,
const iType& arg_begin
749 ,
const iType& arg_end
751 : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
752 , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
753 , thread( arg_thread )
757 template<
typename iType,
class TeamMemberType>
758 struct ThreadVectorRangeBoundariesStruct {
759 typedef iType index_type;
760 const index_type start;
761 const index_type end;
762 enum {increment = 1};
764 KOKKOS_INLINE_FUNCTION
765 constexpr ThreadVectorRangeBoundariesStruct (
const TeamMemberType,
const index_type& count ) noexcept
766 : start( static_cast<index_type>(0) )
769 KOKKOS_INLINE_FUNCTION
770 constexpr ThreadVectorRangeBoundariesStruct (
const index_type& count ) noexcept
771 : start( static_cast<index_type>(0) )
774 KOKKOS_INLINE_FUNCTION
775 constexpr ThreadVectorRangeBoundariesStruct (
const TeamMemberType,
const index_type& arg_begin,
const index_type& arg_end ) noexcept
776 : start( static_cast<index_type>(arg_begin) )
779 KOKKOS_INLINE_FUNCTION
780 constexpr ThreadVectorRangeBoundariesStruct (
const index_type& arg_begin,
const index_type& arg_end ) noexcept
781 : start( static_cast<index_type>(arg_begin) )
785 template<
class TeamMemberType>
786 struct ThreadSingleStruct {
787 const TeamMemberType& team_member;
788 KOKKOS_INLINE_FUNCTION
789 ThreadSingleStruct(
const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
792 template<
class TeamMemberType>
793 struct VectorSingleStruct {
794 const TeamMemberType& team_member;
795 KOKKOS_INLINE_FUNCTION
796 VectorSingleStruct(
const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
807 template<
typename iType,
class TeamMemberType>
808 KOKKOS_INLINE_FUNCTION
809 Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
818 template<
typename iType1,
typename iType2,
class TeamMemberType>
819 KOKKOS_INLINE_FUNCTION
820 Impl::TeamThreadRangeBoundariesStruct<typename std::common_type<iType1, iType2>::type, TeamMemberType>
821 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
const iType2& end );
829 template<
typename iType,
class TeamMemberType>
830 KOKKOS_INLINE_FUNCTION
831 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
834 template<
typename iType,
class TeamMemberType>
835 KOKKOS_INLINE_FUNCTION
836 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
837 ThreadVectorRange(
const TeamMemberType&,
const iType& arg_begin,
const iType& arg_end );
839 #if defined(KOKKOS_ENABLE_PROFILING)
842 template<
typename FunctorType,
typename TagType,
843 bool HasTag = !std::is_same<TagType, void>::value >
844 struct ParallelConstructName;
846 template<
typename FunctorType,
typename TagType>
847 struct ParallelConstructName<FunctorType, TagType, true> {
848 ParallelConstructName(std::string
const& label):label_ref(label) {
850 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
851 typeid(TagType).name();
854 std::string
const&
get() {
855 return (label_ref.empty()) ? default_name : label_ref;
857 std::string
const& label_ref;
858 std::string default_name;
861 template<
typename FunctorType,
typename TagType>
862 struct ParallelConstructName<FunctorType, TagType, false> {
863 ParallelConstructName(std::string
const& label):label_ref(label) {
865 default_name = std::string(
typeid(FunctorType).name());
868 std::string
const&
get() {
869 return (label_ref.empty()) ? default_name : label_ref;
871 std::string
const& label_ref;
872 std::string default_name;
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
RangePolicy set_chunk_size(int chunk_size_) const
set chunk_size to a discrete value
RangePolicy execution_policy
Tag this class as an execution policy.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
RangePolicy(const member_type work_begin, const member_type work_end, Args...args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args...args)
Total range.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
Execution policy for work over a range of an integral type.
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
Execution policy for parallel work over a league of teams of threads.
Parallel execution of a functor calls the functor once with each member of the execution policy...