47 #ifndef KOKKOS_PARALLEL_HPP
48 #define KOKKOS_PARALLEL_HPP
51 #include <Kokkos_Core_fwd.hpp>
52 #include <Kokkos_View.hpp>
53 #include <Kokkos_ExecPolicy.hpp>
55 #if defined(KOKKOS_ENABLE_PROFILING)
56 #include <impl/Kokkos_Profiling_Interface.hpp>
60 #include <impl/Kokkos_Tags.hpp>
61 #include <impl/Kokkos_Traits.hpp>
62 #include <impl/Kokkos_FunctorAnalysis.hpp>
63 #include <impl/Kokkos_FunctorAdapter.hpp>
83 template<
class Functor
88 struct FunctorPolicyExecutionSpace {
89 typedef Kokkos::DefaultExecutionSpace execution_space ;
92 template<
class Functor ,
class Policy >
93 struct FunctorPolicyExecutionSpace
95 , typename enable_if_type< typename Functor::device_type >::type
96 , typename enable_if_type< typename Policy ::execution_space >::type
99 typedef typename Policy ::execution_space execution_space ;
102 template<
class Functor ,
class Policy >
103 struct FunctorPolicyExecutionSpace
105 , typename enable_if_type< typename Functor::execution_space >::type
106 , typename enable_if_type< typename Policy ::execution_space >::type
109 typedef typename Policy ::execution_space execution_space ;
112 template<
class Functor ,
class Policy ,
class EnableFunctor >
113 struct FunctorPolicyExecutionSpace
116 , typename enable_if_type< typename Policy::execution_space >::type
119 typedef typename Policy ::execution_space execution_space ;
122 template<
class Functor ,
class Policy ,
class EnablePolicy >
123 struct FunctorPolicyExecutionSpace
125 , typename enable_if_type< typename Functor::device_type >::type
129 typedef typename Functor::device_type execution_space ;
132 template<
class Functor ,
class Policy ,
class EnablePolicy >
133 struct FunctorPolicyExecutionSpace
135 , typename enable_if_type< typename Functor::execution_space >::type
139 typedef typename Functor::execution_space execution_space ;
171 template<
class ExecPolicy ,
class FunctorType >
174 ,
const FunctorType & functor
175 ,
const std::string& str =
""
176 ,
typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type * = 0
179 #if defined(KOKKOS_ENABLE_PROFILING)
181 if(Kokkos::Profiling::profileLibraryLoaded()) {
182 Kokkos::Impl::ParallelConstructName<FunctorType, typename ExecPolicy::work_tag> name(str);
183 Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID);
187 Kokkos::Impl::shared_allocation_tracking_disable();
189 Kokkos::Impl::shared_allocation_tracking_enable();
193 #if defined(KOKKOS_ENABLE_PROFILING)
194 if(Kokkos::Profiling::profileLibraryLoaded()) {
195 Kokkos::Profiling::endParallelFor(kpID);
200 template<
class FunctorType >
203 ,
const FunctorType & functor
204 ,
const std::string& str =
""
208 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
210 typedef RangePolicy< execution_space > policy ;
212 #if defined(KOKKOS_ENABLE_PROFILING)
214 if(Kokkos::Profiling::profileLibraryLoaded()) {
215 Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
216 Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID);
220 Kokkos::Impl::shared_allocation_tracking_disable();
221 Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) );
222 Kokkos::Impl::shared_allocation_tracking_enable();
226 #if defined(KOKKOS_ENABLE_PROFILING)
227 if(Kokkos::Profiling::profileLibraryLoaded()) {
228 Kokkos::Profiling::endParallelFor(kpID);
233 template<
class ExecPolicy ,
class FunctorType >
236 ,
const ExecPolicy & policy
237 ,
const FunctorType & functor )
239 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
241 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
246 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
248 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
255 #include <Kokkos_Parallel_Reduce.hpp>
415 template<
class ExecutionPolicy ,
class FunctorType >
417 void parallel_scan(
const ExecutionPolicy & policy
418 ,
const FunctorType & functor
419 ,
const std::string& str =
""
420 ,
typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecutionPolicy >::value >::type * = 0
423 #if defined(KOKKOS_ENABLE_PROFILING)
425 if(Kokkos::Profiling::profileLibraryLoaded()) {
426 Kokkos::Impl::ParallelConstructName<FunctorType, typename ExecutionPolicy::work_tag> name(str);
427 Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
431 Kokkos::Impl::shared_allocation_tracking_disable();
432 Impl::ParallelScan< FunctorType , ExecutionPolicy > closure( functor , policy );
433 Kokkos::Impl::shared_allocation_tracking_enable();
437 #if defined(KOKKOS_ENABLE_PROFILING)
438 if(Kokkos::Profiling::profileLibraryLoaded()) {
439 Kokkos::Profiling::endParallelScan(kpID);
445 template<
class FunctorType >
447 void parallel_scan(
const size_t work_count
448 ,
const FunctorType & functor
449 ,
const std::string& str =
"" )
452 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
457 #if defined(KOKKOS_ENABLE_PROFILING)
459 if(Kokkos::Profiling::profileLibraryLoaded()) {
460 Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
461 Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
465 Kokkos::Impl::shared_allocation_tracking_disable();
466 Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) );
467 Kokkos::Impl::shared_allocation_tracking_enable();
471 #if defined(KOKKOS_ENABLE_PROFILING)
472 if(Kokkos::Profiling::profileLibraryLoaded()) {
473 Kokkos::Profiling::endParallelScan(kpID);
479 template<
class ExecutionPolicy ,
class FunctorType >
481 void parallel_scan(
const std::string& str
482 ,
const ExecutionPolicy & policy
483 ,
const FunctorType & functor)
485 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
487 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
490 ::Kokkos::parallel_scan(policy,functor,str);
492 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
494 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
500 template<
class ExecutionPolicy ,
class FunctorType,
class ReturnType >
502 void parallel_scan(
const ExecutionPolicy & policy
503 ,
const FunctorType & functor
505 ,
const std::string& str =
""
506 ,
typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecutionPolicy >::value >::type * = 0
509 #if defined(KOKKOS_ENABLE_PROFILING)
511 if(Kokkos::Profiling::profileLibraryLoaded()) {
512 Kokkos::Impl::ParallelConstructName<FunctorType, typename ExecutionPolicy::work_tag> name(str);
513 Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
517 Kokkos::Impl::shared_allocation_tracking_disable();
518 Impl::ParallelScanWithTotal< FunctorType , ExecutionPolicy, ReturnType > closure( functor, policy, return_value );
519 Kokkos::Impl::shared_allocation_tracking_enable();
523 #if defined(KOKKOS_ENABLE_PROFILING)
524 if(Kokkos::Profiling::profileLibraryLoaded()) {
525 Kokkos::Profiling::endParallelScan(kpID);
531 template<
class FunctorType,
class ReturnType >
533 void parallel_scan(
const size_t work_count
534 ,
const FunctorType & functor
536 ,
const std::string & str =
"" )
539 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
544 #if defined(KOKKOS_ENABLE_PROFILING)
546 if(Kokkos::Profiling::profileLibraryLoaded()) {
547 Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
548 Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
552 Kokkos::Impl::shared_allocation_tracking_disable();
553 Impl::ParallelScanWithTotal< FunctorType, policy, ReturnType > closure( functor, policy(0,work_count), return_value );
554 Kokkos::Impl::shared_allocation_tracking_enable();
558 #if defined(KOKKOS_ENABLE_PROFILING)
559 if(Kokkos::Profiling::profileLibraryLoaded()) {
560 Kokkos::Profiling::endParallelScan(kpID);
566 template<
class ExecutionPolicy,
class FunctorType,
class ReturnType >
568 void parallel_scan(
const std::string& str
569 ,
const ExecutionPolicy & policy
570 ,
const FunctorType & functor
573 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
575 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
578 ::Kokkos::parallel_scan(policy,functor,return_value,str);
580 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
582 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
597 template<
class FunctorType ,
class Enable =
void >
598 struct FunctorTeamShmemSize
600 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType & ,
int ) {
return 0 ; }
603 template<
class FunctorType >
604 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
606 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.team_shmem_size( team_size ) ; }
609 template<
class FunctorType >
610 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
612 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.shmem_size( team_size ) ; }
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
Implementation of the ParallelFor operator that has a partial specialization for the device...
Execution policy for work over a range of an integral type.