48 #ifndef KOKKOS_PARALLEL_HPP
49 #define KOKKOS_PARALLEL_HPP
52 #include <Kokkos_Core_fwd.hpp>
53 #include <Kokkos_View.hpp>
54 #include <Kokkos_ExecPolicy.hpp>
56 #if defined(KOKKOS_ENABLE_PROFILING)
57 #include <impl/Kokkos_Profiling_Interface.hpp>
61 #include <impl/Kokkos_Tags.hpp>
62 #include <impl/Kokkos_Traits.hpp>
63 #include <impl/Kokkos_FunctorAnalysis.hpp>
64 #include <impl/Kokkos_FunctorAdapter.hpp>
84 template <
class Functor,
class Policy,
class EnableFunctor,
class EnablePolicy>
85 struct FunctorPolicyExecutionSpace {
86 typedef Kokkos::DefaultExecutionSpace execution_space;
89 template <
class Functor,
class Policy>
90 struct FunctorPolicyExecutionSpace<
92 typename enable_if_type<typename Functor::device_type>::type,
93 typename enable_if_type<typename Policy ::execution_space>::type> {
94 typedef typename Policy ::execution_space execution_space;
97 template <
class Functor,
class Policy>
98 struct FunctorPolicyExecutionSpace<
100 typename enable_if_type<typename Functor::execution_space>::type,
101 typename enable_if_type<typename Policy ::execution_space>::type> {
102 typedef typename Policy ::execution_space execution_space;
105 template <
class Functor,
class Policy,
class EnableFunctor>
106 struct FunctorPolicyExecutionSpace<
107 Functor, Policy, EnableFunctor,
108 typename enable_if_type<typename Policy::execution_space>::type> {
109 typedef typename Policy ::execution_space execution_space;
112 template <
class Functor,
class Policy,
class EnablePolicy>
113 struct FunctorPolicyExecutionSpace<
115 typename enable_if_type<typename Functor::device_type>::type,
117 typedef typename Functor::device_type::execution_space execution_space;
120 template <
class Functor,
class Policy,
class EnablePolicy>
121 struct FunctorPolicyExecutionSpace<
123 typename enable_if_type<typename Functor::execution_space>::type,
125 typedef typename Functor::execution_space execution_space;
157 template <
class ExecPolicy,
class FunctorType>
159 const ExecPolicy& policy,
const FunctorType& functor,
160 const std::string& str =
"",
161 typename std::enable_if<
162 Kokkos::Impl::is_execution_policy<ExecPolicy>::value>::type* =
164 #if defined(KOKKOS_ENABLE_PROFILING)
166 if (Kokkos::Profiling::profileLibraryLoaded()) {
167 Kokkos::Impl::ParallelConstructName<FunctorType,
168 typename ExecPolicy::work_tag>
170 Kokkos::Profiling::beginParallelFor(
171 name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
178 Kokkos::Impl::shared_allocation_tracking_disable();
180 Kokkos::Impl::shared_allocation_tracking_enable();
184 #if defined(KOKKOS_ENABLE_PROFILING)
185 if (Kokkos::Profiling::profileLibraryLoaded()) {
186 Kokkos::Profiling::endParallelFor(kpID);
191 template <
class FunctorType>
192 inline void parallel_for(
const size_t work_count,
const FunctorType& functor,
193 const std::string& str =
"") {
194 typedef typename Impl::FunctorPolicyExecutionSpace<
195 FunctorType,
void>::execution_space execution_space;
196 typedef RangePolicy<execution_space> policy;
198 #if defined(KOKKOS_ENABLE_PROFILING)
200 if (Kokkos::Profiling::profileLibraryLoaded()) {
201 Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
202 Kokkos::Profiling::beginParallelFor(
204 Kokkos::Profiling::Experimental::device_id(policy().space()), &kpID);
210 Kokkos::Impl::shared_allocation_tracking_disable();
211 Impl::ParallelFor<FunctorType, policy> closure(functor,
212 policy(0, work_count));
213 Kokkos::Impl::shared_allocation_tracking_enable();
217 #if defined(KOKKOS_ENABLE_PROFILING)
218 if (Kokkos::Profiling::profileLibraryLoaded()) {
219 Kokkos::Profiling::endParallelFor(kpID);
224 template <
class ExecPolicy,
class FunctorType>
225 inline void parallel_for(
const std::string& str,
const ExecPolicy& policy,
226 const FunctorType& functor) {
227 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
229 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
234 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
236 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
243 #include <Kokkos_Parallel_Reduce.hpp>
411 template <
class ExecutionPolicy,
class FunctorType>
412 inline void parallel_scan(
413 const ExecutionPolicy& policy,
const FunctorType& functor,
414 const std::string& str =
"",
415 typename std::enable_if<
416 Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
418 #if defined(KOKKOS_ENABLE_PROFILING)
420 if (Kokkos::Profiling::profileLibraryLoaded()) {
421 Kokkos::Impl::ParallelConstructName<FunctorType,
422 typename ExecutionPolicy::work_tag>
424 Kokkos::Profiling::beginParallelScan(
425 name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
432 Kokkos::Impl::shared_allocation_tracking_disable();
433 Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor, policy);
434 Kokkos::Impl::shared_allocation_tracking_enable();
438 #if defined(KOKKOS_ENABLE_PROFILING)
439 if (Kokkos::Profiling::profileLibraryLoaded()) {
440 Kokkos::Profiling::endParallelScan(kpID);
445 template <
class FunctorType>
446 inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
447 const std::string& str =
"") {
449 FunctorType,
void>::execution_space execution_space;
453 #if defined(KOKKOS_ENABLE_PROFILING)
455 if (Kokkos::Profiling::profileLibraryLoaded()) {
456 Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
457 Kokkos::Profiling::beginParallelScan(
459 Kokkos::Profiling::Experimental::device_id(policy().space()), &kpID);
465 Kokkos::Impl::shared_allocation_tracking_disable();
466 Impl::ParallelScan<FunctorType, policy> closure(functor,
467 policy(0, work_count));
468 Kokkos::Impl::shared_allocation_tracking_enable();
472 #if defined(KOKKOS_ENABLE_PROFILING)
473 if (Kokkos::Profiling::profileLibraryLoaded()) {
474 Kokkos::Profiling::endParallelScan(kpID);
479 template <
class ExecutionPolicy,
class FunctorType>
480 inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
481 const FunctorType& functor) {
482 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
484 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
487 ::Kokkos::parallel_scan(policy, functor, str);
489 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
491 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
496 template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
497 inline void parallel_scan(
498 const ExecutionPolicy& policy,
const FunctorType& functor,
499 ReturnType& return_value,
const std::string& str =
"",
500 typename std::enable_if<
501 Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
503 #if defined(KOKKOS_ENABLE_PROFILING)
505 if (Kokkos::Profiling::profileLibraryLoaded()) {
506 Kokkos::Impl::ParallelConstructName<FunctorType,
507 typename ExecutionPolicy::work_tag>
509 Kokkos::Profiling::beginParallelScan(
510 name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()),
517 Kokkos::Impl::shared_allocation_tracking_disable();
518 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType> closure(
519 functor, policy, return_value);
520 Kokkos::Impl::shared_allocation_tracking_enable();
524 #if defined(KOKKOS_ENABLE_PROFILING)
525 if (Kokkos::Profiling::profileLibraryLoaded()) {
526 Kokkos::Profiling::endParallelScan(kpID);
529 policy.space().fence();
532 template <
class FunctorType,
class ReturnType>
533 inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
535 const std::string& str =
"") {
537 FunctorType,
void>::execution_space execution_space;
541 #if defined(KOKKOS_ENABLE_PROFILING)
543 if (Kokkos::Profiling::profileLibraryLoaded()) {
544 Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
545 Kokkos::Profiling::beginParallelScan(
547 Kokkos::Profiling::Experimental::device_id(policy().space()), &kpID);
553 Kokkos::Impl::shared_allocation_tracking_disable();
554 Impl::ParallelScanWithTotal<FunctorType, policy, ReturnType> closure(
555 functor, policy(0, work_count), return_value);
556 Kokkos::Impl::shared_allocation_tracking_enable();
560 #if defined(KOKKOS_ENABLE_PROFILING)
561 if (Kokkos::Profiling::profileLibraryLoaded()) {
562 Kokkos::Profiling::endParallelScan(kpID);
565 execution_space().fence();
568 template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
569 inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
570 const FunctorType& functor,
572 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
574 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
577 ::Kokkos::parallel_scan(policy, functor, return_value, str);
579 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
581 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
594 template <
class FunctorType,
595 bool HasTeamShmemSize =
596 has_member_team_shmem_size<FunctorType>::value,
597 bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
598 struct FunctorTeamShmemSize {
599 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType&,
int) {
604 template <
class FunctorType>
605 struct FunctorTeamShmemSize<FunctorType, true, false> {
606 static inline size_t value(
const FunctorType& f,
int team_size) {
607 return f.team_shmem_size(team_size);
611 template <
class FunctorType>
612 struct FunctorTeamShmemSize<FunctorType, false, true> {
613 static inline size_t value(
const FunctorType& f,
int team_size) {
614 return f.shmem_size(team_size);
617 template <
class FunctorType>
618 struct FunctorTeamShmemSize<FunctorType, true, true> {
619 static inline size_t value(
const FunctorType& ,
int ) {
621 "Functor with both team_shmem_size and shmem_size defined is "
Implementation of the ParallelFor operator that has a partial specialization for the device...
Given a Functor and Execution Policy query an execution space.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
Execution policy for work over a range of an integral type.