Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_ExecPolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_EXECPOLICY_HPP
46 #define KOKKOS_EXECPOLICY_HPP
47 
48 #include <Kokkos_Core_fwd.hpp>
49 #include <impl/Kokkos_Traits.hpp>
50 #include <impl/Kokkos_Error.hpp>
51 #include <impl/Kokkos_Tags.hpp>
52 #include <impl/Kokkos_AnalyzePolicy.hpp>
53 #include <Kokkos_Concepts.hpp>
54 #include <iostream>
55 #if defined(KOKKOS_ENABLE_PROFILING)
56 #include <typeinfo>
57 #endif // KOKKOS_ENABLE_PROFILING
58 
59 //----------------------------------------------------------------------------
60 
61 namespace Kokkos {
62 
63 struct ParallelForTag {};
64 struct ParallelScanTag {};
65 struct ParallelReduceTag {};
66 
67 struct ChunkSize {
68  int value;
69  ChunkSize(int value_) : value(value_) {}
70 };
71 
93 template <class... Properties>
94 class RangePolicy : public Impl::PolicyTraits<Properties...> {
95  public:
96  typedef Impl::PolicyTraits<Properties...> traits;
97 
98  private:
99  typename traits::execution_space m_space;
100  typename traits::index_type m_begin;
101  typename traits::index_type m_end;
102  typename traits::index_type m_granularity;
103  typename traits::index_type m_granularity_mask;
104 
105  template <class... OtherProperties>
106  friend class RangePolicy;
107 
108  public:
111  typedef typename traits::index_type member_type;
112  typedef typename traits::index_type index_type;
113 
114  KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
115  return m_space;
116  }
117  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
118  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
119 
120  // TODO: find a better workaround for Clangs weird instantiation order
121  // This thing is here because of an instantiation error, where the RangePolicy
122  // is inserted into FunctorValue Traits, which tries decltype on the operator.
123  // It tries to do this even though the first argument of parallel for clearly
124  // doesn't match.
125  void operator()(const int&) const {}
126 
127  RangePolicy(const RangePolicy&) = default;
128  RangePolicy(RangePolicy&&) = default;
129 
130  template <class... OtherProperties>
131  RangePolicy(const RangePolicy<OtherProperties...> p) {
132  m_space = p.m_space;
133  m_begin = p.m_begin;
134  m_end = p.m_end;
135  m_granularity = p.m_granularity;
136  m_granularity_mask = p.m_granularity_mask;
137  }
138 
139  inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
140 
142  inline RangePolicy(const typename traits::execution_space& work_space,
143  const member_type work_begin, const member_type work_end)
144  : m_space(work_space),
145  m_begin(work_begin < work_end ? work_begin : 0),
146  m_end(work_begin < work_end ? work_end : 0),
147  m_granularity(0),
148  m_granularity_mask(0) {
149  set_auto_chunk_size();
150  }
151 
153  inline RangePolicy(const member_type work_begin, const member_type work_end)
154  : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
155  set_auto_chunk_size();
156  }
157 
159  template <class... Args>
160  inline RangePolicy(const typename traits::execution_space& work_space,
161  const member_type work_begin, const member_type work_end,
162  Args... args)
163  : m_space(work_space),
164  m_begin(work_begin < work_end ? work_begin : 0),
165  m_end(work_begin < work_end ? work_end : 0),
166  m_granularity(0),
167  m_granularity_mask(0) {
168  set_auto_chunk_size();
169  set(args...);
170  }
171 
173  template <class... Args>
174  inline RangePolicy(const member_type work_begin, const member_type work_end,
175  Args... args)
176  : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
177  set_auto_chunk_size();
178  set(args...);
179  }
180 
181  private:
182  inline void set() {}
183 
184  public:
185  template <class... Args>
186  inline void set(Args...) {
187  static_assert(
188  0 == sizeof...(Args),
189  "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
190  }
191 
192  template <class... Args>
193  inline void set(const ChunkSize& chunksize, Args... args) {
194  m_granularity = chunksize.value;
195  m_granularity_mask = m_granularity - 1;
196  set(args...);
197  }
198 
199  public:
201  inline member_type chunk_size() const { return m_granularity; }
202 
204  inline RangePolicy set_chunk_size(int chunk_size_) const {
205  RangePolicy p = *this;
206  p.m_granularity = chunk_size_;
207  p.m_granularity_mask = p.m_granularity - 1;
208  return p;
209  }
210 
211  private:
213  inline void set_auto_chunk_size() {
214  int64_t concurrency =
215  static_cast<int64_t>(traits::execution_space::concurrency());
216  if (concurrency == 0) concurrency = 1;
217 
218  if (m_granularity > 0) {
219  if (!Impl::is_integral_power_of_two(m_granularity))
220  Kokkos::abort("RangePolicy blocking granularity must be power of two");
221  }
222 
223  int64_t new_chunk_size = 1;
224  while (new_chunk_size * 100 * concurrency <
225  static_cast<int64_t>(m_end - m_begin))
226  new_chunk_size *= 2;
227  if (new_chunk_size < 128) {
228  new_chunk_size = 1;
229  while ((new_chunk_size * 40 * concurrency <
230  static_cast<int64_t>(m_end - m_begin)) &&
231  (new_chunk_size < 128))
232  new_chunk_size *= 2;
233  }
234  m_granularity = new_chunk_size;
235  m_granularity_mask = m_granularity - 1;
236  }
237 
238  public:
243  struct WorkRange {
244  typedef typename RangePolicy::work_tag work_tag;
245  typedef typename RangePolicy::member_type member_type;
246 
247  KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
248  KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
249 
254  KOKKOS_INLINE_FUNCTION
255  WorkRange(const RangePolicy& range, const int part_rank,
256  const int part_size)
257  : m_begin(0), m_end(0) {
258  if (part_size) {
259  // Split evenly among partitions, then round up to the granularity.
260  const member_type work_part =
261  ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
262  range.m_granularity_mask) &
263  ~member_type(range.m_granularity_mask);
264 
265  m_begin = range.begin() + work_part * part_rank;
266  m_end = m_begin + work_part;
267 
268  if (range.end() < m_begin) m_begin = range.end();
269  if (range.end() < m_end) m_end = range.end();
270  }
271  }
272 
273  private:
274  member_type m_begin;
275  member_type m_end;
276  WorkRange();
277  WorkRange& operator=(const WorkRange&);
278  };
279 };
280 
281 } // namespace Kokkos
282 
283 //----------------------------------------------------------------------------
284 //----------------------------------------------------------------------------
285 
286 namespace Kokkos {
287 
288 namespace Impl {
289 
290 template <class ExecSpace, class... Properties>
291 class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
292  private:
293  typedef Impl::PolicyTraits<Properties...> traits;
294 
295  public:
296  typedef typename traits::index_type index_type;
297 
298  //----------------------------------------
309  template <class FunctorType>
310  static int team_size_max(const FunctorType&);
311 
322  template <class FunctorType>
323  static int team_size_recommended(const FunctorType&);
324 
325  template <class FunctorType>
326  static int team_size_recommended(const FunctorType&, const int&);
327 
328  template <class FunctorType>
329  int team_size_recommended(const FunctorType& functor,
330  const int vector_length);
331 
332  //----------------------------------------
334  TeamPolicyInternal(const typename traits::execution_space&,
335  int league_size_request, int team_size_request,
336  int vector_length_request = 1);
337 
338  TeamPolicyInternal(const typename traits::execution_space&,
339  int league_size_request, const Kokkos::AUTO_t&,
340  int vector_length_request = 1);
341 
344  TeamPolicyInternal(int league_size_request, int team_size_request,
345  int vector_length_request = 1);
346 
347  TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
348  int vector_length_request = 1);
349 
350  /* TeamPolicyInternal( int league_size_request , int team_size_request );
351 
352  TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
353 
359  KOKKOS_INLINE_FUNCTION int league_size() const;
360 
366  KOKKOS_INLINE_FUNCTION int team_size() const;
367 
368  inline typename traits::index_type chunk_size() const;
369 
370 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
371  inline TeamPolicyInternal set_chunk_size(int chunk_size) const;
372 #else
373  inline TeamPolicyInternal& set_chunk_size(int chunk_size);
374 #endif
375 
379  struct member_type {
381  KOKKOS_INLINE_FUNCTION
382  typename traits::execution_space::scratch_memory_space team_shmem() const;
383 
385  KOKKOS_INLINE_FUNCTION int league_rank() const;
386 
388  KOKKOS_INLINE_FUNCTION int league_size() const;
389 
391  KOKKOS_INLINE_FUNCTION int team_rank() const;
392 
394  KOKKOS_INLINE_FUNCTION int team_size() const;
395 
397  KOKKOS_INLINE_FUNCTION void team_barrier() const;
398 
401  template <class JoinOp>
402  KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
403  const typename JoinOp::value_type, const JoinOp&) const;
404 
410  template <typename Type>
411  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
412 
422  template <typename Type>
423  KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
424  Type* const global_accum) const;
425  };
426 };
427 
428 struct PerTeamValue {
429  int value;
430  PerTeamValue(int arg);
431 };
432 
433 struct PerThreadValue {
434  int value;
435  PerThreadValue(int arg);
436 };
437 
438 template <class iType, class... Args>
439 struct ExtractVectorLength {
440  static inline iType value(
441  typename std::enable_if<std::is_integral<iType>::value, iType>::type val,
442  Args...) {
443  return val;
444  }
445  static inline
446  typename std::enable_if<!std::is_integral<iType>::value, int>::type
447  value(
448  typename std::enable_if<!std::is_integral<iType>::value, iType>::type,
449  Args...) {
450  return 1;
451  }
452 };
453 
454 template <class iType, class... Args>
455 inline typename std::enable_if<std::is_integral<iType>::value, iType>::type
456 extract_vector_length(iType val, Args...) {
457  return val;
458 }
459 
460 template <class iType, class... Args>
461 inline typename std::enable_if<!std::is_integral<iType>::value, int>::type
462 extract_vector_length(iType, Args...) {
463  return 1;
464 }
465 
466 } // namespace Impl
467 
468 Impl::PerTeamValue PerTeam(const int& arg);
469 Impl::PerThreadValue PerThread(const int& arg);
470 
471 struct ScratchRequest {
472  int level;
473 
474  int per_team;
475  int per_thread;
476 
477  inline ScratchRequest(const int& level_,
478  const Impl::PerTeamValue& team_value) {
479  level = level_;
480  per_team = team_value.value;
481  per_thread = 0;
482  }
483 
484  inline ScratchRequest(const int& level_,
485  const Impl::PerThreadValue& thread_value) {
486  level = level_;
487  per_team = 0;
488  per_thread = thread_value.value;
489  }
490 
491  inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
492  const Impl::PerThreadValue& thread_value) {
493  level = level_;
494  per_team = team_value.value;
495  per_thread = thread_value.value;
496  }
497 
498  inline ScratchRequest(const int& level_,
499  const Impl::PerThreadValue& thread_value,
500  const Impl::PerTeamValue& team_value) {
501  level = level_;
502  per_team = team_value.value;
503  per_thread = thread_value.value;
504  }
505 };
506 
533 template <class... Properties>
535  : public Impl::TeamPolicyInternal<
536  typename Impl::PolicyTraits<Properties...>::execution_space,
537  Properties...> {
538  typedef Impl::TeamPolicyInternal<
539  typename Impl::PolicyTraits<Properties...>::execution_space,
540  Properties...>
541  internal_policy;
542 
543  template <class... OtherProperties>
544  friend class TeamPolicy;
545 
546  public:
547  typedef Impl::PolicyTraits<Properties...> traits;
548 
550 
551  TeamPolicy& operator=(const TeamPolicy&) = default;
552 
554  TeamPolicy(const typename traits::execution_space& space_,
555  int league_size_request, int team_size_request,
556  int vector_length_request = 1)
557  : internal_policy(space_, league_size_request, team_size_request,
558  vector_length_request) {
559  first_arg = false;
560  }
561 
562  TeamPolicy(const typename traits::execution_space& space_,
563  int league_size_request, const Kokkos::AUTO_t&,
564  int vector_length_request = 1)
565  : internal_policy(space_, league_size_request, Kokkos::AUTO(),
566  vector_length_request) {
567  first_arg = false;
568  }
569 
572  TeamPolicy(int league_size_request, int team_size_request,
573  int vector_length_request = 1)
574  : internal_policy(league_size_request, team_size_request,
575  vector_length_request) {
576  first_arg = false;
577  }
578 
579  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
580  int vector_length_request = 1)
581  : internal_policy(league_size_request, Kokkos::AUTO(),
582  vector_length_request) {
583  first_arg = false;
584  }
585 
586 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
587 
588  template <class... Args>
589  TeamPolicy(const typename traits::execution_space&, int league_size_request,
590  int team_size_request, int vector_length_request, Args... args)
591  : internal_policy(typename traits::execution_space(), league_size_request,
592  team_size_request, vector_length_request) {
593  first_arg = false;
594  set(args...);
595  }
596 
597  template <class... Args>
598  TeamPolicy(const typename traits::execution_space&, int league_size_request,
599  const Kokkos::AUTO_t&, int vector_length_request, Args... args)
600  : internal_policy(typename traits::execution_space(), league_size_request,
601  Kokkos::AUTO(), vector_length_request) {
602  first_arg = false;
603  set(args...);
604  }
605 
608  template <class... Args>
609  TeamPolicy(int league_size_request, int team_size_request,
610  int vector_length_request, Args... args)
611  : internal_policy(league_size_request, team_size_request,
612  vector_length_request) {
613  first_arg = false;
614  set(args...);
615  }
616 
617  template <class... Args>
618  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
619  int vector_length_request, Args... args)
620  : internal_policy(league_size_request, Kokkos::AUTO(),
621  vector_length_request) {
622  first_arg = false;
623  set(args...);
624  }
625 
627  template <class... Args>
628  TeamPolicy(const typename traits::execution_space&, int league_size_request,
629  int team_size_request, Args... args)
630  : internal_policy(typename traits::execution_space(), league_size_request,
631  team_size_request,
632  Kokkos::Impl::extract_vector_length<Args...>(args...)) {
633  first_arg = true;
634  set(args...);
635  }
636 
637  template <class... Args>
638  TeamPolicy(const typename traits::execution_space&, int league_size_request,
639  const Kokkos::AUTO_t&, Args... args)
640  : internal_policy(typename traits::execution_space(), league_size_request,
641  Kokkos::AUTO(),
642  Kokkos::Impl::extract_vector_length<Args...>(args...)) {
643  first_arg = true;
644  set(args...);
645  }
646 
649  template <class... Args>
650  TeamPolicy(int league_size_request, int team_size_request, Args... args)
651  : internal_policy(league_size_request, team_size_request,
652  Kokkos::Impl::extract_vector_length<Args...>(args...)) {
653  first_arg = true;
654  set(args...);
655  }
656 
657  template <class... Args>
658  TeamPolicy(int league_size_request, const Kokkos::AUTO_t&, Args... args)
659  : internal_policy(league_size_request, Kokkos::AUTO(),
660  Kokkos::Impl::extract_vector_length<Args...>(args...)) {
661  first_arg = true;
662  set(args...);
663  }
664 #endif
665 
666  template <class... OtherProperties>
667  TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
668  first_arg = p.first_arg;
669  }
670 
671  private:
672  bool first_arg;
673  TeamPolicy(const internal_policy& p) : internal_policy(p) {
674  first_arg = false;
675  }
676 
677 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
678  inline void set() {}
679 #endif
680 
681  public:
682 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
683  template <class... Args>
684  inline void set(Args...) {
685  static_assert(
686  0 == sizeof...(Args),
687  "Kokkos::TeamPolicy: unhandled constructor arguments encountered.");
688  }
689 
690  template <class iType, class... Args>
691  inline typename std::enable_if<std::is_integral<iType>::value>::type set(
692  iType, Args... args) {
693  if (first_arg) {
694  first_arg = false;
695  set(args...);
696  } else {
697  first_arg = false;
698  Kokkos::Impl::throw_runtime_exception(
699  "Kokkos::TeamPolicy: integer argument to constructor in illegal "
700  "place.");
701  }
702  }
703 
704  template <class... Args>
705  inline void set(const ChunkSize& chunksize, Args... args) {
706  first_arg = false;
707  internal_policy::internal_set_chunk_size(chunksize.value);
708  set(args...);
709  }
710 
711  template <class... Args>
712  inline void set(const ScratchRequest& scr_request, Args... args) {
713  first_arg = false;
714  internal_policy::internal_set_scratch_size(
715  scr_request.level, Impl::PerTeamValue(scr_request.per_team),
716  Impl::PerThreadValue(scr_request.per_thread));
717  set(args...);
718  }
719 
720  inline TeamPolicy set_chunk_size(int chunk) const {
721  return TeamPolicy(internal_policy::set_chunk_size(chunk));
722  }
723 
724  inline TeamPolicy set_scratch_size(const int& level,
725  const Impl::PerTeamValue& per_team) const {
726  return TeamPolicy(internal_policy::set_scratch_size(level, per_team));
727  }
728  inline TeamPolicy set_scratch_size(
729  const int& level, const Impl::PerThreadValue& per_thread) const {
730  return TeamPolicy(internal_policy::set_scratch_size(level, per_thread));
731  }
732  inline TeamPolicy set_scratch_size(
733  const int& level, const Impl::PerTeamValue& per_team,
734  const Impl::PerThreadValue& per_thread) const {
735  return TeamPolicy(
736  internal_policy::set_scratch_size(level, per_team, per_thread));
737  }
738  inline TeamPolicy set_scratch_size(const int& level,
739  const Impl::PerThreadValue& per_thread,
740  const Impl::PerTeamValue& per_team) const {
741  return TeamPolicy(
742  internal_policy::set_scratch_size(level, per_team, per_thread));
743  }
744 
745 #else
746  inline TeamPolicy& set_chunk_size(int chunk) {
747  static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
748  internal_policy&>::value,
749  "internal set_chunk_size should return a reference");
750  return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
751  }
752 
753  inline TeamPolicy& set_scratch_size(const int& level,
754  const Impl::PerTeamValue& per_team) {
755  static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
756  level, per_team)),
757  internal_policy&>::value,
758  "internal set_chunk_size should return a reference");
759  return static_cast<TeamPolicy&>(
760  internal_policy::set_scratch_size(level, per_team));
761  }
762  inline TeamPolicy& set_scratch_size(const int& level,
763  const Impl::PerThreadValue& per_thread) {
764  return static_cast<TeamPolicy&>(
765  internal_policy::set_scratch_size(level, per_thread));
766  }
767  inline TeamPolicy& set_scratch_size(const int& level,
768  const Impl::PerTeamValue& per_team,
769  const Impl::PerThreadValue& per_thread) {
770  return static_cast<TeamPolicy&>(
771  internal_policy::set_scratch_size(level, per_team, per_thread));
772  }
773  inline TeamPolicy& set_scratch_size(const int& level,
774  const Impl::PerThreadValue& per_thread,
775  const Impl::PerTeamValue& per_team) {
776  return static_cast<TeamPolicy&>(
777  internal_policy::set_scratch_size(level, per_team, per_thread));
778  }
779 #endif
780 };
781 
782 namespace Impl {
783 
784 template <typename iType, class TeamMemberType>
785 struct TeamThreadRangeBoundariesStruct {
786  private:
787  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
788  const iType& arg_end,
789  const iType& arg_rank,
790  const iType& arg_size) {
791  return arg_begin +
792  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
793  }
794 
795  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
796  const iType& arg_end,
797  const iType& arg_rank,
798  const iType& arg_size) {
799  const iType end_ =
800  arg_begin +
801  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
802  return end_ < arg_end ? end_ : arg_end;
803  }
804 
805  public:
806  typedef iType index_type;
807  const iType start;
808  const iType end;
809  enum { increment = 1 };
810  const TeamMemberType& thread;
811 
812  KOKKOS_INLINE_FUNCTION
813  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
814  const iType& arg_end)
815  : start(
816  ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
817  end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
818  thread(arg_thread) {}
819 
820  KOKKOS_INLINE_FUNCTION
821  TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
822  const iType& arg_begin, const iType& arg_end)
823  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
824  arg_thread.team_size())),
825  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
826  arg_thread.team_size())),
827  thread(arg_thread) {}
828 };
829 
830 template <typename iType, class TeamMemberType>
831 struct TeamVectorRangeBoundariesStruct {
832  private:
833  KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
834  const iType& arg_end,
835  const iType& arg_rank,
836  const iType& arg_size) {
837  return arg_begin +
838  ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
839  }
840 
841  KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
842  const iType& arg_end,
843  const iType& arg_rank,
844  const iType& arg_size) {
845  const iType end_ =
846  arg_begin +
847  ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
848  return end_ < arg_end ? end_ : arg_end;
849  }
850 
851  public:
852  typedef iType index_type;
853  const iType start;
854  const iType end;
855  enum { increment = 1 };
856  const TeamMemberType& thread;
857 
858  KOKKOS_INLINE_FUNCTION
859  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
860  const iType& arg_end)
861  : start(
862  ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
863  end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
864  thread(arg_thread) {}
865 
866  KOKKOS_INLINE_FUNCTION
867  TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
868  const iType& arg_begin, const iType& arg_end)
869  : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
870  arg_thread.team_size())),
871  end(iend(arg_begin, arg_end, arg_thread.team_rank(),
872  arg_thread.team_size())),
873  thread(arg_thread) {}
874 };
875 
876 template <typename iType, class TeamMemberType>
877 struct ThreadVectorRangeBoundariesStruct {
878  typedef iType index_type;
879  const index_type start;
880  const index_type end;
881  enum { increment = 1 };
882 
883  KOKKOS_INLINE_FUNCTION
884  constexpr ThreadVectorRangeBoundariesStruct(const TeamMemberType,
885  const index_type& count) noexcept
886  : start(static_cast<index_type>(0)), end(count) {}
887 
888  KOKKOS_INLINE_FUNCTION
889  constexpr ThreadVectorRangeBoundariesStruct(const index_type& count) noexcept
890  : start(static_cast<index_type>(0)), end(count) {}
891 
892  KOKKOS_INLINE_FUNCTION
893  constexpr ThreadVectorRangeBoundariesStruct(
894  const TeamMemberType, const index_type& arg_begin,
895  const index_type& arg_end) noexcept
896  : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
897 
898  KOKKOS_INLINE_FUNCTION
899  constexpr ThreadVectorRangeBoundariesStruct(
900  const index_type& arg_begin, const index_type& arg_end) noexcept
901  : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
902 };
903 
904 template <class TeamMemberType>
905 struct ThreadSingleStruct {
906  const TeamMemberType& team_member;
907  KOKKOS_INLINE_FUNCTION
908  ThreadSingleStruct(const TeamMemberType& team_member_)
909  : team_member(team_member_) {}
910 };
911 
912 template <class TeamMemberType>
913 struct VectorSingleStruct {
914  const TeamMemberType& team_member;
915  KOKKOS_INLINE_FUNCTION
916  VectorSingleStruct(const TeamMemberType& team_member_)
917  : team_member(team_member_) {}
918 };
919 
920 } // namespace Impl
921 
929 template <typename iType, class TeamMemberType, class _never_use_this_overload>
930 KOKKOS_INLINE_FUNCTION_DELETED
931  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
932  TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
933 
941 template <typename iType1, typename iType2, class TeamMemberType,
942  class _never_use_this_overload>
943 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
944  typename std::common_type<iType1, iType2>::type, TeamMemberType>
945 TeamThreadRange(const TeamMemberType&, const iType1& begin,
946  const iType2& end) = delete;
947 
955 template <typename iType, class TeamMemberType, class _never_use_this_overload>
956 KOKKOS_INLINE_FUNCTION_DELETED
957  Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
958  TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
959 
967 template <typename iType1, typename iType2, class TeamMemberType,
968  class _never_use_this_overload>
969 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
970  typename std::common_type<iType1, iType2>::type, TeamMemberType>
971 TeamVectorRange(const TeamMemberType&, const iType1& begin,
972  const iType2& end) = delete;
973 
981 template <typename iType, class TeamMemberType, class _never_use_this_overload>
982 KOKKOS_INLINE_FUNCTION_DELETED
983  Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
984  ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
985 
986 template <typename iType, class TeamMemberType, class _never_use_this_overload>
987 KOKKOS_INLINE_FUNCTION_DELETED
988  Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
989  ThreadVectorRange(const TeamMemberType&, const iType& arg_begin,
990  const iType& arg_end) = delete;
991 
992 #if defined(KOKKOS_ENABLE_PROFILING)
993 namespace Impl {
994 
995 template <typename FunctorType, typename TagType,
996  bool HasTag = !std::is_same<TagType, void>::value>
997 struct ParallelConstructName;
998 
999 template <typename FunctorType, typename TagType>
1000 struct ParallelConstructName<FunctorType, TagType, true> {
1001  ParallelConstructName(std::string const& label) : label_ref(label) {
1002  if (label.empty()) {
1003  default_name = std::string(typeid(FunctorType).name()) + "/" +
1004  typeid(TagType).name();
1005  }
1006  }
1007  std::string const& get() {
1008  return (label_ref.empty()) ? default_name : label_ref;
1009  }
1010  std::string const& label_ref;
1011  std::string default_name;
1012 };
1013 
1014 template <typename FunctorType, typename TagType>
1015 struct ParallelConstructName<FunctorType, TagType, false> {
1016  ParallelConstructName(std::string const& label) : label_ref(label) {
1017  if (label.empty()) {
1018  default_name = std::string(typeid(FunctorType).name());
1019  }
1020  }
1021  std::string const& get() {
1022  return (label_ref.empty()) ? default_name : label_ref;
1023  }
1024  std::string const& label_ref;
1025  std::string default_name;
1026 };
1027 
1028 } // namespace Impl
1029 #endif /* defined KOKKOS_ENABLE_PROFILING */
1030 
1031 } // namespace Kokkos
1032 
1033 namespace Kokkos {
1034 namespace Experimental {
1035 
1036 namespace Impl {
1037 template <class Property, class Policy>
1038 struct PolicyPropertyAdaptor;
1039 
1040 template <unsigned long P, class... Properties>
1041 struct PolicyPropertyAdaptor<WorkItemProperty::ImplWorkItemProperty<P>,
1042  RangePolicy<Properties...>> {
1043  typedef RangePolicy<Properties...> policy_in_t;
1044  typedef RangePolicy<typename policy_in_t::traits::execution_space,
1045  typename policy_in_t::traits::schedule_type,
1046  typename policy_in_t::traits::work_tag,
1047  typename policy_in_t::traits::index_type,
1048  typename policy_in_t::traits::iteration_pattern,
1049  typename policy_in_t::traits::launch_bounds,
1050  WorkItemProperty::ImplWorkItemProperty<P>>
1051  policy_out_t;
1052 };
1053 
1054 template <unsigned long P, class... Properties>
1055 struct PolicyPropertyAdaptor<WorkItemProperty::ImplWorkItemProperty<P>,
1056  TeamPolicy<Properties...>> {
1057  typedef TeamPolicy<Properties...> policy_in_t;
1058  typedef TeamPolicy<typename policy_in_t::traits::execution_space,
1059  typename policy_in_t::traits::schedule_type,
1060  typename policy_in_t::traits::work_tag,
1061  typename policy_in_t::traits::index_type,
1062  typename policy_in_t::traits::iteration_pattern,
1063  typename policy_in_t::traits::launch_bounds,
1064  WorkItemProperty::ImplWorkItemProperty<P>>
1065  policy_out_t;
1066 };
1067 } // namespace Impl
1068 
1069 template <class PolicyType, unsigned long P>
1070 constexpr typename Impl::PolicyPropertyAdaptor<
1071  WorkItemProperty::ImplWorkItemProperty<P>, PolicyType>::policy_out_t
1072 require(const PolicyType p, WorkItemProperty::ImplWorkItemProperty<P>) {
1073  return typename Impl::PolicyPropertyAdaptor<
1074  WorkItemProperty::ImplWorkItemProperty<P>, PolicyType>::policy_out_t(p);
1075 }
1076 } // namespace Experimental
1077 } // namespace Kokkos
1078 #endif /* #define KOKKOS_EXECPOLICY_HPP */
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)=delete
Execution policy for parallel work over a threads within a team.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
RangePolicy set_chunk_size(int chunk_size_) const
set chunk_size to a discrete value
RangePolicy execution_policy
Tag this class as an execution policy.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
member_type chunk_size() const
return chunk_size
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition&#39;s rank and size.
RangePolicy(const member_type work_begin, const member_type work_end, Args...args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args...args)
Total range.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamVectorRange(const TeamMemberType &, const iType &count)=delete
Execution policy for parallel work over a threads within a team.
Execution policy for work over a range of an integral type.
Subrange for a partition&#39;s rank and size.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
Execution policy for parallel work over a league of teams of threads.
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)=delete
Execution policy for a vector parallel loop.