Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
KokkosExp_MDRangePolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
46 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
47 
48 #include <initializer_list>
49 
50 #include <Kokkos_Layout.hpp>
51 
52 #include <impl/KokkosExp_Host_IterateTile.hpp>
53 #include <Kokkos_ExecPolicy.hpp>
54 #include <Kokkos_Parallel.hpp>
55 
56 #if defined(__CUDACC__) && defined(KOKKOS_ENABLE_CUDA)
57 #include <Cuda/KokkosExp_Cuda_IterateTile.hpp>
58 #include <Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp>
59 #endif
60 
61 #if defined(__HCC__) && defined(KOKKOS_ENABLE_ROCM)
62 //#include<ROCm/KokkosExp_ROCm_IterateTile.hpp>
63 #include <ROCm/KokkosExp_ROCm_IterateTile_Refactor.hpp>
64 #endif
65 
66 #if defined(__HIPCC__) && defined(KOKKOS_ENABLE_HIP)
67 #include <HIP/KokkosExp_HIP_IterateTile.hpp>
68 #endif
69 
70 namespace Kokkos {
71 
72 // ------------------------------------------------------------------ //
73 // Moved to Kokkos_Layout.hpp for more general accessibility
74 /*
75 enum class Iterate
76 {
77  Default, // Default for the device
78  Left, // Left indices stride fastest
79  Right, // Right indices stride fastest
80 };
81 */
82 
83 template <typename ExecSpace>
84 struct default_outer_direction {
85  using type = Iterate;
86 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \
87  defined(KOKKOS_ENABLE_HIP)
88  static constexpr Iterate value = Iterate::Left;
89 #else
90  static constexpr Iterate value = Iterate::Right;
91 #endif
92 };
93 
94 template <typename ExecSpace>
95 struct default_inner_direction {
96  using type = Iterate;
97 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \
98  defined(KOKKOS_ENABLE_HIP)
99  static constexpr Iterate value = Iterate::Left;
100 #else
101  static constexpr Iterate value = Iterate::Right;
102 #endif
103 };
104 
105 // Iteration Pattern
106 template <unsigned N, Iterate OuterDir = Iterate::Default,
107  Iterate InnerDir = Iterate::Default>
108 struct Rank {
109  static_assert(N != 0u, "Kokkos Error: rank 0 undefined");
110  static_assert(N != 1u,
111  "Kokkos Error: rank 1 is not a multi-dimensional range");
112  static_assert(N < 7u, "Kokkos Error: Unsupported rank...");
113 
114  using iteration_pattern = Rank<N, OuterDir, InnerDir>;
115 
116  static constexpr int rank = N;
117  static constexpr Iterate outer_direction = OuterDir;
118  static constexpr Iterate inner_direction = InnerDir;
119 };
120 
121 // multi-dimensional iteration pattern
122 template <typename... Properties>
123 struct MDRangePolicy : public Kokkos::Impl::PolicyTraits<Properties...> {
124  using traits = Kokkos::Impl::PolicyTraits<Properties...>;
125  using range_policy = RangePolicy<Properties...>;
126 
127  typename traits::execution_space m_space;
128 
129  using impl_range_policy =
130  RangePolicy<typename traits::execution_space,
131  typename traits::schedule_type, typename traits::index_type>;
132 
133  typedef MDRangePolicy
134  execution_policy; // needed for is_execution_space interrogation
135 
136  template <class... OtherProperties>
137  friend struct MDRangePolicy;
138 
139  static_assert(!std::is_same<typename traits::iteration_pattern, void>::value,
140  "Kokkos Error: MD iteration pattern not defined");
141 
142  using iteration_pattern = typename traits::iteration_pattern;
143  using work_tag = typename traits::work_tag;
144  using launch_bounds = typename traits::launch_bounds;
145  using member_type = typename range_policy::member_type;
146 
147  enum { rank = static_cast<int>(iteration_pattern::rank) };
148 
149  using index_type = typename traits::index_type;
150  using array_index_type = long;
151  using point_type = Kokkos::Array<array_index_type, rank>; // was index_type
152  using tile_type = Kokkos::Array<array_index_type, rank>;
153  // If point_type or tile_type is not templated on a signed integral type (if
154  // it is unsigned), then if user passes in intializer_list of
155  // runtime-determined values of signed integral type that are not const will
156  // receive a compiler error due to an invalid case for implicit conversion -
157  // "conversion from integer or unscoped enumeration type to integer type that
158  // cannot represent all values of the original, except where source is a
159  // constant expression whose value can be stored exactly in the target type"
160  // This would require the user to either pass a matching index_type parameter
161  // as template parameter to the MDRangePolicy or static_cast the individual
162  // values
163 
164  point_type m_lower;
165  point_type m_upper;
166  tile_type m_tile;
167  point_type m_tile_end;
168  index_type m_num_tiles;
169  index_type m_prod_tile_dims;
170 
171  /*
172  // NDE enum impl definition alternative - replace static constexpr int ?
173  enum { outer_direction = static_cast<int> (
174  (iteration_pattern::outer_direction != Iterate::Default)
175  ? iteration_pattern::outer_direction
176  : default_outer_direction< typename traits::execution_space>::value ) };
177 
178  enum { inner_direction = static_cast<int> (
179  iteration_pattern::inner_direction != Iterate::Default
180  ? iteration_pattern::inner_direction
181  : default_inner_direction< typename traits::execution_space>::value ) };
182 
183  enum { Right = static_cast<int>( Iterate::Right ) };
184  enum { Left = static_cast<int>( Iterate::Left ) };
185  */
186  // static constexpr int rank = iteration_pattern::rank;
187 
188  static constexpr int outer_direction = static_cast<int>(
189  (iteration_pattern::outer_direction != Iterate::Default)
190  ? iteration_pattern::outer_direction
191  : default_outer_direction<typename traits::execution_space>::value);
192 
193  static constexpr int inner_direction = static_cast<int>(
194  iteration_pattern::inner_direction != Iterate::Default
195  ? iteration_pattern::inner_direction
196  : default_inner_direction<typename traits::execution_space>::value);
197 
198  // Ugly ugly workaround intel 14 not handling scoped enum correctly
199  static constexpr int Right = static_cast<int>(Iterate::Right);
200  static constexpr int Left = static_cast<int>(Iterate::Left);
201 
202  KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
203  return m_space;
204  }
205  template <typename LT, typename UT, typename TT = array_index_type>
206  MDRangePolicy(std::initializer_list<LT> const& lower,
207  std::initializer_list<UT> const& upper,
208  std::initializer_list<TT> const& tile = {})
209  : m_space() {
210  init(lower, upper, tile);
211  }
212 
213  template <typename LT, typename UT, typename TT = array_index_type>
214  MDRangePolicy(const typename traits::execution_space& work_space,
215  std::initializer_list<LT> const& lower,
216  std::initializer_list<UT> const& upper,
217  std::initializer_list<TT> const& tile = {})
218  : m_space(work_space) {
219  init(lower, upper, tile);
220  }
221 
222  MDRangePolicy(point_type const& lower, point_type const& upper,
223  tile_type const& tile = tile_type{})
224  : m_space(),
225  m_lower(lower),
226  m_upper(upper),
227  m_tile(tile),
228  m_num_tiles(1),
229  m_prod_tile_dims(1) {
230  init();
231  }
232 
233  MDRangePolicy(const typename traits::execution_space& work_space,
234  point_type const& lower, point_type const& upper,
235  tile_type const& tile = tile_type{})
236  : m_space(work_space),
237  m_lower(lower),
238  m_upper(upper),
239  m_tile(tile),
240  m_num_tiles(1),
241  m_prod_tile_dims(1) {
242  init();
243  }
244 
245  template <class... OtherProperties>
246  MDRangePolicy(const MDRangePolicy<OtherProperties...> p)
247  : m_space(p.m_space),
248  m_lower(p.m_lower),
249  m_upper(p.m_upper),
250  m_tile(p.m_tile),
251  m_tile_end(p.m_tile_end),
252  m_num_tiles(p.m_num_tiles),
253  m_prod_tile_dims(p.m_prod_tile_dims) {}
254 
255  private:
256  void init() {
257  // Host
258  if (true
259 #if defined(KOKKOS_ENABLE_CUDA)
260  && !std::is_same<typename traits::execution_space, Kokkos::Cuda>::value
261 #endif
262 #if defined(KOKKOS_ENABLE_ROCM)
263  && !std::is_same<typename traits::execution_space,
264  Kokkos::Experimental::ROCm>::value
265 #endif
266 #if defined(KOKKOS_ENABLE_HIP)
267  && !std::is_same<typename traits::execution_space,
268  Kokkos::Experimental::HIP>::value
269 #endif
270  ) {
271  index_type span;
272  for (int i = 0; i < rank; ++i) {
273  span = m_upper[i] - m_lower[i];
274  if (m_tile[i] <= 0) {
275  if (((int)inner_direction == (int)Right && (i < rank - 1)) ||
276  ((int)inner_direction == (int)Left && (i > 0))) {
277  m_tile[i] = 2;
278  } else {
279  m_tile[i] = (span == 0 ? 1 : span);
280  }
281  }
282  m_tile_end[i] =
283  static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
284  m_num_tiles *= m_tile_end[i];
285  m_prod_tile_dims *= m_tile[i];
286  }
287  }
288 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
289  else // Cuda
290  {
291  index_type span;
292  int increment = 1;
293  int rank_start = 0;
294  int rank_end = rank;
295  if ((int)inner_direction == (int)Right) {
296  increment = -1;
297  rank_start = rank - 1;
298  rank_end = -1;
299  }
300  bool is_cuda_exec_space =
301 #if defined(KOKKOS_ENABLE_CUDA)
302  std::is_same<typename traits::execution_space, Kokkos::Cuda>::value;
303 #else
304  false;
305 #endif
306  for (int i = rank_start; i != rank_end; i += increment) {
307  span = m_upper[i] - m_lower[i];
308  if (m_tile[i] <= 0) {
309  // TODO: determine what is a good default tile size for cuda and HIP
310  // may be rank dependent
311  if (((int)inner_direction == (int)Right && (i < rank - 1)) ||
312  ((int)inner_direction == (int)Left && (i > 0))) {
313  if (m_prod_tile_dims < 256) {
314  m_tile[i] = (is_cuda_exec_space) ? 2 : 4;
315  } else {
316  m_tile[i] = 1;
317  }
318  } else {
319  m_tile[i] = 16;
320  }
321  }
322  m_tile_end[i] =
323  static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
324  m_num_tiles *= m_tile_end[i];
325  m_prod_tile_dims *= m_tile[i];
326  }
327  if (m_prod_tile_dims >
328  1024) { // Match Cuda restriction for ParallelReduce; 1024,1024,64
329  // max per dim (Kepler), but product num_threads < 1024
330  if (is_cuda_exec_space) {
331  printf(" Tile dimensions exceed Cuda limits\n");
332  Kokkos::abort(
333  " Cuda ExecSpace Error: MDRange tile dims exceed maximum number "
334  "of "
335  "threads per block - choose smaller tile dims");
336  } else {
337  printf(" Tile dimensions exceed HIP limits\n");
338  Kokkos::abort(
339  "HIP ExecSpace Error: MDRange tile dims exceed maximum number of "
340  "threads per block - choose smaller tile dims");
341  }
342  }
343  }
344 #endif
345 #if defined(KOKKOS_ENABLE_ROCM)
346  else // ROCm
347  {
348  index_type span;
349  int increment = 1;
350  int rank_start = 0;
351  int rank_end = rank;
352  if ((int)inner_direction == (int)Right) {
353  increment = -1;
354  rank_start = rank - 1;
355  rank_end = -1;
356  }
357  for (int i = rank_start; i != rank_end; i += increment) {
358  span = m_upper[i] - m_lower[i];
359  if (m_tile[i] <= 0) {
360  // TODO: determine what is a good default tile size for rocm
361  // may be rank dependent
362  if (((int)inner_direction == (int)Right && (i < rank - 1)) ||
363  ((int)inner_direction == (int)Left && (i > 0))) {
364  if (m_prod_tile_dims < 256) {
365  m_tile[i] = 4;
366  } else {
367  m_tile[i] = 1;
368  }
369  } else {
370  m_tile[i] = 16;
371  }
372  }
373  m_tile_end[i] =
374  static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
375  m_num_tiles *= m_tile_end[i];
376  m_prod_tile_dims *= m_tile[i];
377  }
378  if (m_prod_tile_dims > 1024) { // but product num_threads < 1024
379  printf(" Tile dimensions exceed ROCm limits\n");
380  Kokkos::abort(
381  " ROCm ExecSpace Error: MDRange tile dims exceed maximum number of "
382  "threads per block - choose smaller tile dims");
383  // Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error:
384  // MDRange tile dims exceed maximum number of threads per block - choose
385  // smaller tile dims");
386  }
387  }
388 #endif
389  }
390 
391  template <typename LT, typename UT, typename TT = array_index_type>
392  void init(std::initializer_list<LT> const& lower,
393  std::initializer_list<UT> const& upper,
394  std::initializer_list<TT> const& tile = {}) {
395  if (static_cast<int>(m_lower.size()) != rank ||
396  static_cast<int>(m_upper.size()) != rank)
397  Kokkos::abort(
398  "MDRangePolicy: Constructor initializer lists have wrong size");
399 
400  for (auto i = 0; i < rank; ++i) {
401  m_lower[i] = static_cast<array_index_type>(lower.begin()[i]);
402  m_upper[i] = static_cast<array_index_type>(upper.begin()[i]);
403  if (static_cast<int>(tile.size()) == rank)
404  m_tile[i] = static_cast<array_index_type>(tile.begin()[i]);
405  else
406  m_tile[i] = 0;
407  }
408 
409  m_num_tiles = 1;
410  m_prod_tile_dims = 1;
411 
412  // Host
413  if (true
414 #if defined(KOKKOS_ENABLE_CUDA)
415  && !std::is_same<typename traits::execution_space, Kokkos::Cuda>::value
416 #endif
417 #if defined(KOKKOS_ENABLE_ROCM)
418  && !std::is_same<typename traits::execution_space,
419  Kokkos::Experimental::ROCm>::value
420 #endif
421 #if defined(KOKKOS_ENABLE_HIP)
422  && !std::is_same<typename traits::execution_space,
423  Kokkos::Experimental::HIP>::value
424 #endif
425  ) {
426  index_type span;
427  for (int i = 0; i < rank; ++i) {
428  span = m_upper[i] - m_lower[i];
429  if (m_tile[i] <= 0) {
430  if (((int)inner_direction == (int)Right && (i < rank - 1)) ||
431  ((int)inner_direction == (int)Left && (i > 0))) {
432  m_tile[i] = 2;
433  } else {
434  m_tile[i] = (span == 0 ? 1 : span);
435  }
436  }
437  m_tile_end[i] =
438  static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
439  m_num_tiles *= m_tile_end[i];
440  m_prod_tile_dims *= m_tile[i];
441  }
442  }
443 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
444  else // Cuda or HIP
445  {
446  index_type span;
447  int increment = 1;
448  int rank_start = 0;
449  int rank_end = rank;
450  if ((int)inner_direction == (int)Right) {
451  increment = -1;
452  rank_start = rank - 1;
453  rank_end = -1;
454  }
455  for (int i = rank_start; i != rank_end; i += increment) {
456  span = m_upper[i] - m_lower[i];
457  if (m_tile[i] <= 0) {
458  // TODO: determine what is a good default tile size for cuda
459  // may be rank dependent
460  if (((int)inner_direction == (int)Right && (i < rank - 1)) ||
461  ((int)inner_direction == (int)Left && (i > 0))) {
462  if (m_prod_tile_dims < 256) {
463  m_tile[i] = 2;
464  } else {
465  m_tile[i] = 1;
466  }
467  } else {
468  m_tile[i] = 16;
469  }
470  }
471  m_tile_end[i] =
472  static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
473  m_num_tiles *= m_tile_end[i];
474  m_prod_tile_dims *= m_tile[i];
475  }
476  if (m_prod_tile_dims >
477  1024) { // Match Cuda restriction for ParallelReduce; 1024,1024,64
478  // max per dim (Kepler), but product num_threads < 1024
479 #if defined(KOKKOS_ENABLE_CUDA)
480  printf(" Tile dimensions exceed Cuda limits\n");
481  Kokkos::abort(
482  " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of "
483  "threads per block - choose smaller tile dims");
484 #else
485  printf(" Tile dimensions exceed HIP limits\n");
486  Kokkos::abort(
487  " HIP ExecSpace Error: MDRange tile dims exceed maximum number of "
488  "threads per block - choose smaller tile dims");
489 #endif
490  }
491  }
492 #endif
493 #if defined(KOKKOS_ENABLE_ROCM)
494  else // ROCm
495  {
496  index_type span;
497  int increment = 1;
498  int rank_start = 0;
499  int rank_end = rank;
500  if ((int)inner_direction == (int)Right) {
501  increment = -1;
502  rank_start = rank - 1;
503  rank_end = -1;
504  }
505  for (int i = rank_start; i != rank_end; i += increment) {
506  span = m_upper[i] - m_lower[i];
507  if (m_tile[i] <= 0) {
508  // TODO: determine what is a good default tile size for cuda
509  // may be rank dependent
510  if (((int)inner_direction == (int)Right && (i < rank - 1)) ||
511  ((int)inner_direction == (int)Left && (i > 0))) {
512  if (m_prod_tile_dims < 256) {
513  m_tile[i] = 2;
514  } else {
515  m_tile[i] = 1;
516  }
517  } else {
518  m_tile[i] = 16;
519  }
520  }
521  m_tile_end[i] =
522  static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
523  m_num_tiles *= m_tile_end[i];
524  m_prod_tile_dims *= m_tile[i];
525  }
526  if (m_prod_tile_dims >
527  1024) { // Match ROCm restriction for ParallelReduce; 1024,1024,1024
528  // max per dim , but product num_threads < 1024
529  printf(" Tile dimensions exceed ROCm limits\n");
530  Kokkos::abort(
531  " ROCm ExecSpace Error: MDRange tile dims exceed maximum number of "
532  "threads per block - choose smaller tile dims");
533  // Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error:
534  // MDRange tile dims exceed maximum number of threads per block - choose
535  // smaller tile dims");
536  }
537  }
538 #endif
539  }
540 };
541 
542 } // namespace Kokkos
543 
544 // For backward compatibility
545 namespace Kokkos {
546 namespace Experimental {
547 using Kokkos::Iterate;
548 using Kokkos::MDRangePolicy;
549 using Kokkos::Rank;
550 } // namespace Experimental
551 } // namespace Kokkos
552 // ------------------------------------------------------------------ //
553 
554 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
555 // ------------------------------------------------------------------ //
556 // md_parallel_for - deprecated use parallel_for
557 // ------------------------------------------------------------------ //
558 
559 namespace Kokkos {
560 namespace Experimental {
561 
562 template <typename MDRange, typename Functor, typename Enable = void>
563 void md_parallel_for(
564  MDRange const& range, Functor const& f, const std::string& str = "",
565  typename std::enable_if<
566  (true
567 #if defined(KOKKOS_ENABLE_CUDA)
568  && !std::is_same<typename MDRange::range_policy::execution_space,
569  Kokkos::Cuda>::value
570 #endif
571 #if defined(KOKKOS_ENABLE_ROCM)
572  && !std::is_same<typename MDRange::range_policy::execution_space,
573  Kokkos::Experimental::ROCm>::value
574 #endif
575  )>::type* = 0) {
576  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
577 
578  using range_policy = typename MDRange::impl_range_policy;
579 
580  Kokkos::parallel_for(range_policy(0, range.m_num_tiles).set_chunk_size(1), g,
581  str);
582 }
583 
584 template <typename MDRange, typename Functor>
585 void md_parallel_for(
586  const std::string& str, MDRange const& range, Functor const& f,
587  typename std::enable_if<
588  (true
589 #if defined(KOKKOS_ENABLE_CUDA)
590  && !std::is_same<typename MDRange::range_policy::execution_space,
591  Kokkos::Cuda>::value
592 #endif
593 #if defined(KOKKOS_ENABLE_ROCM)
594  && !std::is_same<typename MDRange::range_policy::execution_space,
595  Kokkos::Experimental::ROCm>::value
596 #endif
597  )>::type* = 0) {
598  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
599 
600  using range_policy = typename MDRange::impl_range_policy;
601 
602  Kokkos::parallel_for(range_policy(0, range.m_num_tiles).set_chunk_size(1), g,
603  str);
604 }
605 
606 // Cuda specialization
607 #if defined(__CUDACC__) && defined(KOKKOS_ENABLE_CUDA)
608 template <typename MDRange, typename Functor>
609 void md_parallel_for(
610  const std::string& str, MDRange const& range, Functor const& f,
611  typename std::enable_if<
612  (true
613 #if defined(KOKKOS_ENABLE_CUDA)
614  && std::is_same<typename MDRange::range_policy::execution_space,
615  Kokkos::Cuda>::value
616 #endif
617  )>::type* = 0) {
618  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag>
619  closure(range, f);
620  closure.execute();
621 }
622 
623 template <typename MDRange, typename Functor>
624 void md_parallel_for(
625  MDRange const& range, Functor const& f, const std::string& str = "",
626  typename std::enable_if<
627  (true
628 #if defined(KOKKOS_ENABLE_CUDA)
629  && std::is_same<typename MDRange::range_policy::execution_space,
630  Kokkos::Cuda>::value
631 #endif
632  )>::type* = 0) {
633  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag>
634  closure(range, f);
635  closure.execute();
636 }
637 #endif
638 // ------------------------------------------------------------------ //
639 
640 // ------------------------------------------------------------------ //
641 // md_parallel_reduce - deprecated use parallel_reduce
642 // ------------------------------------------------------------------ //
643 template <typename MDRange, typename Functor, typename ValueType>
644 void md_parallel_reduce(
645  MDRange const& range, Functor const& f, ValueType& v,
646  const std::string& str = "",
647  typename std::enable_if<
648  (true
649 #if defined(KOKKOS_ENABLE_CUDA)
650  && !std::is_same<typename MDRange::range_policy::execution_space,
651  Kokkos::Cuda>::value
652 #endif
653 #if defined(KOKKOS_ENABLE_ROCM)
654  && !std::is_same<typename MDRange::range_policy::execution_space,
655  Kokkos::Experimental::ROCm>::value
656 #endif
657  )>::type* = 0) {
658  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range,
659  f);
660 
661  using range_policy = typename MDRange::impl_range_policy;
663  str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v);
664 }
665 
666 template <typename MDRange, typename Functor, typename ValueType>
667 void md_parallel_reduce(
668  const std::string& str, MDRange const& range, Functor const& f,
669  ValueType& v,
670  typename std::enable_if<
671  (true
672 #if defined(KOKKOS_ENABLE_CUDA)
673  && !std::is_same<typename MDRange::range_policy::execution_space,
674  Kokkos::Cuda>::value
675 #endif
676 #if defined(KOKKOS_ENABLE_ROCM)
677  && !std::is_same<typename MDRange::range_policy::execution_space,
678  Kokkos::Experimental::ROCm>::value
679 #endif
680  )>::type* = 0) {
681  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range,
682  f);
683 
684  using range_policy = typename MDRange::impl_range_policy;
685 
687  str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v);
688 }
689 
690 // Cuda - md_parallel_reduce not implemented - use parallel_reduce
691 
692 } // namespace Experimental
693 } // namespace Kokkos
694 #endif
695 
696 namespace Kokkos {
697 namespace Experimental {
698 namespace Impl {
699 
700 template <unsigned long P, class... Properties>
701 struct PolicyPropertyAdaptor<WorkItemProperty::ImplWorkItemProperty<P>,
702  MDRangePolicy<Properties...>> {
703  typedef MDRangePolicy<Properties...> policy_in_t;
704  typedef MDRangePolicy<typename policy_in_t::traits::execution_space,
705  typename policy_in_t::traits::schedule_type,
706  typename policy_in_t::traits::work_tag,
707  typename policy_in_t::traits::index_type,
708  typename policy_in_t::traits::iteration_pattern,
709  typename policy_in_t::traits::launch_bounds,
710  WorkItemProperty::ImplWorkItemProperty<P>>
711  policy_out_t;
712 };
713 
714 } // namespace Impl
715 } // namespace Experimental
716 } // namespace Kokkos
717 
718 #endif // KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
Declaration of various MemoryLayout options.
Declaration of parallel operators.
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
Parallel reduction.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P...> &V)
Temporary free function rank() until rank() is implemented in the View.