Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
KokkosExp_MDRangePolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
45 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
46 
47 #include <initializer_list>
48 
49 #include <Kokkos_Layout.hpp>
50 
51 #include<impl/KokkosExp_Host_IterateTile.hpp>
52 #include <Kokkos_ExecPolicy.hpp>
53 #include <Kokkos_Parallel.hpp>
54 
55 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
56 #include<Cuda/KokkosExp_Cuda_IterateTile.hpp>
57 #include <Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp>
58 #endif
59 
60 #if defined( __HCC__ ) && defined( KOKKOS_ENABLE_ROCM )
61 //#include<ROCm/KokkosExp_ROCm_IterateTile.hpp>
62 #include <ROCm/KokkosExp_ROCm_IterateTile_Refactor.hpp>
63 #endif
64 
65 namespace Kokkos {
66 
67 // ------------------------------------------------------------------ //
68 // Moved to Kokkos_Layout.hpp for more general accessibility
69 /*
70 enum class Iterate
71 {
72  Default, // Default for the device
73  Left, // Left indices stride fastest
74  Right, // Right indices stride fastest
75 };
76 */
77 
78 template <typename ExecSpace>
79 struct default_outer_direction
80 {
81  using type = Iterate;
82  #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM)
83  static constexpr Iterate value = Iterate::Left;
84  #else
85  static constexpr Iterate value = Iterate::Right;
86  #endif
87 };
88 
89 template <typename ExecSpace>
90 struct default_inner_direction
91 {
92  using type = Iterate;
93  #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM)
94  static constexpr Iterate value = Iterate::Left;
95  #else
96  static constexpr Iterate value = Iterate::Right;
97  #endif
98 };
99 
100 
101 // Iteration Pattern
102 template < unsigned N
103  , Iterate OuterDir = Iterate::Default
104  , Iterate InnerDir = Iterate::Default
105  >
106 struct Rank
107 {
108  static_assert( N != 0u, "Kokkos Error: rank 0 undefined");
109  static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range");
110  static_assert( N < 7u, "Kokkos Error: Unsupported rank...");
111 
112  using iteration_pattern = Rank<N, OuterDir, InnerDir>;
113 
114  static constexpr int rank = N;
115  static constexpr Iterate outer_direction = OuterDir;
116  static constexpr Iterate inner_direction = InnerDir;
117 };
118 
119 
120 // multi-dimensional iteration pattern
121 template <typename... Properties>
122 struct MDRangePolicy
123  : public Kokkos::Impl::PolicyTraits<Properties ...>
124 {
125  using traits = Kokkos::Impl::PolicyTraits<Properties ...>;
126  using range_policy = RangePolicy<Properties...>;
127 
128  typename traits::execution_space m_space;
129 
130  using impl_range_policy = RangePolicy< typename traits::execution_space
131  , typename traits::schedule_type
132  , typename traits::index_type
133  > ;
134 
135  typedef MDRangePolicy execution_policy; // needed for is_execution_space interrogation
136 
137  template<class ... OtherProperties>
138  friend struct MDRangePolicy;
139 
140  static_assert( !std::is_same<typename traits::iteration_pattern,void>::value
141  , "Kokkos Error: MD iteration pattern not defined" );
142 
143  using iteration_pattern = typename traits::iteration_pattern;
144  using work_tag = typename traits::work_tag;
145  using launch_bounds = typename traits::launch_bounds;
146  using member_type = typename range_policy::member_type;
147 
148  enum { rank = static_cast<int>(iteration_pattern::rank) };
149 
150  using index_type = typename traits::index_type;
151  using array_index_type = long;
152  using point_type = Kokkos::Array<array_index_type,rank>; //was index_type
153  using tile_type = Kokkos::Array<array_index_type,rank>;
154  // If point_type or tile_type is not templated on a signed integral type (if it is unsigned),
155  // then if user passes in intializer_list of runtime-determined values of
156  // signed integral type that are not const will receive a compiler error due
157  // to an invalid case for implicit conversion -
158  // "conversion from integer or unscoped enumeration type to integer type that cannot represent all values of the original, except where source is a constant expression whose value can be stored exactly in the target type"
159  // This would require the user to either pass a matching index_type parameter
160  // as template parameter to the MDRangePolicy or static_cast the individual values
161 
162  point_type m_lower;
163  point_type m_upper;
164  tile_type m_tile;
165  point_type m_tile_end;
166  index_type m_num_tiles;
167  index_type m_prod_tile_dims;
168 
169 /*
170  // NDE enum impl definition alternative - replace static constexpr int ?
171  enum { outer_direction = static_cast<int> (
172  (iteration_pattern::outer_direction != Iterate::Default)
173  ? iteration_pattern::outer_direction
174  : default_outer_direction< typename traits::execution_space>::value ) };
175 
176  enum { inner_direction = static_cast<int> (
177  iteration_pattern::inner_direction != Iterate::Default
178  ? iteration_pattern::inner_direction
179  : default_inner_direction< typename traits::execution_space>::value ) };
180 
181  enum { Right = static_cast<int>( Iterate::Right ) };
182  enum { Left = static_cast<int>( Iterate::Left ) };
183 */
184  //static constexpr int rank = iteration_pattern::rank;
185 
186  static constexpr int outer_direction = static_cast<int> (
187  (iteration_pattern::outer_direction != Iterate::Default)
188  ? iteration_pattern::outer_direction
189  : default_outer_direction< typename traits::execution_space>::value );
190 
191  static constexpr int inner_direction = static_cast<int> (
192  iteration_pattern::inner_direction != Iterate::Default
193  ? iteration_pattern::inner_direction
194  : default_inner_direction< typename traits::execution_space>::value ) ;
195 
196  // Ugly ugly workaround intel 14 not handling scoped enum correctly
197  static constexpr int Right = static_cast<int>( Iterate::Right );
198  static constexpr int Left = static_cast<int>( Iterate::Left );
199 
200  KOKKOS_INLINE_FUNCTION const typename traits::execution_space & space() const { return m_space ; }
201  template < typename LT , typename UT , typename TT = array_index_type >
202  MDRangePolicy(std::initializer_list<LT> const& lower, std::initializer_list<UT> const& upper, std::initializer_list<TT> const& tile = {} )
203  : m_space() {
204  init(lower, upper, tile);
205  }
206 
207  template < typename LT , typename UT , typename TT = array_index_type >
208  MDRangePolicy(const typename traits::execution_space & work_space,
209  std::initializer_list<LT> const& lower, std::initializer_list<UT> const& upper, std::initializer_list<TT> const& tile = {} )
210  : m_space( work_space ) {
211  init(lower, upper, tile);
212  }
213 
214  MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} )
215  : m_space()
216  , m_lower(lower)
217  , m_upper(upper)
218  , m_tile(tile)
219  , m_num_tiles(1)
220  , m_prod_tile_dims(1) {
221  init();
222  }
223 
224  MDRangePolicy( const typename traits::execution_space & work_space,
225  point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} )
226  : m_space( work_space )
227  , m_lower(lower)
228  , m_upper(upper)
229  , m_tile(tile)
230  , m_num_tiles(1)
231  , m_prod_tile_dims(1) {
232  init();
233  }
234 
235  template<class ... OtherProperties>
236  MDRangePolicy( const MDRangePolicy<OtherProperties...> p ):
237  m_space(p.m_space),
238  m_lower(p.m_lower),
239  m_upper(p.m_upper),
240  m_tile(p.m_tile),
241  m_tile_end(p.m_tile_end),
242  m_num_tiles(p.m_num_tiles),
243  m_prod_tile_dims(p.m_prod_tile_dims) {}
244 
245 private:
246 
247  void init() {
248  // Host
249  if ( true
250  #if defined(KOKKOS_ENABLE_CUDA)
251  && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
252  #endif
253  #if defined(KOKKOS_ENABLE_ROCM)
254  && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
255  #endif
256  )
257  {
258  index_type span;
259  for (int i=0; i<rank; ++i) {
260  span = m_upper[i] - m_lower[i];
261  if ( m_tile[i] <= 0 ) {
262  if ( ((int)inner_direction == (int)Right && (i < rank-1))
263  || ((int)inner_direction == (int)Left && (i > 0)) )
264  {
265  m_tile[i] = 2;
266  }
267  else {
268  m_tile[i] = (span == 0 ? 1 : span);
269  }
270  }
271  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
272  m_num_tiles *= m_tile_end[i];
273  m_prod_tile_dims *= m_tile[i];
274  }
275  }
276  #if defined(KOKKOS_ENABLE_CUDA)
277  else // Cuda
278  {
279  index_type span;
280  int increment = 1;
281  int rank_start = 0;
282  int rank_end = rank;
283  if((int)inner_direction == (int)Right) {
284  increment = -1;
285  rank_start = rank-1;
286  rank_end = -1;
287  }
288  for (int i=rank_start; i!=rank_end; i+=increment) {
289  span = m_upper[i] - m_lower[i];
290  if ( m_tile[i] <= 0 ) {
291  // TODO: determine what is a good default tile size for cuda
292  // may be rank dependent
293  if ( ((int)inner_direction == (int)Right && (i < rank-1))
294  || ((int)inner_direction == (int)Left && (i > 0)) )
295  {
296  if ( m_prod_tile_dims < 256 ) {
297  m_tile[i] = 2;
298  } else {
299  m_tile[i] = 1;
300  }
301  }
302  else {
303  m_tile[i] = 16;
304  }
305  }
306  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
307  m_num_tiles *= m_tile_end[i];
308  m_prod_tile_dims *= m_tile[i];
309  }
310  if ( m_prod_tile_dims > 1024 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
311  printf(" Tile dimensions exceed Cuda limits\n");
312  Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
313  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
314  }
315  }
316  #endif
317  #if defined(KOKKOS_ENABLE_ROCM)
318  else // ROCm
319  {
320  index_type span;
321  int increment = 1;
322  int rank_start = 0;
323  int rank_end = rank;
324  if((int)inner_direction == (int)Right) {
325  increment = -1;
326  rank_start = rank-1;
327  rank_end = -1;
328  }
329  for (int i=rank_start; i!=rank_end; i+=increment) {
330  span = m_upper[i] - m_lower[i];
331  if ( m_tile[i] <= 0 ) {
332  // TODO: determine what is a good default tile size for rocm
333  // may be rank dependent
334  if ( ((int)inner_direction == (int)Right && (i < rank-1))
335  || ((int)inner_direction == (int)Left && (i > 0)) )
336  {
337  if ( m_prod_tile_dims < 256 ) {
338  m_tile[i] = 4;
339  } else {
340  m_tile[i] = 1;
341  }
342  }
343  else {
344  m_tile[i] = 16;
345  }
346  }
347  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
348  m_num_tiles *= m_tile_end[i];
349  m_prod_tile_dims *= m_tile[i];
350  }
351  if ( m_prod_tile_dims > 1024 ) { //but product num_threads < 1024
352  printf(" Tile dimensions exceed ROCm limits\n");
353  Kokkos::abort(" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
354  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
355  }
356  }
357  #endif
358  }
359 
360  template < typename LT , typename UT , typename TT = array_index_type >
361  void init( std::initializer_list<LT> const& lower, std::initializer_list<UT> const& upper, std::initializer_list<TT> const& tile = {} )
362  {
363  if(static_cast<int>(m_lower.size()) != rank || static_cast<int>(m_upper.size()) != rank)
364  Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size");
365 
366  for ( auto i = 0; i < rank; ++i ) {
367  m_lower[i] = static_cast<array_index_type>(lower.begin()[i]);
368  m_upper[i] = static_cast<array_index_type>(upper.begin()[i]);
369  if(static_cast<int>(tile.size())==rank)
370  m_tile[i] = static_cast<array_index_type>(tile.begin()[i]);
371  else
372  m_tile[i] = 0;
373  }
374 
375  m_num_tiles = 1;
376  m_prod_tile_dims = 1;
377 
378  // Host
379  if ( true
380  #if defined(KOKKOS_ENABLE_CUDA)
381  && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
382  #endif
383  #if defined(KOKKOS_ENABLE_ROCM)
384  && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
385  #endif
386  )
387  {
388  index_type span;
389  for (int i=0; i<rank; ++i) {
390  span = m_upper[i] - m_lower[i];
391  if ( m_tile[i] <= 0 ) {
392  if ( ((int)inner_direction == (int)Right && (i < rank-1))
393  || ((int)inner_direction == (int)Left && (i > 0)) )
394  {
395  m_tile[i] = 2;
396  }
397  else {
398  m_tile[i] = (span == 0 ? 1 : span);
399  }
400  }
401  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
402  m_num_tiles *= m_tile_end[i];
403  m_prod_tile_dims *= m_tile[i];
404  }
405  }
406  #if defined(KOKKOS_ENABLE_CUDA)
407  else // Cuda
408  {
409  index_type span;
410  int increment = 1;
411  int rank_start = 0;
412  int rank_end = rank;
413  if((int)inner_direction == (int)Right) {
414  increment = -1;
415  rank_start = rank-1;
416  rank_end = -1;
417  }
418  for (int i=rank_start; i!=rank_end; i+=increment) {
419  span = m_upper[i] - m_lower[i];
420  if ( m_tile[i] <= 0 ) {
421  // TODO: determine what is a good default tile size for cuda
422  // may be rank dependent
423  if ( ((int)inner_direction == (int)Right && (i < rank-1))
424  || ((int)inner_direction == (int)Left && (i > 0)) )
425  {
426  if ( m_prod_tile_dims < 256 ) {
427  m_tile[i] = 2;
428  } else {
429  m_tile[i] = 1;
430  }
431  }
432  else {
433  m_tile[i] = 16;
434  }
435  }
436  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
437  m_num_tiles *= m_tile_end[i];
438  m_prod_tile_dims *= m_tile[i];
439  }
440  if ( m_prod_tile_dims > 1024 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
441  printf(" Tile dimensions exceed Cuda limits\n");
442  Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
443  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
444  }
445  }
446  #endif
447  #if defined(KOKKOS_ENABLE_ROCM)
448  else // ROCm
449  {
450  index_type span;
451  int increment = 1;
452  int rank_start = 0;
453  int rank_end = rank;
454  if((int)inner_direction == (int)Right) {
455  increment = -1;
456  rank_start = rank-1;
457  rank_end = -1;
458  }
459  for (int i=rank_start; i!=rank_end; i+=increment) {
460  span = m_upper[i] - m_lower[i];
461  if ( m_tile[i] <= 0 ) {
462  // TODO: determine what is a good default tile size for cuda
463  // may be rank dependent
464  if ( ((int)inner_direction == (int)Right && (i < rank-1))
465  || ((int)inner_direction == (int)Left && (i > 0)) )
466  {
467  if ( m_prod_tile_dims < 256 ) {
468  m_tile[i] = 2;
469  } else {
470  m_tile[i] = 1;
471  }
472  }
473  else {
474  m_tile[i] = 16;
475  }
476  }
477  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
478  m_num_tiles *= m_tile_end[i];
479  m_prod_tile_dims *= m_tile[i];
480  }
481  if ( m_prod_tile_dims > 1024 ) { // Match ROCm restriction for ParallelReduce; 1024,1024,1024 max per dim , but product num_threads < 1024
482  printf(" Tile dimensions exceed ROCm limits\n");
483  Kokkos::abort(" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
484  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
485  }
486  }
487  #endif
488  }
489 
490 };
491 
492 } // namespace Kokkos
493 
494 // For backward compatibility
495 namespace Kokkos { namespace Experimental {
496  using Kokkos::MDRangePolicy;
497  using Kokkos::Rank;
498  using Kokkos::Iterate;
499 } } // end Kokkos::Experimental
500 // ------------------------------------------------------------------ //
501 
502 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
503 // ------------------------------------------------------------------ //
504 //md_parallel_for - deprecated use parallel_for
505 // ------------------------------------------------------------------ //
506 
507 namespace Kokkos { namespace Experimental {
508 
509 template <typename MDRange, typename Functor, typename Enable = void>
510 void md_parallel_for( MDRange const& range
511  , Functor const& f
512  , const std::string& str = ""
513  , typename std::enable_if<( true
514  #if defined( KOKKOS_ENABLE_CUDA)
515  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
516  #endif
517  #if defined( KOKKOS_ENABLE_ROCM)
518  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
519  #endif
520  ) >::type* = 0
521  )
522 {
523  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
524 
525  using range_policy = typename MDRange::impl_range_policy;
526 
527  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
528 }
529 
530 template <typename MDRange, typename Functor>
531 void md_parallel_for( const std::string& str
532  , MDRange const& range
533  , Functor const& f
534  , typename std::enable_if<( true
535  #if defined( KOKKOS_ENABLE_CUDA)
536  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
537  #endif
538  #if defined( KOKKOS_ENABLE_ROCM)
539  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
540  #endif
541  ) >::type* = 0
542  )
543 {
544  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
545 
546  using range_policy = typename MDRange::impl_range_policy;
547 
548  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
549 }
550 
551 // Cuda specialization
552 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
553 template <typename MDRange, typename Functor>
554 void md_parallel_for( const std::string& str
555  , MDRange const& range
556  , Functor const& f
557  , typename std::enable_if<( true
558  #if defined( KOKKOS_ENABLE_CUDA)
559  && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
560  #endif
561  ) >::type* = 0
562  )
563 {
564  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
565  closure.execute();
566 }
567 
568 template <typename MDRange, typename Functor>
569 void md_parallel_for( MDRange const& range
570  , Functor const& f
571  , const std::string& str = ""
572  , typename std::enable_if<( true
573  #if defined( KOKKOS_ENABLE_CUDA)
574  && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
575  #endif
576  ) >::type* = 0
577  )
578 {
579  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
580  closure.execute();
581 }
582 #endif
583 // ------------------------------------------------------------------ //
584 
585 // ------------------------------------------------------------------ //
586 //md_parallel_reduce - deprecated use parallel_reduce
587 // ------------------------------------------------------------------ //
588 template <typename MDRange, typename Functor, typename ValueType>
589 void md_parallel_reduce( MDRange const& range
590  , Functor const& f
591  , ValueType & v
592  , const std::string& str = ""
593  , typename std::enable_if<( true
594  #if defined( KOKKOS_ENABLE_CUDA)
595  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
596  #endif
597  #if defined( KOKKOS_ENABLE_ROCM)
598  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
599  #endif
600  ) >::type* = 0
601  )
602 {
603  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
604 
605  using range_policy = typename MDRange::impl_range_policy;
606  Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
607 }
608 
609 template <typename MDRange, typename Functor, typename ValueType>
610 void md_parallel_reduce( const std::string& str
611  , MDRange const& range
612  , Functor const& f
613  , ValueType & v
614  , typename std::enable_if<( true
615  #if defined( KOKKOS_ENABLE_CUDA)
616  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
617  #endif
618  #if defined( KOKKOS_ENABLE_ROCM)
619  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
620  #endif
621  ) >::type* = 0
622  )
623 {
624  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
625 
626  using range_policy = typename MDRange::impl_range_policy;
627 
628  Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
629 }
630 
631 // Cuda - md_parallel_reduce not implemented - use parallel_reduce
632 
633 } } // namespace Kokkos::Experimental
634 #endif
635 
636 namespace Kokkos {
637 namespace Experimental {
638 namespace Impl {
639 
640 template<unsigned long P, class ... Properties>
641 struct PolicyPropertyAdaptor<WorkItemProperty::ImplWorkItemProperty<P>,MDRangePolicy<Properties...>> {
642  typedef MDRangePolicy<Properties...> policy_in_t;
643  typedef MDRangePolicy<typename policy_in_t::traits::execution_space,
644  typename policy_in_t::traits::schedule_type,
645  typename policy_in_t::traits::work_tag,
646  typename policy_in_t::traits::index_type,
647  typename policy_in_t::traits::iteration_pattern,
648  typename policy_in_t::traits::launch_bounds,
649  WorkItemProperty::ImplWorkItemProperty<P>> policy_out_t;
650 };
651 
652 }
653 }
654 }
655 
656 
657 #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
658 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
Parallel reduction.
Declaration of various MemoryLayout options.
Declaration of parallel operators.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P...> &V)
Temporary free function rank() until rank() is implemented in the View.