Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
KokkosExp_MDRangePolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
45 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
46 
47 #include <initializer_list>
48 
49 #include <Kokkos_Layout.hpp>
50 
51 #include<impl/KokkosExp_Host_IterateTile.hpp>
52 #include <Kokkos_ExecPolicy.hpp>
53 #include <Kokkos_Parallel.hpp>
54 
55 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
56 #include<Cuda/KokkosExp_Cuda_IterateTile.hpp>
57 #include <Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp>
58 #endif
59 
60 #if defined( __HCC__ ) && defined( KOKKOS_ENABLE_ROCM )
61 //#include<ROCm/KokkosExp_ROCm_IterateTile.hpp>
62 #include <ROCm/KokkosExp_ROCm_IterateTile_Refactor.hpp>
63 #endif
64 
65 namespace Kokkos {
66 
67 // ------------------------------------------------------------------ //
68 // Moved to Kokkos_Layout.hpp for more general accessibility
69 /*
70 enum class Iterate
71 {
72  Default, // Default for the device
73  Left, // Left indices stride fastest
74  Right, // Right indices stride fastest
75 };
76 */
77 
78 template <typename ExecSpace>
79 struct default_outer_direction
80 {
81  using type = Iterate;
82  #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM)
83  static constexpr Iterate value = Iterate::Left;
84  #else
85  static constexpr Iterate value = Iterate::Right;
86  #endif
87 };
88 
89 template <typename ExecSpace>
90 struct default_inner_direction
91 {
92  using type = Iterate;
93  #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM)
94  static constexpr Iterate value = Iterate::Left;
95  #else
96  static constexpr Iterate value = Iterate::Right;
97  #endif
98 };
99 
100 
101 // Iteration Pattern
102 template < unsigned N
103  , Iterate OuterDir = Iterate::Default
104  , Iterate InnerDir = Iterate::Default
105  >
106 struct Rank
107 {
108  static_assert( N != 0u, "Kokkos Error: rank 0 undefined");
109  static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range");
110  static_assert( N < 7u, "Kokkos Error: Unsupported rank...");
111 
112  using iteration_pattern = Rank<N, OuterDir, InnerDir>;
113 
114  static constexpr int rank = N;
115  static constexpr Iterate outer_direction = OuterDir;
116  static constexpr Iterate inner_direction = InnerDir;
117 };
118 
119 
120 // multi-dimensional iteration pattern
121 template <typename... Properties>
122 struct MDRangePolicy
123  : public Kokkos::Impl::PolicyTraits<Properties ...>
124 {
125  using traits = Kokkos::Impl::PolicyTraits<Properties ...>;
126  using range_policy = RangePolicy<Properties...>;
127 
128  using impl_range_policy = RangePolicy< typename traits::execution_space
129  , typename traits::schedule_type
130  , typename traits::index_type
131  > ;
132 
133  typedef MDRangePolicy execution_policy; // needed for is_execution_space interrogation
134 
135  static_assert( !std::is_same<typename traits::iteration_pattern,void>::value
136  , "Kokkos Error: MD iteration pattern not defined" );
137 
138  using iteration_pattern = typename traits::iteration_pattern;
139  using work_tag = typename traits::work_tag;
140  using launch_bounds = typename traits::launch_bounds;
141  using member_type = typename range_policy::member_type;
142 
143  enum { rank = static_cast<int>(iteration_pattern::rank) };
144 
145  using index_type = typename traits::index_type;
146  using array_index_type = long;
147  using point_type = Kokkos::Array<array_index_type,rank>; //was index_type
148  using tile_type = Kokkos::Array<array_index_type,rank>;
149  // If point_type or tile_type is not templated on a signed integral type (if it is unsigned),
150  // then if user passes in intializer_list of runtime-determined values of
151  // signed integral type that are not const will receive a compiler error due
152  // to an invalid case for implicit conversion -
153  // "conversion from integer or unscoped enumeration type to integer type that cannot represent all values of the original, except where source is a constant expression whose value can be stored exactly in the target type"
154  // This would require the user to either pass a matching index_type parameter
155  // as template parameter to the MDRangePolicy or static_cast the individual values
156 
157  point_type m_lower;
158  point_type m_upper;
159  tile_type m_tile;
160  point_type m_tile_end;
161  index_type m_num_tiles;
162  index_type m_prod_tile_dims;
163 
164 /*
165  // NDE enum impl definition alternative - replace static constexpr int ?
166  enum { outer_direction = static_cast<int> (
167  (iteration_pattern::outer_direction != Iterate::Default)
168  ? iteration_pattern::outer_direction
169  : default_outer_direction< typename traits::execution_space>::value ) };
170 
171  enum { inner_direction = static_cast<int> (
172  iteration_pattern::inner_direction != Iterate::Default
173  ? iteration_pattern::inner_direction
174  : default_inner_direction< typename traits::execution_space>::value ) };
175 
176  enum { Right = static_cast<int>( Iterate::Right ) };
177  enum { Left = static_cast<int>( Iterate::Left ) };
178 */
179  //static constexpr int rank = iteration_pattern::rank;
180 
181  static constexpr int outer_direction = static_cast<int> (
182  (iteration_pattern::outer_direction != Iterate::Default)
183  ? iteration_pattern::outer_direction
184  : default_outer_direction< typename traits::execution_space>::value );
185 
186  static constexpr int inner_direction = static_cast<int> (
187  iteration_pattern::inner_direction != Iterate::Default
188  ? iteration_pattern::inner_direction
189  : default_inner_direction< typename traits::execution_space>::value ) ;
190 
191  // Ugly ugly workaround intel 14 not handling scoped enum correctly
192  static constexpr int Right = static_cast<int>( Iterate::Right );
193  static constexpr int Left = static_cast<int>( Iterate::Left );
194 
195  MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} )
196  : m_lower(lower)
197  , m_upper(upper)
198  , m_tile(tile)
199  , m_num_tiles(1)
200  , m_prod_tile_dims(1)
201  {
202  // Host
203  if ( true
204  #if defined(KOKKOS_ENABLE_CUDA)
205  && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
206  #endif
207  #if defined(KOKKOS_ENABLE_ROCM)
208  && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
209  #endif
210  )
211  {
212  index_type span;
213  for (int i=0; i<rank; ++i) {
214  span = upper[i] - lower[i];
215  if ( m_tile[i] <= 0 ) {
216  if ( ((int)inner_direction == (int)Right && (i < rank-1))
217  || ((int)inner_direction == (int)Left && (i > 0)) )
218  {
219  m_tile[i] = 2;
220  }
221  else {
222  m_tile[i] = (span == 0 ? 1 : span);
223  }
224  }
225  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
226  m_num_tiles *= m_tile_end[i];
227  m_prod_tile_dims *= m_tile[i];
228  }
229  }
230  #if defined(KOKKOS_ENABLE_CUDA)
231  else // Cuda
232  {
233  index_type span;
234  int increment = 1;
235  int rank_start = 0;
236  int rank_end = rank;
237  if((int)inner_direction == (int)Right) {
238  increment = -1;
239  rank_start = rank-1;
240  rank_end = -1;
241  }
242  for (int i=rank_start; i!=rank_end; i+=increment) {
243  span = m_upper[i] - m_lower[i];
244  if ( m_tile[i] <= 0 ) {
245  // TODO: determine what is a good default tile size for cuda
246  // may be rank dependent
247  if ( ((int)inner_direction == (int)Right && (i < rank-1))
248  || ((int)inner_direction == (int)Left && (i > 0)) )
249  {
250  if ( m_prod_tile_dims < 256 ) {
251  m_tile[i] = 2;
252  } else {
253  m_tile[i] = 1;
254  }
255  }
256  else {
257  m_tile[i] = 16;
258  }
259  }
260  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
261  m_num_tiles *= m_tile_end[i];
262  m_prod_tile_dims *= m_tile[i];
263  }
264  if ( m_prod_tile_dims > 1024 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
265  printf(" Tile dimensions exceed Cuda limits\n");
266  Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
267  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
268  }
269  }
270  #endif
271  #if defined(KOKKOS_ENABLE_ROCM)
272  else // ROCm
273  {
274  index_type span;
275  int increment = 1;
276  int rank_start = 0;
277  int rank_end = rank;
278  if((int)inner_direction == (int)Right) {
279  increment = -1;
280  rank_start = rank-1;
281  rank_end = -1;
282  }
283  for (int i=rank_start; i!=rank_end; i+=increment) {
284  span = m_upper[i] - m_lower[i];
285  if ( m_tile[i] <= 0 ) {
286  // TODO: determine what is a good default tile size for rocm
287  // may be rank dependent
288  if ( ((int)inner_direction == (int)Right && (i < rank-1))
289  || ((int)inner_direction == (int)Left && (i > 0)) )
290  {
291  if ( m_prod_tile_dims < 256 ) {
292  m_tile[i] = 4;
293  } else {
294  m_tile[i] = 1;
295  }
296  }
297  else {
298  m_tile[i] = 16;
299  }
300  }
301  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
302  m_num_tiles *= m_tile_end[i];
303  m_prod_tile_dims *= m_tile[i];
304  }
305  if ( m_prod_tile_dims > 1024 ) { //but product num_threads < 1024
306  printf(" Tile dimensions exceed ROCm limits\n");
307  Kokkos::abort(" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
308  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
309  }
310  }
311  #endif
312  }
313 
314 
315  template < typename LT , typename UT , typename TT = array_index_type >
316  MDRangePolicy( std::initializer_list<LT> const& lower, std::initializer_list<UT> const& upper, std::initializer_list<TT> const& tile = {} )
317  {
318 
319  if(static_cast<int>(m_lower.size()) != rank || static_cast<int>(m_upper.size()) != rank)
320  Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size");
321 
322  for ( auto i = 0; i < rank; ++i ) {
323  m_lower[i] = static_cast<array_index_type>(lower.begin()[i]);
324  m_upper[i] = static_cast<array_index_type>(upper.begin()[i]);
325  if(static_cast<int>(tile.size())==rank)
326  m_tile[i] = static_cast<array_index_type>(tile.begin()[i]);
327  else
328  m_tile[i] = 0;
329  }
330 
331  m_num_tiles = 1;
332  m_prod_tile_dims = 1;
333 
334  // Host
335  if ( true
336  #if defined(KOKKOS_ENABLE_CUDA)
337  && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
338  #endif
339  #if defined(KOKKOS_ENABLE_ROCM)
340  && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
341  #endif
342  )
343  {
344  index_type span;
345  for (int i=0; i<rank; ++i) {
346  span = m_upper[i] - m_lower[i];
347  if ( m_tile[i] <= 0 ) {
348  if ( ((int)inner_direction == (int)Right && (i < rank-1))
349  || ((int)inner_direction == (int)Left && (i > 0)) )
350  {
351  m_tile[i] = 2;
352  }
353  else {
354  m_tile[i] = (span == 0 ? 1 : span);
355  }
356  }
357  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
358  m_num_tiles *= m_tile_end[i];
359  m_prod_tile_dims *= m_tile[i];
360  }
361  }
362  #if defined(KOKKOS_ENABLE_CUDA)
363  else // Cuda
364  {
365  index_type span;
366  int increment = 1;
367  int rank_start = 0;
368  int rank_end = rank;
369  if((int)inner_direction == (int)Right) {
370  increment = -1;
371  rank_start = rank-1;
372  rank_end = -1;
373  }
374  for (int i=rank_start; i!=rank_end; i+=increment) {
375  span = m_upper[i] - m_lower[i];
376  if ( m_tile[i] <= 0 ) {
377  // TODO: determine what is a good default tile size for cuda
378  // may be rank dependent
379  if ( ((int)inner_direction == (int)Right && (i < rank-1))
380  || ((int)inner_direction == (int)Left && (i > 0)) )
381  {
382  if ( m_prod_tile_dims < 256 ) {
383  m_tile[i] = 2;
384  } else {
385  m_tile[i] = 1;
386  }
387  }
388  else {
389  m_tile[i] = 16;
390  }
391  }
392  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
393  m_num_tiles *= m_tile_end[i];
394  m_prod_tile_dims *= m_tile[i];
395  }
396  if ( m_prod_tile_dims > 1024 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
397  printf(" Tile dimensions exceed Cuda limits\n");
398  Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
399  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
400  }
401  }
402  #endif
403  #if defined(KOKKOS_ENABLE_ROCM)
404  else // ROCm
405  {
406  index_type span;
407  int increment = 1;
408  int rank_start = 0;
409  int rank_end = rank;
410  if((int)inner_direction == (int)Right) {
411  increment = -1;
412  rank_start = rank-1;
413  rank_end = -1;
414  }
415  for (int i=rank_start; i!=rank_end; i+=increment) {
416  span = m_upper[i] - m_lower[i];
417  if ( m_tile[i] <= 0 ) {
418  // TODO: determine what is a good default tile size for cuda
419  // may be rank dependent
420  if ( ((int)inner_direction == (int)Right && (i < rank-1))
421  || ((int)inner_direction == (int)Left && (i > 0)) )
422  {
423  if ( m_prod_tile_dims < 256 ) {
424  m_tile[i] = 2;
425  } else {
426  m_tile[i] = 1;
427  }
428  }
429  else {
430  m_tile[i] = 16;
431  }
432  }
433  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
434  m_num_tiles *= m_tile_end[i];
435  m_prod_tile_dims *= m_tile[i];
436  }
437  if ( m_prod_tile_dims > 1024 ) { // Match ROCm restriction for ParallelReduce; 1024,1024,1024 max per dim , but product num_threads < 1024
438  printf(" Tile dimensions exceed ROCm limits\n");
439  Kokkos::abort(" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
440  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
441  }
442  }
443  #endif
444  }
445 
446 };
447 
448 } // namespace Kokkos
449 
450 // For backward compatibility
451 namespace Kokkos { namespace Experimental {
452  using Kokkos::MDRangePolicy;
453  using Kokkos::Rank;
454  using Kokkos::Iterate;
455 } } // end Kokkos::Experimental
456 // ------------------------------------------------------------------ //
457 
458 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
459 // ------------------------------------------------------------------ //
460 //md_parallel_for - deprecated use parallel_for
461 // ------------------------------------------------------------------ //
462 
463 namespace Kokkos { namespace Experimental {
464 
465 template <typename MDRange, typename Functor, typename Enable = void>
466 void md_parallel_for( MDRange const& range
467  , Functor const& f
468  , const std::string& str = ""
469  , typename std::enable_if<( true
470  #if defined( KOKKOS_ENABLE_CUDA)
471  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
472  #endif
473  #if defined( KOKKOS_ENABLE_ROCM)
474  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
475  #endif
476  ) >::type* = 0
477  )
478 {
479  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
480 
481  using range_policy = typename MDRange::impl_range_policy;
482 
483  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
484 }
485 
486 template <typename MDRange, typename Functor>
487 void md_parallel_for( const std::string& str
488  , MDRange const& range
489  , Functor const& f
490  , typename std::enable_if<( true
491  #if defined( KOKKOS_ENABLE_CUDA)
492  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
493  #endif
494  #if defined( KOKKOS_ENABLE_ROCM)
495  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
496  #endif
497  ) >::type* = 0
498  )
499 {
500  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
501 
502  using range_policy = typename MDRange::impl_range_policy;
503 
504  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
505 }
506 
507 // Cuda specialization
508 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
509 template <typename MDRange, typename Functor>
510 void md_parallel_for( const std::string& str
511  , MDRange const& range
512  , Functor const& f
513  , typename std::enable_if<( true
514  #if defined( KOKKOS_ENABLE_CUDA)
515  && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
516  #endif
517  ) >::type* = 0
518  )
519 {
520  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
521  closure.execute();
522 }
523 
524 template <typename MDRange, typename Functor>
525 void md_parallel_for( MDRange const& range
526  , Functor const& f
527  , const std::string& str = ""
528  , typename std::enable_if<( true
529  #if defined( KOKKOS_ENABLE_CUDA)
530  && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
531  #endif
532  ) >::type* = 0
533  )
534 {
535  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
536  closure.execute();
537 }
538 #endif
539 // ------------------------------------------------------------------ //
540 
541 // ------------------------------------------------------------------ //
542 //md_parallel_reduce - deprecated use parallel_reduce
543 // ------------------------------------------------------------------ //
544 template <typename MDRange, typename Functor, typename ValueType>
545 void md_parallel_reduce( MDRange const& range
546  , Functor const& f
547  , ValueType & v
548  , const std::string& str = ""
549  , typename std::enable_if<( true
550  #if defined( KOKKOS_ENABLE_CUDA)
551  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
552  #endif
553  #if defined( KOKKOS_ENABLE_ROCM)
554  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
555  #endif
556  ) >::type* = 0
557  )
558 {
559  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
560 
561  using range_policy = typename MDRange::impl_range_policy;
562  Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
563 }
564 
565 template <typename MDRange, typename Functor, typename ValueType>
566 void md_parallel_reduce( const std::string& str
567  , MDRange const& range
568  , Functor const& f
569  , ValueType & v
570  , typename std::enable_if<( true
571  #if defined( KOKKOS_ENABLE_CUDA)
572  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
573  #endif
574  #if defined( KOKKOS_ENABLE_ROCM)
575  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
576  #endif
577  ) >::type* = 0
578  )
579 {
580  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
581 
582  using range_policy = typename MDRange::impl_range_policy;
583 
584  Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
585 }
586 
587 // Cuda - md_parallel_reduce not implemented - use parallel_reduce
588 
589 } } // namespace Kokkos::Experimental
590 #endif
591 
592 #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
593 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
Parallel reduction.
Declaration of various MemoryLayout options.
Declaration of parallel operators.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P...> &V)
Temporary free function rank() until rank() is implemented in the View.