44 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP 
   45 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP 
   47 #include <initializer_list> 
   51 #include<impl/KokkosExp_Host_IterateTile.hpp> 
   52 #include <Kokkos_ExecPolicy.hpp> 
   55 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) 
   56 #include<Cuda/KokkosExp_Cuda_IterateTile.hpp> 
   57 #include <Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp> 
   60 #if defined( __HCC__ ) && defined( KOKKOS_ENABLE_ROCM ) 
   62 #include <ROCm/KokkosExp_ROCm_IterateTile_Refactor.hpp> 
   78 template <
typename ExecSpace>
 
   79 struct default_outer_direction
 
   82   #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM) 
   83   static constexpr Iterate value = Iterate::Left;
 
   85   static constexpr Iterate value = Iterate::Right;
 
   89 template <
typename ExecSpace>
 
   90 struct default_inner_direction
 
   93   #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM) 
   94   static constexpr Iterate value = Iterate::Left;
 
   96   static constexpr Iterate value = Iterate::Right;
 
  102 template < 
unsigned N
 
  103          , Iterate OuterDir = Iterate::Default
 
  104          , Iterate InnerDir = Iterate::Default
 
  108   static_assert( N != 0u, 
"Kokkos Error: rank 0 undefined");
 
  109   static_assert( N != 1u, 
"Kokkos Error: rank 1 is not a multi-dimensional range");
 
  110   static_assert( N < 7u, 
"Kokkos Error: Unsupported rank...");
 
  112   using iteration_pattern = Rank<N, OuterDir, InnerDir>;
 
  114   static constexpr 
int rank = N;
 
  115   static constexpr Iterate outer_direction = OuterDir;
 
  116   static constexpr Iterate inner_direction = InnerDir;
 
  121 template <
typename... Properties>
 
  123   : 
public Kokkos::Impl::PolicyTraits<Properties ...>
 
  125   using traits = Kokkos::Impl::PolicyTraits<Properties ...>;
 
  126   using range_policy = RangePolicy<Properties...>;
 
  128   typename traits::execution_space m_space;
 
  130   using impl_range_policy = RangePolicy< 
typename traits::execution_space
 
  131                                        , 
typename traits::schedule_type
 
  132                                        , 
typename traits::index_type
 
  135   typedef MDRangePolicy execution_policy; 
 
  137   template<
class ... OtherProperties>
 
  138   friend struct MDRangePolicy;
 
  140   static_assert( !std::is_same<typename traits::iteration_pattern,void>::value
 
  141                , 
"Kokkos Error: MD iteration pattern not defined" );
 
  143   using iteration_pattern   = 
typename traits::iteration_pattern;
 
  144   using work_tag            = 
typename traits::work_tag;
 
  145   using launch_bounds       = 
typename traits::launch_bounds;
 
  146   using member_type = 
typename range_policy::member_type;
 
  148   enum { 
rank = 
static_cast<int>(iteration_pattern::rank) };
 
  150   using index_type  = 
typename traits::index_type;
 
  151   using array_index_type = long;
 
  165   point_type m_tile_end;
 
  166   index_type m_num_tiles;
 
  167   index_type m_prod_tile_dims;
 
  186   static constexpr 
int outer_direction = 
static_cast<int> (
 
  187       (iteration_pattern::outer_direction != Iterate::Default)
 
  188     ? iteration_pattern::outer_direction
 
  189     : default_outer_direction< typename traits::execution_space>::value );
 
  191   static constexpr 
int inner_direction = 
static_cast<int> (
 
  192       iteration_pattern::inner_direction != Iterate::Default
 
  193     ? iteration_pattern::inner_direction
 
  194     : default_inner_direction< typename traits::execution_space>::value ) ;
 
  197   static constexpr 
int Right = 
static_cast<int>( Iterate::Right );
 
  198   static constexpr 
int Left  = 
static_cast<int>( Iterate::Left );
 
  200   KOKKOS_INLINE_FUNCTION 
const typename traits::execution_space & space()
 const { 
return m_space ; }
 
  201   template < 
typename LT , 
typename UT , 
typename TT = array_index_type >
 
  202   MDRangePolicy(std::initializer_list<LT> 
const& lower, std::initializer_list<UT> 
const& upper, std::initializer_list<TT> 
const& tile = {} )
 
  204     init(lower, upper, tile);
 
  207   template < 
typename LT , 
typename UT , 
typename TT = array_index_type >
 
  208   MDRangePolicy(
const typename traits::execution_space & work_space,
 
  209     std::initializer_list<LT> 
const& lower, std::initializer_list<UT> 
const& upper, std::initializer_list<TT> 
const& tile = {} )
 
  210     : m_space( work_space ) {
 
  211     init(lower, upper, tile);
 
  214   MDRangePolicy( point_type 
const& lower, point_type 
const& upper, tile_type 
const& tile = tile_type{} )
 
  220     , m_prod_tile_dims(1) {
 
  224   MDRangePolicy( 
const typename traits::execution_space & work_space,
 
  225     point_type 
const& lower, point_type 
const& upper, tile_type 
const& tile = tile_type{} )
 
  226     : m_space( work_space )
 
  231     , m_prod_tile_dims(1) {
 
  235   template<
class ... OtherProperties>
 
  236   MDRangePolicy( 
const MDRangePolicy<OtherProperties...> p ):
 
  241      m_tile_end(p.m_tile_end),
 
  242      m_num_tiles(p.m_num_tiles),
 
  243      m_prod_tile_dims(p.m_prod_tile_dims) {}
 
  250        #
if defined(KOKKOS_ENABLE_CUDA)
 
  251          && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
 
  253        #
if defined(KOKKOS_ENABLE_ROCM)
 
  254          && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
 
  259       for (
int i=0; i<
rank; ++i) {
 
  260         span = m_upper[i] - m_lower[i];
 
  261         if ( m_tile[i] <= 0 ) {
 
  262           if (  ((
int)inner_direction == (
int)Right && (i < rank-1))
 
  263               || ((
int)inner_direction == (
int)Left && (i > 0)) )
 
  268             m_tile[i] = (span == 0 ? 1 : span);
 
  271         m_tile_end[i] = 
static_cast<index_type
>((span + m_tile[i] - 1) / m_tile[i]);
 
  272         m_num_tiles *= m_tile_end[i];
 
  273         m_prod_tile_dims *= m_tile[i];
 
  276     #if defined(KOKKOS_ENABLE_CUDA) 
  283       if((
int)inner_direction == (int)Right) {
 
  288       for (
int i=rank_start; i!=rank_end; i+=increment) {
 
  289         span = m_upper[i] - m_lower[i];
 
  290         if ( m_tile[i] <= 0 ) {
 
  293           if (  ((
int)inner_direction == (int)Right && (i < rank-1))
 
  294               || ((
int)inner_direction == (int)Left && (i > 0)) )
 
  296             if ( m_prod_tile_dims < 256 ) {
 
  306         m_tile_end[i] = 
static_cast<index_type
>((span + m_tile[i] - 1) / m_tile[i]);
 
  307         m_num_tiles *= m_tile_end[i];
 
  308         m_prod_tile_dims *= m_tile[i];
 
  310       if ( m_prod_tile_dims > 1024 ) { 
 
  311         printf(
" Tile dimensions exceed Cuda limits\n");
 
  312         Kokkos::abort(
" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
 
  317     #if defined(KOKKOS_ENABLE_ROCM) 
  324       if((
int)inner_direction == (
int)Right) {
 
  329       for (
int i=rank_start; i!=rank_end; i+=increment) {
 
  330         span = m_upper[i] - m_lower[i];
 
  331         if ( m_tile[i] <= 0 ) {
 
  334           if (  ((
int)inner_direction == (
int)Right && (i < rank-1))
 
  335               || ((
int)inner_direction == (
int)Left && (i > 0)) )
 
  337             if ( m_prod_tile_dims < 256 ) {
 
  347         m_tile_end[i] = 
static_cast<index_type
>((span + m_tile[i] - 1) / m_tile[i]);
 
  348         m_num_tiles *= m_tile_end[i];
 
  349         m_prod_tile_dims *= m_tile[i];
 
  351       if ( m_prod_tile_dims > 1024 ) { 
 
  352         printf(
" Tile dimensions exceed ROCm limits\n");
 
  353         Kokkos::abort(
" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
 
  360   template < 
typename LT , 
typename UT , 
typename TT = array_index_type >
 
  361   void init( std::initializer_list<LT> 
const& lower, std::initializer_list<UT> 
const& upper, std::initializer_list<TT> 
const& tile = {} )
 
  363     if(static_cast<int>(m_lower.size()) != rank || static_cast<int>(m_upper.size()) != rank)
 
  364       Kokkos::abort(
"MDRangePolicy: Constructor initializer lists have wrong size");
 
  366     for ( 
auto i = 0; i < 
rank; ++i ) {
 
  367       m_lower[i] = 
static_cast<array_index_type
>(lower.begin()[i]);
 
  368       m_upper[i] = 
static_cast<array_index_type
>(upper.begin()[i]);
 
  369       if(static_cast<int>(tile.size())==rank)
 
  370         m_tile[i] = 
static_cast<array_index_type
>(tile.begin()[i]);
 
  376     m_prod_tile_dims = 1;
 
  380        #
if defined(KOKKOS_ENABLE_CUDA)
 
  381          && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
 
  383        #if defined(KOKKOS_ENABLE_ROCM) 
  384          && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
 
  389       for (
int i=0; i<
rank; ++i) {
 
  390         span = m_upper[i] - m_lower[i];
 
  391         if ( m_tile[i] <= 0 ) {
 
  392           if (  ((
int)inner_direction == (
int)Right && (i < rank-1))
 
  393               || ((int)inner_direction == (
int)Left && (i > 0)) )
 
  398             m_tile[i] = (span == 0 ? 1 : span);
 
  401         m_tile_end[i] = 
static_cast<index_type
>((span + m_tile[i] - 1) / m_tile[i]);
 
  402         m_num_tiles *= m_tile_end[i];
 
  403         m_prod_tile_dims *= m_tile[i];
 
  406     #if defined(KOKKOS_ENABLE_CUDA) 
  413       if((
int)inner_direction == (int)Right) {
 
  418       for (
int i=rank_start; i!=rank_end; i+=increment) {
 
  419         span = m_upper[i] - m_lower[i];
 
  420         if ( m_tile[i] <= 0 ) {
 
  423           if (  ((
int)inner_direction == (int)Right && (i < rank-1))
 
  424               || ((
int)inner_direction == (int)Left && (i > 0)) )
 
  426             if ( m_prod_tile_dims < 256 ) {
 
  436         m_tile_end[i] = 
static_cast<index_type
>((span + m_tile[i] - 1) / m_tile[i]);
 
  437         m_num_tiles *= m_tile_end[i];
 
  438         m_prod_tile_dims *= m_tile[i];
 
  440       if ( m_prod_tile_dims > 1024 ) { 
 
  441         printf(
" Tile dimensions exceed Cuda limits\n");
 
  442         Kokkos::abort(
" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
 
  447     #if defined(KOKKOS_ENABLE_ROCM) 
  454       if((
int)inner_direction == (
int)Right) {
 
  459       for (
int i=rank_start; i!=rank_end; i+=increment) {
 
  460         span = m_upper[i] - m_lower[i];
 
  461         if ( m_tile[i] <= 0 ) {
 
  464           if (  ((
int)inner_direction == (
int)Right && (i < rank-1))
 
  465               || ((
int)inner_direction == (
int)Left && (i > 0)) )
 
  467             if ( m_prod_tile_dims < 256 ) {
 
  477         m_tile_end[i] = 
static_cast<index_type
>((span + m_tile[i] - 1) / m_tile[i]);
 
  478         m_num_tiles *= m_tile_end[i];
 
  479         m_prod_tile_dims *= m_tile[i];
 
  481       if ( m_prod_tile_dims > 1024 ) { 
 
  482         printf(
" Tile dimensions exceed ROCm limits\n");
 
  483         Kokkos::abort(
" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
 
  495 namespace Kokkos { 
namespace Experimental {
 
  496   using Kokkos::MDRangePolicy;
 
  498   using Kokkos::Iterate;
 
  502 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 
  507 namespace Kokkos { 
namespace Experimental {
 
  509 template <
typename MDRange, 
typename Functor, 
typename Enable = 
void>
 
  510 void md_parallel_for( MDRange 
const& range
 
  512                     , 
const std::string& str = 
"" 
  513                     , 
typename std::enable_if<( 
true 
  514                       #
if defined( KOKKOS_ENABLE_CUDA)
 
  515                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
 
  517                       #
if defined( KOKKOS_ENABLE_ROCM)
 
  518                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
 
  523   Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
 
  525   using range_policy = 
typename MDRange::impl_range_policy;
 
  530 template <
typename MDRange, 
typename Functor>
 
  531 void md_parallel_for( 
const std::string& str
 
  532                     , MDRange 
const& range
 
  534                     , 
typename std::enable_if<( 
true 
  535                       #
if defined( KOKKOS_ENABLE_CUDA)
 
  536                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
 
  538                       #
if defined( KOKKOS_ENABLE_ROCM)
 
  539                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
 
  544   Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
 
  546   using range_policy = 
typename MDRange::impl_range_policy;
 
  552 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) 
  553 template <
typename MDRange, 
typename Functor>
 
  554 void md_parallel_for( 
const std::string& str
 
  555                     , MDRange 
const& range
 
  557                     , 
typename std::enable_if<( 
true 
  558                       #
if defined( KOKKOS_ENABLE_CUDA)
 
  559                       && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
 
  564   Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
 
  568 template <
typename MDRange, 
typename Functor>
 
  569 void md_parallel_for( MDRange 
const& range
 
  571                     , 
const std::string& str = 
"" 
  572                     , 
typename std::enable_if<( 
true 
  573                       #
if defined( KOKKOS_ENABLE_CUDA)
 
  574                       && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
 
  579   Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
 
  588 template <
typename MDRange, 
typename Functor, 
typename ValueType>
 
  589 void md_parallel_reduce( MDRange 
const& range
 
  592                     , 
const std::string& str = 
"" 
  593                     , 
typename std::enable_if<( 
true 
  594                       #
if defined( KOKKOS_ENABLE_CUDA)
 
  595                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
 
  597                       #
if defined( KOKKOS_ENABLE_ROCM)
 
  598                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
 
  603   Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
 
  605   using range_policy = 
typename MDRange::impl_range_policy;
 
  609 template <
typename MDRange, 
typename Functor, 
typename ValueType>
 
  610 void md_parallel_reduce( 
const std::string& str
 
  611                     , MDRange 
const& range
 
  614                     , 
typename std::enable_if<( 
true 
  615                       #
if defined( KOKKOS_ENABLE_CUDA)
 
  616                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
 
  618                       #
if defined( KOKKOS_ENABLE_ROCM)
 
  619                       && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
 
  624   Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
 
  626   using range_policy = 
typename MDRange::impl_range_policy;
 
  637 namespace Experimental {
 
  640 template<
unsigned long P, 
class ... Properties>
 
  641 struct PolicyPropertyAdaptor<WorkItemProperty::ImplWorkItemProperty<P>,MDRangePolicy<Properties...>> {
 
  642   typedef MDRangePolicy<Properties...> policy_in_t;
 
  643   typedef MDRangePolicy<
typename policy_in_t::traits::execution_space,
 
  644                       typename policy_in_t::traits::schedule_type,
 
  645                       typename policy_in_t::traits::work_tag,
 
  646                       typename policy_in_t::traits::index_type,
 
  647                       typename policy_in_t::traits::iteration_pattern,
 
  648                       typename policy_in_t::traits::launch_bounds,
 
  649                       WorkItemProperty::ImplWorkItemProperty<P>> policy_out_t;
 
  657 #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy. 
 
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
Parallel reduction. 
 
Declaration of various MemoryLayout options. 
 
Declaration of parallel operators. 
 
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P...> &V)
Temporary free function rank() until rank() is implemented in the View.