doc/html/Kokkos__WorkGraphPolicy_8hpp_source.html

 /*

 //@HEADER

 // ************************************************************************

 //

 //                        Kokkos v. 2.0

 //              Copyright (2014) Sandia Corporation

 //

 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,

 // the U.S. Government retains certain rights in this software.

 //

 // Redistribution and use in source and binary forms, with or without

 // modification, are permitted provided that the following conditions are

 // met:

 //

 // 1. Redistributions of source code must retain the above copyright

 // notice, this list of conditions and the following disclaimer.

 //

 // 2. Redistributions in binary form must reproduce the above copyright

 // notice, this list of conditions and the following disclaimer in the

 // documentation and/or other materials provided with the distribution.

 //

 // 3. Neither the name of the Corporation nor the names of the

 // contributors may be used to endorse or promote products derived from

 // this software without specific prior written permission.

 //

 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY

 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE

 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 //

 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)

 //

 // ************************************************************************

 //@HEADER

 */


 #ifndef KOKKOS_WORKGRAPHPOLICY_HPP

 #define KOKKOS_WORKGRAPHPOLICY_HPP


 namespace Kokkos {

 namespace Impl {


 template< class functor_type , class execution_space, class ... policy_args >

 class WorkGraphExec;


 }} // namespace Kokkos::Impl


 namespace Kokkos {


 template< class ... Properties >

 class WorkGraphPolicy

 {

 public:


   using execution_policy = WorkGraphPolicy<Properties ... >;

   using self_type       = WorkGraphPolicy<Properties ... >;

   using traits          = Kokkos::Impl::PolicyTraits<Properties ... >;

   using index_type      = typename traits::index_type;

   using member_type     = index_type;

   using work_tag        = typename traits::work_tag;

   using execution_space = typename traits::execution_space;

   using memory_space    = typename execution_space::memory_space;

   using graph_type      = Kokkos::Crs<index_type,execution_space,void,index_type>;


   enum : std::int32_t {

     END_TOKEN       = -1 ,

     BEGIN_TOKEN     = -2 ,

     COMPLETED_TOKEN = -3 };


 private:


   using ints_type = Kokkos::View<std::int32_t*, memory_space>;


   // Let N = m_graph.numRows(), the total work

   // m_queue[  0 ..   N-1] = the ready queue

   // m_queue[  N .. 2*N-1] = the waiting queue counts

   // m_queue[2*N .. 2*N+2] = the ready queue hints


   graph_type const m_graph;

   ints_type        m_queue ;


   KOKKOS_INLINE_FUNCTION

   void push_work( const std::int32_t w ) const noexcept

     {

       const std::int32_t N = m_graph.numRows();


       std::int32_t volatile * const ready_queue = & m_queue[0] ;

       std::int32_t volatile * const end_hint    = & m_queue[2*N+1] ;


       // Push work to end of queue

       const std::int32_t j = atomic_fetch_add( end_hint , 1 );


       if ( ( N <= j ) ||

            ( END_TOKEN != atomic_exchange(ready_queue+j,w) ) ) {

         // ERROR: past the end of queue or did not replace END_TOKEN

         Kokkos::abort("WorkGraphPolicy push_work error");

       }


       memory_fence();

     }


 public:


   KOKKOS_INLINE_FUNCTION

   std::int32_t pop_work() const noexcept

     {

       const std::int32_t N = m_graph.numRows();


       std::int32_t volatile * const ready_queue = & m_queue[0] ;

       std::int32_t volatile * const begin_hint  = & m_queue[2*N] ;


       // begin hint is guaranteed to be less than or equal to

       // actual begin location in the queue.


       for ( std::int32_t i = *begin_hint ; i < N ; ++i ) {


         const std::int32_t w = ready_queue[i] ;


         if ( w == END_TOKEN ) { return END_TOKEN ; }


         if ( ( w != BEGIN_TOKEN ) &&

              ( w == atomic_compare_exchange(ready_queue+i,w,(std::int32_t)BEGIN_TOKEN) ) ) {

           // Attempt to claim ready work index succeeded,

           // update the hint and return work index

           atomic_increment( begin_hint );

           return w ;

         }

         // arrive here when ready_queue[i] == BEGIN_TOKEN

       }


       return COMPLETED_TOKEN ;

     }


   KOKKOS_INLINE_FUNCTION

   void completed_work( std::int32_t w ) const noexcept

     {

       Kokkos::memory_fence();


       // Make sure the completed work function's memory accesses are flushed.


       const std::int32_t N = m_graph.numRows();


       std::int32_t volatile * const count_queue = & m_queue[N] ;


       const std::int32_t B = m_graph.row_map(w);

       const std::int32_t E = m_graph.row_map(w+1);


       for ( std::int32_t i = B ; i < E ; ++i ) {

         const std::int32_t j = m_graph.entries(i);

         if ( 1 == atomic_fetch_add(count_queue+j,-1) ) {

           push_work(j);

         }

       }

     }


   struct TagInit {};

   struct TagCount {};

   struct TagReady {};


   KOKKOS_INLINE_FUNCTION

   void operator()( const TagInit , int i ) const noexcept

     { m_queue[i] = i < m_graph.numRows() ? END_TOKEN : 0 ; }


   KOKKOS_INLINE_FUNCTION

   void operator()( const TagCount , int i ) const noexcept

     {

       std::int32_t volatile * const count_queue =

         & m_queue[ m_graph.numRows() ] ;


       atomic_increment( count_queue + m_graph.entries[i] );

     }


   KOKKOS_INLINE_FUNCTION

   void operator()( const TagReady , int w ) const noexcept

     {

       std::int32_t const * const count_queue =

         & m_queue[ m_graph.numRows() ] ;


       if ( 0 == count_queue[w] ) push_work(w);

     }


   WorkGraphPolicy( const graph_type & arg_graph )

     : m_graph(arg_graph)

     , m_queue( view_alloc( "queue" , WithoutInitializing )

              , arg_graph.numRows() * 2 + 2 )

   {

     { // Initialize

       using policy_type = RangePolicy<std::int32_t, execution_space, TagInit>;

       using closure_type = Kokkos::Impl::ParallelFor<self_type, policy_type>;

       const closure_type closure(*this, policy_type(0, m_queue.size()));

       closure.execute();

       execution_space::fence();

     }


     { // execute-after counts

       using policy_type = RangePolicy<std::int32_t, execution_space, TagCount>;

       using closure_type = Kokkos::Impl::ParallelFor<self_type, policy_type>;

       const closure_type closure(*this,policy_type(0,m_graph.entries.size()));

       closure.execute();

       execution_space::fence();

     }


     { // Scheduling ready tasks

       using policy_type = RangePolicy<std::int32_t, execution_space, TagReady>;

       using closure_type = Kokkos::Impl::ParallelFor<self_type, policy_type>;

       const closure_type closure(*this,policy_type(0,m_graph.numRows()));

       closure.execute();

       execution_space::fence();

     }

   }

 };


 } // namespace Kokkos


 #ifdef KOKKOS_ENABLE_SERIAL

 #include "impl/Kokkos_Serial_WorkGraphPolicy.hpp"

 #endif


 #ifdef KOKKOS_ENABLE_OPENMP

 #include "OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp"

 #endif


 #ifdef KOKKOS_ENABLE_CUDA

 #include "Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp"

 #endif


 #ifdef KOKKOS_ENABLE_THREADS

 #include "Threads/Kokkos_Threads_WorkGraphPolicy.hpp"

 #endif


 #endif /* #define KOKKOS_WORKGRAPHPOLICY_HPP */

Kokkos::view_alloc
Impl::ViewCtorProp< typename Impl::ViewCtorProp< void, Args >::type... > view_alloc(Args const &...args)
Create View allocation parameter bundle from argument list.
Definition: Kokkos_View.hpp:494

Kokkos::View< std::int32_t *, memory_space >

Kokkos::Crs
Compressed row storage array.
Definition: Kokkos_Crs.hpp:83

Kokkos::Impl::ParallelFor
Implementation of the ParallelFor operator that has a partial specialization for the device...
Definition: Kokkos_Core_fwd.hpp:276