17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_WORKGRAPHPOLICY_HPP
23 #define KOKKOS_WORKGRAPHPOLICY_HPP
25 #include <impl/Kokkos_AnalyzePolicy.hpp>
26 #include <Kokkos_Crs.hpp>
31 template <
class functor_type,
class execution_space,
class... policy_args>
39 template <
class... Properties>
40 class WorkGraphPolicy :
public Kokkos::Impl::PolicyTraits<Properties...> {
42 using execution_policy = WorkGraphPolicy<Properties...>;
43 using self_type = WorkGraphPolicy<Properties...>;
44 using traits = Kokkos::Impl::PolicyTraits<Properties...>;
45 using index_type =
typename traits::index_type;
46 using member_type = index_type;
47 using execution_space =
typename traits::execution_space;
48 using memory_space =
typename execution_space::memory_space;
65 graph_type
const m_graph;
68 KOKKOS_INLINE_FUNCTION
69 void push_work(
const std::int32_t w)
const noexcept {
70 const std::int32_t N = m_graph.numRows();
72 std::int32_t*
const ready_queue = &m_queue[0];
73 std::int32_t*
const end_hint = &m_queue[2 * N + 1];
76 const std::int32_t j = atomic_fetch_add(end_hint, 1);
78 if ((N <= j) || (END_TOKEN != atomic_exchange(ready_queue + j, w))) {
80 Kokkos::abort(
"WorkGraphPolicy push_work error");
101 KOKKOS_INLINE_FUNCTION
102 std::int32_t pop_work() const noexcept {
103 const std::int32_t N = m_graph.numRows();
105 std::int32_t*
const ready_queue = &m_queue[0];
106 std::int32_t*
const begin_hint = &m_queue[2 * N];
111 for (std::int32_t i = Kokkos::atomic_load(begin_hint); i < N; ++i) {
112 const std::int32_t w = Kokkos::atomic_load(&ready_queue[i]);
114 if (w == END_TOKEN) {
118 if ((w != BEGIN_TOKEN) &&
119 (w == atomic_compare_exchange(ready_queue + i, w,
120 (std::int32_t)BEGIN_TOKEN))) {
123 atomic_increment(begin_hint);
129 return COMPLETED_TOKEN;
132 KOKKOS_INLINE_FUNCTION
133 void completed_work(std::int32_t w)
const noexcept {
134 Kokkos::memory_fence();
138 const std::int32_t N = m_graph.numRows();
140 std::int32_t*
const count_queue = &m_queue[N];
142 const std::int32_t B = m_graph.row_map(w);
143 const std::int32_t E = m_graph.row_map(w + 1);
145 for (std::int32_t i = B; i < E; ++i) {
146 const std::int32_t j = m_graph.entries(i);
147 if (1 == atomic_fetch_add(count_queue + j, -1)) {
163 KOKKOS_INLINE_FUNCTION
164 void operator()(
const TagInit,
int i)
const noexcept {
165 m_queue[i] = i < m_graph.numRows() ? END_TOKEN : 0;
168 KOKKOS_INLINE_FUNCTION
169 void operator()(
const TagCount,
int i)
const noexcept {
170 std::int32_t*
const count_queue = &m_queue[m_graph.numRows()];
172 atomic_increment(count_queue + m_graph.entries[i]);
175 KOKKOS_INLINE_FUNCTION
176 void operator()(
const TagReady,
int w)
const noexcept {
177 std::int32_t
const*
const count_queue = &m_queue[m_graph.numRows()];
179 if (0 == count_queue[w]) push_work(w);
182 execution_space space()
const {
return execution_space(); }
184 WorkGraphPolicy(
const graph_type& arg_graph)
185 : m_graph(arg_graph),
186 m_queue(view_alloc(
"queue", WithoutInitializing),
187 arg_graph.numRows() * 2 + 2) {
189 using policy_type = RangePolicy<std::int32_t, execution_space, TagInit>;
191 const closure_type closure(*
this, policy_type(0, m_queue.size()));
193 execution_space().fence(
194 "Kokkos::WorkGraphPolicy::WorkGraphPolicy: fence after executing "
199 using policy_type = RangePolicy<std::int32_t, execution_space, TagCount>;
201 const closure_type closure(*
this, policy_type(0, m_graph.entries.size()));
203 execution_space().fence(
204 "Kokkos::WorkGraphPolicy::WorkGraphPolicy: fence after executing "
209 using policy_type = RangePolicy<std::int32_t, execution_space, TagReady>;
211 const closure_type closure(*
this, policy_type(0, m_graph.numRows()));
213 execution_space().fence(
214 "Kokkos::WorkGraphPolicy::WorkGraphPolicy: fence after executing "
222 #ifdef KOKKOS_ENABLE_SERIAL
223 #include "Serial/Kokkos_Serial_WorkGraphPolicy.hpp"
226 #ifdef KOKKOS_ENABLE_OPENMP
227 #include "OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp"
230 #ifdef KOKKOS_ENABLE_CUDA
231 #include "Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp"
234 #ifdef KOKKOS_ENABLE_HIP
235 #include "HIP/Kokkos_HIP_WorkGraphPolicy.hpp"
238 #ifdef KOKKOS_ENABLE_THREADS
239 #include "Threads/Kokkos_Threads_WorkGraphPolicy.hpp"
242 #ifdef KOKKOS_ENABLE_HPX
243 #include "HPX/Kokkos_HPX_WorkGraphPolicy.hpp"
Compressed row storage array.
Implementation of the ParallelFor operator that has a partial specialization for the device...