Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_GraphNode.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_KOKKOS_GRAPHNODE_HPP
23 #define KOKKOS_KOKKOS_GRAPHNODE_HPP
24 
25 #include <Kokkos_Macros.hpp>
26 
27 #include <impl/Kokkos_Error.hpp> // contract macros
28 
29 #include <Kokkos_Core_fwd.hpp>
30 #include <Kokkos_Graph_fwd.hpp>
31 #include <impl/Kokkos_GraphImpl_fwd.hpp>
32 #include <Kokkos_Parallel_Reduce.hpp>
33 #include <impl/Kokkos_GraphImpl_Utilities.hpp>
34 #include <impl/Kokkos_GraphImpl.hpp> // GraphAccess
35 
36 #include <memory> // std::shared_ptr
37 
38 namespace Kokkos {
39 namespace Experimental {
40 
41 template <class ExecutionSpace, class Kernel /*= TypeErasedTag*/,
42  class Predecessor /*= TypeErasedTag*/>
43 class GraphNodeRef {
44  //----------------------------------------------------------------------------
45  // <editor-fold desc="template parameter constraints"> {{{2
46 
47  // Note: because of these assertions, instantiating this class template is not
48  // intended to be SFINAE-safe, so do validation before you instantiate.
49 
50  static_assert(
51  std::is_same_v<Predecessor, TypeErasedTag> ||
52  Kokkos::Impl::is_specialization_of<Predecessor, GraphNodeRef>::value,
53  "Invalid predecessor template parameter given to GraphNodeRef");
54 
55  static_assert(
56  Kokkos::is_execution_space<ExecutionSpace>::value,
57  "Invalid execution space template parameter given to GraphNodeRef");
58 
59  static_assert(std::is_same_v<Predecessor, TypeErasedTag> ||
60  Kokkos::Impl::is_graph_kernel<Kernel>::value,
61  "Invalid kernel template parameter given to GraphNodeRef");
62 
63  static_assert(!Kokkos::Impl::is_more_type_erased<Kernel, Predecessor>::value,
64  "The kernel of a graph node can't be more type-erased than the "
65  "predecessor");
66 
67  // </editor-fold> end template parameter constraints }}}2
68  //----------------------------------------------------------------------------
69 
70  public:
71  //----------------------------------------------------------------------------
72  // <editor-fold desc="public member types"> {{{2
73 
74  using execution_space = ExecutionSpace;
75  using graph_kernel = Kernel;
76  using graph_predecessor = Predecessor;
77 
78  // </editor-fold> end public member types }}}2
79  //----------------------------------------------------------------------------
80 
81  private:
82  //----------------------------------------------------------------------------
83  // <editor-fold desc="Friends"> {{{2
84 
85  template <class, class, class>
86  friend class GraphNodeRef;
87  friend struct Kokkos::Impl::GraphAccess;
88 
89  // </editor-fold> end Friends }}}2
90  //----------------------------------------------------------------------------
91 
92  //----------------------------------------------------------------------------
93  // <editor-fold desc="Private Data Members"> {{{2
94 
95  using graph_impl_t = Kokkos::Impl::GraphImpl<ExecutionSpace>;
96  std::weak_ptr<graph_impl_t> m_graph_impl;
97 
98  // TODO @graphs figure out if we can get away with a weak reference here?
99  // GraphNodeRef instances shouldn't be stored by users outside
100  // of the create_graph closure, and so if we restructure things
101  // slightly, we could make it so that the graph owns the
102  // node_impl_t instance and this only holds a std::weak_ptr to
103  // it.
104  using node_impl_t =
105  Kokkos::Impl::GraphNodeImpl<ExecutionSpace, Kernel, Predecessor>;
106  std::shared_ptr<node_impl_t> m_node_impl;
107 
108  // </editor-fold> end Private Data Members }}}2
109  //----------------------------------------------------------------------------
110 
111  //----------------------------------------------------------------------------
112  // <editor-fold desc="Implementation detail accessors"> {{{2
113 
114  // Internally, use shallow constness
115  node_impl_t& get_node_impl() const { return *m_node_impl.get(); }
116  std::shared_ptr<node_impl_t> const& get_node_ptr() const& {
117  return m_node_impl;
118  }
119  std::shared_ptr<node_impl_t> get_node_ptr() && {
120  return std::move(m_node_impl);
121  }
122  std::weak_ptr<graph_impl_t> get_graph_weak_ptr() const {
123  return m_graph_impl;
124  }
125 
126  // </editor-fold> end Implementation detail accessors }}}2
127  //----------------------------------------------------------------------------
128 
129  // TODO kernel name propagation and exposure
130 
131  template <class NextKernelDeduced>
132  auto _then_kernel(NextKernelDeduced&& arg_kernel) const {
133  static_assert(Kokkos::Impl::is_graph_kernel_v<
134  Kokkos::Impl::remove_cvref_t<NextKernelDeduced>>,
135  "Kokkos internal error");
136 
137  auto graph_ptr = m_graph_impl.lock();
138  KOKKOS_EXPECTS(bool(graph_ptr))
139 
140  using next_kernel_t = Kokkos::Impl::remove_cvref_t<NextKernelDeduced>;
141 
142  using return_t = GraphNodeRef<ExecutionSpace, next_kernel_t, GraphNodeRef>;
143 
144  auto rv = Kokkos::Impl::GraphAccess::make_graph_node_ref(
145  m_graph_impl,
146  Kokkos::Impl::GraphAccess::make_node_shared_ptr<
147  typename return_t::node_impl_t>(
148  m_node_impl->execution_space_instance(),
149  Kokkos::Impl::_graph_node_kernel_ctor_tag{},
150  (NextKernelDeduced&&)arg_kernel,
151  // *this is the predecessor
152  Kokkos::Impl::_graph_node_predecessor_ctor_tag{}, *this));
153 
154  // Add the node itself to the backend's graph data structure, now that
155  // everything is set up.
156  graph_ptr->add_node(rv.m_node_impl);
157  // Add the predecessaor we stored in the constructor above in the backend's
158  // data structure, now that everything is set up.
159  graph_ptr->add_predecessor(rv.m_node_impl, *this);
160  KOKKOS_ENSURES(bool(rv.m_node_impl))
161  return rv;
162  }
163 
164  //----------------------------------------------------------------------------
165  // <editor-fold desc="Private constructors"> {{{2
166 
167  GraphNodeRef(std::weak_ptr<graph_impl_t> arg_graph_impl,
168  std::shared_ptr<node_impl_t> arg_node_impl)
169  : m_graph_impl(std::move(arg_graph_impl)),
170  m_node_impl(std::move(arg_node_impl)) {}
171 
172  // </editor-fold> end Private constructors }}}2
173  //----------------------------------------------------------------------------
174 
175  public:
176  //----------------------------------------------------------------------------
177  // <editor-fold desc="Constructors, destructors, and assignment"> {{{2
178 
179  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
180  // <editor-fold desc="rule of 6 ctors"> {{{3
181 
182  // Copyable and movable (basically just shared_ptr semantics
183  GraphNodeRef() noexcept = default;
184  GraphNodeRef(GraphNodeRef const&) = default;
185  GraphNodeRef(GraphNodeRef&&) noexcept = default;
186  GraphNodeRef& operator=(GraphNodeRef const&) = default;
187  GraphNodeRef& operator=(GraphNodeRef&&) noexcept = default;
188  ~GraphNodeRef() = default;
189 
190  // </editor-fold> end rule of 6 ctors }}}3
191  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
192 
193  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
194  // <editor-fold desc="Type-erasing converting ctor and assignment"> {{{3
195 
196  template <class OtherKernel, class OtherPredecessor,
197  std::enable_if_t<
198  // Not a copy/move constructor
199  !std::is_same_v<GraphNodeRef,
200  GraphNodeRef<execution_space, OtherKernel,
201  OtherPredecessor>> &&
202  // must be an allowed type erasure of the kernel
203  Kokkos::Impl::is_compatible_type_erasure<
204  OtherKernel, graph_kernel>::value &&
205  // must be an allowed type erasure of the predecessor
206  Kokkos::Impl::is_compatible_type_erasure<
207  OtherPredecessor, graph_predecessor>::value,
208  int> = 0>
209  /* implicit */
210  GraphNodeRef(
211  GraphNodeRef<execution_space, OtherKernel, OtherPredecessor> const& other)
212  : m_graph_impl(other.m_graph_impl), m_node_impl(other.m_node_impl) {}
213 
214  // Note: because this is an implicit conversion (as is supposed to be the
215  // case with most type-erasing wrappers like this), we don't also need
216  // a converting assignment operator.
217 
218  // </editor-fold> end Type-erasing converting ctor and assignment }}}3
219  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
220 
221  // </editor-fold> end Constructors, destructors, and assignment }}}2
222  //----------------------------------------------------------------------------
223 
224  //----------------------------------------------------------------------------
225  // <editor-fold desc="then_parallel_for"> {{{2
226 
227  // TODO We should do better than a p-for (that uses registers, heavier).
228  // This should "just" launch the function on device with our driver.
229  template <typename Label, typename Functor,
230  typename = std::enable_if_t<std::is_invocable_r_v<
231  void, const Kokkos::Impl::remove_cvref_t<Functor>>>>
232  auto then(Label&& label, const ExecutionSpace& exec,
233  Functor&& functor) const {
234  using next_kernel_t =
235  Kokkos::Impl::GraphNodeThenImpl<ExecutionSpace,
236  Kokkos::Impl::remove_cvref_t<Functor>>;
237  return this->_then_kernel(next_kernel_t{std::forward<Label>(label), exec,
238  std::forward<Functor>(functor)});
239  }
240 
241  template <typename Label, typename Functor,
242  typename = std::enable_if_t<std::is_invocable_r_v<
243  void, const Kokkos::Impl::remove_cvref_t<Functor>>>>
244  auto then(Label&& label, Functor&& functor) const {
245  return this->then(std::forward<Label>(label), ExecutionSpace{},
246  std::forward<Functor>(functor));
247  }
248 
249  template <
250  class Policy, class Functor,
251  std::enable_if_t<
252  // equivalent to:
253  // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>>
254  is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value,
255  // --------------------
256  int> = 0>
257  auto then_parallel_for(std::string arg_name, Policy&& arg_policy,
258  Functor&& functor) const {
259  //----------------------------------------
260  KOKKOS_EXPECTS(!m_graph_impl.expired())
261  KOKKOS_EXPECTS(bool(m_node_impl))
262  // TODO @graph restore this expectation once we add comparability to space
263  // instances
264  // KOKKOS_EXPECTS(
265  // arg_policy.space() == m_graph_impl->get_execution_space());
266 
267  // needs to static assert constraint: DataParallelFunctor<Functor>
268 
269  using policy_t = Kokkos::Impl::remove_cvref_t<Policy>;
270  // constraint check: same execution space type (or defaulted, maybe?)
271  static_assert(
272  std::is_same_v<typename policy_t::execution_space, execution_space>,
273  // TODO @graph make defaulted execution space work
274  //|| policy_t::execution_space_is_defaulted,
275  "Execution Space mismatch between execution policy and graph");
276 
277  auto policy = Experimental::require((Policy&&)arg_policy,
278  Kokkos::Impl::KernelInGraphProperty{});
279 
280  using next_policy_t = decltype(policy);
281  using next_kernel_t =
282  Kokkos::Impl::GraphNodeKernelImpl<ExecutionSpace, next_policy_t,
283  std::decay_t<Functor>,
284  Kokkos::ParallelForTag>;
285  return this->_then_kernel(next_kernel_t{std::move(arg_name), policy.space(),
286  (Functor&&)functor,
287  (Policy&&)policy});
288  }
289 
290  template <
291  class Policy, class Functor,
292  std::enable_if_t<
293  // equivalent to:
294  // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>>
295  is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value,
296  // --------------------
297  int> = 0>
298  auto then_parallel_for(Policy&& policy, Functor&& functor) const {
299  // needs to static assert constraint: DataParallelFunctor<Functor>
300  return this->then_parallel_for("", (Policy&&)policy, (Functor&&)functor);
301  }
302 
303  template <class Functor>
304  auto then_parallel_for(std::string name, std::size_t n,
305  Functor&& functor) const {
306  // needs to static assert constraint: DataParallelFunctor<Functor>
307  return this->then_parallel_for(std::move(name),
309  (Functor&&)functor);
310  }
311 
312  template <class Functor>
313  auto then_parallel_for(std::size_t n, Functor&& functor) const {
314  // needs to static assert constraint: DataParallelFunctor<Functor>
315  return this->then_parallel_for("", n, (Functor&&)functor);
316  }
317 
318  // </editor-fold> end then_parallel_for }}}2
319  //----------------------------------------------------------------------------
320 
321  //----------------------------------------------------------------------------
322  // <editor-fold desc="then_parallel_reduce"> {{{2
323 
324  // Equivalent to std::get<I>(std::tuple) but callable on the device.
325  template <bool B, class T1, class T2>
326  static KOKKOS_FUNCTION std::conditional_t<B, T1&&, T2&&>
327  impl_forwarding_switch(T1&& v1, T2&& v2) {
328  if constexpr (B)
329  return static_cast<T1&&>(v1);
330  else
331  return static_cast<T2&&>(v2);
332  }
333 
334  template <
335  class Policy, class Functor, class ReturnType,
336  std::enable_if_t<
337  // equivalent to:
338  // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>>
339  is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value,
340  // --------------------
341  int> = 0>
342  auto then_parallel_reduce(std::string arg_name, Policy&& arg_policy,
343  Functor&& functor,
344  ReturnType&& return_value) const {
345  auto graph_impl_ptr = m_graph_impl.lock();
346  KOKKOS_EXPECTS(bool(graph_impl_ptr))
347  KOKKOS_EXPECTS(bool(m_node_impl))
348  // TODO @graph restore this expectation once we add comparability to space
349  // instances
350  // KOKKOS_EXPECTS(
351  // arg_policy.space() == m_graph_impl->get_execution_space());
352 
353  // needs static assertion of constraint:
354  // DataParallelReductionFunctor<Functor, ReturnType>
355 
356  using policy_t = std::remove_cv_t<std::remove_reference_t<Policy>>;
357  static_assert(
358  std::is_same_v<typename policy_t::execution_space, execution_space>,
359  // TODO @graph make defaulted execution space work
360  // || policy_t::execution_space_is_defaulted,
361  "Execution Space mismatch between execution policy and graph");
362 
363  // This is also just an expectation, but it's one that we expect the user
364  // to interact with (even in release mode), so we should throw an exception
365  // with an explanation rather than just doing a contract assertion.
366  // We can't static_assert this because of the way that Reducers store
367  // whether or not they point to a View as a runtime boolean rather than part
368  // of the type.
369  if (Kokkos::Impl::parallel_reduce_needs_fence(
370  graph_impl_ptr->get_execution_space(), return_value)) {
371  Kokkos::Impl::throw_runtime_exception(
372  "Parallel reductions in graphs can't operate on Reducers that "
373  "reference a scalar because they can't complete synchronously. Use a "
374  "Kokkos::View instead and keep in mind the result will only be "
375  "available once the graph is submitted (or in tasks that depend on "
376  "this one).");
377  }
378 
379  //----------------------------------------
380  // This is a disaster, but I guess it's not a my disaster to fix right now
381  using return_type_remove_cvref =
382  std::remove_cv_t<std::remove_reference_t<ReturnType>>;
383  static_assert(Kokkos::is_view<return_type_remove_cvref>::value ||
384  Kokkos::is_reducer<return_type_remove_cvref>::value,
385  "Output argument to parallel reduce in a graph must be a "
386  "View or a Reducer");
387 
388  if constexpr (Kokkos::is_reducer_v<return_type_remove_cvref>) {
389  static_assert(
391  ExecutionSpace, typename return_type_remove_cvref::
392  result_view_type::memory_space>::accessible,
393  "The reduction target must be accessible by the graph execution "
394  "space.");
395  } else {
396  static_assert(
398  ExecutionSpace,
399  typename return_type_remove_cvref::memory_space>::accessible,
400  "The reduction target must be accessible by the graph execution "
401  "space.");
402  }
403 
404  using return_type =
405  // Yes, you do really have to do this...
406  std::conditional_t<Kokkos::is_reducer<return_type_remove_cvref>::value,
407  return_type_remove_cvref,
408  const return_type_remove_cvref>;
409  using functor_type = Kokkos::Impl::remove_cvref_t<Functor>;
410  // see Kokkos_Parallel_Reduce.hpp for how these details are used there;
411  // we're just doing the same thing here
412  using return_value_adapter =
413  Kokkos::Impl::ParallelReduceReturnValue<void, return_type,
414  functor_type>;
415  // End of Kokkos reducer disaster
416  //----------------------------------------
417 
418  auto policy = Experimental::require((Policy&&)arg_policy,
419  Kokkos::Impl::KernelInGraphProperty{});
420 
421  using passed_reducer_type = typename return_value_adapter::reducer_type;
422 
423  constexpr bool passed_reducer_type_is_invalid =
424  std::is_same_v<InvalidType, passed_reducer_type>;
425  using TheReducerType =
426  std::conditional_t<passed_reducer_type_is_invalid, functor_type,
427  passed_reducer_type>;
428 
429  using analysis = Kokkos::Impl::FunctorAnalysis<
430  Kokkos::Impl::FunctorPatternInterface::REDUCE, Policy, TheReducerType,
431  typename return_value_adapter::value_type>;
432  typename analysis::Reducer final_reducer(
433  impl_forwarding_switch<passed_reducer_type_is_invalid>(functor,
434  return_value));
435  Kokkos::Impl::CombinedFunctorReducer<functor_type,
436  typename analysis::Reducer>
437  functor_reducer(functor, final_reducer);
438 
439  using next_policy_t = decltype(policy);
440  using next_kernel_t =
441  Kokkos::Impl::GraphNodeKernelImpl<ExecutionSpace, next_policy_t,
442  decltype(functor_reducer),
443  Kokkos::ParallelReduceTag>;
444 
445  return this->_then_kernel(next_kernel_t{
446  std::move(arg_name), graph_impl_ptr->get_execution_space(),
447  functor_reducer, (Policy&&)policy,
448  return_value_adapter::return_value(return_value, functor)});
449  }
450 
451  template <
452  class Policy, class Functor, class ReturnType,
453  std::enable_if_t<
454  // equivalent to:
455  // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>>
456  is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value,
457  // --------------------
458  int> = 0>
459  auto then_parallel_reduce(Policy&& arg_policy, Functor&& functor,
460  ReturnType&& return_value) const {
461  return this->then_parallel_reduce("", (Policy&&)arg_policy,
462  (Functor&&)functor,
463  (ReturnType&&)return_value);
464  }
465 
466  template <class Functor, class ReturnType>
467  auto then_parallel_reduce(std::string label,
468  typename execution_space::size_type idx_end,
469  Functor&& functor,
470  ReturnType&& return_value) const {
471  return this->then_parallel_reduce(
472  std::move(label), Kokkos::RangePolicy<execution_space>{0, idx_end},
473  (Functor&&)functor, (ReturnType&&)return_value);
474  }
475 
476  template <class Functor, class ReturnType>
477  auto then_parallel_reduce(typename execution_space::size_type idx_end,
478  Functor&& functor,
479  ReturnType&& return_value) const {
480  return this->then_parallel_reduce("", idx_end, (Functor&&)functor,
481  (ReturnType&&)return_value);
482  }
483 
484  // </editor-fold> end then_parallel_reduce }}}2
485  //----------------------------------------------------------------------------
486 
487  // TODO @graph parallel scan, deep copy, etc.
488 };
489 
490 } // end namespace Experimental
491 } // end namespace Kokkos
492 
493 #endif // KOKKOS_KOKKOS_GRAPHNODE_HPP
Can AccessSpace access MemorySpace ?
ReturnType
Execution policy for work over a range of an integral type.