Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Sacado_Fad_Kokkos_Specialization.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef SACADO_FAD_KOKKOS_VIEW_SUPPORT_INCLUDES
11 #error "This file can only be included by Sacado_Fad_Kokkos_View_Support.hpp"
12 #endif
13 
14 // This file contains pieces of Kokkos that still need to be
15 // overloaded/specialized for LayoutContiguous!
16 // -- Kokkos::Impl::SubviewLegalArgsCompileTime
17 // -- Kokkos::subview
18 // -- Kokkos::subdynrankview
19 // -- Kokkos::resize (when using Hierarchical Fad)
20 
21 // Deal with subview of LayoutContiguous which isn't yet a real mdspan Layout
22 namespace Kokkos {
23 namespace Impl {
24 
25 // Rules for subview arguments and layouts matching
26 
27 template <class LayoutDest, unsigned StrideDst, class LayoutSrc, int RankDest,
28  int RankSrc, int CurrentArg, class... SubViewArgs>
29 struct SubviewLegalArgsCompileTime<
30  Kokkos::LayoutContiguous<LayoutDest, StrideDst>, LayoutSrc, RankDest,
31  RankSrc, CurrentArg, SubViewArgs...> {
32  enum {
34  SubviewLegalArgsCompileTime<LayoutDest, LayoutSrc, RankDest, RankSrc,
35  CurrentArg, SubViewArgs...>::value
36  };
37 };
38 
39 template <class LayoutDest, class LayoutSrc, unsigned StrideSrc, int RankDest,
40  int RankSrc, int CurrentArg, class... SubViewArgs>
41 struct SubviewLegalArgsCompileTime<
42  LayoutDest, Kokkos::LayoutContiguous<LayoutSrc, StrideSrc>, RankDest,
43  RankSrc, CurrentArg, SubViewArgs...> {
44  enum {
46  SubviewLegalArgsCompileTime<LayoutDest, LayoutSrc, RankDest, RankSrc,
47  CurrentArg, SubViewArgs...>::value
48  };
49 };
50 
51 template <class LayoutDest, unsigned StrideDest, class LayoutSrc,
52  unsigned StrideSrc, int RankDest, int RankSrc, int CurrentArg,
53  class... SubViewArgs>
54 struct SubviewLegalArgsCompileTime<
55  Kokkos::LayoutContiguous<LayoutDest, StrideDest>,
56  Kokkos::LayoutContiguous<LayoutSrc, StrideSrc>, RankDest, RankSrc,
57  CurrentArg, SubViewArgs...> {
58  enum {
60  SubviewLegalArgsCompileTime<LayoutDest, LayoutSrc, RankDest, RankSrc,
61  CurrentArg, SubViewArgs...>::value
62  };
63 };
64 
65 template <class DstT, class DstL, unsigned DstS, class... DstArgs, class SrcT,
66  class SrcL, unsigned SrcS, class... SrcArgs, class... Args>
67 struct CommonSubview<
68  Kokkos::View<DstT, Kokkos::LayoutContiguous<DstL, DstS>, DstArgs...>,
69  Kokkos::View<SrcT, Kokkos::LayoutContiguous<SrcL, SrcS>, SrcArgs...>,
70  Args...> {
71  using DstType =
72  Kokkos::View<DstT, Kokkos::LayoutContiguous<DstL, DstS>, DstArgs...>;
73  using SrcType =
74  Kokkos::View<SrcT, Kokkos::LayoutContiguous<SrcL, SrcS>, SrcArgs...>;
75  using dst_subview_type =
76  decltype(subview(std::declval<DstType>(), std::declval<Args>()...));
77  using src_subview_type =
78  decltype(subview(std::declval<SrcType>(), std::declval<Args>()...));
81  CommonSubview(const DstType &dst, const SrcType &src, const Args &...args)
82  : dst_sub(subview(dst, args...)), src_sub(subview(src, args...)) {}
83 };
84 
85 } // namespace Impl
86 } // namespace Kokkos
87 
88 namespace {
89 template <class T, size_t N> struct data_type_construct {
90  using type = typename data_type_construct<T *, N - 1>::type;
91 };
92 template <class T> struct data_type_construct<T, 0> {
93  using type = T;
94 };
95 } // namespace
96 namespace Kokkos {
97 // This is needed to deal with the return Layout Deduction for LayoutContiguous
98 // ...
99 template <class D, class LayoutSrc, unsigned StrideSrc, class... P,
100  class... Args>
101 KOKKOS_INLINE_FUNCTION auto subview(
102  const View<D, Kokkos::LayoutContiguous<LayoutSrc, StrideSrc>, P...> &src,
103  Args... args) {
104  using view_t = View<D, Kokkos::LayoutContiguous<LayoutSrc, StrideSrc>, P...>;
105  auto submapping_result = submdspan_mapping(
106  src.mapping(), Impl::transform_kokkos_slice_to_mdspan_slice(args)...);
107  using sub_data_type = typename data_type_construct<
108  typename view_t::value_type,
109  decltype(submapping_result.mapping)::extents_type::rank()>::type;
110  using layout_t = std::conditional_t<
111  std::is_same_v<typename decltype(submapping_result.mapping)::layout_type,
112  layout_stride>,
113  LayoutStride, LayoutSrc>;
114  return View<sub_data_type, LayoutContiguous<layout_t, StrideSrc>,
115  typename view_t::device_type, typename view_t::memory_traits>(
116  src.accessor().offset(src.data_handle(), submapping_result.offset),
117  submapping_result.mapping, src.accessor());
118 }
119 
120 // This is needed to deal with the return Layout Deduction for LayoutContiguous
121 // ...
122 template <class D, class LayoutSrc, unsigned StrideSrc, class... P,
123  class... Args>
124 KOKKOS_INLINE_FUNCTION auto
126  P...> &src,
127  Args... args) {
128  static_assert(View<D, P...>::rank == sizeof...(Args),
129  "subview requires one argument for each source View rank");
130 
131  using sub_mdspan_t = decltype(submdspan(
132  src.to_mdspan(), Impl::transform_kokkos_slice_to_mdspan_slice(args)...));
133  if constexpr (std::is_same_v<typename sub_mdspan_t::layout_type,
134  layout_stride>) {
135  return typename Kokkos::Impl::ViewMapping<
136  void /* deduce subview type from source view traits */
137  ,
138  typename Impl::RemoveAlignedMemoryTrait<
140  Args...>::type(src, args...);
141  } else {
142  return typename Kokkos::Impl::ViewMapping<
143  void /* deduce subview type from source view traits */
144  ,
145  typename Impl::RemoveAlignedMemoryTrait<
147  Args...>::type(src, args...);
148  }
149 }
150 
151 template <class T, class LayoutSrc, unsigned StrideSrc, class... DRVArgs,
152  class SubArg0 = int, class SubArg1 = int, class SubArg2 = int,
153  class SubArg3 = int, class SubArg4 = int, class SubArg5 = int,
154  class SubArg6 = int>
155 KOKKOS_INLINE_FUNCTION auto
157  DRVArgs...> &drv,
158  SubArg0 arg0 = SubArg0{}, SubArg1 arg1 = SubArg1{},
159  SubArg2 arg2 = SubArg2{}, SubArg3 arg3 = SubArg3{},
160  SubArg4 arg4 = SubArg4{}, SubArg5 arg5 = SubArg5{},
161  SubArg6 arg6 = SubArg6{}) {
162  auto sub = subview(drv.DownCast(), arg0, arg1, arg2, arg3, arg4, arg5, arg6);
163  using sub_t = decltype(sub);
164  size_t new_rank = (drv.rank() > 0 && !std::is_integral_v<SubArg0> ? 1 : 0) +
165  (drv.rank() > 1 && !std::is_integral_v<SubArg1> ? 1 : 0) +
166  (drv.rank() > 2 && !std::is_integral_v<SubArg2> ? 1 : 0) +
167  (drv.rank() > 3 && !std::is_integral_v<SubArg3> ? 1 : 0) +
168  (drv.rank() > 4 && !std::is_integral_v<SubArg4> ? 1 : 0) +
169  (drv.rank() > 5 && !std::is_integral_v<SubArg5> ? 1 : 0) +
170  (drv.rank() > 6 && !std::is_integral_v<SubArg6> ? 1 : 0);
171 
172  using return_type = DynRankView<
173  typename sub_t::value_type,
174  typename sub_t::array_layout, // LayoutContiguous<LayoutStride,
175  // StrideSrc>,
176  typename sub_t::device_type, typename sub_t::memory_traits>;
177 
178  auto layout = sub.layout().base_layout();
179  for (int i = new_rank; i < 8; i++)
180  layout.dimension[i] = 1;
181  if constexpr (std::is_same_v<decltype(layout), LayoutStride>)
182  for (int i = new_rank; i < 8; i++)
183  layout.stride[i] = 1;
184 
185  return return_type{
186  typename return_type::view_type(
187  sub.data_handle(),
188  Impl::mapping_from_array_layout<typename return_type::mapping_type>(
189  layout),
190  sub.accessor()),
191  new_rank};
192 }
193 
194 template <class T, class LayoutSrc, unsigned StrideSrc, class... DRVArgs,
195  class SubArg0 = int, class SubArg1 = int, class SubArg2 = int,
196  class SubArg3 = int, class SubArg4 = int, class SubArg5 = int,
197  class SubArg6 = int>
198 KOKKOS_INLINE_FUNCTION auto
199 subview(const DynRankView<T, LayoutContiguous<LayoutSrc, StrideSrc>, DRVArgs...>
200  &drv,
201  SubArg0 arg0 = SubArg0{}, SubArg1 arg1 = SubArg1{},
202  SubArg2 arg2 = SubArg2{}, SubArg3 arg3 = SubArg3{},
203  SubArg4 arg4 = SubArg4{}, SubArg5 arg5 = SubArg5{},
204  SubArg6 arg6 = SubArg6{}) {
205  return subdynrankview(drv, arg0, arg1, arg2, arg3, arg4, arg5, arg6);
206 }
207 } // namespace Kokkos
208 
209 #if defined(SACADO_VIEW_CUDA_HIERARCHICAL)
210 namespace Kokkos {
211 template <class SrcT, class SrcL, unsigned SrcS, class... SrcArgs>
212 void resize(
213  Kokkos::View<SrcT, Kokkos::LayoutContiguous<SrcL, SrcS>, SrcArgs...> &src,
214  const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
215  const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
216  const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
217  const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
218  const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
219  const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
220  const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
221  const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
222  const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
223  bool size_mismatch = false;
224  using view_t =
225  Kokkos::View<SrcT, Kokkos::LayoutContiguous<SrcL, SrcS>, SrcArgs...>;
226  for (int r = 0; r < (int)src.rank(); r++) {
227  if (new_extents[r] != src.extent(r))
228  size_mismatch = true;
229  }
230  if (size_mismatch) {
231  using exec_space = typename view_t::execution_space;
232  auto dst = view_t(src.label(), n0, n1, n2);
233  size_t total_extent = 1;
234  for (size_t r = 0; r < view_t::rank(); r++)
235  total_extent *= Kokkos::min(src.extent(r), dst.extent(r));
236  size_t vector_size = 32;
237 
238  // Just arbitraryly using team_size = 1 for low concurrency backends (i.e.
239  // CPUs)
240  size_t team_size =
241  exec_space().concurrency() > 1000 ? 512 / vector_size : 1;
242 
243  size_t num_teams = (total_extent + team_size - 1) / team_size;
244 
245  Kokkos::parallel_for(
246  "Sacado::view_copy Hierarchical",
247  Kokkos::TeamPolicy<exec_space>(num_teams, team_size, vector_size),
248  KOKKOS_LAMBDA(
249  const typename Kokkos::TeamPolicy<exec_space>::member_type &team) {
250  size_t ii = team.league_rank() * team.team_size() + team.team_rank();
251  if (ii >= total_extent)
252  return;
253  // work around capture restriction
254  if (dst.data() == src.data())
255  return;
256  if constexpr (view_t::rank() == 0)
257  dst() == src();
258  else if constexpr (view_t::rank() == 1) {
259  dst(ii) == src(ii);
260  } else if constexpr (view_t::rank() == 2) {
261  int i1 = ii % src.extent(1);
262  int i0 = ii / src.extent(1);
263  dst(i0, i1) = src(i0, i1);
264  } else if constexpr (view_t::rank() == 3) {
265  int i2 = ii % src.extent(2);
266  int i1 = (ii / src.extent(2)) % src.extent(1);
267  int i0 = ii / (src.extent(2) * src.extent(1));
268  dst(i0, i1, i2) = src(i0, i1, i2);
269  } else if constexpr (view_t::rank() == 4) {
270  int i3 = ii % src.extent(3);
271  int i2 = (ii / src.extent(3)) % src.extent(2);
272  int i1 = (ii / (src.extent(3) * src.extent(2))) % src.extent(1);
273  int i0 = (ii / (src.extent(3) * src.extent(2) * src.extent(1)));
274  dst(i0, i1, i2, i3) = src(i0, i1, i2, i3);
275  } else if constexpr (view_t::rank() == 5) {
276  int i4 = ii % src.extent(4);
277  int i3 = (ii / src.extent(4)) % src.extent(3);
278  int i2 = (ii / (src.extent(4) * src.extent(3))) % src.extent(2);
279  int i1 = (ii / (src.extent(4) * src.extent(3) * src.extent(2))) %
280  src.extent(1);
281  int i0 = (ii / (src.extent(4) * src.extent(3) * src.extent(2) *
282  src.extent(1)));
283  dst(i0, i1, i2, i3, i4) = src(i0, i1, i2, i3, i4);
284  } else if constexpr (view_t::rank() == 5) {
285  int i5 = ii % src.extent(5);
286  int i4 = (ii / src.extent(5)) % src.extent(4);
287  int i3 = (ii / (src.extent(5) * src.extent(4))) % src.extent(3);
288  int i2 = (ii / (src.extent(5) * src.extent(4) * src.extent(3))) %
289  src.extent(2);
290  int i1 = (ii / (src.extent(5) * src.extent(4) * src.extent(3) *
291  src.extent(2))) %
292  src.extent(1);
293  int i0 = (ii / (src.extent(5) * src.extent(4) * src.extent(3) *
294  src.extent(2) * src.extent(1)));
295  dst(i0, i1, i2, i3, i4, i5) = src(i0, i1, i2, i3, i4, i5);
296  }
297  });
298  Kokkos::fence();
299  src = dst;
300  }
301 }
302 } // namespace Kokkos
303 #endif // SACADO_VIEW_CUDA_HIERARCHICAL
KOKKOS_INLINE_FUNCTION auto subview(const View< D, Kokkos::LayoutContiguous< LayoutSrc, StrideSrc >, P...> &src, Args...args)
#define T
Definition: Sacado_rad.hpp:553
#define D
Definition: Sacado_rad.hpp:557
SimpleFad< ValueT > min(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
int value
const int N
KOKKOS_INLINE_FUNCTION auto subdynrankview(const DynRankView< T, LayoutContiguous< LayoutSrc, StrideSrc >, DRVArgs...> &drv, SubArg0 arg0=SubArg0{}, SubArg1 arg1=SubArg1{}, SubArg2 arg2=SubArg2{}, SubArg3 arg3=SubArg3{}, SubArg4 arg4=SubArg4{}, SubArg5 arg5=SubArg5{}, SubArg6 arg6=SubArg6{})
if(first)
Definition: uninit.c:110