Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
KokkosExp_MDRangePolicy.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
23 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
24 
25 #include <initializer_list>
26 
27 #include <Kokkos_Layout.hpp>
28 #include <Kokkos_Rank.hpp>
29 #include <Kokkos_Array.hpp>
30 #include <impl/KokkosExp_Host_IterateTile.hpp>
31 #include <Kokkos_ExecPolicy.hpp>
32 #include <type_traits>
33 #include <cmath>
34 
35 namespace Kokkos {
36 
37 // ------------------------------------------------------------------ //
38 // Moved to Kokkos_Layout.hpp for more general accessibility
39 /*
40 enum class Iterate
41 {
42  Default, // Default for the device
43  Left, // Left indices stride fastest
44  Right, // Right indices stride fastest
45 };
46 */
47 
48 template <typename ExecSpace>
49 struct default_outer_direction {
50  using type = Iterate;
51  static constexpr Iterate value = Iterate::Right;
52 };
53 
54 template <typename ExecSpace>
55 struct default_inner_direction {
56  using type = Iterate;
57  static constexpr Iterate value = Iterate::Right;
58 };
59 
60 namespace Impl {
61 // NOTE the comparison below is encapsulated to silent warnings about pointless
62 // comparison of unsigned integer with zero
63 template <class T>
64 constexpr std::enable_if_t<!std::is_signed_v<T>, bool>
65 is_less_than_value_initialized_variable(T) {
66  return false;
67 }
68 
69 template <class T>
70 constexpr std::enable_if_t<std::is_signed_v<T>, bool>
71 is_less_than_value_initialized_variable(T arg) {
72  return arg < T{};
73 }
74 
75 // Checked narrowing conversion that calls abort if the cast changes the value
76 template <class To, class From>
77 constexpr To checked_narrow_cast(From arg, std::size_t idx) {
78  constexpr const bool is_different_signedness =
79  (std::is_signed_v<To> != std::is_signed_v<From>);
80  auto const ret = static_cast<To>(arg);
81  if (static_cast<From>(ret) != arg ||
82  (is_different_signedness &&
83  is_less_than_value_initialized_variable(arg) !=
84  is_less_than_value_initialized_variable(ret))) {
85  auto msg =
86  "Kokkos::MDRangePolicy bound type error: an unsafe implicit conversion "
87  "is performed on a bound (" +
88  std::to_string(arg) + ") in dimension (" + std::to_string(idx) +
89  "), which may not preserve its original value.\n";
90  Kokkos::abort(msg.c_str());
91  }
92  return ret;
93 }
94 // NOTE prefer C array U[M] to std::initalizer_list<U> so that the number of
95 // elements can be deduced (https://stackoverflow.com/q/40241370)
96 // NOTE for some unfortunate reason the policy bounds are stored as signed
97 // integer arrays (point_type which is Kokkos::Array<std::int64_t>) so we
98 // specify the index type (actual policy index_type from the traits) and check
99 // ahead of time that narrowing conversions will be safe.
100 template <class IndexType, class Array, class U, std::size_t M>
101 constexpr Array to_array_potentially_narrowing(const U (&init)[M]) {
102  using T = typename Array::value_type;
103  Array a{};
104  constexpr std::size_t N = a.size();
105  static_assert(M <= N);
106  auto* ptr = a.data();
107  // NOTE equivalent to
108  // std::transform(std::begin(init), std::end(init), a.data(),
109  // [](U x) { return static_cast<T>(x); });
110  // except that std::transform is not constexpr.
111  for (std::size_t i = 0; i < M; ++i) {
112  *ptr++ = checked_narrow_cast<T>(init[i], i);
113  (void)checked_narrow_cast<IndexType>(init[i], i); // see note above
114  }
115  return a;
116 }
117 
118 // NOTE Making a copy even when std::is_same<Array, Kokkos::Array<U, M>>::value
119 // is true to reduce code complexity. You may change this if you have a good
120 // reason to. Intentionally not enabling std::array at this time but this may
121 // change too.
122 template <class IndexType, class NVCC_WONT_LET_ME_CALL_YOU_Array, class U,
123  std::size_t M>
124 constexpr NVCC_WONT_LET_ME_CALL_YOU_Array to_array_potentially_narrowing(
125  Kokkos::Array<U, M> const& other) {
126  using T = typename NVCC_WONT_LET_ME_CALL_YOU_Array::value_type;
127  NVCC_WONT_LET_ME_CALL_YOU_Array a{};
128  constexpr std::size_t N = a.size();
129  static_assert(M <= N);
130  for (std::size_t i = 0; i < M; ++i) {
131  a[i] = checked_narrow_cast<T>(other[i], i);
132  (void)checked_narrow_cast<IndexType>(other[i], i); // see note above
133  }
134  return a;
135 }
136 
137 struct TileSizeProperties {
138  int max_threads;
139  int default_largest_tile_size;
140  int default_tile_size;
141  int max_total_tile_size;
142 };
143 
144 template <typename ExecutionSpace>
145 TileSizeProperties get_tile_size_properties(const ExecutionSpace&) {
146  // Host settings
147  TileSizeProperties properties;
148  properties.max_threads = std::numeric_limits<int>::max();
149  properties.default_largest_tile_size = 0;
150  properties.default_tile_size = 2;
151  properties.max_total_tile_size = std::numeric_limits<int>::max();
152  return properties;
153 }
154 
155 } // namespace Impl
156 
157 // multi-dimensional iteration pattern
158 template <typename... Properties>
159 struct MDRangePolicy;
160 
161 // Note: If MDRangePolicy has a primary template, implicit CTAD (deduction
162 // guides) are generated -> MDRangePolicy<> by some compilers, which is
163 // incorrect. By making it a template specialization instead, no implicit CTAD
164 // is generated. This works because there has to be at least one property
165 // specified (which is Rank<...>); otherwise, we'd get the static_assert
166 // "Kokkos::Error: MD iteration pattern not defined". This template
167 // specialization uses <P, Properties...> in all places for correctness.
168 template <typename P, typename... Properties>
169 struct MDRangePolicy<P, Properties...>
170  : public Kokkos::Impl::PolicyTraits<P, Properties...> {
171  using traits = Kokkos::Impl::PolicyTraits<P, Properties...>;
172  using range_policy = RangePolicy<P, Properties...>;
173 
174  typename traits::execution_space m_space;
175 
176  using impl_range_policy =
177  RangePolicy<typename traits::execution_space,
178  typename traits::schedule_type, typename traits::index_type>;
179 
180  using execution_policy =
181  MDRangePolicy<P, Properties...>; // needed for is_execution_policy
182  // interrogation
183 
184  template <class... OtherProperties>
185  friend struct MDRangePolicy;
186 
187  static_assert(!std::is_void_v<typename traits::iteration_pattern>,
188  "Kokkos Error: MD iteration pattern not defined");
189 
190  using iteration_pattern = typename traits::iteration_pattern;
191  using work_tag = typename traits::work_tag;
192  using launch_bounds = typename traits::launch_bounds;
193  using member_type = typename range_policy::member_type;
194 
195  static constexpr int rank = iteration_pattern::rank;
196  static_assert(rank < 7, "Kokkos MDRangePolicy Error: Unsupported rank...");
197 
198  using index_type = typename traits::index_type;
199  using array_index_type = std::int64_t;
200  using point_type = Kokkos::Array<array_index_type, rank>; // was index_type
201  using tile_type = Kokkos::Array<array_index_type, rank>;
202  // If point_type or tile_type is not templated on a signed integral type (if
203  // it is unsigned), then if user passes in intializer_list of
204  // runtime-determined values of signed integral type that are not const will
205  // receive a compiler error due to an invalid case for implicit conversion -
206  // "conversion from integer or unscoped enumeration type to integer type that
207  // cannot represent all values of the original, except where source is a
208  // constant expression whose value can be stored exactly in the target type"
209  // This would require the user to either pass a matching index_type parameter
210  // as template parameter to the MDRangePolicy or static_cast the individual
211  // values
212 
213  point_type m_lower = {};
214  point_type m_upper = {};
215  tile_type m_tile = {};
216  point_type m_tile_end = {};
217  index_type m_num_tiles = 1;
218  index_type m_prod_tile_dims = 1;
219  bool m_tune_tile_size = false;
220 
221  static constexpr auto outer_direction =
222  (iteration_pattern::outer_direction != Iterate::Default)
223  ? iteration_pattern::outer_direction
224  : default_outer_direction<typename traits::execution_space>::value;
225 
226  static constexpr auto inner_direction =
227  iteration_pattern::inner_direction != Iterate::Default
228  ? iteration_pattern::inner_direction
229  : default_inner_direction<typename traits::execution_space>::value;
230 
231  static constexpr auto Right = Iterate::Right;
232  static constexpr auto Left = Iterate::Left;
233 
234  KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
235  return m_space;
236  }
237 
238  MDRangePolicy() = default;
239 
240  template <typename LT, std::size_t LN, typename UT, std::size_t UN,
241  typename TT = array_index_type, std::size_t TN = rank,
242  typename = std::enable_if_t<std::is_integral_v<LT> &&
243  std::is_integral_v<UT> &&
244  std::is_integral_v<TT>>>
245  MDRangePolicy(const LT (&lower)[LN], const UT (&upper)[UN],
246  const TT (&tile)[TN] = {})
247  : MDRangePolicy(
248  Impl::to_array_potentially_narrowing<index_type, decltype(m_lower)>(
249  lower),
250  Impl::to_array_potentially_narrowing<index_type, decltype(m_upper)>(
251  upper),
252  Impl::to_array_potentially_narrowing<index_type, decltype(m_tile)>(
253  tile)) {
254  static_assert(
255  LN == rank && UN == rank && TN <= rank,
256  "MDRangePolicy: Constructor initializer lists have wrong size");
257  }
258 
259  template <typename LT, std::size_t LN, typename UT, std::size_t UN,
260  typename TT = array_index_type, std::size_t TN = rank,
261  typename = std::enable_if_t<std::is_integral_v<LT> &&
262  std::is_integral_v<UT> &&
263  std::is_integral_v<TT>>>
264  MDRangePolicy(const typename traits::execution_space& work_space,
265  const LT (&lower)[LN], const UT (&upper)[UN],
266  const TT (&tile)[TN] = {})
267  : MDRangePolicy(
268  work_space,
269  Impl::to_array_potentially_narrowing<index_type, decltype(m_lower)>(
270  lower),
271  Impl::to_array_potentially_narrowing<index_type, decltype(m_upper)>(
272  upper),
273  Impl::to_array_potentially_narrowing<index_type, decltype(m_tile)>(
274  tile)) {
275  static_assert(
276  LN == rank && UN == rank && TN <= rank,
277  "MDRangePolicy: Constructor initializer lists have wrong size");
278  }
279 
280  // NOTE: Keeping these two constructor despite the templated constructors
281  // from Kokkos arrays for backwards compability to allow construction from
282  // double-braced initializer lists.
283  MDRangePolicy(point_type const& lower, point_type const& upper,
284  tile_type const& tile = tile_type{})
285  : MDRangePolicy(typename traits::execution_space(), lower, upper, tile) {}
286 
287  MDRangePolicy(const typename traits::execution_space& work_space,
288  point_type const& lower, point_type const& upper,
289  tile_type const& tile = tile_type{})
290  : m_space(work_space), m_lower(lower), m_upper(upper), m_tile(tile) {
291  init_helper(Impl::get_tile_size_properties(work_space));
292  }
293 
294  template <typename T, std::size_t NT = rank,
295  typename = std::enable_if_t<std::is_integral_v<T>>>
296  MDRangePolicy(Kokkos::Array<T, rank> const& lower,
297  Kokkos::Array<T, rank> const& upper,
299  : MDRangePolicy(typename traits::execution_space(), lower, upper, tile) {}
300 
301  template <typename T, std::size_t NT = rank,
302  typename = std::enable_if_t<std::is_integral_v<T>>>
303  MDRangePolicy(const typename traits::execution_space& work_space,
304  Kokkos::Array<T, rank> const& lower,
305  Kokkos::Array<T, rank> const& upper,
307  : MDRangePolicy(
308  work_space,
309  Impl::to_array_potentially_narrowing<index_type, decltype(m_lower)>(
310  lower),
311  Impl::to_array_potentially_narrowing<index_type, decltype(m_upper)>(
312  upper),
313  Impl::to_array_potentially_narrowing<index_type, decltype(m_tile)>(
314  tile)) {}
315 
316  template <class... OtherProperties>
317  MDRangePolicy(const MDRangePolicy<OtherProperties...> p)
318  : traits(p), // base class may contain data such as desired occupancy
319  m_space(p.m_space),
320  m_lower(p.m_lower),
321  m_upper(p.m_upper),
322  m_tile(p.m_tile),
323  m_tile_end(p.m_tile_end),
324  m_num_tiles(p.m_num_tiles),
325  m_prod_tile_dims(p.m_prod_tile_dims),
326  m_tune_tile_size(p.m_tune_tile_size) {}
327 
328  void impl_change_tile_size(const point_type& tile) {
329  m_tile = tile;
330  init_helper(Impl::get_tile_size_properties(m_space));
331  }
332  bool impl_tune_tile_size() const { return m_tune_tile_size; }
333 
334  tile_type tile_size_recommended() const {
335  tile_type rec_tile_sizes = {};
336 
337  for (std::size_t i = 0; i < rec_tile_sizes.size(); ++i) {
338  rec_tile_sizes[i] = tile_size_recommended(i);
339  }
340  return rec_tile_sizes;
341  }
342 
343  int max_total_tile_size() const {
344  return Impl::get_tile_size_properties(m_space).max_total_tile_size;
345  }
346 
347  private:
348  int tile_size_recommended(const int tile_rank) const {
349  auto properties = Impl::get_tile_size_properties(m_space);
350  int last_rank = (inner_direction == Iterate::Right) ? rank - 1 : 0;
351  int rank_acc =
352  (inner_direction == Iterate::Right) ? tile_rank + 1 : tile_rank - 1;
353  int rec_tile_size = (std::pow(properties.default_tile_size, rank_acc) <
354  properties.max_total_tile_size)
355  ? properties.default_tile_size
356  : 1;
357 
358  if (tile_rank == last_rank) {
359  rec_tile_size = tile_size_last_rank(
360  properties, m_upper[last_rank] - m_lower[last_rank]);
361  }
362  return rec_tile_size;
363  }
364 
365  int tile_size_last_rank(const Impl::TileSizeProperties properties,
366  const index_type length) const {
367  return properties.default_largest_tile_size == 0
368  ? std::max<int>(length, 1)
369  : properties.default_largest_tile_size;
370  }
371 
372  void init_helper(Impl::TileSizeProperties properties) {
373  m_prod_tile_dims = 1;
374  int increment = 1;
375  int rank_start = 0;
376  int rank_end = rank;
377  if (inner_direction == Iterate::Right) {
378  increment = -1;
379  rank_start = rank - 1;
380  rank_end = -1;
381  }
382 
383  for (int i = rank_start; i != rank_end; i += increment) {
384  const index_type length = m_upper[i] - m_lower[i];
385 
386  if (m_upper[i] < m_lower[i]) {
387  std::string msg =
388  "Kokkos::MDRangePolicy bounds error: The lower bound (" +
389  std::to_string(m_lower[i]) + ") is greater than its upper bound (" +
390  std::to_string(m_upper[i]) + ") in dimension " + std::to_string(i) +
391  ".\n";
392 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
393  Kokkos::abort(msg.c_str());
394 #elif defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
395  Kokkos::Impl::log_warning(msg);
396 #endif
397  }
398 
399  if (m_tile[i] <= 0) {
400  m_tune_tile_size = true;
401  if ((inner_direction == Iterate::Right && (i < rank - 1)) ||
402  (inner_direction == Iterate::Left && (i > 0))) {
403  if (m_prod_tile_dims * properties.default_tile_size <
404  static_cast<index_type>(properties.max_total_tile_size)) {
405  m_tile[i] = properties.default_tile_size;
406  } else {
407  m_tile[i] = 1;
408  }
409  } else {
410  m_tile[i] = tile_size_last_rank(properties, length);
411  }
412  }
413  m_tile_end[i] =
414  static_cast<index_type>((length + m_tile[i] - 1) / m_tile[i]);
415  m_num_tiles *= m_tile_end[i];
416  m_prod_tile_dims *= m_tile[i];
417  }
418  if (m_prod_tile_dims > static_cast<index_type>(properties.max_threads)) {
419  printf(" Product of tile dimensions exceed maximum limit: %d\n",
420  static_cast<int>(properties.max_threads));
421  Kokkos::abort(
422  "ExecSpace Error: MDRange tile dims exceed maximum number "
423  "of threads per block - choose smaller tile dims");
424  }
425  }
426 };
427 
428 template <typename LT, size_t N, typename UT>
429 MDRangePolicy(const LT (&)[N], const UT (&)[N]) -> MDRangePolicy<Rank<N>>;
430 
431 template <typename LT, size_t N, typename UT, typename TT, size_t TN>
432 MDRangePolicy(const LT (&)[N], const UT (&)[N], const TT (&)[TN])
433  -> MDRangePolicy<Rank<N>>;
434 
435 template <typename LT, size_t N, typename UT>
436 MDRangePolicy(DefaultExecutionSpace const&, const LT (&)[N], const UT (&)[N])
437  -> MDRangePolicy<Rank<N>>;
438 
439 template <typename LT, size_t N, typename UT, typename TT, size_t TN>
440 MDRangePolicy(DefaultExecutionSpace const&, const LT (&)[N], const UT (&)[N],
441  const TT (&)[TN]) -> MDRangePolicy<Rank<N>>;
442 
443 template <typename ES, typename LT, size_t N, typename UT,
444  typename = std::enable_if_t<is_execution_space_v<ES>>>
445 MDRangePolicy(ES const&, const LT (&)[N], const UT (&)[N])
446  -> MDRangePolicy<ES, Rank<N>>;
447 
448 template <typename ES, typename LT, size_t N, typename UT, typename TT,
449  size_t TN, typename = std::enable_if_t<is_execution_space_v<ES>>>
450 MDRangePolicy(ES const&, const LT (&)[N], const UT (&)[N], const TT (&)[TN])
451  -> MDRangePolicy<ES, Rank<N>>;
452 
453 template <typename T, size_t N>
454 MDRangePolicy(Array<T, N> const&, Array<T, N> const&) -> MDRangePolicy<Rank<N>>;
455 
456 template <typename T, size_t N, size_t NT>
457 MDRangePolicy(Array<T, N> const&, Array<T, N> const&, Array<T, NT> const&)
458  -> MDRangePolicy<Rank<N>>;
459 
460 template <typename T, size_t N>
461 MDRangePolicy(DefaultExecutionSpace const&, Array<T, N> const&,
462  Array<T, N> const&) -> MDRangePolicy<Rank<N>>;
463 
464 template <typename T, size_t N, size_t NT>
465 MDRangePolicy(DefaultExecutionSpace const&, Array<T, N> const&,
466  Array<T, N> const&, Array<T, NT> const&)
467  -> MDRangePolicy<Rank<N>>;
468 
469 template <typename ES, typename T, size_t N,
470  typename = std::enable_if_t<is_execution_space_v<ES>>>
471 MDRangePolicy(ES const&, Array<T, N> const&, Array<T, N> const&)
472  -> MDRangePolicy<ES, Rank<N>>;
473 
474 template <typename ES, typename T, size_t N, size_t NT,
475  typename = std::enable_if_t<is_execution_space_v<ES>>>
476 MDRangePolicy(ES const&, Array<T, N> const&, Array<T, N> const&,
477  Array<T, NT> const&) -> MDRangePolicy<ES, Rank<N>>;
478 
479 } // namespace Kokkos
480 
481 #endif // KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
Derived from the C++17 &#39;std::array&#39;. Dropping the iterator interface.
Declaration of various MemoryLayout options.