Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_ScratchSpace.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_SCRATCHSPACE_HPP
23 #define KOKKOS_SCRATCHSPACE_HPP
24 
25 #include <cstdio>
26 #include <cstddef>
27 #include <Kokkos_Core_fwd.hpp>
28 #include <Kokkos_Concepts.hpp>
29 
30 /*--------------------------------------------------------------------------*/
31 
32 namespace Kokkos {
33 
37 template <class ExecSpace>
38 class ScratchMemorySpace {
39  static_assert(
40  is_execution_space<ExecSpace>::value,
41  "Instantiating ScratchMemorySpace on non-execution-space type.");
42 
43  public:
44  // Minimal overalignment used by view scratch allocations
45  constexpr static int ALIGN = 8;
46 
47  private:
48  mutable char* m_iter_L0 = nullptr;
49  mutable char* m_iter_L1 = nullptr;
50  char* m_end_L0 = nullptr;
51  char* m_end_L1 = nullptr;
52 
53  mutable int m_multiplier = 0;
54  mutable int m_offset = 0;
55  mutable int m_default_level = 0;
56 
57 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
58  constexpr static int DEFAULT_ALIGNMENT_MASK = ALIGN - 1;
59 #endif
60 
61  public:
63  using memory_space = ScratchMemorySpace<ExecSpace>;
64  using execution_space = ExecSpace;
66  using device_type = Kokkos::Device<execution_space, memory_space>;
67 
68  using array_layout = typename ExecSpace::array_layout;
69  using size_type = typename ExecSpace::size_type;
70 
71  static constexpr const char* name() { return "ScratchMemorySpace"; }
72 
73 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
74  // This function is unused
75  template <typename IntType>
76  KOKKOS_DEPRECATED KOKKOS_INLINE_FUNCTION static constexpr IntType align(
77  const IntType& size) {
78  return (size + DEFAULT_ALIGNMENT_MASK) & ~DEFAULT_ALIGNMENT_MASK;
79  }
80 #endif
81 
82  template <typename IntType>
83  KOKKOS_INLINE_FUNCTION void* get_shmem(const IntType& size,
84  int level = -1) const {
85  return get_shmem_common</*alignment_requested*/ false>(size, 1, level);
86  }
87 
88  template <typename IntType>
89  KOKKOS_INLINE_FUNCTION void* get_shmem_aligned(const IntType& size,
90  const ptrdiff_t alignment,
91  int level = -1) const {
92  return get_shmem_common</*alignment_requested*/ true>(size, alignment,
93  level);
94  }
95 
96  private:
97  template <bool alignment_requested, typename IntType>
98  KOKKOS_INLINE_FUNCTION void* get_shmem_common(
99  const IntType& size, [[maybe_unused]] const ptrdiff_t alignment,
100  int level = -1) const {
101  if (level == -1) level = m_default_level;
102  auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1;
103  auto m_iter_old = m_iter;
104  if constexpr (alignment_requested) {
105  const ptrdiff_t missalign = size_t(m_iter) % alignment;
106  if (missalign) m_iter += alignment - missalign;
107  }
108 
109  // This is each thread's start pointer for its allocation
110  // Note: for team scratch m_offset is 0, since every
111  // thread will get back the same shared pointer
112  void* tmp = m_iter + m_offset * size;
113  uintptr_t increment = static_cast<uintptr_t>(size) * m_multiplier;
114 
115  // Cast to uintptr_t to avoid problems with pointer arithmetic using SYCL
116  const auto end_iter =
117  reinterpret_cast<uintptr_t>((level == 0) ? m_end_L0 : m_end_L1);
118  auto current_iter = reinterpret_cast<uintptr_t>(m_iter);
119  auto capacity = end_iter - current_iter;
120 
121  if (increment > capacity) {
122  // Request did overflow: return nullptr and reset m_iter
123  m_iter = m_iter_old;
124  tmp = nullptr;
125 #ifdef KOKKOS_ENABLE_DEBUG
126  // mfh 23 Jun 2015: printf call consumes 25 registers
127  // in a CUDA build, so only print in debug mode. The
128  // function still returns nullptr if not enough memory.
129  Kokkos::printf(
130  "ScratchMemorySpace<...>::get_shmem: Failed to allocate "
131  "%ld byte(s); remaining capacity is %ld byte(s)\n",
132  long(size), long(capacity));
133 #endif // KOKKOS_ENABLE_DEBUG
134  } else {
135  m_iter += increment;
136  }
137  return tmp;
138  }
139 
140  public:
141  KOKKOS_DEFAULTED_FUNCTION
142  ScratchMemorySpace() = default;
143 
144  template <typename IntType>
145  KOKKOS_INLINE_FUNCTION ScratchMemorySpace(void* ptr_L0,
146  const IntType& size_L0,
147  void* ptr_L1 = nullptr,
148  const IntType& size_L1 = 0)
149  : m_iter_L0(static_cast<char*>(ptr_L0)),
150  m_iter_L1(static_cast<char*>(ptr_L1)),
151  m_end_L0(static_cast<char*>(ptr_L0) + size_L0),
152  m_end_L1(static_cast<char*>(ptr_L1) + size_L1),
153  m_multiplier(1),
154  m_offset(0),
155  m_default_level(0) {}
156 
157  KOKKOS_INLINE_FUNCTION
158  const ScratchMemorySpace& set_team_thread_mode(const int& level,
159  const int& multiplier,
160  const int& offset) const {
161  m_default_level = level;
162  m_multiplier = multiplier;
163  m_offset = offset;
164  return *this;
165  }
166 };
167 
168 } // namespace Kokkos
169 
170 #endif /* #ifndef KOKKOS_SCRATCHSPACE_HPP */
ScratchMemorySpace< ExecSpace > memory_space
Kokkos::Device< execution_space, memory_space > device_type