Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_Cuda.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_CUDA_HPP
23 #define KOKKOS_CUDA_HPP
24 
25 #include <Kokkos_Macros.hpp>
26 #if defined(KOKKOS_ENABLE_CUDA)
27 
28 #include <Kokkos_Core_fwd.hpp>
29 
30 #include <iosfwd>
31 #include <vector>
32 
33 #include <impl/Kokkos_AnalyzePolicy.hpp>
34 #include <Kokkos_CudaSpace.hpp>
35 #include <Cuda/Kokkos_Cuda_Error.hpp> // CUDA_SAFE_CALL
36 
37 #include <Kokkos_Parallel.hpp>
38 #include <Kokkos_TaskScheduler.hpp>
39 #include <Kokkos_Layout.hpp>
40 #include <Kokkos_ScratchSpace.hpp>
41 #include <Kokkos_MemoryTraits.hpp>
42 #include <impl/Kokkos_HostSharedPtr.hpp>
43 #include <impl/Kokkos_InitializationSettings.hpp>
44 
45 /*--------------------------------------------------------------------------*/
46 
47 namespace Kokkos {
48 namespace Impl {
49 class CudaExec;
50 class CudaInternal;
51 } // namespace Impl
52 } // namespace Kokkos
53 
54 /*--------------------------------------------------------------------------*/
55 
56 namespace Kokkos {
57 
58 namespace Impl {
59 namespace Experimental {
60 enum class CudaLaunchMechanism : unsigned {
61  Default = 0,
62  ConstantMemory = 1,
63  GlobalMemory = 2,
64  LocalMemory = 4
65 };
66 
67 constexpr inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
68  CudaLaunchMechanism p2) {
69  return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) |
70  static_cast<unsigned>(p2));
71 }
72 constexpr inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
73  CudaLaunchMechanism p2) {
74  return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) &
75  static_cast<unsigned>(p2));
76 }
77 
78 template <CudaLaunchMechanism l>
79 struct CudaDispatchProperties {
80  CudaLaunchMechanism launch_mechanism = l;
81 };
82 } // namespace Experimental
83 } // namespace Impl
94 class Cuda {
95  public:
97 
98 
100  using execution_space = Cuda;
101 
102 #if defined(KOKKOS_ENABLE_CUDA_UVM)
103  using memory_space = CudaUVMSpace;
105 #else
106  using memory_space = CudaSpace;
108 #endif
109 
111  using device_type = Kokkos::Device<execution_space, memory_space>;
112 
114  using size_type = memory_space::size_type;
115 
117  using array_layout = LayoutLeft;
118 
120  using scratch_memory_space = ScratchMemorySpace<Cuda>;
121 
123  //--------------------------------------------------
125 
126 
129  KOKKOS_INLINE_FUNCTION static int in_parallel() {
130 #if defined(__CUDA_ARCH__)
131  return true;
132 #else
133  return false;
134 #endif
135  }
136 
148  static bool sleep();
149 
155  static bool wake();
156 
163  static void impl_static_fence(const std::string& name);
164 
165  void fence(const std::string& name =
166  "Kokkos::Cuda::fence(): Unnamed Instance Fence") const;
167 
169 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
170  static int concurrency();
171 #else
172  int concurrency() const;
173 #endif
174 
176  void print_configuration(std::ostream& os, bool verbose = false) const;
177 
179  //--------------------------------------------------
181 
182  Cuda();
183 
184  Cuda(cudaStream_t stream, bool manage_stream = false);
185 
186  //--------------------------------------------------------------------------
188  static void impl_finalize();
189 
191  static int impl_is_initialized();
192 
194  static void impl_initialize(InitializationSettings const&);
195 
199  static size_type device_arch();
200 
202  static size_type detect_device_count();
203 
207  static std::vector<unsigned> detect_device_arch();
208 
209  cudaStream_t cuda_stream() const;
210  int cuda_device() const;
211  const cudaDeviceProp& cuda_device_prop() const;
212 
214  //--------------------------------------------------------------------------
215 
216  static const char* name();
217 
218  inline Impl::CudaInternal* impl_internal_space_instance() const {
219  return m_space_instance.get();
220  }
221  uint32_t impl_instance_id() const noexcept;
222 
223  private:
224  friend bool operator==(Cuda const& lhs, Cuda const& rhs) {
225  return lhs.impl_internal_space_instance() ==
226  rhs.impl_internal_space_instance();
227  }
228  friend bool operator!=(Cuda const& lhs, Cuda const& rhs) {
229  return !(lhs == rhs);
230  }
231  Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
232 };
233 
234 namespace Tools {
235 namespace Experimental {
236 template <>
237 struct DeviceTypeTraits<Cuda> {
239  static constexpr DeviceType id = DeviceType::Cuda;
240  static int device_id(const Cuda& exec) { return exec.cuda_device(); }
241 };
242 } // namespace Experimental
243 } // namespace Tools
244 
245 namespace Impl {
246 
247 template <class DT, class... DP>
248 struct ZeroMemset<Kokkos::Cuda, DT, DP...> {
249  ZeroMemset(const Kokkos::Cuda& exec_space_instance,
250  const View<DT, DP...>& dst,
251  typename View<DT, DP...>::const_value_type&) {
252  KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemsetAsync(
253  dst.data(), 0,
254  dst.size() * sizeof(typename View<DT, DP...>::value_type),
255  exec_space_instance.cuda_stream()));
256  }
257 
258  ZeroMemset(const View<DT, DP...>& dst,
259  typename View<DT, DP...>::const_value_type&) {
260  KOKKOS_IMPL_CUDA_SAFE_CALL(
261  cudaMemset(dst.data(), 0,
262  dst.size() * sizeof(typename View<DT, DP...>::value_type)));
263  }
264 };
265 } // namespace Impl
266 } // namespace Kokkos
267 
268 /*--------------------------------------------------------------------------*/
269 /*--------------------------------------------------------------------------*/
270 
271 namespace Kokkos {
272 namespace Impl {
273 
274 template <>
275 struct MemorySpaceAccess<Kokkos::CudaSpace,
276  Kokkos::Cuda::scratch_memory_space> {
277  enum : bool { assignable = false };
278  enum : bool { accessible = true };
279  enum : bool { deepcopy = false };
280 };
281 
282 #if defined(KOKKOS_ENABLE_CUDA_UVM)
283 
284 // If forcing use of UVM everywhere
285 // then must assume that CudaUVMSpace
286 // can be a stand-in for CudaSpace.
287 // This will fail when a strange host-side execution space
288 // that defines CudaUVMSpace as its preferredmemory space.
289 
290 template <>
291 struct MemorySpaceAccess<Kokkos::CudaUVMSpace,
292  Kokkos::Cuda::scratch_memory_space> {
293  enum : bool { assignable = false };
294  enum : bool { accessible = true };
295  enum : bool { deepcopy = false };
296 };
297 
298 #endif
299 
300 } // namespace Impl
301 } // namespace Kokkos
302 
303 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
304 #endif /* #ifndef KOKKOS_CUDA_HPP */