17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
20 "Including non-public Kokkos header files is not allowed.");
22 #ifndef KOKKOS_CUDA_HPP
23 #define KOKKOS_CUDA_HPP
25 #include <Kokkos_Macros.hpp>
26 #if defined(KOKKOS_ENABLE_CUDA)
28 #include <Kokkos_Core_fwd.hpp>
33 #include <impl/Kokkos_AnalyzePolicy.hpp>
34 #include <Kokkos_CudaSpace.hpp>
35 #include <Cuda/Kokkos_Cuda_Error.hpp>
37 #include <Kokkos_Parallel.hpp>
38 #include <Kokkos_TaskScheduler.hpp>
39 #include <Kokkos_Layout.hpp>
40 #include <Kokkos_ScratchSpace.hpp>
41 #include <Kokkos_MemoryTraits.hpp>
42 #include <impl/Kokkos_HostSharedPtr.hpp>
43 #include <impl/Kokkos_InitializationSettings.hpp>
59 namespace Experimental {
60 enum class CudaLaunchMechanism : unsigned {
67 constexpr
inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
68 CudaLaunchMechanism p2) {
69 return static_cast<CudaLaunchMechanism
>(
static_cast<unsigned>(p1) |
70 static_cast<unsigned>(p2));
72 constexpr
inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
73 CudaLaunchMechanism p2) {
74 return static_cast<CudaLaunchMechanism
>(
static_cast<unsigned>(p1) &
75 static_cast<unsigned>(p2));
78 template <CudaLaunchMechanism l>
79 struct CudaDispatchProperties {
80 CudaLaunchMechanism launch_mechanism = l;
100 using execution_space = Cuda;
102 #if defined(KOKKOS_ENABLE_CUDA_UVM)
103 using memory_space = CudaUVMSpace;
106 using memory_space = CudaSpace;
111 using device_type = Kokkos::Device<execution_space, memory_space>;
114 using size_type = memory_space::size_type;
117 using array_layout = LayoutLeft;
120 using scratch_memory_space = ScratchMemorySpace<Cuda>;
129 KOKKOS_INLINE_FUNCTION
static int in_parallel() {
130 #if defined(__CUDA_ARCH__)
163 static void impl_static_fence(
const std::string& name);
165 void fence(
const std::string& name =
166 "Kokkos::Cuda::fence(): Unnamed Instance Fence")
const;
169 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
170 static int concurrency();
172 int concurrency()
const;
176 void print_configuration(std::ostream& os,
bool verbose =
false)
const;
184 Cuda(cudaStream_t stream,
bool manage_stream =
false);
188 static void impl_finalize();
191 static int impl_is_initialized();
194 static void impl_initialize(InitializationSettings
const&);
199 static size_type device_arch();
202 static size_type detect_device_count();
207 static std::vector<unsigned> detect_device_arch();
209 cudaStream_t cuda_stream()
const;
210 int cuda_device()
const;
211 const cudaDeviceProp& cuda_device_prop()
const;
216 static const char* name();
218 inline Impl::CudaInternal* impl_internal_space_instance()
const {
219 return m_space_instance.get();
221 uint32_t impl_instance_id() const noexcept;
224 friend
bool operator==(Cuda const& lhs, Cuda const& rhs) {
225 return lhs.impl_internal_space_instance() ==
226 rhs.impl_internal_space_instance();
228 friend bool operator!=(Cuda
const& lhs, Cuda
const& rhs) {
229 return !(lhs == rhs);
231 Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
235 namespace Experimental {
237 struct DeviceTypeTraits<Cuda> {
239 static constexpr DeviceType
id = DeviceType::Cuda;
240 static int device_id(
const Cuda& exec) {
return exec.cuda_device(); }
247 template <
class DT,
class... DP>
248 struct ZeroMemset<Kokkos::Cuda, DT, DP...> {
249 ZeroMemset(
const Kokkos::Cuda& exec_space_instance,
250 const View<DT, DP...>& dst,
251 typename View<DT, DP...>::const_value_type&) {
252 KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemsetAsync(
254 dst.size() *
sizeof(
typename View<DT, DP...>::value_type),
255 exec_space_instance.cuda_stream()));
258 ZeroMemset(
const View<DT, DP...>& dst,
259 typename View<DT, DP...>::const_value_type&) {
260 KOKKOS_IMPL_CUDA_SAFE_CALL(
261 cudaMemset(dst.data(), 0,
262 dst.size() *
sizeof(
typename View<DT, DP...>::value_type)));
275 struct MemorySpaceAccess<Kokkos::CudaSpace,
276 Kokkos::Cuda::scratch_memory_space> {
277 enum :
bool { assignable =
false };
278 enum :
bool { accessible =
true };
279 enum :
bool { deepcopy =
false };
282 #if defined(KOKKOS_ENABLE_CUDA_UVM)
291 struct MemorySpaceAccess<Kokkos::CudaUVMSpace,
292 Kokkos::Cuda::scratch_memory_space> {
293 enum :
bool { assignable =
false };
294 enum :
bool { accessible =
true };
295 enum :
bool { deepcopy =
false };