Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_CudaSpace.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_CUDASPACE_HPP
23 #define KOKKOS_CUDASPACE_HPP
24 
25 #include <Kokkos_Macros.hpp>
26 #if defined(KOKKOS_ENABLE_CUDA)
27 
28 #include <Kokkos_Core_fwd.hpp>
29 
30 #include <iosfwd>
31 #include <typeinfo>
32 #include <string>
33 #include <memory>
34 
35 #include <Kokkos_HostSpace.hpp>
36 #include <impl/Kokkos_SharedAlloc.hpp>
37 
38 #include <impl/Kokkos_Profiling_Interface.hpp>
39 
40 #include <Cuda/Kokkos_Cuda_abort.hpp>
41 
42 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
43 extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
44 extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
45 #endif
46 
47 /*--------------------------------------------------------------------------*/
48 
49 namespace Kokkos {
50 namespace Impl {
51 
52 template <typename T>
53 struct is_cuda_type_space : public std::false_type {};
54 
55 } // namespace Impl
56 
59 class CudaSpace {
60  public:
62  using memory_space = CudaSpace;
63  using execution_space = Kokkos::Cuda;
64  using device_type = Kokkos::Device<execution_space, memory_space>;
65 
66  using size_type = unsigned int;
67 
68  /*--------------------------------*/
69 
70  CudaSpace();
71  CudaSpace(CudaSpace&& rhs) = default;
72  CudaSpace(const CudaSpace& rhs) = default;
73  CudaSpace& operator=(CudaSpace&& rhs) = default;
74  CudaSpace& operator=(const CudaSpace& rhs) = default;
75  ~CudaSpace() = default;
76 
78  void* allocate(const Cuda& exec_space, const size_t arg_alloc_size) const;
79  void* allocate(const Cuda& exec_space, const char* arg_label,
80  const size_t arg_alloc_size,
81  const size_t arg_logical_size = 0) const;
82  void* allocate(const size_t arg_alloc_size) const;
83  void* allocate(const char* arg_label, const size_t arg_alloc_size,
84  const size_t arg_logical_size = 0) const;
85 
87  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
88  void deallocate(const char* arg_label, void* const arg_alloc_ptr,
89  const size_t arg_alloc_size,
90  const size_t arg_logical_size = 0) const;
91 
92  private:
93  template <class, class, class, class>
95  void* impl_allocate(const Cuda& exec_space, const char* arg_label,
96  const size_t arg_alloc_size,
97  const size_t arg_logical_size = 0,
98  const Kokkos::Tools::SpaceHandle =
99  Kokkos::Tools::make_space_handle(name())) const;
100  void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
101  const size_t arg_logical_size = 0,
102  const Kokkos::Tools::SpaceHandle =
103  Kokkos::Tools::make_space_handle(name())) const;
104  void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
105  const size_t arg_alloc_size,
106  const size_t arg_logical_size = 0,
107  const Kokkos::Tools::SpaceHandle =
108  Kokkos::Tools::make_space_handle(name())) const;
109 
110  public:
112  static constexpr const char* name() { return m_name; }
113 
114  private:
115  int m_device;
116 
117  static constexpr const char* m_name = "Cuda";
118  friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
119 };
120 
121 template <>
122 struct Impl::is_cuda_type_space<CudaSpace> : public std::true_type {};
123 
124 } // namespace Kokkos
125 
126 /*--------------------------------------------------------------------------*/
127 /*--------------------------------------------------------------------------*/
128 
129 namespace Kokkos {
130 
134 class CudaUVMSpace {
135  public:
137  using memory_space = CudaUVMSpace;
138  using execution_space = Cuda;
139  using device_type = Kokkos::Device<execution_space, memory_space>;
140  using size_type = unsigned int;
141 
142 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
143 
144  KOKKOS_DEPRECATED static bool available();
145 #endif
146 
147  /*--------------------------------*/
148 
149  /*--------------------------------*/
150 
151  CudaUVMSpace();
152  CudaUVMSpace(CudaUVMSpace&& rhs) = default;
153  CudaUVMSpace(const CudaUVMSpace& rhs) = default;
154  CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
155  CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
156  ~CudaUVMSpace() = default;
157 
159  void* allocate(const size_t arg_alloc_size) const;
160  void* allocate(const char* arg_label, const size_t arg_alloc_size,
161  const size_t arg_logical_size = 0) const;
162 
164  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
165  void deallocate(const char* arg_label, void* const arg_alloc_ptr,
166  const size_t arg_alloc_size,
167  const size_t arg_logical_size = 0) const;
168 
169  private:
170  template <class, class, class, class>
172  void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
173  const size_t arg_logical_size = 0,
174  const Kokkos::Tools::SpaceHandle =
175  Kokkos::Tools::make_space_handle(name())) const;
176  void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
177  const size_t arg_alloc_size,
178  const size_t arg_logical_size = 0,
179  const Kokkos::Tools::SpaceHandle =
180  Kokkos::Tools::make_space_handle(name())) const;
181 
182  public:
184  static constexpr const char* name() { return m_name; }
185 
186 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
187  static bool cuda_pin_uvm_to_host();
188  static void cuda_set_pin_uvm_to_host(bool val);
189 #endif
190  /*--------------------------------*/
191 
192  private:
193  int m_device;
194 
195 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
196  static bool kokkos_impl_cuda_pin_uvm_to_host_v;
197 #endif
198  static constexpr const char* m_name = "CudaUVM";
199 };
200 
201 template <>
202 struct Impl::is_cuda_type_space<CudaUVMSpace> : public std::true_type {};
203 
204 } // namespace Kokkos
205 
206 /*--------------------------------------------------------------------------*/
207 /*--------------------------------------------------------------------------*/
208 
209 namespace Kokkos {
210 
214 class CudaHostPinnedSpace {
215  public:
217 
218  using execution_space = HostSpace::execution_space;
219  using memory_space = CudaHostPinnedSpace;
220  using device_type = Kokkos::Device<execution_space, memory_space>;
221  using size_type = unsigned int;
222 
223  /*--------------------------------*/
224 
225  CudaHostPinnedSpace();
226  CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
227  CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
228  CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
229  CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
230  ~CudaHostPinnedSpace() = default;
231 
233  void* allocate(const size_t arg_alloc_size) const;
234  void* allocate(const char* arg_label, const size_t arg_alloc_size,
235  const size_t arg_logical_size = 0) const;
236 
238  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
239  void deallocate(const char* arg_label, void* const arg_alloc_ptr,
240  const size_t arg_alloc_size,
241  const size_t arg_logical_size = 0) const;
242 
243  private:
244  template <class, class, class, class>
246  void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
247  const size_t arg_logical_size = 0,
248  const Kokkos::Tools::SpaceHandle =
249  Kokkos::Tools::make_space_handle(name())) const;
250  void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
251  const size_t arg_alloc_size,
252  const size_t arg_logical_size = 0,
253  const Kokkos::Tools::SpaceHandle =
254  Kokkos::Tools::make_space_handle(name())) const;
255 
256  public:
258  static constexpr const char* name() { return m_name; }
259 
260  private:
261  static constexpr const char* m_name = "CudaHostPinned";
262 
263  /*--------------------------------*/
264 };
265 
266 template <>
267 struct Impl::is_cuda_type_space<CudaHostPinnedSpace> : public std::true_type {};
268 
269 } // namespace Kokkos
270 
271 /*--------------------------------------------------------------------------*/
272 /*--------------------------------------------------------------------------*/
273 
274 namespace Kokkos {
275 namespace Impl {
276 
277 cudaStream_t cuda_get_deep_copy_stream();
278 
279 const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
280  bool initialize = true);
281 
282 static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,
283  Kokkos::CudaSpace>::assignable,
284  "");
285 static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaUVMSpace,
286  Kokkos::CudaUVMSpace>::assignable,
287  "");
288 static_assert(
289  Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaHostPinnedSpace,
290  Kokkos::CudaHostPinnedSpace>::assignable,
291  "");
292 
293 //----------------------------------------
294 
295 template <>
296 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
297  enum : bool { assignable = false };
298  enum : bool { accessible = false };
299  enum : bool { deepcopy = true };
300 };
301 
302 template <>
303 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
304  // HostSpace::execution_space != CudaUVMSpace::execution_space
305  enum : bool { assignable = false };
306  enum : bool { accessible = true };
307  enum : bool { deepcopy = true };
308 };
309 
310 template <>
311 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
312  // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
313  enum : bool { assignable = true };
314  enum : bool { accessible = true };
315  enum : bool { deepcopy = true };
316 };
317 
318 //----------------------------------------
319 
320 template <>
321 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
322  enum : bool { assignable = false };
323  enum : bool { accessible = false };
324  enum : bool { deepcopy = true };
325 };
326 
327 template <>
328 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
329  // CudaSpace::execution_space == CudaUVMSpace::execution_space
330  enum : bool { assignable = true };
331  enum : bool { accessible = true };
332  enum : bool { deepcopy = true };
333 };
334 
335 template <>
336 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
337  // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
338  enum : bool { assignable = false };
339  enum : bool { accessible = true }; // CudaSpace::execution_space
340  enum : bool { deepcopy = true };
341 };
342 
343 //----------------------------------------
344 // CudaUVMSpace::execution_space == Cuda
345 // CudaUVMSpace accessible to both Cuda and Host
346 
347 template <>
348 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
349  enum : bool { assignable = false };
350  enum : bool { accessible = false }; // Cuda cannot access HostSpace
351  enum : bool { deepcopy = true };
352 };
353 
354 template <>
355 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
356  // CudaUVMSpace::execution_space == CudaSpace::execution_space
357  // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
358  enum : bool { assignable = false };
359 
360  // CudaUVMSpace::execution_space can access CudaSpace
361  enum : bool { accessible = true };
362  enum : bool { deepcopy = true };
363 };
364 
365 template <>
366 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
367  // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
368  enum : bool { assignable = false };
369  enum : bool { accessible = true }; // CudaUVMSpace::execution_space
370  enum : bool { deepcopy = true };
371 };
372 
373 //----------------------------------------
374 // CudaHostPinnedSpace::execution_space == HostSpace::execution_space
375 // CudaHostPinnedSpace accessible to both Cuda and Host
376 
377 template <>
378 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
379  enum : bool { assignable = false }; // Cannot access from Cuda
380  enum : bool { accessible = true }; // CudaHostPinnedSpace::execution_space
381  enum : bool { deepcopy = true };
382 };
383 
384 template <>
385 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
386  enum : bool { assignable = false }; // Cannot access from Host
387  enum : bool { accessible = false };
388  enum : bool { deepcopy = true };
389 };
390 
391 template <>
392 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
393  enum : bool { assignable = false }; // different execution_space
394  enum : bool { accessible = true }; // same accessibility
395  enum : bool { deepcopy = true };
396 };
397 
398 //----------------------------------------
399 
400 } // namespace Impl
401 } // namespace Kokkos
402 
403 /*--------------------------------------------------------------------------*/
404 /*--------------------------------------------------------------------------*/
405 
406 namespace Kokkos {
407 namespace Impl {
408 
409 void DeepCopyCuda(void* dst, const void* src, size_t n);
410 void DeepCopyAsyncCuda(const Cuda& instance, void* dst, const void* src,
411  size_t n);
412 void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
413 
414 template <class MemSpace>
415 struct DeepCopy<MemSpace, HostSpace, Cuda,
416  std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
417  DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
418  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
419  DeepCopyAsyncCuda(instance, dst, src, n);
420  }
421 };
422 
423 template <class MemSpace>
424 struct DeepCopy<HostSpace, MemSpace, Cuda,
425  std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
426  DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
427  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
428  DeepCopyAsyncCuda(instance, dst, src, n);
429  }
430 };
431 
432 template <class MemSpace1, class MemSpace2>
433 struct DeepCopy<MemSpace1, MemSpace2, Cuda,
434  std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
435  is_cuda_type_space<MemSpace2>::value>> {
436  DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
437  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
438  DeepCopyAsyncCuda(instance, dst, src, n);
439  }
440 };
441 
442 template <class MemSpace1, class MemSpace2, class ExecutionSpace>
443 struct DeepCopy<MemSpace1, MemSpace2, ExecutionSpace,
444  std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
445  is_cuda_type_space<MemSpace2>::value &&
446  !std::is_same<ExecutionSpace, Cuda>::value>> {
447  inline DeepCopy(void* dst, const void* src, size_t n) {
448  DeepCopyCuda(dst, src, n);
449  }
450 
451  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
452  size_t n) {
453  exec.fence(fence_string());
454  DeepCopyAsyncCuda(dst, src, n);
455  }
456 
457  private:
458  static const std::string& fence_string() {
459  static const std::string string =
460  std::string("Kokkos::Impl::DeepCopy<") + MemSpace1::name() + "Space, " +
461  MemSpace2::name() +
462  "Space, ExecutionSpace>::DeepCopy: fence before copy";
463  return string;
464  }
465 };
466 
467 template <class MemSpace, class ExecutionSpace>
468 struct DeepCopy<MemSpace, HostSpace, ExecutionSpace,
469  std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
470  !std::is_same<ExecutionSpace, Cuda>::value>> {
471  inline DeepCopy(void* dst, const void* src, size_t n) {
472  DeepCopyCuda(dst, src, n);
473  }
474 
475  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
476  size_t n) {
477  exec.fence(fence_string());
478  DeepCopyAsyncCuda(dst, src, n);
479  }
480 
481  private:
482  static const std::string& fence_string() {
483  static const std::string string =
484  std::string("Kokkos::Impl::DeepCopy<") + MemSpace::name() +
485  "Space, HostSpace, ExecutionSpace>::DeepCopy: fence before copy";
486  return string;
487  }
488 };
489 
490 template <class MemSpace, class ExecutionSpace>
491 struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
492  std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
493  !std::is_same<ExecutionSpace, Cuda>::value>> {
494  inline DeepCopy(void* dst, const void* src, size_t n) {
495  DeepCopyCuda(dst, src, n);
496  }
497 
498  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
499  size_t n) {
500  exec.fence(fence_string());
501  DeepCopyAsyncCuda(dst, src, n);
502  }
503 
504  private:
505  static const std::string& fence_string() {
506  static const std::string string =
507  std::string("Kokkos::Impl::DeepCopy<HostSpace, ") + MemSpace::name() +
508  "Space, ExecutionSpace>::DeepCopy: fence before copy";
509  return string;
510  }
511 };
512 
513 } // namespace Impl
514 } // namespace Kokkos
515 
516 //----------------------------------------------------------------------------
517 //----------------------------------------------------------------------------
518 
519 namespace Kokkos {
520 namespace Impl {
521 
522 template <>
523 class SharedAllocationRecord<Kokkos::CudaSpace, void>
524  : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
525  private:
526  friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
527  friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
528  friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
529 
530  using RecordBase = SharedAllocationRecord<void, void>;
531  using base_t =
532  HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
533 
534  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
535  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
536 
537  static ::cudaTextureObject_t attach_texture_object(
538  const unsigned sizeof_alias, void* const alloc_ptr,
539  const size_t alloc_size);
540 
541 #ifdef KOKKOS_ENABLE_DEBUG
542  static RecordBase s_root_record;
543 #endif
544 
545  ::cudaTextureObject_t m_tex_obj = 0;
546  const Kokkos::CudaSpace m_space;
547 
548  protected:
549  ~SharedAllocationRecord();
550  SharedAllocationRecord() = default;
551 
552  // This constructor does not forward to the one without exec_space arg
553  // in order to work around https://github.com/kokkos/kokkos/issues/5258
554  // This constructor is templated so I can't just put it into the cpp file
555  // like the other constructor.
556  template <typename ExecutionSpace>
557  SharedAllocationRecord(
558  const ExecutionSpace& /*exec_space*/, const Kokkos::CudaSpace& arg_space,
559  const std::string& arg_label, const size_t arg_alloc_size,
560  const RecordBase::function_type arg_dealloc = &base_t::deallocate)
561  : base_t(
562 #ifdef KOKKOS_ENABLE_DEBUG
563  &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
564 #endif
565  Impl::checked_allocation_with_header(arg_space, arg_label,
566  arg_alloc_size),
567  sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
568  arg_label),
569  m_tex_obj(0),
570  m_space(arg_space) {
571 
572  SharedAllocationHeader header;
573 
574  this->base_t::_fill_host_accessible_header_info(header, arg_label);
575 
576  // Copy to device memory
577  // workaround for issue with NVCC and MSVC
578  // https://github.com/kokkos/kokkos/issues/5258
579  deep_copy_header_no_exec(RecordBase::m_alloc_ptr, &header);
580  }
581 
582  SharedAllocationRecord(
583  const Kokkos::Cuda& exec_space, const Kokkos::CudaSpace& arg_space,
584  const std::string& arg_label, const size_t arg_alloc_size,
585  const RecordBase::function_type arg_dealloc = &base_t::deallocate);
586 
587  SharedAllocationRecord(
588  const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
589  const size_t arg_alloc_size,
590  const RecordBase::function_type arg_dealloc = &base_t::deallocate);
591 
592  // helper function to work around MSVC+NVCC issue
593  // https://github.com/kokkos/kokkos/issues/5258
594  static void deep_copy_header_no_exec(void*, const void*);
595 
596  public:
597  template <typename AliasType>
598  inline ::cudaTextureObject_t attach_texture_object() {
599  static_assert((std::is_same<AliasType, int>::value ||
600  std::is_same<AliasType, ::int2>::value ||
601  std::is_same<AliasType, ::int4>::value),
602  "Cuda texture fetch only supported for alias types of int, "
603  "::int2, or ::int4");
604 
605  if (m_tex_obj == 0) {
606  m_tex_obj = attach_texture_object(sizeof(AliasType),
607  (void*)RecordBase::m_alloc_ptr,
608  RecordBase::m_alloc_size);
609  }
610 
611  return m_tex_obj;
612  }
613 
614  template <typename AliasType>
615  inline int attach_texture_object_offset(const AliasType* const ptr) {
616  // Texture object is attached to the entire allocation range
617  return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
618  }
619 };
620 
621 template <>
622 class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
623  : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
624  private:
625  friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
626 
627  using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
628  using RecordBase = SharedAllocationRecord<void, void>;
629 
630  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
631  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
632 
633  static RecordBase s_root_record;
634 
635  ::cudaTextureObject_t m_tex_obj = 0;
636  const Kokkos::CudaUVMSpace m_space;
637 
638  protected:
639  ~SharedAllocationRecord();
640  SharedAllocationRecord() = default;
641 
642  // This constructor does not forward to the one without exec_space arg
643  // in order to work around https://github.com/kokkos/kokkos/issues/5258
644  // This constructor is templated so I can't just put it into the cpp file
645  // like the other constructor.
646  template <typename ExecutionSpace>
647  SharedAllocationRecord(
648  const ExecutionSpace& /*exec_space*/,
649  const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
650  const size_t arg_alloc_size,
651  const RecordBase::function_type arg_dealloc = &base_t::deallocate)
652  : base_t(
653 #ifdef KOKKOS_ENABLE_DEBUG
654  &SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record,
655 #endif
656  Impl::checked_allocation_with_header(arg_space, arg_label,
657  arg_alloc_size),
658  sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
659  arg_label),
660  m_tex_obj(0),
661  m_space(arg_space) {
662  this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
663  arg_label);
664  }
665 
666  SharedAllocationRecord(
667  const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
668  const size_t arg_alloc_size,
669  const RecordBase::function_type arg_dealloc = &base_t::deallocate);
670 
671  public:
672  template <typename AliasType>
673  inline ::cudaTextureObject_t attach_texture_object() {
674  static_assert((std::is_same<AliasType, int>::value ||
675  std::is_same<AliasType, ::int2>::value ||
676  std::is_same<AliasType, ::int4>::value),
677  "Cuda texture fetch only supported for alias types of int, "
678  "::int2, or ::int4");
679 
680  if (m_tex_obj == 0) {
681  m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
682  attach_texture_object(sizeof(AliasType),
683  (void*)RecordBase::m_alloc_ptr,
684  RecordBase::m_alloc_size);
685  }
686 
687  return m_tex_obj;
688  }
689 
690  template <typename AliasType>
691  inline int attach_texture_object_offset(const AliasType* const ptr) {
692  // Texture object is attached to the entire allocation range
693  return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
694  }
695 };
696 
697 template <>
698 class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
699  : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
700  private:
701  friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
702 
703  using RecordBase = SharedAllocationRecord<void, void>;
704  using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
705 
706  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
707  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
708 
709  static RecordBase s_root_record;
710 
711  const Kokkos::CudaHostPinnedSpace m_space;
712 
713  protected:
714  ~SharedAllocationRecord();
715  SharedAllocationRecord() = default;
716 
717  // This constructor does not forward to the one without exec_space arg
718  // in order to work around https://github.com/kokkos/kokkos/issues/5258
719  // This constructor is templated so I can't just put it into the cpp file
720  // like the other constructor.
721  template <typename ExecutionSpace>
722  SharedAllocationRecord(
723  const ExecutionSpace& /*exec_space*/,
724  const Kokkos::CudaHostPinnedSpace& arg_space,
725  const std::string& arg_label, const size_t arg_alloc_size,
726  const RecordBase::function_type arg_dealloc = &base_t::deallocate)
727  : base_t(
728 #ifdef KOKKOS_ENABLE_DEBUG
729  &SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
730  void>::s_root_record,
731 #endif
732  Impl::checked_allocation_with_header(arg_space, arg_label,
733  arg_alloc_size),
734  sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
735  arg_label),
736  m_space(arg_space) {
737  this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
738  arg_label);
739  }
740 
741  SharedAllocationRecord(
742  const Kokkos::CudaHostPinnedSpace& arg_space,
743  const std::string& arg_label, const size_t arg_alloc_size,
744  const RecordBase::function_type arg_dealloc = &base_t::deallocate);
745 };
746 
747 } // namespace Impl
748 } // namespace Kokkos
749 
750 //----------------------------------------------------------------------------
751 //----------------------------------------------------------------------------
752 
753 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
754 #endif /* #define KOKKOS_CUDASPACE_HPP */
static constexpr const char * name()
Memory management for host memory.
DefaultHostExecutionSpace execution_space
Default execution space for this memory space.
bool available()
Query if hwloc is available.
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Access relationship between DstMemorySpace and SrcMemorySpace.