45 #ifndef KOKKOS_CUDASPACE_HPP
46 #define KOKKOS_CUDASPACE_HPP
48 #include <Kokkos_Macros.hpp>
49 #if defined(KOKKOS_ENABLE_CUDA)
51 #include <Kokkos_Core_fwd.hpp>
57 #include <Kokkos_HostSpace.hpp>
59 #include <impl/Kokkos_Profiling_Interface.hpp>
61 #include <Cuda/Kokkos_Cuda_abort.hpp>
63 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
64 extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
65 extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(
bool);
77 typedef CudaSpace memory_space;
78 typedef Kokkos::Cuda execution_space;
79 typedef Kokkos::Device<execution_space, memory_space> device_type;
81 typedef unsigned int size_type;
86 CudaSpace(CudaSpace&& rhs) =
default;
87 CudaSpace(
const CudaSpace& rhs) =
default;
88 CudaSpace& operator=(CudaSpace&& rhs) =
default;
89 CudaSpace& operator=(
const CudaSpace& rhs) =
default;
90 ~CudaSpace() =
default;
93 void* allocate(
const size_t arg_alloc_size)
const;
96 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
99 static constexpr
const char* name() {
return m_name; }
103 static void access_error();
104 static void access_error(
const void*
const);
109 static constexpr
const char* m_name =
"Cuda";
110 friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
120 void init_lock_arrays_cuda_space();
129 int* atomic_lock_array_cuda_space_ptr(
bool deallocate =
false);
138 int* scratch_lock_array_cuda_space_ptr(
bool deallocate =
false);
146 int* threadid_lock_array_cuda_space_ptr(
bool deallocate =
false);
161 typedef CudaUVMSpace memory_space;
162 typedef Cuda execution_space;
163 typedef Kokkos::Device<execution_space, memory_space> device_type;
164 typedef unsigned int size_type;
171 KOKKOS_DEPRECATED
static int number_of_allocations();
178 CudaUVMSpace(CudaUVMSpace&& rhs) =
default;
179 CudaUVMSpace(
const CudaUVMSpace& rhs) =
default;
180 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) =
default;
181 CudaUVMSpace& operator=(
const CudaUVMSpace& rhs) =
default;
182 ~CudaUVMSpace() =
default;
185 void* allocate(
const size_t arg_alloc_size)
const;
188 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
191 static constexpr
const char* name() {
return m_name; }
193 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
194 static bool cuda_pin_uvm_to_host();
195 static void cuda_set_pin_uvm_to_host(
bool val);
202 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
203 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
205 static constexpr
const char* m_name =
"CudaUVM";
218 class CudaHostPinnedSpace {
222 typedef HostSpace::execution_space execution_space;
223 typedef CudaHostPinnedSpace memory_space;
224 typedef Kokkos::Device<execution_space, memory_space> device_type;
225 typedef unsigned int size_type;
229 CudaHostPinnedSpace();
230 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) =
default;
231 CudaHostPinnedSpace(
const CudaHostPinnedSpace& rhs) =
default;
232 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) =
default;
233 CudaHostPinnedSpace& operator=(
const CudaHostPinnedSpace& rhs) =
default;
234 ~CudaHostPinnedSpace() =
default;
237 void* allocate(
const size_t arg_alloc_size)
const;
240 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
243 static constexpr
const char* name() {
return m_name; }
246 static constexpr
const char* m_name =
"CudaHostPinned";
260 Kokkos::CudaSpace>::assignable,
263 Kokkos::CudaUVMSpace>::assignable,
267 Kokkos::CudaHostPinnedSpace>::assignable,
273 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
274 enum { assignable =
false };
275 enum { accessible =
false };
276 enum { deepcopy =
true };
280 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
282 enum { assignable =
false };
283 enum { accessible =
true };
284 enum { deepcopy =
true };
288 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
290 enum { assignable =
true };
291 enum { accessible =
true };
292 enum { deepcopy =
true };
299 enum { assignable =
false };
300 enum { accessible =
false };
301 enum { deepcopy =
true };
305 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
307 enum { assignable =
true };
308 enum { accessible =
true };
309 enum { deepcopy =
true };
313 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
315 enum { assignable =
false };
316 enum { accessible =
true };
317 enum { deepcopy =
true };
326 enum { assignable =
false };
327 enum { accessible =
false };
328 enum { deepcopy =
true };
332 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
335 enum { assignable =
false };
338 enum { accessible =
true };
339 enum { deepcopy =
true };
343 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
345 enum { assignable =
false };
346 enum { accessible =
true };
347 enum { deepcopy =
true };
356 enum { assignable =
false };
357 enum { accessible =
true };
358 enum { deepcopy =
true };
362 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
363 enum { assignable =
false };
364 enum { accessible =
false };
365 enum { deepcopy =
true };
369 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
370 enum { assignable =
false };
371 enum { accessible =
true };
372 enum { deepcopy =
true };
386 void DeepCopyAsyncCuda(
void* dst,
const void* src,
size_t n);
389 struct DeepCopy<CudaSpace, CudaSpace, Cuda> {
390 DeepCopy(
void* dst,
const void* src,
size_t);
391 DeepCopy(
const Cuda&,
void* dst,
const void* src,
size_t);
395 struct DeepCopy<CudaSpace, HostSpace, Cuda> {
396 DeepCopy(
void* dst,
const void* src,
size_t);
397 DeepCopy(
const Cuda&,
void* dst,
const void* src,
size_t);
401 struct DeepCopy<HostSpace, CudaSpace, Cuda> {
402 DeepCopy(
void* dst,
const void* src,
size_t);
403 DeepCopy(
const Cuda&,
void* dst,
const void* src,
size_t);
407 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, Cuda> {
408 DeepCopy(
void* dst,
const void* src,
size_t n) {
409 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
411 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
412 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
417 struct DeepCopy<CudaUVMSpace, HostSpace, Cuda> {
418 DeepCopy(
void* dst,
const void* src,
size_t n) {
419 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
421 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
422 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
427 struct DeepCopy<HostSpace, CudaUVMSpace, Cuda> {
428 DeepCopy(
void* dst,
const void* src,
size_t n) {
429 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
431 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
432 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
437 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, Cuda> {
438 DeepCopy(
void* dst,
const void* src,
size_t n) {
439 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
441 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
442 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
447 struct DeepCopy<CudaHostPinnedSpace, HostSpace, Cuda> {
448 DeepCopy(
void* dst,
const void* src,
size_t n) {
449 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
451 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
452 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
457 struct DeepCopy<HostSpace, CudaHostPinnedSpace, Cuda> {
458 DeepCopy(
void* dst,
const void* src,
size_t n) {
459 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
461 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
462 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
467 struct DeepCopy<CudaUVMSpace, CudaSpace, Cuda> {
468 DeepCopy(
void* dst,
const void* src,
size_t n) {
469 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
471 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
472 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
477 struct DeepCopy<CudaSpace, CudaUVMSpace, Cuda> {
478 DeepCopy(
void* dst,
const void* src,
size_t n) {
479 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
481 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
482 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
487 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, Cuda> {
488 DeepCopy(
void* dst,
const void* src,
size_t n) {
489 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
491 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
492 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
497 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, Cuda> {
498 DeepCopy(
void* dst,
const void* src,
size_t n) {
499 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
501 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
502 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
507 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, Cuda> {
508 DeepCopy(
void* dst,
const void* src,
size_t n) {
509 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
511 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
512 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
517 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, Cuda> {
518 DeepCopy(
void* dst,
const void* src,
size_t n) {
519 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
521 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
522 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
526 template <
class ExecutionSpace>
527 struct DeepCopy<CudaSpace, CudaSpace, ExecutionSpace> {
528 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
529 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
532 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
535 DeepCopyAsyncCuda(dst, src, n);
539 template <
class ExecutionSpace>
540 struct DeepCopy<CudaSpace, HostSpace, ExecutionSpace> {
541 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
542 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
545 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
548 DeepCopyAsyncCuda(dst, src, n);
552 template <
class ExecutionSpace>
553 struct DeepCopy<HostSpace, CudaSpace, ExecutionSpace> {
554 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
555 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
558 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
561 DeepCopyAsyncCuda(dst, src, n);
565 template <
class ExecutionSpace>
566 struct DeepCopy<CudaSpace, CudaUVMSpace, ExecutionSpace> {
567 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
568 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
571 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
574 DeepCopyAsyncCuda(dst, src, n);
578 template <
class ExecutionSpace>
579 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, ExecutionSpace> {
580 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
581 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
584 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
587 DeepCopyAsyncCuda(dst, src, n);
591 template <
class ExecutionSpace>
592 struct DeepCopy<CudaUVMSpace, CudaSpace, ExecutionSpace> {
593 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
594 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
597 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
600 DeepCopyAsyncCuda(dst, src, n);
604 template <
class ExecutionSpace>
605 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, ExecutionSpace> {
606 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
607 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
610 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
613 DeepCopyAsyncCuda(dst, src, n);
617 template <
class ExecutionSpace>
618 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, ExecutionSpace> {
619 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
620 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
623 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
626 DeepCopyAsyncCuda(dst, src, n);
630 template <
class ExecutionSpace>
631 struct DeepCopy<CudaUVMSpace, HostSpace, ExecutionSpace> {
632 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
633 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
636 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
639 DeepCopyAsyncCuda(dst, src, n);
643 template <
class ExecutionSpace>
644 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, ExecutionSpace> {
645 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
646 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
649 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
652 DeepCopyAsyncCuda(dst, src, n);
656 template <
class ExecutionSpace>
657 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, ExecutionSpace> {
658 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
659 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
662 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
665 DeepCopyAsyncCuda(dst, src, n);
669 template <
class ExecutionSpace>
670 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, ExecutionSpace> {
671 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
672 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
675 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
678 DeepCopyAsyncCuda(dst, src, n);
682 template <
class ExecutionSpace>
683 struct DeepCopy<CudaHostPinnedSpace, HostSpace, ExecutionSpace> {
684 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
685 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
688 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
691 DeepCopyAsyncCuda(dst, src, n);
695 template <
class ExecutionSpace>
696 struct DeepCopy<HostSpace, CudaUVMSpace, ExecutionSpace> {
697 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
698 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
701 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
704 DeepCopyAsyncCuda(dst, src, n);
708 template <
class ExecutionSpace>
709 struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> {
710 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
711 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
714 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
717 DeepCopyAsyncCuda(dst, src, n);
732 struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace,
734 enum { value =
false };
735 KOKKOS_INLINE_FUNCTION
static void verify(
void) {
736 Kokkos::abort(
"Cuda code attempted to access HostSpace memory");
739 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {
740 Kokkos::abort(
"Cuda code attempted to access HostSpace memory");
746 struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace,
747 Kokkos::CudaUVMSpace> {
748 enum { value =
true };
749 KOKKOS_INLINE_FUNCTION
static void verify(
void) {}
750 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {}
755 struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace,
756 Kokkos::CudaHostPinnedSpace> {
757 enum { value =
true };
758 KOKKOS_INLINE_FUNCTION
static void verify(
void) {}
759 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {}
763 template <
class OtherSpace>
764 struct VerifyExecutionCanAccessMemorySpace<
765 typename std::enable_if<!std::is_same<Kokkos::CudaSpace, OtherSpace>::value,
766 Kokkos::CudaSpace>::type,
768 enum { value =
false };
769 KOKKOS_INLINE_FUNCTION
static void verify(
void) {
770 Kokkos::abort(
"Cuda code attempted to access unknown Space memory");
773 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {
774 Kokkos::abort(
"Cuda code attempted to access unknown Space memory");
781 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
783 enum { value =
false };
784 inline static void verify(
void) { CudaSpace::access_error(); }
785 inline static void verify(
const void* p) { CudaSpace::access_error(p); }
790 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
791 Kokkos::CudaUVMSpace> {
792 enum { value =
true };
793 inline static void verify(
void) {}
794 inline static void verify(
const void*) {}
799 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
800 Kokkos::CudaHostPinnedSpace> {
801 enum { value =
true };
802 KOKKOS_INLINE_FUNCTION
static void verify(
void) {}
803 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {}
816 class SharedAllocationRecord<Kokkos::CudaSpace, void>
817 :
public SharedAllocationRecord<void, void> {
819 friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
821 typedef SharedAllocationRecord<void, void> RecordBase;
823 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
824 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
826 static void deallocate(RecordBase*);
828 static ::cudaTextureObject_t attach_texture_object(
829 const unsigned sizeof_alias, void* const alloc_ptr,
830 const size_t alloc_size);
833 static RecordBase s_root_record;
836 ::cudaTextureObject_t m_tex_obj;
837 const Kokkos::CudaSpace m_space;
840 ~SharedAllocationRecord();
841 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
843 SharedAllocationRecord(
844 const Kokkos::CudaSpace& arg_space,
const std::string& arg_label,
845 const size_t arg_alloc_size,
846 const RecordBase::function_type arg_dealloc = &deallocate);
849 std::string get_label()
const;
851 static SharedAllocationRecord* allocate(
const Kokkos::CudaSpace& arg_space,
852 const std::string& arg_label,
853 const size_t arg_alloc_size);
856 static void* allocate_tracked(
const Kokkos::CudaSpace& arg_space,
857 const std::string& arg_label,
858 const size_t arg_alloc_size);
861 static void* reallocate_tracked(
void*
const arg_alloc_ptr,
862 const size_t arg_alloc_size);
865 static void deallocate_tracked(
void*
const arg_alloc_ptr);
867 static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
869 template <
typename AliasType>
870 inline ::cudaTextureObject_t attach_texture_object() {
871 static_assert((std::is_same<AliasType, int>::value ||
872 std::is_same<AliasType, ::int2>::value ||
873 std::is_same<AliasType, ::int4>::value),
874 "Cuda texture fetch only supported for alias types of int, "
875 "::int2, or ::int4");
877 if (m_tex_obj == 0) {
878 m_tex_obj = attach_texture_object(
sizeof(AliasType),
879 (
void*)RecordBase::m_alloc_ptr,
880 RecordBase::m_alloc_size);
886 template <
typename AliasType>
887 inline int attach_texture_object_offset(
const AliasType*
const ptr) {
889 return ptr -
reinterpret_cast<AliasType*
>(RecordBase::m_alloc_ptr);
892 static void print_records(std::ostream&,
const Kokkos::CudaSpace&,
893 bool detail =
false);
897 class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
898 :
public SharedAllocationRecord<void, void> {
900 typedef SharedAllocationRecord<void, void> RecordBase;
902 SharedAllocationRecord(
const SharedAllocationRecord&) =
delete;
903 SharedAllocationRecord& operator=(
const SharedAllocationRecord&) =
delete;
905 static void deallocate(RecordBase*);
907 static RecordBase s_root_record;
909 ::cudaTextureObject_t m_tex_obj;
910 const Kokkos::CudaUVMSpace m_space;
913 ~SharedAllocationRecord();
914 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
916 SharedAllocationRecord(
917 const Kokkos::CudaUVMSpace& arg_space,
const std::string& arg_label,
918 const size_t arg_alloc_size,
919 const RecordBase::function_type arg_dealloc = &deallocate);
922 std::string get_label()
const;
924 static SharedAllocationRecord* allocate(
const Kokkos::CudaUVMSpace& arg_space,
925 const std::string& arg_label,
926 const size_t arg_alloc_size);
929 static void* allocate_tracked(
const Kokkos::CudaUVMSpace& arg_space,
930 const std::string& arg_label,
931 const size_t arg_alloc_size);
934 static void* reallocate_tracked(
void*
const arg_alloc_ptr,
935 const size_t arg_alloc_size);
938 static void deallocate_tracked(
void*
const arg_alloc_ptr);
940 static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
942 template <
typename AliasType>
943 inline ::cudaTextureObject_t attach_texture_object() {
944 static_assert((std::is_same<AliasType, int>::value ||
945 std::is_same<AliasType, ::int2>::value ||
946 std::is_same<AliasType, ::int4>::value),
947 "Cuda texture fetch only supported for alias types of int, "
948 "::int2, or ::int4");
950 if (m_tex_obj == 0) {
951 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
952 attach_texture_object(
sizeof(AliasType),
953 (
void*)RecordBase::m_alloc_ptr,
954 RecordBase::m_alloc_size);
960 template <
typename AliasType>
961 inline int attach_texture_object_offset(
const AliasType*
const ptr) {
963 return ptr -
reinterpret_cast<AliasType*
>(RecordBase::m_alloc_ptr);
966 static void print_records(std::ostream&,
const Kokkos::CudaUVMSpace&,
967 bool detail =
false);
971 class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
972 :
public SharedAllocationRecord<void, void> {
974 typedef SharedAllocationRecord<void, void> RecordBase;
976 SharedAllocationRecord(
const SharedAllocationRecord&) =
delete;
977 SharedAllocationRecord& operator=(
const SharedAllocationRecord&) =
delete;
979 static void deallocate(RecordBase*);
981 static RecordBase s_root_record;
983 const Kokkos::CudaHostPinnedSpace m_space;
986 ~SharedAllocationRecord();
987 SharedAllocationRecord() : RecordBase(), m_space() {}
989 SharedAllocationRecord(
990 const Kokkos::CudaHostPinnedSpace& arg_space,
991 const std::string& arg_label,
const size_t arg_alloc_size,
992 const RecordBase::function_type arg_dealloc = &deallocate);
995 std::string get_label()
const;
997 static SharedAllocationRecord* allocate(
998 const Kokkos::CudaHostPinnedSpace& arg_space,
999 const std::string& arg_label,
const size_t arg_alloc_size);
1001 static void* allocate_tracked(
const Kokkos::CudaHostPinnedSpace& arg_space,
1002 const std::string& arg_label,
1003 const size_t arg_alloc_size);
1006 static void* reallocate_tracked(
void*
const arg_alloc_ptr,
1007 const size_t arg_alloc_size);
1010 static void deallocate_tracked(
void*
const arg_alloc_ptr);
1012 static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
1014 static void print_records(std::ostream&,
const Kokkos::CudaHostPinnedSpace&,
1015 bool detail =
false);
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.