44 #ifndef KOKKOS_CUDASPACE_HPP
45 #define KOKKOS_CUDASPACE_HPP
47 #include <Kokkos_Macros.hpp>
48 #if defined( KOKKOS_ENABLE_CUDA )
50 #include <Kokkos_Core_fwd.hpp>
56 #include <Kokkos_HostSpace.hpp>
58 #include <Cuda/Kokkos_Cuda_abort.hpp>
70 typedef CudaSpace memory_space ;
71 typedef Kokkos::Cuda execution_space ;
72 typedef Kokkos::Device<execution_space,memory_space> device_type;
74 typedef unsigned int size_type ;
79 CudaSpace( CudaSpace && rhs ) = default ;
80 CudaSpace(
const CudaSpace & rhs ) = default ;
81 CudaSpace & operator = ( CudaSpace && rhs ) = default ;
82 CudaSpace & operator = (
const CudaSpace & rhs ) = default ;
83 ~CudaSpace() = default ;
86 void * allocate(
const size_t arg_alloc_size )
const ;
89 void deallocate(
void *
const arg_alloc_ptr
90 ,
const size_t arg_alloc_size )
const ;
93 static constexpr
const char* name() {
return m_name; }
97 static void access_error();
98 static void access_error(
const void *
const );
104 static constexpr
const char* m_name =
"Cuda";
105 friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
115 void init_lock_arrays_cuda_space();
124 int* atomic_lock_array_cuda_space_ptr(
bool deallocate =
false);
132 int* scratch_lock_array_cuda_space_ptr(
bool deallocate =
false);
140 int* threadid_lock_array_cuda_space_ptr(
bool deallocate =
false);
156 typedef CudaUVMSpace memory_space ;
157 typedef Cuda execution_space ;
158 typedef Kokkos::Device<execution_space,memory_space> device_type;
159 typedef unsigned int size_type ;
167 static int number_of_allocations();
175 CudaUVMSpace( CudaUVMSpace && rhs ) = default ;
176 CudaUVMSpace(
const CudaUVMSpace & rhs ) = default ;
177 CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ;
178 CudaUVMSpace & operator = (
const CudaUVMSpace & rhs ) = default ;
179 ~CudaUVMSpace() = default ;
182 void * allocate(
const size_t arg_alloc_size )
const ;
185 void deallocate(
void *
const arg_alloc_ptr
186 ,
const size_t arg_alloc_size )
const ;
189 static constexpr
const char* name() {
return m_name; }
196 static constexpr
const char* m_name =
"CudaUVM";
210 class CudaHostPinnedSpace {
215 typedef HostSpace::execution_space execution_space ;
216 typedef CudaHostPinnedSpace memory_space ;
217 typedef Kokkos::Device<execution_space,memory_space> device_type;
218 typedef unsigned int size_type ;
222 CudaHostPinnedSpace();
223 CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ;
224 CudaHostPinnedSpace(
const CudaHostPinnedSpace & rhs ) = default ;
225 CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ;
226 CudaHostPinnedSpace & operator = (
const CudaHostPinnedSpace & rhs ) = default ;
227 ~CudaHostPinnedSpace() = default ;
230 void * allocate(
const size_t arg_alloc_size )
const ;
233 void deallocate(
void *
const arg_alloc_ptr
234 ,
const size_t arg_alloc_size )
const ;
237 static constexpr
const char* name() {
return m_name; }
241 static constexpr
const char* m_name =
"CudaHostPinned";
261 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace > {
262 enum { assignable =
false };
263 enum { accessible =
false };
264 enum { deepcopy =
true };
268 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
270 enum { assignable =
false };
271 enum { accessible =
true };
272 enum { deepcopy =
true };
276 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
278 enum { assignable =
true };
279 enum { accessible =
true };
280 enum { deepcopy =
true };
287 enum { assignable =
false };
288 enum { accessible =
false };
289 enum { deepcopy =
true };
293 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
295 enum { assignable =
true };
296 enum { accessible =
true };
297 enum { deepcopy =
true };
301 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
303 enum { assignable =
false };
304 enum { accessible =
true };
305 enum { deepcopy =
true };
314 enum { assignable =
false };
315 enum { accessible =
false };
316 enum { deepcopy =
true };
320 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
323 enum { assignable =
false };
326 enum { accessible =
true };
327 enum { deepcopy =
true };
331 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
333 enum { assignable =
false };
334 enum { accessible =
true };
335 enum { deepcopy =
true };
345 enum { assignable =
false };
346 enum { accessible =
true };
347 enum { deepcopy =
true };
351 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
352 enum { assignable =
false };
353 enum { accessible =
false };
354 enum { deepcopy =
true };
358 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
359 enum { assignable =
false };
360 enum { accessible =
true };
361 enum { deepcopy =
true };
374 void DeepCopyAsyncCuda(
void * dst ,
const void * src ,
size_t n);
376 template<>
struct DeepCopy< CudaSpace , CudaSpace , Cuda>
378 DeepCopy(
void * dst ,
const void * src ,
size_t );
379 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
382 template<>
struct DeepCopy< CudaSpace , HostSpace , Cuda >
384 DeepCopy(
void * dst ,
const void * src ,
size_t );
385 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
388 template<>
struct DeepCopy< HostSpace , CudaSpace , Cuda >
390 DeepCopy(
void * dst ,
const void * src ,
size_t );
391 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
394 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
397 DeepCopy(
void * dst ,
const void * src ,
size_t n )
398 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
401 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
404 DeepCopyAsyncCuda (dst,src,n);
408 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
411 DeepCopy(
void * dst ,
const void * src ,
size_t n )
412 { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
415 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
418 DeepCopyAsyncCuda (dst,src,n);
422 template<
class ExecutionSpace>
423 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
426 DeepCopy(
void * dst ,
const void * src ,
size_t n )
427 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
430 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
433 DeepCopyAsyncCuda (dst,src,n);
437 template<
class ExecutionSpace>
438 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
441 DeepCopy(
void * dst ,
const void * src ,
size_t n )
442 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
445 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
448 DeepCopyAsyncCuda (dst,src,n);
452 template<
class ExecutionSpace>
453 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
456 DeepCopy(
void * dst ,
const void * src ,
size_t n )
457 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
460 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
463 DeepCopyAsyncCuda (dst,src,n);
468 template<
class ExecutionSpace>
469 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
472 DeepCopy(
void * dst ,
const void * src ,
size_t n )
473 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
476 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
479 DeepCopyAsyncCuda (dst,src,n);
483 template<
class ExecutionSpace>
484 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
487 DeepCopy(
void * dst ,
const void * src ,
size_t n )
488 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
491 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
494 DeepCopyAsyncCuda (dst,src,n);
498 template<
class ExecutionSpace>
499 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
502 DeepCopy(
void * dst ,
const void * src ,
size_t n )
503 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
506 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
509 DeepCopyAsyncCuda (dst,src,n);
513 template<
class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
516 DeepCopy(
void * dst ,
const void * src ,
size_t n )
517 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
520 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
523 DeepCopyAsyncCuda (dst,src,n);
528 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
531 DeepCopy(
void * dst ,
const void * src ,
size_t n )
532 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
535 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
538 DeepCopyAsyncCuda (dst,src,n);
542 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
545 DeepCopy(
void * dst ,
const void * src ,
size_t n )
546 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
549 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
552 DeepCopyAsyncCuda (dst,src,n);
556 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
559 DeepCopy(
void * dst ,
const void * src ,
size_t n )
560 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
563 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
566 DeepCopyAsyncCuda (dst,src,n);
570 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
573 DeepCopy(
void * dst ,
const void * src ,
size_t n )
574 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
577 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
580 DeepCopyAsyncCuda (dst,src,n);
585 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
588 DeepCopy(
void * dst ,
const void * src ,
size_t n )
589 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
592 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
595 DeepCopyAsyncCuda (dst,src,n);
599 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
602 DeepCopy(
void * dst ,
const void * src ,
size_t n )
603 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
606 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
609 DeepCopyAsyncCuda (dst,src,n);
624 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace ,
Kokkos::HostSpace >
626 enum { value =
false };
627 KOKKOS_INLINE_FUNCTION
static void verify(
void )
628 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
630 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
631 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
636 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
638 enum { value =
true };
639 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
640 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
645 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
647 enum { value =
true };
648 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
649 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
653 template<
class OtherSpace >
654 struct VerifyExecutionCanAccessMemorySpace<
655 typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
658 enum { value =
false };
659 KOKKOS_INLINE_FUNCTION
static void verify(
void )
660 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
662 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
663 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
669 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
671 enum { value =
false };
672 inline static void verify(
void ) { CudaSpace::access_error(); }
673 inline static void verify(
const void * p ) { CudaSpace::access_error(p); }
678 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
680 enum { value =
true };
681 inline static void verify(
void ) { }
682 inline static void verify(
const void * ) { }
687 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
689 enum { value =
true };
690 KOKKOS_INLINE_FUNCTION
static void verify(
void ) {}
691 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) {}
704 class SharedAllocationRecord< Kokkos::CudaSpace , void >
705 :
public SharedAllocationRecord< void , void >
709 friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
711 typedef SharedAllocationRecord< void , void > RecordBase ;
713 SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
714 SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
716 static void deallocate( RecordBase * );
718 static ::cudaTextureObject_t
719 attach_texture_object( const unsigned sizeof_alias
720 , void * const alloc_ptr
721 , const size_t alloc_size );
724 static RecordBase s_root_record ;
727 ::cudaTextureObject_t m_tex_obj ;
728 const Kokkos::CudaSpace m_space ;
732 ~SharedAllocationRecord();
733 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
735 SharedAllocationRecord(
const Kokkos::CudaSpace & arg_space
736 ,
const std::string & arg_label
737 ,
const size_t arg_alloc_size
738 ,
const RecordBase::function_type arg_dealloc = & deallocate
743 std::string get_label()
const ;
745 static SharedAllocationRecord * allocate(
const Kokkos::CudaSpace & arg_space
746 ,
const std::string & arg_label
747 ,
const size_t arg_alloc_size );
751 void * allocate_tracked(
const Kokkos::CudaSpace & arg_space
752 ,
const std::string & arg_label
753 ,
const size_t arg_alloc_size );
757 void * reallocate_tracked(
void *
const arg_alloc_ptr
758 ,
const size_t arg_alloc_size );
762 void deallocate_tracked(
void *
const arg_alloc_ptr );
764 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
766 template<
typename AliasType >
768 ::cudaTextureObject_t attach_texture_object()
770 static_assert( ( std::is_same< AliasType , int >::value ||
771 std::is_same< AliasType , ::int2 >::value ||
772 std::is_same< AliasType , ::int4 >::value )
773 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
775 if ( m_tex_obj == 0 ) {
776 m_tex_obj = attach_texture_object(
sizeof(AliasType)
777 , (
void*) RecordBase::m_alloc_ptr
778 , RecordBase::m_alloc_size );
784 template<
typename AliasType >
786 int attach_texture_object_offset(
const AliasType *
const ptr )
789 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
792 static void print_records( std::ostream & ,
const Kokkos::CudaSpace & ,
bool detail =
false );
797 class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
798 :
public SharedAllocationRecord< void , void >
802 typedef SharedAllocationRecord< void , void > RecordBase ;
804 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
805 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
807 static void deallocate( RecordBase * );
809 static RecordBase s_root_record ;
811 ::cudaTextureObject_t m_tex_obj ;
812 const Kokkos::CudaUVMSpace m_space ;
816 ~SharedAllocationRecord();
817 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
819 SharedAllocationRecord(
const Kokkos::CudaUVMSpace & arg_space
820 ,
const std::string & arg_label
821 ,
const size_t arg_alloc_size
822 ,
const RecordBase::function_type arg_dealloc = & deallocate
827 std::string get_label()
const ;
829 static SharedAllocationRecord * allocate(
const Kokkos::CudaUVMSpace & arg_space
830 ,
const std::string & arg_label
831 ,
const size_t arg_alloc_size
836 void * allocate_tracked(
const Kokkos::CudaUVMSpace & arg_space
837 ,
const std::string & arg_label
838 ,
const size_t arg_alloc_size );
842 void * reallocate_tracked(
void *
const arg_alloc_ptr
843 ,
const size_t arg_alloc_size );
847 void deallocate_tracked(
void *
const arg_alloc_ptr );
849 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
852 template<
typename AliasType >
854 ::cudaTextureObject_t attach_texture_object()
856 static_assert( ( std::is_same< AliasType , int >::value ||
857 std::is_same< AliasType , ::int2 >::value ||
858 std::is_same< AliasType , ::int4 >::value )
859 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
861 if ( m_tex_obj == 0 ) {
862 m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
863 attach_texture_object(
sizeof(AliasType)
864 , (
void*) RecordBase::m_alloc_ptr
865 , RecordBase::m_alloc_size );
871 template<
typename AliasType >
873 int attach_texture_object_offset(
const AliasType *
const ptr )
876 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
879 static void print_records( std::ostream & ,
const Kokkos::CudaUVMSpace & ,
bool detail =
false );
883 class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
884 :
public SharedAllocationRecord< void , void >
888 typedef SharedAllocationRecord< void , void > RecordBase ;
890 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
891 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
893 static void deallocate( RecordBase * );
895 static RecordBase s_root_record ;
897 const Kokkos::CudaHostPinnedSpace m_space ;
901 ~SharedAllocationRecord();
902 SharedAllocationRecord() : RecordBase(), m_space() {}
904 SharedAllocationRecord(
const Kokkos::CudaHostPinnedSpace & arg_space
905 ,
const std::string & arg_label
906 ,
const size_t arg_alloc_size
907 ,
const RecordBase::function_type arg_dealloc = & deallocate
912 std::string get_label()
const ;
914 static SharedAllocationRecord * allocate(
const Kokkos::CudaHostPinnedSpace & arg_space
915 ,
const std::string & arg_label
916 ,
const size_t arg_alloc_size
920 void * allocate_tracked(
const Kokkos::CudaHostPinnedSpace & arg_space
921 ,
const std::string & arg_label
922 ,
const size_t arg_alloc_size );
926 void * reallocate_tracked(
void *
const arg_alloc_ptr
927 ,
const size_t arg_alloc_size );
931 void deallocate_tracked(
void *
const arg_alloc_ptr );
934 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
936 static void print_records( std::ostream & ,
const Kokkos::CudaHostPinnedSpace & ,
bool detail =
false );
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.