45 #ifndef KOKKOS_ROCMSPACE_HPP 
   46 #define KOKKOS_ROCMSPACE_HPP 
   48 #include <Kokkos_Core_fwd.hpp> 
   50 #if defined(KOKKOS_ENABLE_ROCM) 
   56 #include <Kokkos_HostSpace.hpp> 
   61 namespace Experimental {
 
   67   using memory_space    = ROCmSpace;
 
   68   using execution_space = Kokkos::Experimental::ROCm;
 
   69   using device_type     = Kokkos::Device<execution_space, memory_space>;
 
   71   using size_type = 
unsigned int;
 
   76   ROCmSpace(ROCmSpace&& rhs)      = 
default;
 
   77   ROCmSpace(
const ROCmSpace& rhs) = 
default;
 
   78   ROCmSpace& operator=(ROCmSpace&& rhs) = 
default;
 
   79   ROCmSpace& operator=(
const ROCmSpace& rhs) = 
default;
 
   80   ~ROCmSpace()                               = 
default;
 
   83   void* allocate(
const size_t arg_alloc_size) 
const;
 
   86   void deallocate(
void* 
const arg_alloc_ptr, 
const size_t arg_alloc_size) 
const;
 
   89   static constexpr 
const char* name() { 
return m_name; };
 
   93   static void access_error();
 
   94   static void access_error(
const void* 
const);
 
   99   static constexpr 
const char* m_name = 
"ROCm";
 
  100   friend class Kokkos::Impl::SharedAllocationRecord<
 
  101       Kokkos::Experimental::ROCmSpace, void>;
 
  108 void* rocm_device_allocate(
int);
 
  109 void* rocm_hostpinned_allocate(
int);
 
  110 void rocm_device_free(
void*);
 
  118 void init_lock_arrays_rocm_space();
 
  127 int* atomic_lock_array_rocm_space_ptr(
bool deallocate = 
false);
 
  136 int* scratch_lock_array_rocm_space_ptr(
bool deallocate = 
false);
 
  144 int* threadid_lock_array_rocm_space_ptr(
bool deallocate = 
false);
 
  152 namespace Experimental {
 
  156 class ROCmHostPinnedSpace {
 
  160   using execution_space = HostSpace::execution_space;
 
  161   using memory_space    = ROCmHostPinnedSpace;
 
  162   using device_type     = Kokkos::Device<execution_space, memory_space>;
 
  163   using size_type       = 
unsigned int;
 
  167   ROCmHostPinnedSpace();
 
  168   ROCmHostPinnedSpace(ROCmHostPinnedSpace&& rhs)      = 
default;
 
  169   ROCmHostPinnedSpace(
const ROCmHostPinnedSpace& rhs) = 
default;
 
  170   ROCmHostPinnedSpace& operator=(ROCmHostPinnedSpace&& rhs) = 
default;
 
  171   ROCmHostPinnedSpace& operator=(
const ROCmHostPinnedSpace& rhs) = 
default;
 
  172   ~ROCmHostPinnedSpace()                                         = 
default;
 
  175   void* allocate(
const size_t arg_alloc_size) 
const;
 
  178   void deallocate(
void* 
const arg_alloc_ptr, 
const size_t arg_alloc_size) 
const;
 
  181   static constexpr 
const char* name() { 
return m_name; };
 
  184   static constexpr 
const char* m_name = 
"ROCmHostPinned";
 
  198                   Kokkos::Experimental::ROCmSpace,
 
  199                   Kokkos::Experimental::ROCmSpace>::assignable,
 
  205 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::Experimental::ROCmSpace> {
 
  206   enum { assignable = 
false };
 
  207   enum { accessible = 
false };
 
  208   enum { deepcopy = 
true };
 
  212 struct MemorySpaceAccess<Kokkos::HostSpace,
 
  213                          Kokkos::Experimental::ROCmHostPinnedSpace> {
 
  215   enum { assignable = 
true };
 
  216   enum { accessible = 
true };
 
  217   enum { deepcopy = 
true };
 
  224   enum { assignable = 
false };
 
  225   enum { accessible = 
false };
 
  226   enum { deepcopy = 
true };
 
  230 struct MemorySpaceAccess<Kokkos::Experimental::ROCmSpace,
 
  231                          Kokkos::Experimental::ROCmHostPinnedSpace> {
 
  233   enum { assignable = 
false };
 
  234   enum { accessible = 
true };  
 
  235   enum { deepcopy = 
true };
 
  243 struct MemorySpaceAccess<Kokkos::Experimental::ROCmHostPinnedSpace,
 
  245   enum { assignable = 
false };  
 
  246   enum { accessible = 
true };   
 
  247   enum { deepcopy = 
true };
 
  251 struct MemorySpaceAccess<Kokkos::Experimental::ROCmHostPinnedSpace,
 
  252                          Kokkos::Experimental::ROCmSpace> {
 
  253   enum { assignable = 
false };  
 
  254   enum { accessible = 
false };
 
  255   enum { deepcopy = 
true };
 
  269 hc::completion_future DeepCopyAsyncROCm(
void* dst, 
const void* src, 
size_t n);
 
  272 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
 
  273                 Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm> {
 
  274   DeepCopy(
void* dst, 
const void* src, 
size_t);
 
  275   DeepCopy(
const Kokkos::Experimental::ROCm&, 
void* dst, 
const void* src,
 
  280 struct DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
 
  281                 Kokkos::Experimental::ROCm> {
 
  282   DeepCopy(
void* dst, 
const void* src, 
size_t);
 
  283   DeepCopy(
const Kokkos::Experimental::ROCm&, 
void* dst, 
const void* src,
 
  288 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
 
  289                 Kokkos::Experimental::ROCm> {
 
  290   DeepCopy(
void* dst, 
const void* src, 
size_t);
 
  291   DeepCopy(
const Kokkos::Experimental::ROCm&, 
void* dst, 
const void* src,
 
  295 template <
class ExecutionSpace>
 
  296 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
 
  297                 Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
 
  298   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  299     (void)DeepCopy<Kokkos::Experimental::ROCmSpace,
 
  300                    Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm>(
 
  304   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  307     hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
 
  313 template <
class ExecutionSpace>
 
  314 struct DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace, ExecutionSpace> {
 
  315   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  316     (void)DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
 
  317                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  320   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  323     DeepCopy(dst, src, n);
 
  327 template <
class ExecutionSpace>
 
  328 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
 
  329   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  330     (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
 
  331                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  334   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  337     DeepCopy(dst, src, n);
 
  342 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
 
  343                 Kokkos::Experimental::ROCmHostPinnedSpace,
 
  344                 Kokkos::Experimental::ROCm> {
 
  345   DeepCopy(
void* dst, 
const void* src, 
size_t);
 
  346   DeepCopy(
const Kokkos::Experimental::ROCm&, 
void* dst, 
const void* src,
 
  351 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
 
  352                 Kokkos::Experimental::ROCm> {
 
  353   DeepCopy(
void* dst, 
const void* src, 
size_t);
 
  354   DeepCopy(
const Kokkos::Experimental::ROCm&, 
void* dst, 
const void* src,
 
  359 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
 
  360                 Kokkos::Experimental::ROCm> {
 
  361   DeepCopy(
void* dst, 
const void* src, 
size_t);
 
  362   DeepCopy(
const Kokkos::Experimental::ROCm&, 
void* dst, 
const void* src,
 
  366 template <
class ExecutionSpace>
 
  367 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
 
  368                 Kokkos::Experimental::ROCmHostPinnedSpace, ExecutionSpace> {
 
  369   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  370     (void)DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
 
  371                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  374   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  377     hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
 
  383 template <
class ExecutionSpace>
 
  384 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
 
  385                 Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
 
  386   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  387     (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
 
  388                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  391   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  394     hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
 
  400 template <
class ExecutionSpace>
 
  401 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
 
  402                 Kokkos::Experimental::ROCmHostPinnedSpace, ExecutionSpace> {
 
  403   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  404     (void)DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
 
  405                    Kokkos::Experimental::ROCmHostPinnedSpace,
 
  406                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  409   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  415     DeepCopy(dst, src, n);
 
  419 template <
class ExecutionSpace>
 
  420 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
 
  422   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  423     (void)DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
 
  424                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  427   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  430     DeepCopy(dst, src, n);
 
  434 template <
class ExecutionSpace>
 
  435 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
 
  437   inline DeepCopy(
void* dst, 
const void* src, 
size_t n) {
 
  438     (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
 
  439                    Kokkos::Experimental::ROCm>(dst, src, n);
 
  442   inline DeepCopy(
const ExecutionSpace& exec, 
void* dst, 
const void* src,
 
  445     DeepCopy(dst, src, n);
 
  459 struct VerifyExecutionCanAccessMemorySpace<Kokkos::Experimental::ROCmSpace,
 
  461   enum { value = 
false };
 
  462   KOKKOS_INLINE_FUNCTION 
static void verify(
void) {
 
  463     Kokkos::abort(
"ROCm code attempted to access HostSpace memory");
 
  466   KOKKOS_INLINE_FUNCTION 
static void verify(
const void*) {
 
  467     Kokkos::abort(
"ROCm code attempted to access HostSpace memory");
 
  473 struct VerifyExecutionCanAccessMemorySpace<
 
  474     Kokkos::Experimental::ROCmSpace,
 
  475     Kokkos::Experimental::ROCmHostPinnedSpace> {
 
  476   enum { value = 
true };
 
  477   KOKKOS_INLINE_FUNCTION 
static void verify(
void) {}
 
  478   KOKKOS_INLINE_FUNCTION 
static void verify(
const void*) {}
 
  482 template <
class OtherSpace>
 
  483 struct VerifyExecutionCanAccessMemorySpace<
 
  484     typename std::enable_if<
 
  485         !is_same<Kokkos::Experimental::ROCmSpace, OtherSpace>::value,
 
  486         Kokkos::Experimental::ROCmSpace>::type,
 
  488   enum { value = 
false };
 
  489   KOKKOS_INLINE_FUNCTION 
static void verify(
void) {
 
  490     Kokkos::abort(
"ROCm code attempted to access unknown Space memory");
 
  493   KOKKOS_INLINE_FUNCTION 
static void verify(
const void*) {
 
  494     Kokkos::abort(
"ROCm code attempted to access unknown Space memory");
 
  501 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
 
  502                                            Kokkos::Experimental::ROCmSpace> {
 
  503   enum { value = 
false };
 
  504   inline static void verify(
void) {
 
  505     Kokkos::Experimental::ROCmSpace::access_error();
 
  507   inline static void verify(
const void* p) {
 
  508     Kokkos::Experimental::ROCmSpace::access_error(p);
 
  514 struct VerifyExecutionCanAccessMemorySpace<
 
  515     Kokkos::HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace> {
 
  516   enum { value = 
true };
 
  517   KOKKOS_INLINE_FUNCTION 
static void verify(
void) {}
 
  518   KOKKOS_INLINE_FUNCTION 
static void verify(
const void*) {}
 
  530 class SharedAllocationRecord<Kokkos::Experimental::ROCmSpace, void>
 
  531     : 
public SharedAllocationRecord<void, void> {
 
  533   using RecordBase = SharedAllocationRecord<void, void>;
 
  535   SharedAllocationRecord(
const SharedAllocationRecord&) = 
delete;
 
  536   SharedAllocationRecord& operator=(
const SharedAllocationRecord&) = 
delete;
 
  538   static void deallocate(RecordBase*);
 
  541   static RecordBase s_root_record;
 
  544   const Kokkos::Experimental::ROCmSpace m_space;
 
  547   ~SharedAllocationRecord();
 
  549   SharedAllocationRecord(
 
  550       const Kokkos::Experimental::ROCmSpace& arg_space,
 
  551       const std::string& arg_label, 
const size_t arg_alloc_size,
 
  552       const RecordBase::function_type arg_dealloc = &deallocate);
 
  555   std::string get_label() 
const;
 
  557   static SharedAllocationRecord* allocate(
 
  558       const Kokkos::Experimental::ROCmSpace& arg_space,
 
  559       const std::string& arg_label, 
const size_t arg_alloc_size);
 
  562   static void* allocate_tracked(
 
  563       const Kokkos::Experimental::ROCmSpace& arg_space,
 
  564       const std::string& arg_label, 
const size_t arg_alloc_size);
 
  567   static void* reallocate_tracked(
void* 
const arg_alloc_ptr,
 
  568                                   const size_t arg_alloc_size);
 
  571   static void deallocate_tracked(
void* 
const arg_alloc_ptr);
 
  573   static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
 
  575   static void print_records(std::ostream&,
 
  576                             const Kokkos::Experimental::ROCmSpace&,
 
  577                             bool detail = 
false);
 
  581 class SharedAllocationRecord<Kokkos::Experimental::ROCmHostPinnedSpace, void>
 
  582     : 
public SharedAllocationRecord<void, void> {
 
  584   using RecordBase = SharedAllocationRecord<void, void>;
 
  586   SharedAllocationRecord(
const SharedAllocationRecord&) = 
delete;
 
  587   SharedAllocationRecord& operator=(
const SharedAllocationRecord&) = 
delete;
 
  589   static void deallocate(RecordBase*);
 
  592   static RecordBase s_root_record;
 
  595   const Kokkos::Experimental::ROCmHostPinnedSpace m_space;
 
  598   ~SharedAllocationRecord();
 
  599   SharedAllocationRecord() : RecordBase(), m_space() {}
 
  601   SharedAllocationRecord(
 
  602       const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
 
  603       const std::string& arg_label, 
const size_t arg_alloc_size,
 
  604       const RecordBase::function_type arg_dealloc = &deallocate);
 
  607   std::string get_label() 
const;
 
  609   static SharedAllocationRecord* allocate(
 
  610       const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
 
  611       const std::string& arg_label, 
const size_t arg_alloc_size);
 
  613   static void* allocate_tracked(
 
  614       const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
 
  615       const std::string& arg_label, 
const size_t arg_alloc_size);
 
  618   static void* reallocate_tracked(
void* 
const arg_alloc_ptr,
 
  619                                   const size_t arg_alloc_size);
 
  622   static void deallocate_tracked(
void* 
const arg_alloc_ptr);
 
  624   static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
 
  626   static void print_records(std::ostream&,
 
  627                             const Kokkos::Experimental::ROCmHostPinnedSpace&,
 
  628                             bool detail = 
false);
 
Memory management for host memory. 
Access relationship between DstMemorySpace and SrcMemorySpace.