45 #ifndef KOKKOS_ROCMSPACE_HPP
46 #define KOKKOS_ROCMSPACE_HPP
48 #include <Kokkos_Core_fwd.hpp>
50 #if defined(KOKKOS_ENABLE_ROCM)
56 #include <Kokkos_HostSpace.hpp>
61 namespace Experimental {
67 typedef ROCmSpace memory_space;
68 typedef Kokkos::Experimental::ROCm execution_space;
69 typedef Kokkos::Device<execution_space, memory_space> device_type;
71 typedef unsigned int size_type;
76 ROCmSpace(ROCmSpace&& rhs) =
default;
77 ROCmSpace(
const ROCmSpace& rhs) =
default;
78 ROCmSpace& operator=(ROCmSpace&& rhs) =
default;
79 ROCmSpace& operator=(
const ROCmSpace& rhs) =
default;
80 ~ROCmSpace() =
default;
83 void* allocate(
const size_t arg_alloc_size)
const;
86 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
89 static constexpr
const char* name() {
return m_name; };
93 static void access_error();
94 static void access_error(
const void*
const);
99 static constexpr
const char* m_name =
"ROCm";
100 friend class Kokkos::Impl::SharedAllocationRecord<
101 Kokkos::Experimental::ROCmSpace, void>;
108 void* rocm_device_allocate(
int);
109 void* rocm_hostpinned_allocate(
int);
110 void rocm_device_free(
void*);
118 void init_lock_arrays_rocm_space();
127 int* atomic_lock_array_rocm_space_ptr(
bool deallocate =
false);
136 int* scratch_lock_array_rocm_space_ptr(
bool deallocate =
false);
144 int* threadid_lock_array_rocm_space_ptr(
bool deallocate =
false);
152 namespace Experimental {
156 class ROCmHostPinnedSpace {
160 typedef HostSpace::execution_space execution_space;
161 typedef ROCmHostPinnedSpace memory_space;
162 typedef Kokkos::Device<execution_space, memory_space> device_type;
163 typedef unsigned int size_type;
167 ROCmHostPinnedSpace();
168 ROCmHostPinnedSpace(ROCmHostPinnedSpace&& rhs) =
default;
169 ROCmHostPinnedSpace(
const ROCmHostPinnedSpace& rhs) =
default;
170 ROCmHostPinnedSpace& operator=(ROCmHostPinnedSpace&& rhs) =
default;
171 ROCmHostPinnedSpace& operator=(
const ROCmHostPinnedSpace& rhs) =
default;
172 ~ROCmHostPinnedSpace() =
default;
175 void* allocate(
const size_t arg_alloc_size)
const;
178 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
181 static constexpr
const char* name() {
return m_name; };
184 static constexpr
const char* m_name =
"ROCmHostPinned";
198 Kokkos::Experimental::ROCmSpace,
199 Kokkos::Experimental::ROCmSpace>::assignable,
205 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::Experimental::ROCmSpace> {
206 enum { assignable =
false };
207 enum { accessible =
false };
208 enum { deepcopy =
true };
212 struct MemorySpaceAccess<Kokkos::HostSpace,
213 Kokkos::Experimental::ROCmHostPinnedSpace> {
215 enum { assignable =
true };
216 enum { accessible =
true };
217 enum { deepcopy =
true };
224 enum { assignable =
false };
225 enum { accessible =
false };
226 enum { deepcopy =
true };
230 struct MemorySpaceAccess<Kokkos::Experimental::ROCmSpace,
231 Kokkos::Experimental::ROCmHostPinnedSpace> {
233 enum { assignable =
false };
234 enum { accessible =
true };
235 enum { deepcopy =
true };
243 struct MemorySpaceAccess<Kokkos::Experimental::ROCmHostPinnedSpace,
245 enum { assignable =
false };
246 enum { accessible =
true };
247 enum { deepcopy =
true };
251 struct MemorySpaceAccess<Kokkos::Experimental::ROCmHostPinnedSpace,
252 Kokkos::Experimental::ROCmSpace> {
253 enum { assignable =
false };
254 enum { accessible =
false };
255 enum { deepcopy =
true };
269 hc::completion_future DeepCopyAsyncROCm(
void* dst,
const void* src,
size_t n);
272 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
273 Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm> {
274 DeepCopy(
void* dst,
const void* src,
size_t);
275 DeepCopy(
const Kokkos::Experimental::ROCm&,
void* dst,
const void* src,
280 struct DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
281 Kokkos::Experimental::ROCm> {
282 DeepCopy(
void* dst,
const void* src,
size_t);
283 DeepCopy(
const Kokkos::Experimental::ROCm&,
void* dst,
const void* src,
288 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
289 Kokkos::Experimental::ROCm> {
290 DeepCopy(
void* dst,
const void* src,
size_t);
291 DeepCopy(
const Kokkos::Experimental::ROCm&,
void* dst,
const void* src,
295 template <
class ExecutionSpace>
296 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
297 Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
298 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
299 (void)DeepCopy<Kokkos::Experimental::ROCmSpace,
300 Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm>(
304 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
307 hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
313 template <
class ExecutionSpace>
314 struct DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace, ExecutionSpace> {
315 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
316 (void)DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
317 Kokkos::Experimental::ROCm>(dst, src, n);
320 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
323 DeepCopy(dst, src, n);
327 template <
class ExecutionSpace>
328 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
329 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
330 (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
331 Kokkos::Experimental::ROCm>(dst, src, n);
334 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
337 DeepCopy(dst, src, n);
342 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
343 Kokkos::Experimental::ROCmHostPinnedSpace,
344 Kokkos::Experimental::ROCm> {
345 DeepCopy(
void* dst,
const void* src,
size_t);
346 DeepCopy(
const Kokkos::Experimental::ROCm&,
void* dst,
const void* src,
351 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
352 Kokkos::Experimental::ROCm> {
353 DeepCopy(
void* dst,
const void* src,
size_t);
354 DeepCopy(
const Kokkos::Experimental::ROCm&,
void* dst,
const void* src,
359 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
360 Kokkos::Experimental::ROCm> {
361 DeepCopy(
void* dst,
const void* src,
size_t);
362 DeepCopy(
const Kokkos::Experimental::ROCm&,
void* dst,
const void* src,
366 template <
class ExecutionSpace>
367 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
368 Kokkos::Experimental::ROCmHostPinnedSpace, ExecutionSpace> {
369 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
370 (void)DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
371 Kokkos::Experimental::ROCm>(dst, src, n);
374 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
377 hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
383 template <
class ExecutionSpace>
384 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
385 Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
386 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
387 (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
388 Kokkos::Experimental::ROCm>(dst, src, n);
391 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
394 hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
400 template <
class ExecutionSpace>
401 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
402 Kokkos::Experimental::ROCmHostPinnedSpace, ExecutionSpace> {
403 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
404 (void)DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
405 Kokkos::Experimental::ROCmHostPinnedSpace,
406 Kokkos::Experimental::ROCm>(dst, src, n);
409 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
415 DeepCopy(dst, src, n);
419 template <
class ExecutionSpace>
420 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
422 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
423 (void)DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
424 Kokkos::Experimental::ROCm>(dst, src, n);
427 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
430 DeepCopy(dst, src, n);
434 template <
class ExecutionSpace>
435 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
437 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
438 (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
439 Kokkos::Experimental::ROCm>(dst, src, n);
442 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
445 DeepCopy(dst, src, n);
459 struct VerifyExecutionCanAccessMemorySpace<Kokkos::Experimental::ROCmSpace,
461 enum { value =
false };
462 KOKKOS_INLINE_FUNCTION
static void verify(
void) {
463 Kokkos::abort(
"ROCm code attempted to access HostSpace memory");
466 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {
467 Kokkos::abort(
"ROCm code attempted to access HostSpace memory");
473 struct VerifyExecutionCanAccessMemorySpace<
474 Kokkos::Experimental::ROCmSpace,
475 Kokkos::Experimental::ROCmHostPinnedSpace> {
476 enum { value =
true };
477 KOKKOS_INLINE_FUNCTION
static void verify(
void) {}
478 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {}
482 template <
class OtherSpace>
483 struct VerifyExecutionCanAccessMemorySpace<
484 typename std::enable_if<
485 !is_same<Kokkos::Experimental::ROCmSpace, OtherSpace>::value,
486 Kokkos::Experimental::ROCmSpace>::type,
488 enum { value =
false };
489 KOKKOS_INLINE_FUNCTION
static void verify(
void) {
490 Kokkos::abort(
"ROCm code attempted to access unknown Space memory");
493 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {
494 Kokkos::abort(
"ROCm code attempted to access unknown Space memory");
501 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
502 Kokkos::Experimental::ROCmSpace> {
503 enum { value =
false };
504 inline static void verify(
void) {
505 Kokkos::Experimental::ROCmSpace::access_error();
507 inline static void verify(
const void* p) {
508 Kokkos::Experimental::ROCmSpace::access_error(p);
514 struct VerifyExecutionCanAccessMemorySpace<
515 Kokkos::HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace> {
516 enum { value =
true };
517 KOKKOS_INLINE_FUNCTION
static void verify(
void) {}
518 KOKKOS_INLINE_FUNCTION
static void verify(
const void*) {}
530 class SharedAllocationRecord<Kokkos::Experimental::ROCmSpace, void>
531 :
public SharedAllocationRecord<void, void> {
533 typedef SharedAllocationRecord<void, void> RecordBase;
535 SharedAllocationRecord(
const SharedAllocationRecord&) =
delete;
536 SharedAllocationRecord& operator=(
const SharedAllocationRecord&) =
delete;
538 static void deallocate(RecordBase*);
541 static RecordBase s_root_record;
544 const Kokkos::Experimental::ROCmSpace m_space;
547 ~SharedAllocationRecord();
549 SharedAllocationRecord(
550 const Kokkos::Experimental::ROCmSpace& arg_space,
551 const std::string& arg_label,
const size_t arg_alloc_size,
552 const RecordBase::function_type arg_dealloc = &deallocate);
555 std::string get_label()
const;
557 static SharedAllocationRecord* allocate(
558 const Kokkos::Experimental::ROCmSpace& arg_space,
559 const std::string& arg_label,
const size_t arg_alloc_size);
562 static void* allocate_tracked(
563 const Kokkos::Experimental::ROCmSpace& arg_space,
564 const std::string& arg_label,
const size_t arg_alloc_size);
567 static void* reallocate_tracked(
void*
const arg_alloc_ptr,
568 const size_t arg_alloc_size);
571 static void deallocate_tracked(
void*
const arg_alloc_ptr);
573 static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
575 static void print_records(std::ostream&,
576 const Kokkos::Experimental::ROCmSpace&,
577 bool detail =
false);
581 class SharedAllocationRecord<Kokkos::Experimental::ROCmHostPinnedSpace, void>
582 :
public SharedAllocationRecord<void, void> {
584 typedef SharedAllocationRecord<void, void> RecordBase;
586 SharedAllocationRecord(
const SharedAllocationRecord&) =
delete;
587 SharedAllocationRecord& operator=(
const SharedAllocationRecord&) =
delete;
589 static void deallocate(RecordBase*);
592 static RecordBase s_root_record;
595 const Kokkos::Experimental::ROCmHostPinnedSpace m_space;
598 ~SharedAllocationRecord();
599 SharedAllocationRecord() : RecordBase(), m_space() {}
601 SharedAllocationRecord(
602 const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
603 const std::string& arg_label,
const size_t arg_alloc_size,
604 const RecordBase::function_type arg_dealloc = &deallocate);
607 std::string get_label()
const;
609 static SharedAllocationRecord* allocate(
610 const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
611 const std::string& arg_label,
const size_t arg_alloc_size);
613 static void* allocate_tracked(
614 const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
615 const std::string& arg_label,
const size_t arg_alloc_size);
618 static void* reallocate_tracked(
void*
const arg_alloc_ptr,
619 const size_t arg_alloc_size);
622 static void deallocate_tracked(
void*
const arg_alloc_ptr);
624 static SharedAllocationRecord* get_record(
void* arg_alloc_ptr);
626 static void print_records(std::ostream&,
627 const Kokkos::Experimental::ROCmHostPinnedSpace&,
628 bool detail =
false);
Memory management for host memory.
Access relationship between DstMemorySpace and SrcMemorySpace.