Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_ROCmSpace.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_ROCMSPACE_HPP
46 #define KOKKOS_ROCMSPACE_HPP
47 
48 #include <Kokkos_Core_fwd.hpp>
49 
50 #if defined(KOKKOS_ENABLE_ROCM)
51 
52 #include <iosfwd>
53 #include <typeinfo>
54 #include <string>
55 
56 #include <Kokkos_HostSpace.hpp>
57 
58 /*--------------------------------------------------------------------------*/
59 
60 namespace Kokkos {
61 namespace Experimental {
64 class ROCmSpace {
65  public:
67  typedef ROCmSpace memory_space;
68  typedef Kokkos::Experimental::ROCm execution_space;
69  typedef Kokkos::Device<execution_space, memory_space> device_type;
70 
71  typedef unsigned int size_type;
72 
73  /*--------------------------------*/
74 
75  ROCmSpace();
76  ROCmSpace(ROCmSpace&& rhs) = default;
77  ROCmSpace(const ROCmSpace& rhs) = default;
78  ROCmSpace& operator=(ROCmSpace&& rhs) = default;
79  ROCmSpace& operator=(const ROCmSpace& rhs) = default;
80  ~ROCmSpace() = default;
81 
83  void* allocate(const size_t arg_alloc_size) const;
84 
86  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
87 
89  static constexpr const char* name() { return m_name; };
90 
91  /*--------------------------------*/
93  static void access_error();
94  static void access_error(const void* const);
95 
96  private:
97  int m_device;
98 
99  static constexpr const char* m_name = "ROCm";
100  friend class Kokkos::Impl::SharedAllocationRecord<
101  Kokkos::Experimental::ROCmSpace, void>;
102 };
103 
104 } // namespace Experimental
105 
106 namespace Impl {
107 
108 void* rocm_device_allocate(int);
109 void* rocm_hostpinned_allocate(int);
110 void rocm_device_free(void*);
111 
118 void init_lock_arrays_rocm_space();
119 
127 int* atomic_lock_array_rocm_space_ptr(bool deallocate = false);
128 
136 int* scratch_lock_array_rocm_space_ptr(bool deallocate = false);
137 
144 int* threadid_lock_array_rocm_space_ptr(bool deallocate = false);
145 } // namespace Impl
146 } // namespace Kokkos
147 
148 /*--------------------------------------------------------------------------*/
149 /*--------------------------------------------------------------------------*/
150 
151 namespace Kokkos {
152 namespace Experimental {
156 class ROCmHostPinnedSpace {
157  public:
159 
160  typedef HostSpace::execution_space execution_space;
161  typedef ROCmHostPinnedSpace memory_space;
162  typedef Kokkos::Device<execution_space, memory_space> device_type;
163  typedef unsigned int size_type;
164 
165  /*--------------------------------*/
166 
167  ROCmHostPinnedSpace();
168  ROCmHostPinnedSpace(ROCmHostPinnedSpace&& rhs) = default;
169  ROCmHostPinnedSpace(const ROCmHostPinnedSpace& rhs) = default;
170  ROCmHostPinnedSpace& operator=(ROCmHostPinnedSpace&& rhs) = default;
171  ROCmHostPinnedSpace& operator=(const ROCmHostPinnedSpace& rhs) = default;
172  ~ROCmHostPinnedSpace() = default;
173 
175  void* allocate(const size_t arg_alloc_size) const;
176 
178  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
179 
181  static constexpr const char* name() { return m_name; };
182 
183  private:
184  static constexpr const char* m_name = "ROCmHostPinned";
185 
186  /*--------------------------------*/
187 };
188 } // namespace Experimental
189 } // namespace Kokkos
190 
191 /*--------------------------------------------------------------------------*/
192 /*--------------------------------------------------------------------------*/
193 
194 namespace Kokkos {
195 namespace Impl {
196 
197 static_assert(Kokkos::Impl::MemorySpaceAccess<
198  Kokkos::Experimental::ROCmSpace,
199  Kokkos::Experimental::ROCmSpace>::assignable,
200  "");
201 
202 //----------------------------------------
203 
204 template <>
205 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::Experimental::ROCmSpace> {
206  enum { assignable = false };
207  enum { accessible = false };
208  enum { deepcopy = true };
209 };
210 
211 template <>
212 struct MemorySpaceAccess<Kokkos::HostSpace,
213  Kokkos::Experimental::ROCmHostPinnedSpace> {
214  // HostSpace::execution_space == ROCmHostPinnedSpace::execution_space
215  enum { assignable = true };
216  enum { accessible = true };
217  enum { deepcopy = true };
218 };
219 
220 //----------------------------------------
221 
222 template <>
223 struct MemorySpaceAccess<Kokkos::Experimental::ROCmSpace, Kokkos::HostSpace> {
224  enum { assignable = false };
225  enum { accessible = false };
226  enum { deepcopy = true };
227 };
228 
229 template <>
230 struct MemorySpaceAccess<Kokkos::Experimental::ROCmSpace,
231  Kokkos::Experimental::ROCmHostPinnedSpace> {
232  // ROCmSpace::execution_space != ROCmHostPinnedSpace::execution_space
233  enum { assignable = false };
234  enum { accessible = true }; // ROCmSpace::execution_space
235  enum { deepcopy = true };
236 };
237 
238 //----------------------------------------
239 // ROCmHostPinnedSpace::execution_space == HostSpace::execution_space
240 // ROCmHostPinnedSpace accessible to both ROCm and Host
241 
242 template <>
243 struct MemorySpaceAccess<Kokkos::Experimental::ROCmHostPinnedSpace,
245  enum { assignable = false }; // Cannot access from ROCm
246  enum { accessible = true }; // ROCmHostPinnedSpace::execution_space
247  enum { deepcopy = true };
248 };
249 
250 template <>
251 struct MemorySpaceAccess<Kokkos::Experimental::ROCmHostPinnedSpace,
252  Kokkos::Experimental::ROCmSpace> {
253  enum { assignable = false }; // Cannot access from Host
254  enum { accessible = false };
255  enum { deepcopy = true };
256 };
257 
258 }; // namespace Impl
259 //----------------------------------------
260 
261 } // namespace Kokkos
262 
263 /*--------------------------------------------------------------------------*/
264 /*--------------------------------------------------------------------------*/
265 
266 namespace Kokkos {
267 namespace Impl {
268 
269 hc::completion_future DeepCopyAsyncROCm(void* dst, const void* src, size_t n);
270 
271 template <>
272 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
273  Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm> {
274  DeepCopy(void* dst, const void* src, size_t);
275  DeepCopy(const Kokkos::Experimental::ROCm&, void* dst, const void* src,
276  size_t);
277 };
278 
279 template <>
280 struct DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
281  Kokkos::Experimental::ROCm> {
282  DeepCopy(void* dst, const void* src, size_t);
283  DeepCopy(const Kokkos::Experimental::ROCm&, void* dst, const void* src,
284  size_t);
285 };
286 
287 template <>
288 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
289  Kokkos::Experimental::ROCm> {
290  DeepCopy(void* dst, const void* src, size_t);
291  DeepCopy(const Kokkos::Experimental::ROCm&, void* dst, const void* src,
292  size_t);
293 };
294 
295 template <class ExecutionSpace>
296 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
297  Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
298  inline DeepCopy(void* dst, const void* src, size_t n) {
299  (void)DeepCopy<Kokkos::Experimental::ROCmSpace,
300  Kokkos::Experimental::ROCmSpace, Kokkos::Experimental::ROCm>(
301  dst, src, n);
302  }
303 
304  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
305  size_t n) {
306  exec.fence();
307  hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
308  fut.wait();
309  // DeepCopy (dst,src,n);
310  }
311 };
312 
313 template <class ExecutionSpace>
314 struct DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace, ExecutionSpace> {
315  inline DeepCopy(void* dst, const void* src, size_t n) {
316  (void)DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
317  Kokkos::Experimental::ROCm>(dst, src, n);
318  }
319 
320  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
321  size_t n) {
322  exec.fence();
323  DeepCopy(dst, src, n);
324  }
325 };
326 
327 template <class ExecutionSpace>
328 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
329  inline DeepCopy(void* dst, const void* src, size_t n) {
330  (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
331  Kokkos::Experimental::ROCm>(dst, src, n);
332  }
333 
334  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
335  size_t n) {
336  exec.fence();
337  DeepCopy(dst, src, n);
338  }
339 };
340 
341 template <>
342 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
343  Kokkos::Experimental::ROCmHostPinnedSpace,
344  Kokkos::Experimental::ROCm> {
345  DeepCopy(void* dst, const void* src, size_t);
346  DeepCopy(const Kokkos::Experimental::ROCm&, void* dst, const void* src,
347  size_t);
348 };
349 
350 template <>
351 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
352  Kokkos::Experimental::ROCm> {
353  DeepCopy(void* dst, const void* src, size_t);
354  DeepCopy(const Kokkos::Experimental::ROCm&, void* dst, const void* src,
355  size_t);
356 };
357 
358 template <>
359 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
360  Kokkos::Experimental::ROCm> {
361  DeepCopy(void* dst, const void* src, size_t);
362  DeepCopy(const Kokkos::Experimental::ROCm&, void* dst, const void* src,
363  size_t);
364 };
365 
366 template <class ExecutionSpace>
367 struct DeepCopy<Kokkos::Experimental::ROCmSpace,
368  Kokkos::Experimental::ROCmHostPinnedSpace, ExecutionSpace> {
369  inline DeepCopy(void* dst, const void* src, size_t n) {
370  (void)DeepCopy<Kokkos::Experimental::ROCmSpace, HostSpace,
371  Kokkos::Experimental::ROCm>(dst, src, n);
372  }
373 
374  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
375  size_t n) {
376  exec.fence();
377  hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
378  fut.wait();
379  // DeepCopyROCm (dst,src,n);
380  }
381 };
382 
383 template <class ExecutionSpace>
384 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
385  Kokkos::Experimental::ROCmSpace, ExecutionSpace> {
386  inline DeepCopy(void* dst, const void* src, size_t n) {
387  (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmSpace,
388  Kokkos::Experimental::ROCm>(dst, src, n);
389  }
390 
391  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
392  size_t n) {
393  exec.fence();
394  hc::completion_future fut = DeepCopyAsyncROCm(dst, src, n);
395  fut.wait();
396  // DeepCopyROCm (dst,src,n);
397  }
398 };
399 
400 template <class ExecutionSpace>
401 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
402  Kokkos::Experimental::ROCmHostPinnedSpace, ExecutionSpace> {
403  inline DeepCopy(void* dst, const void* src, size_t n) {
404  (void)DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace,
405  Kokkos::Experimental::ROCmHostPinnedSpace,
406  Kokkos::Experimental::ROCm>(dst, src, n);
407  }
408 
409  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
410  size_t n) {
411  exec.fence();
412  // hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
413  // fut.wait();
414  // DeepCopyAsyncROCm (dst,src,n);
415  DeepCopy(dst, src, n);
416  }
417 };
418 
419 template <class ExecutionSpace>
420 struct DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
421  ExecutionSpace> {
422  inline DeepCopy(void* dst, const void* src, size_t n) {
423  (void)DeepCopy<Kokkos::Experimental::ROCmHostPinnedSpace, HostSpace,
424  Kokkos::Experimental::ROCm>(dst, src, n);
425  }
426 
427  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
428  size_t n) {
429  exec.fence();
430  DeepCopy(dst, src, n);
431  }
432 };
433 
434 template <class ExecutionSpace>
435 struct DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
436  ExecutionSpace> {
437  inline DeepCopy(void* dst, const void* src, size_t n) {
438  (void)DeepCopy<HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace,
439  Kokkos::Experimental::ROCm>(dst, src, n);
440  }
441 
442  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
443  size_t n) {
444  exec.fence();
445  DeepCopy(dst, src, n);
446  }
447 };
448 } // namespace Impl
449 } // namespace Kokkos
450 
451 //----------------------------------------------------------------------------
452 //----------------------------------------------------------------------------
453 
454 namespace Kokkos {
455 namespace Impl {
456 
458 template <>
459 struct VerifyExecutionCanAccessMemorySpace<Kokkos::Experimental::ROCmSpace,
461  enum { value = false };
462  KOKKOS_INLINE_FUNCTION static void verify(void) {
463  Kokkos::abort("ROCm code attempted to access HostSpace memory");
464  }
465 
466  KOKKOS_INLINE_FUNCTION static void verify(const void*) {
467  Kokkos::abort("ROCm code attempted to access HostSpace memory");
468  }
469 };
470 
472 template <>
473 struct VerifyExecutionCanAccessMemorySpace<
474  Kokkos::Experimental::ROCmSpace,
475  Kokkos::Experimental::ROCmHostPinnedSpace> {
476  enum { value = true };
477  KOKKOS_INLINE_FUNCTION static void verify(void) {}
478  KOKKOS_INLINE_FUNCTION static void verify(const void*) {}
479 };
480 
482 template <class OtherSpace>
483 struct VerifyExecutionCanAccessMemorySpace<
484  typename std::enable_if<
485  !is_same<Kokkos::Experimental::ROCmSpace, OtherSpace>::value,
486  Kokkos::Experimental::ROCmSpace>::type,
487  OtherSpace> {
488  enum { value = false };
489  KOKKOS_INLINE_FUNCTION static void verify(void) {
490  Kokkos::abort("ROCm code attempted to access unknown Space memory");
491  }
492 
493  KOKKOS_INLINE_FUNCTION static void verify(const void*) {
494  Kokkos::abort("ROCm code attempted to access unknown Space memory");
495  }
496 };
497 
498 //----------------------------------------------------------------------------
500 template <>
501 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
502  Kokkos::Experimental::ROCmSpace> {
503  enum { value = false };
504  inline static void verify(void) {
505  Kokkos::Experimental::ROCmSpace::access_error();
506  }
507  inline static void verify(const void* p) {
508  Kokkos::Experimental::ROCmSpace::access_error(p);
509  }
510 };
511 
513 template <>
514 struct VerifyExecutionCanAccessMemorySpace<
515  Kokkos::HostSpace, Kokkos::Experimental::ROCmHostPinnedSpace> {
516  enum { value = true };
517  KOKKOS_INLINE_FUNCTION static void verify(void) {}
518  KOKKOS_INLINE_FUNCTION static void verify(const void*) {}
519 };
520 } // namespace Impl
521 } // namespace Kokkos
522 
523 //----------------------------------------------------------------------------
524 //----------------------------------------------------------------------------
525 
526 namespace Kokkos {
527 namespace Impl {
528 
529 template <>
530 class SharedAllocationRecord<Kokkos::Experimental::ROCmSpace, void>
531  : public SharedAllocationRecord<void, void> {
532  private:
533  typedef SharedAllocationRecord<void, void> RecordBase;
534 
535  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
536  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
537 
538  static void deallocate(RecordBase*);
539 
540 #ifdef KOKKOS_DEBUG
541  static RecordBase s_root_record;
542 #endif
543 
544  const Kokkos::Experimental::ROCmSpace m_space;
545 
546  protected:
547  ~SharedAllocationRecord();
548 
549  SharedAllocationRecord(
550  const Kokkos::Experimental::ROCmSpace& arg_space,
551  const std::string& arg_label, const size_t arg_alloc_size,
552  const RecordBase::function_type arg_dealloc = &deallocate);
553 
554  public:
555  std::string get_label() const;
556 
557  static SharedAllocationRecord* allocate(
558  const Kokkos::Experimental::ROCmSpace& arg_space,
559  const std::string& arg_label, const size_t arg_alloc_size);
560 
562  static void* allocate_tracked(
563  const Kokkos::Experimental::ROCmSpace& arg_space,
564  const std::string& arg_label, const size_t arg_alloc_size);
565 
567  static void* reallocate_tracked(void* const arg_alloc_ptr,
568  const size_t arg_alloc_size);
569 
571  static void deallocate_tracked(void* const arg_alloc_ptr);
572 
573  static SharedAllocationRecord* get_record(void* arg_alloc_ptr);
574 
575  static void print_records(std::ostream&,
576  const Kokkos::Experimental::ROCmSpace&,
577  bool detail = false);
578 };
579 
580 template <>
581 class SharedAllocationRecord<Kokkos::Experimental::ROCmHostPinnedSpace, void>
582  : public SharedAllocationRecord<void, void> {
583  private:
584  typedef SharedAllocationRecord<void, void> RecordBase;
585 
586  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
587  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
588 
589  static void deallocate(RecordBase*);
590 
591 #ifdef KOKKOS_DEBUG
592  static RecordBase s_root_record;
593 #endif
594 
595  const Kokkos::Experimental::ROCmHostPinnedSpace m_space;
596 
597  protected:
598  ~SharedAllocationRecord();
599  SharedAllocationRecord() : RecordBase(), m_space() {}
600 
601  SharedAllocationRecord(
602  const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
603  const std::string& arg_label, const size_t arg_alloc_size,
604  const RecordBase::function_type arg_dealloc = &deallocate);
605 
606  public:
607  std::string get_label() const;
608 
609  static SharedAllocationRecord* allocate(
610  const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
611  const std::string& arg_label, const size_t arg_alloc_size);
613  static void* allocate_tracked(
614  const Kokkos::Experimental::ROCmHostPinnedSpace& arg_space,
615  const std::string& arg_label, const size_t arg_alloc_size);
616 
618  static void* reallocate_tracked(void* const arg_alloc_ptr,
619  const size_t arg_alloc_size);
620 
622  static void deallocate_tracked(void* const arg_alloc_ptr);
623 
624  static SharedAllocationRecord* get_record(void* arg_alloc_ptr);
625 
626  static void print_records(std::ostream&,
627  const Kokkos::Experimental::ROCmHostPinnedSpace&,
628  bool detail = false);
629 };
630 } // namespace Impl
631 } // namespace Kokkos
632 
633 //----------------------------------------------------------------------------
634 //----------------------------------------------------------------------------
635 
636 #endif /* #if defined( KOKKOS_ENABLE_ROCM ) */
637 #endif /* #define KOKKOS_ROCMSPACE_HPP */
Memory management for host memory.
Access relationship between DstMemorySpace and SrcMemorySpace.