Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_CudaSpace.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_CUDASPACE_HPP
46 #define KOKKOS_CUDASPACE_HPP
47 
48 #include <Kokkos_Macros.hpp>
49 #if defined(KOKKOS_ENABLE_CUDA)
50 
51 #include <Kokkos_Core_fwd.hpp>
52 
53 #include <iosfwd>
54 #include <typeinfo>
55 #include <string>
56 
57 #include <Kokkos_HostSpace.hpp>
58 
59 #include <impl/Kokkos_Profiling_Interface.hpp>
60 
61 #include <Cuda/Kokkos_Cuda_abort.hpp>
62 
63 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
64 extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
65 extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
66 #endif
67 
68 /*--------------------------------------------------------------------------*/
69 
70 namespace Kokkos {
71 
74 class CudaSpace {
75  public:
77  typedef CudaSpace memory_space;
78  typedef Kokkos::Cuda execution_space;
79  typedef Kokkos::Device<execution_space, memory_space> device_type;
80 
81  typedef unsigned int size_type;
82 
83  /*--------------------------------*/
84 
85  CudaSpace();
86  CudaSpace(CudaSpace&& rhs) = default;
87  CudaSpace(const CudaSpace& rhs) = default;
88  CudaSpace& operator=(CudaSpace&& rhs) = default;
89  CudaSpace& operator=(const CudaSpace& rhs) = default;
90  ~CudaSpace() = default;
91 
93  void* allocate(const size_t arg_alloc_size) const;
94 
96  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
97 
99  static constexpr const char* name() { return m_name; }
100 
101  /*--------------------------------*/
103  static void access_error();
104  static void access_error(const void* const);
105 
106  private:
107  int m_device;
108 
109  static constexpr const char* m_name = "Cuda";
110  friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
111 };
112 
113 namespace Impl {
120 void init_lock_arrays_cuda_space();
121 
129 int* atomic_lock_array_cuda_space_ptr(bool deallocate = false);
130 
138 int* scratch_lock_array_cuda_space_ptr(bool deallocate = false);
139 
146 int* threadid_lock_array_cuda_space_ptr(bool deallocate = false);
147 } // namespace Impl
148 } // namespace Kokkos
149 
150 /*--------------------------------------------------------------------------*/
151 /*--------------------------------------------------------------------------*/
152 
153 namespace Kokkos {
154 
158 class CudaUVMSpace {
159  public:
161  typedef CudaUVMSpace memory_space;
162  typedef Cuda execution_space;
163  typedef Kokkos::Device<execution_space, memory_space> device_type;
164  typedef unsigned int size_type;
165 
167  static bool available();
168 
169  /*--------------------------------*/
171  KOKKOS_DEPRECATED static int number_of_allocations();
172 
173  /*--------------------------------*/
174 
175  /*--------------------------------*/
176 
177  CudaUVMSpace();
178  CudaUVMSpace(CudaUVMSpace&& rhs) = default;
179  CudaUVMSpace(const CudaUVMSpace& rhs) = default;
180  CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
181  CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
182  ~CudaUVMSpace() = default;
183 
185  void* allocate(const size_t arg_alloc_size) const;
186 
188  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
189 
191  static constexpr const char* name() { return m_name; }
192 
193 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
194  static bool cuda_pin_uvm_to_host();
195  static void cuda_set_pin_uvm_to_host(bool val);
196 #endif
197  /*--------------------------------*/
198 
199  private:
200  int m_device;
201 
202 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
203  static bool kokkos_impl_cuda_pin_uvm_to_host_v;
204 #endif
205  static constexpr const char* m_name = "CudaUVM";
206 };
207 
208 } // namespace Kokkos
209 
210 /*--------------------------------------------------------------------------*/
211 /*--------------------------------------------------------------------------*/
212 
213 namespace Kokkos {
214 
218 class CudaHostPinnedSpace {
219  public:
221 
222  typedef HostSpace::execution_space execution_space;
223  typedef CudaHostPinnedSpace memory_space;
224  typedef Kokkos::Device<execution_space, memory_space> device_type;
225  typedef unsigned int size_type;
226 
227  /*--------------------------------*/
228 
229  CudaHostPinnedSpace();
230  CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
231  CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
232  CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
233  CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
234  ~CudaHostPinnedSpace() = default;
235 
237  void* allocate(const size_t arg_alloc_size) const;
238 
240  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
241 
243  static constexpr const char* name() { return m_name; }
244 
245  private:
246  static constexpr const char* m_name = "CudaHostPinned";
247 
248  /*--------------------------------*/
249 };
250 
251 } // namespace Kokkos
252 
253 /*--------------------------------------------------------------------------*/
254 /*--------------------------------------------------------------------------*/
255 
256 namespace Kokkos {
257 namespace Impl {
258 
259 static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,
260  Kokkos::CudaSpace>::assignable,
261  "");
262 static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaUVMSpace,
263  Kokkos::CudaUVMSpace>::assignable,
264  "");
265 static_assert(
266  Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaHostPinnedSpace,
267  Kokkos::CudaHostPinnedSpace>::assignable,
268  "");
269 
270 //----------------------------------------
271 
272 template <>
273 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
274  enum { assignable = false };
275  enum { accessible = false };
276  enum { deepcopy = true };
277 };
278 
279 template <>
280 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
281  // HostSpace::execution_space != CudaUVMSpace::execution_space
282  enum { assignable = false };
283  enum { accessible = true };
284  enum { deepcopy = true };
285 };
286 
287 template <>
288 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
289  // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
290  enum { assignable = true };
291  enum { accessible = true };
292  enum { deepcopy = true };
293 };
294 
295 //----------------------------------------
296 
297 template <>
298 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
299  enum { assignable = false };
300  enum { accessible = false };
301  enum { deepcopy = true };
302 };
303 
304 template <>
305 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
306  // CudaSpace::execution_space == CudaUVMSpace::execution_space
307  enum { assignable = true };
308  enum { accessible = true };
309  enum { deepcopy = true };
310 };
311 
312 template <>
313 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
314  // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
315  enum { assignable = false };
316  enum { accessible = true }; // CudaSpace::execution_space
317  enum { deepcopy = true };
318 };
319 
320 //----------------------------------------
321 // CudaUVMSpace::execution_space == Cuda
322 // CudaUVMSpace accessible to both Cuda and Host
323 
324 template <>
325 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
326  enum { assignable = false };
327  enum { accessible = false }; // Cuda cannot access HostSpace
328  enum { deepcopy = true };
329 };
330 
331 template <>
332 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
333  // CudaUVMSpace::execution_space == CudaSpace::execution_space
334  // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
335  enum { assignable = false };
336 
337  // CudaUVMSpace::execution_space can access CudaSpace
338  enum { accessible = true };
339  enum { deepcopy = true };
340 };
341 
342 template <>
343 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
344  // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
345  enum { assignable = false };
346  enum { accessible = true }; // CudaUVMSpace::execution_space
347  enum { deepcopy = true };
348 };
349 
350 //----------------------------------------
351 // CudaHostPinnedSpace::execution_space == HostSpace::execution_space
352 // CudaHostPinnedSpace accessible to both Cuda and Host
353 
354 template <>
355 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
356  enum { assignable = false }; // Cannot access from Cuda
357  enum { accessible = true }; // CudaHostPinnedSpace::execution_space
358  enum { deepcopy = true };
359 };
360 
361 template <>
362 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
363  enum { assignable = false }; // Cannot access from Host
364  enum { accessible = false };
365  enum { deepcopy = true };
366 };
367 
368 template <>
369 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
370  enum { assignable = false }; // different execution_space
371  enum { accessible = true }; // same accessibility
372  enum { deepcopy = true };
373 };
374 
375 //----------------------------------------
376 
377 } // namespace Impl
378 } // namespace Kokkos
379 
380 /*--------------------------------------------------------------------------*/
381 /*--------------------------------------------------------------------------*/
382 
383 namespace Kokkos {
384 namespace Impl {
385 
386 void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
387 
388 template <>
389 struct DeepCopy<CudaSpace, CudaSpace, Cuda> {
390  DeepCopy(void* dst, const void* src, size_t);
391  DeepCopy(const Cuda&, void* dst, const void* src, size_t);
392 };
393 
394 template <>
395 struct DeepCopy<CudaSpace, HostSpace, Cuda> {
396  DeepCopy(void* dst, const void* src, size_t);
397  DeepCopy(const Cuda&, void* dst, const void* src, size_t);
398 };
399 
400 template <>
401 struct DeepCopy<HostSpace, CudaSpace, Cuda> {
402  DeepCopy(void* dst, const void* src, size_t);
403  DeepCopy(const Cuda&, void* dst, const void* src, size_t);
404 };
405 
406 template <>
407 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, Cuda> {
408  DeepCopy(void* dst, const void* src, size_t n) {
409  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
410  }
411  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
412  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
413  }
414 };
415 
416 template <>
417 struct DeepCopy<CudaUVMSpace, HostSpace, Cuda> {
418  DeepCopy(void* dst, const void* src, size_t n) {
419  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
420  }
421  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
422  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
423  }
424 };
425 
426 template <>
427 struct DeepCopy<HostSpace, CudaUVMSpace, Cuda> {
428  DeepCopy(void* dst, const void* src, size_t n) {
429  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
430  }
431  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
432  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
433  }
434 };
435 
436 template <>
437 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, Cuda> {
438  DeepCopy(void* dst, const void* src, size_t n) {
439  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
440  }
441  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
442  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
443  }
444 };
445 
446 template <>
447 struct DeepCopy<CudaHostPinnedSpace, HostSpace, Cuda> {
448  DeepCopy(void* dst, const void* src, size_t n) {
449  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
450  }
451  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
452  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
453  }
454 };
455 
456 template <>
457 struct DeepCopy<HostSpace, CudaHostPinnedSpace, Cuda> {
458  DeepCopy(void* dst, const void* src, size_t n) {
459  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
460  }
461  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
462  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
463  }
464 };
465 
466 template <>
467 struct DeepCopy<CudaUVMSpace, CudaSpace, Cuda> {
468  DeepCopy(void* dst, const void* src, size_t n) {
469  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
470  }
471  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
472  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
473  }
474 };
475 
476 template <>
477 struct DeepCopy<CudaSpace, CudaUVMSpace, Cuda> {
478  DeepCopy(void* dst, const void* src, size_t n) {
479  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
480  }
481  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
482  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
483  }
484 };
485 
486 template <>
487 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, Cuda> {
488  DeepCopy(void* dst, const void* src, size_t n) {
489  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
490  }
491  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
492  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
493  }
494 };
495 
496 template <>
497 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, Cuda> {
498  DeepCopy(void* dst, const void* src, size_t n) {
499  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
500  }
501  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
502  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
503  }
504 };
505 
506 template <>
507 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, Cuda> {
508  DeepCopy(void* dst, const void* src, size_t n) {
509  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
510  }
511  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
512  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
513  }
514 };
515 
516 template <>
517 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, Cuda> {
518  DeepCopy(void* dst, const void* src, size_t n) {
519  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
520  }
521  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
522  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
523  }
524 };
525 
526 template <class ExecutionSpace>
527 struct DeepCopy<CudaSpace, CudaSpace, ExecutionSpace> {
528  inline DeepCopy(void* dst, const void* src, size_t n) {
529  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
530  }
531 
532  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
533  size_t n) {
534  exec.fence();
535  DeepCopyAsyncCuda(dst, src, n);
536  }
537 };
538 
539 template <class ExecutionSpace>
540 struct DeepCopy<CudaSpace, HostSpace, ExecutionSpace> {
541  inline DeepCopy(void* dst, const void* src, size_t n) {
542  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
543  }
544 
545  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
546  size_t n) {
547  exec.fence();
548  DeepCopyAsyncCuda(dst, src, n);
549  }
550 };
551 
552 template <class ExecutionSpace>
553 struct DeepCopy<HostSpace, CudaSpace, ExecutionSpace> {
554  inline DeepCopy(void* dst, const void* src, size_t n) {
555  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
556  }
557 
558  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
559  size_t n) {
560  exec.fence();
561  DeepCopyAsyncCuda(dst, src, n);
562  }
563 };
564 
565 template <class ExecutionSpace>
566 struct DeepCopy<CudaSpace, CudaUVMSpace, ExecutionSpace> {
567  inline DeepCopy(void* dst, const void* src, size_t n) {
568  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
569  }
570 
571  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
572  size_t n) {
573  exec.fence();
574  DeepCopyAsyncCuda(dst, src, n);
575  }
576 };
577 
578 template <class ExecutionSpace>
579 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, ExecutionSpace> {
580  inline DeepCopy(void* dst, const void* src, size_t n) {
581  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
582  }
583 
584  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
585  size_t n) {
586  exec.fence();
587  DeepCopyAsyncCuda(dst, src, n);
588  }
589 };
590 
591 template <class ExecutionSpace>
592 struct DeepCopy<CudaUVMSpace, CudaSpace, ExecutionSpace> {
593  inline DeepCopy(void* dst, const void* src, size_t n) {
594  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
595  }
596 
597  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
598  size_t n) {
599  exec.fence();
600  DeepCopyAsyncCuda(dst, src, n);
601  }
602 };
603 
604 template <class ExecutionSpace>
605 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, ExecutionSpace> {
606  inline DeepCopy(void* dst, const void* src, size_t n) {
607  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
608  }
609 
610  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
611  size_t n) {
612  exec.fence();
613  DeepCopyAsyncCuda(dst, src, n);
614  }
615 };
616 
617 template <class ExecutionSpace>
618 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, ExecutionSpace> {
619  inline DeepCopy(void* dst, const void* src, size_t n) {
620  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
621  }
622 
623  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
624  size_t n) {
625  exec.fence();
626  DeepCopyAsyncCuda(dst, src, n);
627  }
628 };
629 
630 template <class ExecutionSpace>
631 struct DeepCopy<CudaUVMSpace, HostSpace, ExecutionSpace> {
632  inline DeepCopy(void* dst, const void* src, size_t n) {
633  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
634  }
635 
636  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
637  size_t n) {
638  exec.fence();
639  DeepCopyAsyncCuda(dst, src, n);
640  }
641 };
642 
643 template <class ExecutionSpace>
644 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, ExecutionSpace> {
645  inline DeepCopy(void* dst, const void* src, size_t n) {
646  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
647  }
648 
649  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
650  size_t n) {
651  exec.fence();
652  DeepCopyAsyncCuda(dst, src, n);
653  }
654 };
655 
656 template <class ExecutionSpace>
657 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, ExecutionSpace> {
658  inline DeepCopy(void* dst, const void* src, size_t n) {
659  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
660  }
661 
662  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
663  size_t n) {
664  exec.fence();
665  DeepCopyAsyncCuda(dst, src, n);
666  }
667 };
668 
669 template <class ExecutionSpace>
670 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, ExecutionSpace> {
671  inline DeepCopy(void* dst, const void* src, size_t n) {
672  (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
673  }
674 
675  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
676  size_t n) {
677  exec.fence();
678  DeepCopyAsyncCuda(dst, src, n);
679  }
680 };
681 
682 template <class ExecutionSpace>
683 struct DeepCopy<CudaHostPinnedSpace, HostSpace, ExecutionSpace> {
684  inline DeepCopy(void* dst, const void* src, size_t n) {
685  (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
686  }
687 
688  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
689  size_t n) {
690  exec.fence();
691  DeepCopyAsyncCuda(dst, src, n);
692  }
693 };
694 
695 template <class ExecutionSpace>
696 struct DeepCopy<HostSpace, CudaUVMSpace, ExecutionSpace> {
697  inline DeepCopy(void* dst, const void* src, size_t n) {
698  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
699  }
700 
701  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
702  size_t n) {
703  exec.fence();
704  DeepCopyAsyncCuda(dst, src, n);
705  }
706 };
707 
708 template <class ExecutionSpace>
709 struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> {
710  inline DeepCopy(void* dst, const void* src, size_t n) {
711  (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
712  }
713 
714  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
715  size_t n) {
716  exec.fence();
717  DeepCopyAsyncCuda(dst, src, n);
718  }
719 };
720 
721 } // namespace Impl
722 } // namespace Kokkos
723 
724 //----------------------------------------------------------------------------
725 //----------------------------------------------------------------------------
726 
727 namespace Kokkos {
728 namespace Impl {
729 
731 template <>
732 struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace,
734  enum { value = false };
735  KOKKOS_INLINE_FUNCTION static void verify(void) {
736  Kokkos::abort("Cuda code attempted to access HostSpace memory");
737  }
738 
739  KOKKOS_INLINE_FUNCTION static void verify(const void*) {
740  Kokkos::abort("Cuda code attempted to access HostSpace memory");
741  }
742 };
743 
745 template <>
746 struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace,
747  Kokkos::CudaUVMSpace> {
748  enum { value = true };
749  KOKKOS_INLINE_FUNCTION static void verify(void) {}
750  KOKKOS_INLINE_FUNCTION static void verify(const void*) {}
751 };
752 
754 template <>
755 struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace,
756  Kokkos::CudaHostPinnedSpace> {
757  enum { value = true };
758  KOKKOS_INLINE_FUNCTION static void verify(void) {}
759  KOKKOS_INLINE_FUNCTION static void verify(const void*) {}
760 };
761 
763 template <class OtherSpace>
764 struct VerifyExecutionCanAccessMemorySpace<
765  typename std::enable_if<!std::is_same<Kokkos::CudaSpace, OtherSpace>::value,
766  Kokkos::CudaSpace>::type,
767  OtherSpace> {
768  enum { value = false };
769  KOKKOS_INLINE_FUNCTION static void verify(void) {
770  Kokkos::abort("Cuda code attempted to access unknown Space memory");
771  }
772 
773  KOKKOS_INLINE_FUNCTION static void verify(const void*) {
774  Kokkos::abort("Cuda code attempted to access unknown Space memory");
775  }
776 };
777 
778 //----------------------------------------------------------------------------
780 template <>
781 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
782  Kokkos::CudaSpace> {
783  enum { value = false };
784  inline static void verify(void) { CudaSpace::access_error(); }
785  inline static void verify(const void* p) { CudaSpace::access_error(p); }
786 };
787 
789 template <>
790 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
791  Kokkos::CudaUVMSpace> {
792  enum { value = true };
793  inline static void verify(void) {}
794  inline static void verify(const void*) {}
795 };
796 
798 template <>
799 struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,
800  Kokkos::CudaHostPinnedSpace> {
801  enum { value = true };
802  KOKKOS_INLINE_FUNCTION static void verify(void) {}
803  KOKKOS_INLINE_FUNCTION static void verify(const void*) {}
804 };
805 
806 } // namespace Impl
807 } // namespace Kokkos
808 
809 //----------------------------------------------------------------------------
810 //----------------------------------------------------------------------------
811 
812 namespace Kokkos {
813 namespace Impl {
814 
815 template <>
816 class SharedAllocationRecord<Kokkos::CudaSpace, void>
817  : public SharedAllocationRecord<void, void> {
818  private:
819  friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
820 
821  typedef SharedAllocationRecord<void, void> RecordBase;
822 
823  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
824  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
825 
826  static void deallocate(RecordBase*);
827 
828  static ::cudaTextureObject_t attach_texture_object(
829  const unsigned sizeof_alias, void* const alloc_ptr,
830  const size_t alloc_size);
831 
832 #ifdef KOKKOS_DEBUG
833  static RecordBase s_root_record;
834 #endif
835 
836  ::cudaTextureObject_t m_tex_obj;
837  const Kokkos::CudaSpace m_space;
838 
839  protected:
840  ~SharedAllocationRecord();
841  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
842 
843  SharedAllocationRecord(
844  const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
845  const size_t arg_alloc_size,
846  const RecordBase::function_type arg_dealloc = &deallocate);
847 
848  public:
849  std::string get_label() const;
850 
851  static SharedAllocationRecord* allocate(const Kokkos::CudaSpace& arg_space,
852  const std::string& arg_label,
853  const size_t arg_alloc_size);
854 
856  static void* allocate_tracked(const Kokkos::CudaSpace& arg_space,
857  const std::string& arg_label,
858  const size_t arg_alloc_size);
859 
861  static void* reallocate_tracked(void* const arg_alloc_ptr,
862  const size_t arg_alloc_size);
863 
865  static void deallocate_tracked(void* const arg_alloc_ptr);
866 
867  static SharedAllocationRecord* get_record(void* arg_alloc_ptr);
868 
869  template <typename AliasType>
870  inline ::cudaTextureObject_t attach_texture_object() {
871  static_assert((std::is_same<AliasType, int>::value ||
872  std::is_same<AliasType, ::int2>::value ||
873  std::is_same<AliasType, ::int4>::value),
874  "Cuda texture fetch only supported for alias types of int, "
875  "::int2, or ::int4");
876 
877  if (m_tex_obj == 0) {
878  m_tex_obj = attach_texture_object(sizeof(AliasType),
879  (void*)RecordBase::m_alloc_ptr,
880  RecordBase::m_alloc_size);
881  }
882 
883  return m_tex_obj;
884  }
885 
886  template <typename AliasType>
887  inline int attach_texture_object_offset(const AliasType* const ptr) {
888  // Texture object is attached to the entire allocation range
889  return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
890  }
891 
892  static void print_records(std::ostream&, const Kokkos::CudaSpace&,
893  bool detail = false);
894 };
895 
896 template <>
897 class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
898  : public SharedAllocationRecord<void, void> {
899  private:
900  typedef SharedAllocationRecord<void, void> RecordBase;
901 
902  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
903  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
904 
905  static void deallocate(RecordBase*);
906 
907  static RecordBase s_root_record;
908 
909  ::cudaTextureObject_t m_tex_obj;
910  const Kokkos::CudaUVMSpace m_space;
911 
912  protected:
913  ~SharedAllocationRecord();
914  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
915 
916  SharedAllocationRecord(
917  const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
918  const size_t arg_alloc_size,
919  const RecordBase::function_type arg_dealloc = &deallocate);
920 
921  public:
922  std::string get_label() const;
923 
924  static SharedAllocationRecord* allocate(const Kokkos::CudaUVMSpace& arg_space,
925  const std::string& arg_label,
926  const size_t arg_alloc_size);
927 
929  static void* allocate_tracked(const Kokkos::CudaUVMSpace& arg_space,
930  const std::string& arg_label,
931  const size_t arg_alloc_size);
932 
934  static void* reallocate_tracked(void* const arg_alloc_ptr,
935  const size_t arg_alloc_size);
936 
938  static void deallocate_tracked(void* const arg_alloc_ptr);
939 
940  static SharedAllocationRecord* get_record(void* arg_alloc_ptr);
941 
942  template <typename AliasType>
943  inline ::cudaTextureObject_t attach_texture_object() {
944  static_assert((std::is_same<AliasType, int>::value ||
945  std::is_same<AliasType, ::int2>::value ||
946  std::is_same<AliasType, ::int4>::value),
947  "Cuda texture fetch only supported for alias types of int, "
948  "::int2, or ::int4");
949 
950  if (m_tex_obj == 0) {
951  m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
952  attach_texture_object(sizeof(AliasType),
953  (void*)RecordBase::m_alloc_ptr,
954  RecordBase::m_alloc_size);
955  }
956 
957  return m_tex_obj;
958  }
959 
960  template <typename AliasType>
961  inline int attach_texture_object_offset(const AliasType* const ptr) {
962  // Texture object is attached to the entire allocation range
963  return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
964  }
965 
966  static void print_records(std::ostream&, const Kokkos::CudaUVMSpace&,
967  bool detail = false);
968 };
969 
970 template <>
971 class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
972  : public SharedAllocationRecord<void, void> {
973  private:
974  typedef SharedAllocationRecord<void, void> RecordBase;
975 
976  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
977  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
978 
979  static void deallocate(RecordBase*);
980 
981  static RecordBase s_root_record;
982 
983  const Kokkos::CudaHostPinnedSpace m_space;
984 
985  protected:
986  ~SharedAllocationRecord();
987  SharedAllocationRecord() : RecordBase(), m_space() {}
988 
989  SharedAllocationRecord(
990  const Kokkos::CudaHostPinnedSpace& arg_space,
991  const std::string& arg_label, const size_t arg_alloc_size,
992  const RecordBase::function_type arg_dealloc = &deallocate);
993 
994  public:
995  std::string get_label() const;
996 
997  static SharedAllocationRecord* allocate(
998  const Kokkos::CudaHostPinnedSpace& arg_space,
999  const std::string& arg_label, const size_t arg_alloc_size);
1001  static void* allocate_tracked(const Kokkos::CudaHostPinnedSpace& arg_space,
1002  const std::string& arg_label,
1003  const size_t arg_alloc_size);
1004 
1006  static void* reallocate_tracked(void* const arg_alloc_ptr,
1007  const size_t arg_alloc_size);
1008 
1010  static void deallocate_tracked(void* const arg_alloc_ptr);
1011 
1012  static SharedAllocationRecord* get_record(void* arg_alloc_ptr);
1013 
1014  static void print_records(std::ostream&, const Kokkos::CudaHostPinnedSpace&,
1015  bool detail = false);
1016 };
1017 
1018 } // namespace Impl
1019 } // namespace Kokkos
1020 
1021 //----------------------------------------------------------------------------
1022 //----------------------------------------------------------------------------
1023 
1024 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
1025 #endif /* #define KOKKOS_CUDASPACE_HPP */
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.