Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_CudaSpace.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CUDASPACE_HPP
45 #define KOKKOS_CUDASPACE_HPP
46 
47 #include <Kokkos_Macros.hpp>
48 #if defined( KOKKOS_ENABLE_CUDA )
49 
50 #include <Kokkos_Core_fwd.hpp>
51 
52 #include <iosfwd>
53 #include <typeinfo>
54 #include <string>
55 
56 #include <Kokkos_HostSpace.hpp>
57 
58 #include <Cuda/Kokkos_Cuda_abort.hpp>
59 
60 /*--------------------------------------------------------------------------*/
61 
62 namespace Kokkos {
63 
66 class CudaSpace {
67 public:
68 
70  typedef CudaSpace memory_space ;
71  typedef Kokkos::Cuda execution_space ;
72  typedef Kokkos::Device<execution_space,memory_space> device_type;
73 
74  typedef unsigned int size_type ;
75 
76  /*--------------------------------*/
77 
78  CudaSpace();
79  CudaSpace( CudaSpace && rhs ) = default ;
80  CudaSpace( const CudaSpace & rhs ) = default ;
81  CudaSpace & operator = ( CudaSpace && rhs ) = default ;
82  CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
83  ~CudaSpace() = default ;
84 
86  void * allocate( const size_t arg_alloc_size ) const ;
87 
89  void deallocate( void * const arg_alloc_ptr
90  , const size_t arg_alloc_size ) const ;
91 
93  static constexpr const char* name() { return m_name; }
94 
95  /*--------------------------------*/
97  static void access_error();
98  static void access_error( const void * const );
99 
100 private:
101 
102  int m_device ;
103 
104  static constexpr const char* m_name = "Cuda";
105  friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
106 };
107 
108 namespace Impl {
115 void init_lock_arrays_cuda_space();
116 
124 int* atomic_lock_array_cuda_space_ptr(bool deallocate = false);
125 
132 int* scratch_lock_array_cuda_space_ptr(bool deallocate = false);
133 
140 int* threadid_lock_array_cuda_space_ptr(bool deallocate = false);
141 }
142 } // namespace Kokkos
143 
144 /*--------------------------------------------------------------------------*/
145 /*--------------------------------------------------------------------------*/
146 
147 namespace Kokkos {
148 
152 class CudaUVMSpace {
153 public:
154 
156  typedef CudaUVMSpace memory_space ;
157  typedef Cuda execution_space ;
158  typedef Kokkos::Device<execution_space,memory_space> device_type;
159  typedef unsigned int size_type ;
160 
162  static bool available();
163 
164 
165  /*--------------------------------*/
167  static int number_of_allocations();
168 
169  /*--------------------------------*/
170 
171 
172  /*--------------------------------*/
173 
174  CudaUVMSpace();
175  CudaUVMSpace( CudaUVMSpace && rhs ) = default ;
176  CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
177  CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ;
178  CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
179  ~CudaUVMSpace() = default ;
180 
182  void * allocate( const size_t arg_alloc_size ) const ;
183 
185  void deallocate( void * const arg_alloc_ptr
186  , const size_t arg_alloc_size ) const ;
187 
189  static constexpr const char* name() { return m_name; }
190 
191  /*--------------------------------*/
192 
193 private:
194  int m_device ;
195 
196  static constexpr const char* m_name = "CudaUVM";
197 
198 };
199 
200 } // namespace Kokkos
201 
202 /*--------------------------------------------------------------------------*/
203 /*--------------------------------------------------------------------------*/
204 
205 namespace Kokkos {
206 
210 class CudaHostPinnedSpace {
211 public:
212 
214 
215  typedef HostSpace::execution_space execution_space ;
216  typedef CudaHostPinnedSpace memory_space ;
217  typedef Kokkos::Device<execution_space,memory_space> device_type;
218  typedef unsigned int size_type ;
219 
220  /*--------------------------------*/
221 
222  CudaHostPinnedSpace();
223  CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ;
224  CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
225  CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ;
226  CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
227  ~CudaHostPinnedSpace() = default ;
228 
230  void * allocate( const size_t arg_alloc_size ) const ;
231 
233  void deallocate( void * const arg_alloc_ptr
234  , const size_t arg_alloc_size ) const ;
235 
237  static constexpr const char* name() { return m_name; }
238 
239 private:
240 
241  static constexpr const char* m_name = "CudaHostPinned";
242 
243  /*--------------------------------*/
244 };
245 
246 } // namespace Kokkos
247 
248 /*--------------------------------------------------------------------------*/
249 /*--------------------------------------------------------------------------*/
250 
251 namespace Kokkos {
252 namespace Impl {
253 
257 
258 //----------------------------------------
259 
260 template<>
261 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace > {
262  enum { assignable = false };
263  enum { accessible = false };
264  enum { deepcopy = true };
265 };
266 
267 template<>
268 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
269  // HostSpace::execution_space != CudaUVMSpace::execution_space
270  enum { assignable = false };
271  enum { accessible = true };
272  enum { deepcopy = true };
273 };
274 
275 template<>
276 struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
277  // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
278  enum { assignable = true };
279  enum { accessible = true };
280  enum { deepcopy = true };
281 };
282 
283 //----------------------------------------
284 
285 template<>
286 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace > {
287  enum { assignable = false };
288  enum { accessible = false };
289  enum { deepcopy = true };
290 };
291 
292 template<>
293 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
294  // CudaSpace::execution_space == CudaUVMSpace::execution_space
295  enum { assignable = true };
296  enum { accessible = true };
297  enum { deepcopy = true };
298 };
299 
300 template<>
301 struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
302  // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
303  enum { assignable = false };
304  enum { accessible = true }; // CudaSpace::execution_space
305  enum { deepcopy = true };
306 };
307 
308 //----------------------------------------
309 // CudaUVMSpace::execution_space == Cuda
310 // CudaUVMSpace accessible to both Cuda and Host
311 
312 template<>
313 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace > {
314  enum { assignable = false };
315  enum { accessible = false }; // Cuda cannot access HostSpace
316  enum { deepcopy = true };
317 };
318 
319 template<>
320 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
321  // CudaUVMSpace::execution_space == CudaSpace::execution_space
322  // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
323  enum { assignable = false };
324 
325  // CudaUVMSpace::execution_space can access CudaSpace
326  enum { accessible = true };
327  enum { deepcopy = true };
328 };
329 
330 template<>
331 struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
332  // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
333  enum { assignable = false };
334  enum { accessible = true }; // CudaUVMSpace::execution_space
335  enum { deepcopy = true };
336 };
337 
338 
339 //----------------------------------------
340 // CudaHostPinnedSpace::execution_space == HostSpace::execution_space
341 // CudaHostPinnedSpace accessible to both Cuda and Host
342 
343 template<>
344 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace > {
345  enum { assignable = false }; // Cannot access from Cuda
346  enum { accessible = true }; // CudaHostPinnedSpace::execution_space
347  enum { deepcopy = true };
348 };
349 
350 template<>
351 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
352  enum { assignable = false }; // Cannot access from Host
353  enum { accessible = false };
354  enum { deepcopy = true };
355 };
356 
357 template<>
358 struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
359  enum { assignable = false }; // different execution_space
360  enum { accessible = true }; // same accessibility
361  enum { deepcopy = true };
362 };
363 
364 //----------------------------------------
365 
366 }} // namespace Kokkos::Impl
367 
368 /*--------------------------------------------------------------------------*/
369 /*--------------------------------------------------------------------------*/
370 
371 namespace Kokkos {
372 namespace Impl {
373 
374 void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
375 
376 template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
377 {
378  DeepCopy( void * dst , const void * src , size_t );
379  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
380 };
381 
382 template<> struct DeepCopy< CudaSpace , HostSpace , Cuda >
383 {
384  DeepCopy( void * dst , const void * src , size_t );
385  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
386 };
387 
388 template<> struct DeepCopy< HostSpace , CudaSpace , Cuda >
389 {
390  DeepCopy( void * dst , const void * src , size_t );
391  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
392 };
393 
394 template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
395 {
396  inline
397  DeepCopy( void * dst , const void * src , size_t n )
398  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
399 
400  inline
401  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
402  {
403  exec.fence();
404  DeepCopyAsyncCuda (dst,src,n);
405  }
406 };
407 
408 template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
409 {
410  inline
411  DeepCopy( void * dst , const void * src , size_t n )
412  { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
413 
414  inline
415  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
416  {
417  exec.fence();
418  DeepCopyAsyncCuda (dst,src,n);
419  }
420 };
421 
422 template<class ExecutionSpace>
423 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
424 {
425  inline
426  DeepCopy( void * dst , const void * src , size_t n )
427  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
428 
429  inline
430  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
431  {
432  exec.fence();
433  DeepCopyAsyncCuda (dst,src,n);
434  }
435 };
436 
437 template<class ExecutionSpace>
438 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
439 {
440  inline
441  DeepCopy( void * dst , const void * src , size_t n )
442  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
443 
444  inline
445  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
446  {
447  exec.fence();
448  DeepCopyAsyncCuda (dst,src,n);
449  }
450 };
451 
452 template<class ExecutionSpace>
453 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
454 {
455  inline
456  DeepCopy( void * dst , const void * src , size_t n )
457  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
458 
459  inline
460  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
461  {
462  exec.fence();
463  DeepCopyAsyncCuda (dst,src,n);
464  }
465 };
466 
467 
468 template<class ExecutionSpace>
469 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
470 {
471  inline
472  DeepCopy( void * dst , const void * src , size_t n )
473  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
474 
475  inline
476  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
477  {
478  exec.fence();
479  DeepCopyAsyncCuda (dst,src,n);
480  }
481 };
482 
483 template<class ExecutionSpace>
484 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
485 {
486  inline
487  DeepCopy( void * dst , const void * src , size_t n )
488  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
489 
490  inline
491  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
492  {
493  exec.fence();
494  DeepCopyAsyncCuda (dst,src,n);
495  }
496 };
497 
498 template<class ExecutionSpace>
499 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
500 {
501  inline
502  DeepCopy( void * dst , const void * src , size_t n )
503  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
504 
505  inline
506  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
507  {
508  exec.fence();
509  DeepCopyAsyncCuda (dst,src,n);
510  }
511 };
512 
513 template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
514 {
515  inline
516  DeepCopy( void * dst , const void * src , size_t n )
517  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
518 
519  inline
520  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
521  {
522  exec.fence();
523  DeepCopyAsyncCuda (dst,src,n);
524  }
525 };
526 
527 
528 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
529 {
530  inline
531  DeepCopy( void * dst , const void * src , size_t n )
532  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
533 
534  inline
535  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
536  {
537  exec.fence();
538  DeepCopyAsyncCuda (dst,src,n);
539  }
540 };
541 
542 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
543 {
544  inline
545  DeepCopy( void * dst , const void * src , size_t n )
546  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
547 
548  inline
549  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
550  {
551  exec.fence();
552  DeepCopyAsyncCuda (dst,src,n);
553  }
554 };
555 
556 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
557 {
558  inline
559  DeepCopy( void * dst , const void * src , size_t n )
560  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
561 
562  inline
563  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
564  {
565  exec.fence();
566  DeepCopyAsyncCuda (dst,src,n);
567  }
568 };
569 
570 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
571 {
572  inline
573  DeepCopy( void * dst , const void * src , size_t n )
574  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
575 
576  inline
577  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
578  {
579  exec.fence();
580  DeepCopyAsyncCuda (dst,src,n);
581  }
582 };
583 
584 
585 template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
586 {
587  inline
588  DeepCopy( void * dst , const void * src , size_t n )
589  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
590 
591  inline
592  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
593  {
594  exec.fence();
595  DeepCopyAsyncCuda (dst,src,n);
596  }
597 };
598 
599 template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
600 {
601  inline
602  DeepCopy( void * dst , const void * src , size_t n )
603  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
604 
605  inline
606  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
607  {
608  exec.fence();
609  DeepCopyAsyncCuda (dst,src,n);
610  }
611 };
612 
613 } // namespace Impl
614 } // namespace Kokkos
615 
616 //----------------------------------------------------------------------------
617 //----------------------------------------------------------------------------
618 
619 namespace Kokkos {
620 namespace Impl {
621 
623 template<>
624 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
625 {
626  enum { value = false };
627  KOKKOS_INLINE_FUNCTION static void verify( void )
628  { Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
629 
630  KOKKOS_INLINE_FUNCTION static void verify( const void * )
631  { Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
632 };
633 
635 template<>
636 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
637 {
638  enum { value = true };
639  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
640  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
641 };
642 
644 template<>
645 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
646 {
647  enum { value = true };
648  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
649  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
650 };
651 
653 template< class OtherSpace >
654 struct VerifyExecutionCanAccessMemorySpace<
655  typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
656  OtherSpace >
657 {
658  enum { value = false };
659  KOKKOS_INLINE_FUNCTION static void verify( void )
660  { Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
661 
662  KOKKOS_INLINE_FUNCTION static void verify( const void * )
663  { Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
664 };
665 
666 //----------------------------------------------------------------------------
668 template<>
669 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
670 {
671  enum { value = false };
672  inline static void verify( void ) { CudaSpace::access_error(); }
673  inline static void verify( const void * p ) { CudaSpace::access_error(p); }
674 };
675 
677 template<>
678 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
679 {
680  enum { value = true };
681  inline static void verify( void ) { }
682  inline static void verify( const void * ) { }
683 };
684 
686 template<>
687 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
688 {
689  enum { value = true };
690  KOKKOS_INLINE_FUNCTION static void verify( void ) {}
691  KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
692 };
693 
694 } // namespace Impl
695 } // namespace Kokkos
696 
697 //----------------------------------------------------------------------------
698 //----------------------------------------------------------------------------
699 
700 namespace Kokkos {
701 namespace Impl {
702 
703 template<>
704 class SharedAllocationRecord< Kokkos::CudaSpace , void >
705  : public SharedAllocationRecord< void , void >
706 {
707 private:
708 
709  friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
710 
711  typedef SharedAllocationRecord< void , void > RecordBase ;
712 
713  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
714  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
715 
716  static void deallocate( RecordBase * );
717 
718  static ::cudaTextureObject_t
719  attach_texture_object( const unsigned sizeof_alias
720  , void * const alloc_ptr
721  , const size_t alloc_size );
722 
723 #ifdef KOKKOS_DEBUG
724  static RecordBase s_root_record ;
725 #endif
726 
727  ::cudaTextureObject_t m_tex_obj ;
728  const Kokkos::CudaSpace m_space ;
729 
730 protected:
731 
732  ~SharedAllocationRecord();
733  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
734 
735  SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
736  , const std::string & arg_label
737  , const size_t arg_alloc_size
738  , const RecordBase::function_type arg_dealloc = & deallocate
739  );
740 
741 public:
742 
743  std::string get_label() const ;
744 
745  static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
746  , const std::string & arg_label
747  , const size_t arg_alloc_size );
748 
750  static
751  void * allocate_tracked( const Kokkos::CudaSpace & arg_space
752  , const std::string & arg_label
753  , const size_t arg_alloc_size );
754 
756  static
757  void * reallocate_tracked( void * const arg_alloc_ptr
758  , const size_t arg_alloc_size );
759 
761  static
762  void deallocate_tracked( void * const arg_alloc_ptr );
763 
764  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
765 
766  template< typename AliasType >
767  inline
768  ::cudaTextureObject_t attach_texture_object()
769  {
770  static_assert( ( std::is_same< AliasType , int >::value ||
771  std::is_same< AliasType , ::int2 >::value ||
772  std::is_same< AliasType , ::int4 >::value )
773  , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
774 
775  if ( m_tex_obj == 0 ) {
776  m_tex_obj = attach_texture_object( sizeof(AliasType)
777  , (void*) RecordBase::m_alloc_ptr
778  , RecordBase::m_alloc_size );
779  }
780 
781  return m_tex_obj ;
782  }
783 
784  template< typename AliasType >
785  inline
786  int attach_texture_object_offset( const AliasType * const ptr )
787  {
788  // Texture object is attached to the entire allocation range
789  return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
790  }
791 
792  static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
793 };
794 
795 
796 template<>
797 class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
798  : public SharedAllocationRecord< void , void >
799 {
800 private:
801 
802  typedef SharedAllocationRecord< void , void > RecordBase ;
803 
804  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
805  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
806 
807  static void deallocate( RecordBase * );
808 
809  static RecordBase s_root_record ;
810 
811  ::cudaTextureObject_t m_tex_obj ;
812  const Kokkos::CudaUVMSpace m_space ;
813 
814 protected:
815 
816  ~SharedAllocationRecord();
817  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
818 
819  SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
820  , const std::string & arg_label
821  , const size_t arg_alloc_size
822  , const RecordBase::function_type arg_dealloc = & deallocate
823  );
824 
825 public:
826 
827  std::string get_label() const ;
828 
829  static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
830  , const std::string & arg_label
831  , const size_t arg_alloc_size
832  );
833 
835  static
836  void * allocate_tracked( const Kokkos::CudaUVMSpace & arg_space
837  , const std::string & arg_label
838  , const size_t arg_alloc_size );
839 
841  static
842  void * reallocate_tracked( void * const arg_alloc_ptr
843  , const size_t arg_alloc_size );
844 
846  static
847  void deallocate_tracked( void * const arg_alloc_ptr );
848 
849  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
850 
851 
852  template< typename AliasType >
853  inline
854  ::cudaTextureObject_t attach_texture_object()
855  {
856  static_assert( ( std::is_same< AliasType , int >::value ||
857  std::is_same< AliasType , ::int2 >::value ||
858  std::is_same< AliasType , ::int4 >::value )
859  , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
860 
861  if ( m_tex_obj == 0 ) {
862  m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
863  attach_texture_object( sizeof(AliasType)
864  , (void*) RecordBase::m_alloc_ptr
865  , RecordBase::m_alloc_size );
866  }
867 
868  return m_tex_obj ;
869  }
870 
871  template< typename AliasType >
872  inline
873  int attach_texture_object_offset( const AliasType * const ptr )
874  {
875  // Texture object is attached to the entire allocation range
876  return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
877  }
878 
879  static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
880 };
881 
882 template<>
883 class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
884  : public SharedAllocationRecord< void , void >
885 {
886 private:
887 
888  typedef SharedAllocationRecord< void , void > RecordBase ;
889 
890  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
891  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
892 
893  static void deallocate( RecordBase * );
894 
895  static RecordBase s_root_record ;
896 
897  const Kokkos::CudaHostPinnedSpace m_space ;
898 
899 protected:
900 
901  ~SharedAllocationRecord();
902  SharedAllocationRecord() : RecordBase(), m_space() {}
903 
904  SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
905  , const std::string & arg_label
906  , const size_t arg_alloc_size
907  , const RecordBase::function_type arg_dealloc = & deallocate
908  );
909 
910 public:
911 
912  std::string get_label() const ;
913 
914  static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
915  , const std::string & arg_label
916  , const size_t arg_alloc_size
917  );
919  static
920  void * allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space
921  , const std::string & arg_label
922  , const size_t arg_alloc_size );
923 
925  static
926  void * reallocate_tracked( void * const arg_alloc_ptr
927  , const size_t arg_alloc_size );
928 
930  static
931  void deallocate_tracked( void * const arg_alloc_ptr );
932 
933 
934  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
935 
936  static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
937 };
938 
939 } // namespace Impl
940 } // namespace Kokkos
941 
942 //----------------------------------------------------------------------------
943 //----------------------------------------------------------------------------
944 
945 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
946 #endif /* #define KOKKOS_CUDASPACE_HPP */
947 
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.