doc/html/Sacado__DynamicArrayTraits_8hpp_source.html

 // @HEADER

 // ***********************************************************************

 //

 //                           Sacado Package

 //                 Copyright (2006) Sandia Corporation

 //

 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,

 // the U.S. Government retains certain rights in this software.

 //

 // This library is free software; you can redistribute it and/or modify

 // it under the terms of the GNU Lesser General Public License as

 // published by the Free Software Foundation; either version 2.1 of the

 // License, or (at your option) any later version.

 //

 // This library is distributed in the hope that it will be useful, but

 // WITHOUT ANY WARRANTY; without even the implied warranty of

 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 // Lesser General Public License for more details.

 //

 // You should have received a copy of the GNU Lesser General Public

 // License along with this library; if not, write to the Free Software

 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301

 // USA

 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps

 // (etphipp@sandia.gov).

 //

 // ***********************************************************************

 // @HEADER


 #ifndef SACADO_DYNAMICARRAYTRAITS_HPP

 #define SACADO_DYNAMICARRAYTRAITS_HPP


 #include <new>

 #include <cstring>

 #include <stdint.h>


 #include "Sacado_Traits.hpp"

 #if defined(HAVE_SACADO_KOKKOSCORE)

 #include "Kokkos_Core.hpp"

 #if defined(KOKKOS_ENABLE_CUDA)

 #include "Cuda/Kokkos_Cuda_Vectorization.hpp"

 #endif

 #if !defined(SACADO_DISABLE_CUDA_IN_KOKKOS)

 #include "Kokkos_MemoryPool.hpp"

 #endif

 #endif


 namespace Sacado {


   template <typename ExecSpace>

   void createGlobalMemoryPool(const ExecSpace& space

             , const size_t min_total_alloc_size

             , const uint32_t min_block_alloc_size

             , const uint32_t max_block_alloc_size

             , const uint32_t min_superblock_size

             ) {}


   template <typename ExecSpace>

   void destroyGlobalMemoryPool(const ExecSpace& space) {}


 #if 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(KOKKOS_ENABLE_OPENMP)

   namespace Impl {

     extern const Kokkos::MemoryPool<Kokkos::OpenMP>* global_sacado_openmp_memory_pool;

   }


   inline void

   createGlobalMemoryPool(const ExecSpace& space

             , const size_t min_total_alloc_size

             , const uint32_t min_block_alloc_size

             , const uint32_t max_block_alloc_size

             , const uint32_t min_superblock_size

             )

   {

     typedef Kokkos::MemoryPool<Kokkos::OpenMP> pool_t;

     Impl::global_sacado_openmp_memory_pool =

       new pool_t(typename Kokkos::OpenMP::memory_space(),

           min_total_alloc_size,

           min_block_alloc_size,

           max_block_alloc_size,

           min_superblock_size);

   }


   inline void destroyGlobalMemoryPool(const Kokkos::OpenMP& space)

   {

     delete Impl::global_sacado_openmp_memory_pool;

   }

 #endif


 #if defined(HAVE_SACADO_KOKKOSCORE) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)


   namespace Impl {


     extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_host;

     extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_device;

 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE

     extern __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device;

 #else

     __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device = 0;

 #endif


     struct SetMemoryPoolPtr {

       Kokkos::MemoryPool<Kokkos::Cuda>* pool_device;

       __device__ inline void operator()(int) const {

         global_sacado_cuda_memory_pool_on_device = pool_device;

       };

     };


   }


   // For some reason we get memory errors if these functions are defined in

   // Sacado_DynamicArrayTraits.cpp

   inline void

   createGlobalMemoryPool(const Kokkos::Cuda& space

             , const size_t min_total_alloc_size

             , const uint32_t min_block_alloc_size

             , const uint32_t max_block_alloc_size

             , const uint32_t min_superblock_size

             )

   {

     typedef Kokkos::MemoryPool<Kokkos::Cuda> pool_t;

     pool_t* pool =

       new pool_t(typename Kokkos::Cuda::memory_space(),

           min_total_alloc_size,

           min_block_alloc_size,

           max_block_alloc_size,

           min_superblock_size);

     Impl::SetMemoryPoolPtr f;

     CUDA_SAFE_CALL( cudaMalloc( &f.pool_device, sizeof(pool_t) ) );

     CUDA_SAFE_CALL( cudaMemcpy( f.pool_device, pool,

                                 sizeof(pool_t),

                                 cudaMemcpyHostToDevice ) );

     Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,1),f);

     Impl::global_sacado_cuda_memory_pool_host = pool;

     Impl::global_sacado_cuda_memory_pool_device = f.pool_device;

   }


   inline void destroyGlobalMemoryPool(const Kokkos::Cuda& space)

   {

     CUDA_SAFE_CALL( cudaFree( (void*) Impl::global_sacado_cuda_memory_pool_device ) );

     delete Impl::global_sacado_cuda_memory_pool_host;

   }


 #endif


 #if !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)


   namespace Impl {


     // Compute warp lane/thread index

      __device__ inline int warpLane(const int warp_size = 32) {

       return ( threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x ) % warp_size;

     }


     // Reduce y across the warp and broadcast to all lanes

     template <typename T>

      __device__ inline T warpReduce(T y, const int warp_size = 32) {

       for (int i=1; i<warp_size; i*=2) {

         y += Kokkos::shfl_down(y, i, warp_size);

       }

       y = Kokkos::shfl(y, 0, warp_size);

       return y;

     }


     // Non-inclusive plus-scan up the warp, replacing the first entry with 0

     template <typename T>

     __device__ inline int warpScan(T y, const int warp_size = 32) {

       const int lane = warpLane();

       y = Kokkos::shfl_up(y, 1, warp_size);

       if (lane == 0)

         y = T(0);

       for (int i=1; i<warp_size; i*=2) {

         T t = Kokkos::shfl_up(y, i, warp_size);

         if (lane > i)

           y += t;

       }

       return y;

     }


     template <typename T>

     __device__ inline T warpBcast(T y, int id, const int warp_size = 32) {

       return Kokkos::shfl(y, id, warp_size);

     }


   }


 #endif


   namespace Impl {


     template <typename T>

     KOKKOS_INLINE_FUNCTION

     static T* ds_alloc(const int sz) {

 #if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )

       T* m;

       CUDA_SAFE_CALL( cudaMallocManaged( (void**) &m, sz*sizeof(T), cudaMemAttachGlobal ) );

 #elif defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)

       T* m = 0;

       const int total_sz = warpReduce(sz);

       const int lane = warpLane();

       if (total_sz > 0 && lane == 0) {

         m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));

         if (m == 0)

           Kokkos::abort("Allocation failed.  Kokkos memory pool is out of memory");

       }

       m = warpBcast(m,0);

       m += warpScan(sz);

 #elif 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)

       T* m = 0;

       if (sz > 0) {

         if (global_sacado_openmp_memory_pool != 0) {

           m = static_cast<T*>(global_sacado_openmp_memory_pool->allocate(sz*sizeof(T)));

           if (m == 0)

             Kokkos::abort("Allocation failed.  Kokkos memory pool is out of memory");

         }

         else

           m = static_cast<T* >(operator new(sz*sizeof(T)));

       }

 #else

       T* m = static_cast<T* >(operator new(sz*sizeof(T)));

 #if defined(HAVE_SACADO_KOKKOSCORE)

       if (m == 0)

         Kokkos::abort("Allocation failed.");

 #endif

 #endif

       return m;

     }


     template <typename T>

     KOKKOS_INLINE_FUNCTION

     static void ds_free(T* m, int sz) {

 #if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )

       if (sz > 0)

         CUDA_SAFE_CALL( cudaFree(m) );

 #elif defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)

       const int total_sz = warpReduce(sz);

       const int lane = warpLane();

       if (total_sz > 0 && lane == 0) {

         global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));

       }

 #elif 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)

       if (sz > 0) {

         if (global_sacado_openmp_memory_pool != 0)

           global_sacado_openmp_memory_pool->deallocate((void*) m, sz*sizeof(T));

         else

           operator delete((void*) m);

       }

 #else

       if (sz > 0)

         operator delete((void*) m);

 #endif

     }


   }


   template <typename T, bool isScalar = IsScalarType<T>::value>

   struct ds_array {


     KOKKOS_INLINE_FUNCTION

     static T* get(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         T* p = m;

         for (int i=0; i<sz; ++i)

           new (p++) T();

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         T* p = m;

         for (int i=0; i<sz; ++i)

           new (p++) T(0.0);

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(const T* src, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         T* p = m;

         for (int i=0; i<sz; ++i)

           new (p++) T(*(src++));

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* strided_get_and_fill(const T* src, int stride, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         T* p = m;

         for (int i=0; i<sz; ++i) {

           new (p++) T(*(src));

           src += stride;

         }

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static void copy(const T* src, T*  dest, int sz) {

       for (int i=0; i<sz; ++i)

         *(dest++) = *(src++);

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_copy(const T* src, int src_stride,

                                     T* dest, int dest_stride, int sz) {

       for (int i=0; i<sz; ++i) {

         *(dest) = *(src);

         dest += dest_stride;

         src += src_stride;

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void zero(T* dest, int sz) {

       for (int i=0; i<sz; ++i)

         *(dest++) = T(0.);

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_zero(T* dest, int stride, int sz) {

       for (int i=0; i<sz; ++i) {

         *(dest) = T(0.);

         dest += stride;

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void destroy_and_release(T* m, int sz) {

       T* e = m+sz;

       for (T* b = m; b!=e; b++)

         b->~T();

       Impl::ds_free(m, sz);

     }

   };


 #if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)


   namespace Impl {


     template <typename T>

     KOKKOS_INLINE_FUNCTION

     static T* ds_strided_alloc(const int sz) {

       T* m = 0;

       // Only do strided memory allocations when we are doing hierarchical

       // parallelism with a vector dimension of 32.  The limitation on the

       // memory pool allowing only a single thread in a warp to allocate

       // makes it too difficult to do otherwise.

       if (blockDim.x == 32) {

         //const int lane = warpLane();

         const int lane = threadIdx.x;

         if (sz > 0 && lane == 0) {

 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)

           m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(sz*sizeof(T)));

           if (m == 0)

             Kokkos::abort("Allocation failed.  Kokkos memory pool is out of memory");

 #else

           m = static_cast<T* >(operator new(sz*sizeof(T)));

 #if defined(HAVE_SACADO_KOKKOSCORE)

           if (m == 0)

             Kokkos::abort("Allocation failed.");

 #endif

 #endif

         }

         m = warpBcast(m,0,blockDim.x);

       }

       else {

         if (sz > 0) {

           m = static_cast<T* >(operator new(sz*sizeof(T)));

 #if defined(HAVE_SACADO_KOKKOSCORE)

           if (m == 0)

             Kokkos::abort("Allocation failed.");

 #endif

         }

       }


       return m;

     }


     template <typename T>

     KOKKOS_INLINE_FUNCTION

     static void ds_strided_free(T* m, int sz) {

       if (blockDim.x == 32) {

         // const int lane = warpLane();

         const int lane = threadIdx.x;

         if (sz > 0 && lane == 0) {

 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)

           global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, sz*sizeof(T));

 #else

           operator delete((void*) m);

 #endif

         }

       }

       else {

         if (sz > 0)

           operator delete((void*) m);

       }


     }


   }


   template <typename T>

   struct ds_array<T,true> {


     KOKKOS_INLINE_FUNCTION

     static T* get(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         for (int i=threadIdx.x; i<sz; i+=blockDim.x)

           m[i] = 0.0;

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(const T* src, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         for (int i=threadIdx.x; i<sz; i+=blockDim.x)

           m[i] = src[i];

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* strided_get_and_fill(const T* src, int stride, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         for (int i=threadIdx.x; i<sz; i+=blockDim.x)

           m[i] = src[i*stride];

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static void copy(const T* src, T* dest, int sz) {

       if (sz > 0)

         for (int i=threadIdx.x; i<sz; i+=blockDim.x)

           dest[i] = src[i];

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_copy(const T* src, int src_stride,

                              T* dest, int dest_stride, int sz) {

       for (int i=threadIdx.x; i<sz; i+=blockDim.x) {

         dest[i*dest_stride] = src[i*src_stride];

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void zero(T* dest, int sz) {

       if (sz > 0)

         for (int i=threadIdx.x; i<sz; i+=blockDim.x)

           dest[i] = T(0.);

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_zero(T* dest, int stride, int sz) {

       for (int i=threadIdx.x; i<sz; i+=blockDim.x) {

         dest[i*stride] = T(0.);

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void destroy_and_release(T* m, int sz) {

       Impl::ds_strided_free(m, sz);

     }

   };


 #elif defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD_STRIDED) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)


   namespace Impl {


     template <typename T>

     KOKKOS_INLINE_FUNCTION

     static T* ds_strided_alloc(const int sz) {

       T* m = 0;

       // Only do strided memory allocations when we are doing hierarchical

       // parallelism with a vector dimension of 32.  The limitation on the

       // memory pool allowing only a single thread in a warp to allocate

       // makes it too difficult to do otherwise.

       if (blockDim.x == 32) {

         // const int total_sz = warpReduce(sz);

         // const int lane = warpLane();

         const int total_sz = warpReduce(sz, blockDim.x);

         const int lane = threadIdx.x;

         if (total_sz > 0 && lane == 0) {

 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)

           m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));

           if (m == 0)

             Kokkos::abort("Allocation failed.  Kokkos memory pool is out of memory");

 #else

           m = static_cast<T* >(operator new(total_sz*sizeof(T)));

 #if defined(HAVE_SACADO_KOKKOSCORE)

           if (m == 0)

             Kokkos::abort("Allocation failed.");

 #endif

 #endif

         }

         m = warpBcast(m,0,blockDim.x);

         m += lane;

       }

       else {

         if (sz > 0) {

           m = static_cast<T* >(operator new(sz*sizeof(T)));

 #if defined(HAVE_SACADO_KOKKOSCORE)

           if (m == 0)

             Kokkos::abort("Allocation failed.");

 #endif

         }

       }


       return m;

     }


     template <typename T>

     KOKKOS_INLINE_FUNCTION

     static void ds_strided_free(T* m, int sz) {

       if (blockDim.x == 32) {

         // const int total_sz = warpReduce(sz);

         // const int lane = warpLane();

         const int total_sz = warpReduce(sz, blockDim.x);

         const int lane = threadIdx.x;

         if (total_sz > 0 && lane == 0) {

 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)

           global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));

 #else

           operator delete((void*) m);

 #endif

         }

       }

       else {

         if (sz > 0)

           operator delete((void*) m);

       }

     }

   }


   template <typename T>

   struct ds_array<T,true> {


     KOKKOS_INLINE_FUNCTION

     static T* get(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         for (int i=0; i<sz; ++i)

           m[i*blockDim.x] = 0.0;

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(const T* src, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         for (int i=0; i<sz; ++i)

           m[i*blockDim.x] = src[i*blockDim.x];

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* strided_get_and_fill(const T* src, int stride, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_strided_alloc<T>(sz);

         for (int i=0; i<sz; ++i)

           m[i*blockDim.x] = src[i*stride];

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static void copy(const T* src, T* dest, int sz) {

       if (sz > 0)

         for (int i=0; i<sz; ++i)

           dest[i*blockDim.x] = src[i*blockDim.x];

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_copy(const T* src, int src_stride,

                                     T* dest, int dest_stride, int sz) {

       for (int i=0; i<sz; ++i) {

         *(dest) = *(src);

         dest += dest_stride;

         src += src_stride;

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void zero(T* dest, int sz) {

       if (sz > 0)

         for (int i=0; i<sz; ++i)

           dest[i*blockDim.x] = T(0.);

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_zero(T* dest, int stride, int sz) {

       for (int i=0; i<sz; ++i) {

         *(dest) = T(0.);

         dest += stride;

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void destroy_and_release(T* m, int sz) {

       Impl::ds_strided_free(m, sz);

     }

   };


 #else


   template <typename T>

   struct ds_array<T,true> {


     KOKKOS_INLINE_FUNCTION

     static T* get(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

 #ifdef __CUDACC__

         for (int i=0; i<sz; ++i)

           m[i] = 0.0;

 #else

         std::memset(m,0,sz*sizeof(T));

 #endif

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* get_and_fill(const T* src, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         for (int i=0; i<sz; ++i)

           m[i] = src[i];

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static T* strided_get_and_fill(const T* src, int stride, int sz) {

       if (sz > 0) {

         T* m = Impl::ds_alloc<T>(sz);

         for (int i=0; i<sz; ++i)

           m[i] = src[i*stride];

         return m;

       }

       return NULL;

     }


     KOKKOS_INLINE_FUNCTION

     static void copy(const T* src, T* dest, int sz) {

       if (sz > 0 && dest != NULL && src != NULL)

 #ifdef __CUDACC__

         for (int i=0; i<sz; ++i)

           dest[i] = src[i];

 #else

         std::memcpy(dest,src,sz*sizeof(T));

 #endif

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_copy(const T* src, int src_stride,

                                     T* dest, int dest_stride, int sz) {

       for (int i=0; i<sz; ++i) {

         *(dest) = *(src);

         dest += dest_stride;

         src += src_stride;

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void zero(T* dest, int sz) {

       if (sz > 0 && dest != NULL)

 #ifdef __CUDACC__

         for (int i=0; i<sz; ++i)

           dest[i] = T(0.);

 #else

         std::memset(dest,0,sz*sizeof(T));

 #endif

     }


     KOKKOS_INLINE_FUNCTION

     static void strided_zero(T* dest, int stride, int sz) {

       for (int i=0; i<sz; ++i) {

         *(dest) = T(0.);

         dest += stride;

       }

     }


     KOKKOS_INLINE_FUNCTION

     static void destroy_and_release(T* m, int sz) {

       Impl::ds_free(m, sz);

     }

   };


 #endif


 } // namespace Sacado


 #endif // SACADO_DYNAMICARRAY_HPP

Sacado::ds_array::strided_get_and_fill
static KOKKOS_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
Definition: Sacado_DynamicArrayTraits.hpp:308

Sacado::ds_array< T, true >::destroy_and_release
static KOKKOS_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
Definition: Sacado_DynamicArrayTraits.hpp:812

Sacado::ds_array::copy
static KOKKOS_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:323

f
void f()

Sacado::createGlobalMemoryPool
void createGlobalMemoryPool(const ExecSpace &space, const size_t min_total_alloc_size, const uint32_t min_block_alloc_size, const uint32_t max_block_alloc_size, const uint32_t min_superblock_size)
Definition: Sacado_DynamicArrayTraits.hpp:51

Sacado::ds_array::strided_zero
static KOKKOS_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:348

true
expr true
Definition: Sacado_ELRFad_Ops.hpp:182

Sacado::ds_array::destroy_and_release
static KOKKOS_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
Definition: Sacado_DynamicArrayTraits.hpp:357

Sacado::ds_array< T, true >::strided_copy
static KOKKOS_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:780

KOKKOS_INLINE_FUNCTION
#define KOKKOS_INLINE_FUNCTION
Definition: Sacado_ConfigDefs.h:94

T
#define T
Definition: Sacado_rad.hpp:573

Sacado::Impl::ds_alloc
static KOKKOS_INLINE_FUNCTION T * ds_alloc(const int sz)
Definition: Sacado_DynamicArrayTraits.hpp:192

Sacado::ds_array::get_and_fill
static KOKKOS_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
Definition: Sacado_DynamicArrayTraits.hpp:276

Sacado::ds_array::zero
static KOKKOS_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:341

Sacado::ds_array::strided_copy
static KOKKOS_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:330

Sacado::ds_array< T, true >::copy
static KOKKOS_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:768

Sacado::ds_array< T, true >::strided_get_and_fill
static KOKKOS_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
Definition: Sacado_DynamicArrayTraits.hpp:756

Sacado::ds_array< T, true >::strided_zero
static KOKKOS_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:803

Sacado::ds_array< T, true >::get_and_fill
static KOKKOS_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
Definition: Sacado_DynamicArrayTraits.hpp:741

Sacado::Impl::ds_free
static KOKKOS_INLINE_FUNCTION void ds_free(T *m, int sz)
Definition: Sacado_DynamicArrayTraits.hpp:230

Sacado::ds_array< T, true >::get_and_fill
static KOKKOS_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
Definition: Sacado_DynamicArrayTraits.hpp:722

Sacado::destroyGlobalMemoryPool
void destroyGlobalMemoryPool(const ExecSpace &space)
Definition: Sacado_DynamicArrayTraits.hpp:59

Sacado_Traits.hpp

Sacado::ds_array::get_and_fill
static KOKKOS_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
Definition: Sacado_DynamicArrayTraits.hpp:292

Sacado::ds_array
Dynamic array allocation class that works for any type.
Definition: Sacado_DynamicArrayTraits.hpp:259

Sacado::ds_array< T, true >::zero
static KOKKOS_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
Definition: Sacado_DynamicArrayTraits.hpp:791