Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Sacado_DynamicArrayTraits.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 
30 #ifndef SACADO_DYNAMICARRAYTRAITS_HPP
31 #define SACADO_DYNAMICARRAYTRAITS_HPP
32 
33 #include <new>
34 #include <cstring>
35 #include <stdint.h>
36 
37 #include "Sacado_Traits.hpp"
38 #if defined(HAVE_SACADO_KOKKOS)
39 #include "Kokkos_Core.hpp"
40 #endif
41 
42 namespace Sacado {
43 
44  template <typename ExecSpace>
45  void createGlobalMemoryPool(const ExecSpace& space
46  , const size_t min_total_alloc_size
47  , const uint32_t min_block_alloc_size
48  , const uint32_t max_block_alloc_size
49  , const uint32_t min_superblock_size
50  ) {}
51 
52  template <typename ExecSpace>
53  void destroyGlobalMemoryPool(const ExecSpace& space) {}
54 
55 #if 0 && defined(HAVE_SACADO_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
56  namespace Impl {
57  extern const Kokkos::MemoryPool<Kokkos::OpenMP>* global_sacado_openmp_memory_pool;
58  }
59 
60  inline void
61  createGlobalMemoryPool(const ExecSpace& space
62  , const size_t min_total_alloc_size
63  , const uint32_t min_block_alloc_size
64  , const uint32_t max_block_alloc_size
65  , const uint32_t min_superblock_size
66  )
67  {
68  typedef Kokkos::MemoryPool<Kokkos::OpenMP> pool_t;
69  Impl::global_sacado_openmp_memory_pool =
70  new pool_t(typename Kokkos::OpenMP::memory_space(),
71  min_total_alloc_size,
72  min_block_alloc_size,
73  max_block_alloc_size,
74  min_superblock_size);
75  }
76 
77  inline void destroyGlobalMemoryPool(const Kokkos::OpenMP& space)
78  {
79  delete Impl::global_sacado_openmp_memory_pool;
80  }
81 #endif
82 
83 #if defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
84 
85  namespace Impl {
86 
87  extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_host;
88  extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_device;
89 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
90  extern __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device;
91 #else
92  __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device = 0;
93 #endif
94 
95  struct SetMemoryPoolPtr {
96  Kokkos::MemoryPool<Kokkos::Cuda>* pool_device;
97  __device__ inline void operator()(int) const {
98  global_sacado_cuda_memory_pool_on_device = pool_device;
99  };
100  };
101 
102  }
103 
104  // For some reason we get memory errors if these functions are defined in
105  // Sacado_DynamicArrayTraits.cpp
106  inline void
107  createGlobalMemoryPool(const Kokkos::Cuda& space
108  , const size_t min_total_alloc_size
109  , const uint32_t min_block_alloc_size
110  , const uint32_t max_block_alloc_size
111  , const uint32_t min_superblock_size
112  )
113  {
114  typedef Kokkos::MemoryPool<Kokkos::Cuda> pool_t;
115  pool_t* pool =
116  new pool_t(typename Kokkos::Cuda::memory_space(),
117  min_total_alloc_size,
118  min_block_alloc_size,
119  max_block_alloc_size,
120  min_superblock_size);
121  Impl::SetMemoryPoolPtr f;
122  KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc( &f.pool_device, sizeof(pool_t) ) );
123  KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMemcpy( f.pool_device, pool,
124  sizeof(pool_t),
125  cudaMemcpyHostToDevice ) );
126  Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,1),f);
127  Impl::global_sacado_cuda_memory_pool_host = pool;
128  Impl::global_sacado_cuda_memory_pool_device = f.pool_device;
129  }
130 
131  inline void destroyGlobalMemoryPool(const Kokkos::Cuda& space)
132  {
133  KOKKOS_IMPL_CUDA_SAFE_CALL( cudaFree( (void*) Impl::global_sacado_cuda_memory_pool_device ) );
134  delete Impl::global_sacado_cuda_memory_pool_host;
135  }
136 
137 #endif
138 
139 #if !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
140 
141  namespace Impl {
142 
143  // Compute warp lane/thread index
144  __device__ inline int warpLane(const int warp_size = 32) {
145  return ( threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x ) % warp_size;
146  }
147 
148  // Reduce y across the warp and broadcast to all lanes
149  template <typename T>
150  __device__ inline T warpReduce(T y, const int warp_size = 32) {
151  for (int i=1; i<warp_size; i*=2) {
152  y += Kokkos::shfl_down(y, i, warp_size);
153  }
154  y = Kokkos::shfl(y, 0, warp_size);
155  return y;
156  }
157 
158  // Non-inclusive plus-scan up the warp, replacing the first entry with 0
159  template <typename T>
160  __device__ inline int warpScan(T y, const int warp_size = 32) {
161  const int lane = warpLane();
162  y = Kokkos::shfl_up(y, 1, warp_size);
163  if (lane == 0)
164  y = T(0);
165  for (int i=1; i<warp_size; i*=2) {
166  T t = Kokkos::shfl_up(y, i, warp_size);
167  if (lane > i)
168  y += t;
169  }
170  return y;
171  }
172 
173  template <typename T>
174  __device__ inline T warpBcast(T y, int id, const int warp_size = 32) {
175  return Kokkos::shfl(y, id, warp_size);
176  }
177 
178  }
179 
180 #endif
181 
182  namespace Impl {
183 
184  template <typename T>
186  static T* ds_alloc(const int sz) {
187 #if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )
188  T* m = 0;
189  if (sz > 0)
190  KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMallocManaged( (void**) &m, sz*sizeof(T), cudaMemAttachGlobal ) );
191 #elif defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
192  // This code assumes all threads enter ds_alloc, even those with sz == 0
193  T* m = 0;
194  const int total_sz = warpReduce(sz);
195  const int lane = warpLane();
196  if (total_sz > 0 && lane == 0) {
197  m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));
198  if (m == 0)
199  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
200  }
201  m = warpBcast(m,0);
202  m += warpScan(sz);
203 #elif 0 && defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)
204  T* m = 0;
205  if (sz > 0) {
206  if (global_sacado_openmp_memory_pool != 0) {
207  m = static_cast<T*>(global_sacado_openmp_memory_pool->allocate(sz*sizeof(T)));
208  if (m == 0)
209  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
210  }
211  else
212  m = static_cast<T* >(operator new(sz*sizeof(T)));
213  }
214 #else
215  T* m = 0;
216  if (sz > 0) {
217  m = static_cast<T* >(operator new(sz*sizeof(T)));
218 #if defined(HAVE_SACADO_KOKKOS)
219  if (m == 0)
220  Kokkos::abort("Allocation failed.");
221 #endif
222  }
223 #endif
224  return m;
225  }
226 
227  template <typename T>
229  static void ds_free(T* m, int sz) {
230 #if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )
231  if (sz > 0)
232  KOKKOS_IMPL_CUDA_SAFE_CALL( cudaFree(m) );
233 #elif defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
234  const int total_sz = warpReduce(sz);
235  const int lane = warpLane();
236  if (total_sz > 0 && lane == 0) {
237  global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));
238  }
239 #elif 0 && defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)
240  if (sz > 0) {
241  if (global_sacado_openmp_memory_pool != 0)
242  global_sacado_openmp_memory_pool->deallocate((void*) m, sz*sizeof(T));
243  else
244  operator delete((void*) m);
245  }
246 #else
247  if (sz > 0)
248  operator delete((void*) m);
249 #endif
250  }
251 
252  }
253 
258  struct ds_array {
259 
262  static T* get(int sz) {
263  T* m = Impl::ds_alloc<T>(sz);
264  T* p = m;
265  for (int i=0; i<sz; ++i)
266  new (p++) T();
267  return m;
268  }
269 
272  static T* get_and_fill(int sz) {
273  T* m = Impl::ds_alloc<T>(sz);
274  T* p = m;
275  for (int i=0; i<sz; ++i)
276  new (p++) T(0.0);
277  return m;
278  }
279 
285  static T* get_and_fill(const T* src, int sz) {
286  T* m = Impl::ds_alloc<T>(sz);
287  T* p = m;
288  for (int i=0; i<sz; ++i)
289  new (p++) T(*(src++));
290  return m;
291  }
292 
298  static T* strided_get_and_fill(const T* src, int stride, int sz) {
299  T* m = Impl::ds_alloc<T>(sz);
300  T* p = m;
301  for (int i=0; i<sz; ++i) {
302  new (p++) T(*(src));
303  src += stride;
304  }
305  return m;
306  }
307 
310  static void copy(const T* src, T* dest, int sz) {
311  for (int i=0; i<sz; ++i)
312  *(dest++) = *(src++);
313  }
314 
317  static void strided_copy(const T* src, int src_stride,
318  T* dest, int dest_stride, int sz) {
319  for (int i=0; i<sz; ++i) {
320  *(dest) = *(src);
321  dest += dest_stride;
322  src += src_stride;
323  }
324  }
325 
328  static void zero(T* dest, int sz) {
329  for (int i=0; i<sz; ++i)
330  *(dest++) = T(0.);
331  }
332 
335  static void strided_zero(T* dest, int stride, int sz) {
336  for (int i=0; i<sz; ++i) {
337  *(dest) = T(0.);
338  dest += stride;
339  }
340  }
341 
344  static void destroy_and_release(T* m, int sz) {
345  T* e = m+sz;
346  for (T* b = m; b!=e; b++)
347  b->~T();
348  Impl::ds_free(m, sz);
349  }
350  };
351 
352 #if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
353 
354  namespace Impl {
355 
356  template <typename T>
358  static T* ds_strided_alloc(const int sz) {
359  T* m = 0;
360  // Only do strided memory allocations when we are doing hierarchical
361  // parallelism with a vector dimension of 32. The limitation on the
362  // memory pool allowing only a single thread in a warp to allocate
363  // makes it too difficult to do otherwise.
364  if (blockDim.x == 32) {
365  //const int lane = warpLane();
366  const int lane = threadIdx.x;
367  if (sz > 0 && lane == 0) {
368 #if defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
369  m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(sz*sizeof(T)));
370  if (m == 0)
371  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
372 #else
373  m = static_cast<T* >(operator new(sz*sizeof(T)));
374 #if defined(HAVE_SACADO_KOKKOS)
375  if (m == 0)
376  Kokkos::abort("Allocation failed.");
377 #endif
378 #endif
379  }
380  m = warpBcast(m,0,blockDim.x);
381  }
382  else {
383  if (sz > 0) {
384  m = static_cast<T* >(operator new(sz*sizeof(T)));
385 #if defined(HAVE_SACADO_KOKKOS)
386  if (m == 0)
387  Kokkos::abort("Allocation failed.");
388 #endif
389  }
390  }
391 
392  return m;
393  }
394 
395  template <typename T>
397  static void ds_strided_free(T* m, int sz) {
398  if (blockDim.x == 32) {
399  // const int lane = warpLane();
400  const int lane = threadIdx.x;
401  if (sz > 0 && lane == 0) {
402 #if defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
403  global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, sz*sizeof(T));
404 #else
405  operator delete((void*) m);
406 #endif
407  }
408  }
409  else {
410  if (sz > 0)
411  operator delete((void*) m);
412  }
413 
414  }
415 
416  }
417 
422  template <typename T>
423  struct ds_array<T,true> {
424 
427  static T* get(int sz) {
428  T* m = Impl::ds_strided_alloc<T>(sz);
429  return m;
430  }
431 
434  static T* get_and_fill(int sz) {
435  T* m = Impl::ds_strided_alloc<T>(sz);
436  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
437  m[i] = 0.0;
438  return m;
439  }
440 
446  static T* get_and_fill(const T* src, int sz) {
447  T* m = Impl::ds_strided_alloc<T>(sz);
448  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
449  m[i] = src[i];
450  return m;
451  }
452 
458  static T* strided_get_and_fill(const T* src, int stride, int sz) {
459  T* m = Impl::ds_strided_alloc<T>(sz);
460  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
461  m[i] = src[i*stride];
462  return m;
463  }
464 
467  static void copy(const T* src, T* dest, int sz) {
468  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
469  dest[i] = src[i];
470  }
471 
474  static void strided_copy(const T* src, int src_stride,
475  T* dest, int dest_stride, int sz) {
476  for (int i=threadIdx.x; i<sz; i+=blockDim.x) {
477  dest[i*dest_stride] = src[i*src_stride];
478  }
479  }
480 
483  static void zero(T* dest, int sz) {
484  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
485  dest[i] = T(0.);
486  }
487 
490  static void strided_zero(T* dest, int stride, int sz) {
491  for (int i=threadIdx.x; i<sz; i+=blockDim.x) {
492  dest[i*stride] = T(0.);
493  }
494  }
495 
498  static void destroy_and_release(T* m, int sz) {
499  Impl::ds_strided_free(m, sz);
500  }
501  };
502 
503 #elif defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD_STRIDED) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
504 
505  namespace Impl {
506 
507  template <typename T>
509  static T* ds_strided_alloc(const int sz) {
510  T* m = 0;
511  // Only do strided memory allocations when we are doing hierarchical
512  // parallelism with a vector dimension of 32. The limitation on the
513  // memory pool allowing only a single thread in a warp to allocate
514  // makes it too difficult to do otherwise.
515  if (blockDim.x == 32) {
516  // const int total_sz = warpReduce(sz);
517  // const int lane = warpLane();
518  const int total_sz = warpReduce(sz, blockDim.x);
519  const int lane = threadIdx.x;
520  if (total_sz > 0 && lane == 0) {
521 #if defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
522  m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));
523  if (m == 0)
524  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
525 #else
526  m = static_cast<T* >(operator new(total_sz*sizeof(T)));
527 #if defined(HAVE_SACADO_KOKKOS)
528  if (m == 0)
529  Kokkos::abort("Allocation failed.");
530 #endif
531 #endif
532  }
533  m = warpBcast(m,0,blockDim.x);
534  m += lane;
535  }
536  else {
537  if (sz > 0) {
538  m = static_cast<T* >(operator new(sz*sizeof(T)));
539 #if defined(HAVE_SACADO_KOKKOS)
540  if (m == 0)
541  Kokkos::abort("Allocation failed.");
542 #endif
543  }
544  }
545 
546  return m;
547  }
548 
549  template <typename T>
551  static void ds_strided_free(T* m, int sz) {
552  if (blockDim.x == 32) {
553  // const int total_sz = warpReduce(sz);
554  // const int lane = warpLane();
555  const int total_sz = warpReduce(sz, blockDim.x);
556  const int lane = threadIdx.x;
557  if (total_sz > 0 && lane == 0) {
558 #if defined(HAVE_SACADO_KOKKOS) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
559  global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));
560 #else
561  operator delete((void*) m);
562 #endif
563  }
564  }
565  else {
566  if (sz > 0)
567  operator delete((void*) m);
568  }
569  }
570  }
571 
576  template <typename T>
577  struct ds_array<T,true> {
578 
581  static T* get(int sz) {
582  T* m = Impl::ds_strided_alloc<T>(sz);
583  return m;
584  }
585 
588  static T* get_and_fill(int sz) {
589  T* m = Impl::ds_strided_alloc<T>(sz);
590  for (int i=0; i<sz; ++i)
591  m[i*blockDim.x] = 0.0;
592  return m;
593  }
594 
600  static T* get_and_fill(const T* src, int sz) {
601  T* m = Impl::ds_strided_alloc<T>(sz);
602  for (int i=0; i<sz; ++i)
603  m[i*blockDim.x] = src[i*blockDim.x];
604  return m;
605  }
606 
612  static T* strided_get_and_fill(const T* src, int stride, int sz) {
613  T* m = Impl::ds_strided_alloc<T>(sz);
614  for (int i=0; i<sz; ++i)
615  m[i*blockDim.x] = src[i*stride];
616  return m;
617  }
618 
621  static void copy(const T* src, T* dest, int sz) {
622  for (int i=0; i<sz; ++i)
623  dest[i*blockDim.x] = src[i*blockDim.x];
624  }
625 
628  static void strided_copy(const T* src, int src_stride,
629  T* dest, int dest_stride, int sz) {
630  for (int i=0; i<sz; ++i) {
631  *(dest) = *(src);
632  dest += dest_stride;
633  src += src_stride;
634  }
635  }
636 
639  static void zero(T* dest, int sz) {
640  for (int i=0; i<sz; ++i)
641  dest[i*blockDim.x] = T(0.);
642  }
643 
646  static void strided_zero(T* dest, int stride, int sz) {
647  for (int i=0; i<sz; ++i) {
648  *(dest) = T(0.);
649  dest += stride;
650  }
651  }
652 
655  static void destroy_and_release(T* m, int sz) {
656  Impl::ds_strided_free(m, sz);
657  }
658  };
659 
660 #else
661 
666  template <typename T>
667  struct ds_array<T,true> {
668 
671  static T* get(int sz) {
672  T* m = Impl::ds_alloc<T>(sz);
673  return m;
674  }
675 
678  static T* get_and_fill(int sz) {
679  T* m = Impl::ds_alloc<T>(sz);
680 #if defined(__CUDACC__ ) || defined(__HIPCC__ )
681  for (int i=0; i<sz; ++i)
682  m[i] = 0.0;
683 #else
684  if (sz > 0)
685  std::memset(m,0,sz*sizeof(T));
686 #endif
687  return m;
688  }
689 
695  static T* get_and_fill(const T* src, int sz) {
696  T* m = Impl::ds_alloc<T>(sz);
697  for (int i=0; i<sz; ++i)
698  m[i] = src[i];
699  return m;
700  }
701 
707  static T* strided_get_and_fill(const T* src, int stride, int sz) {
708  T* m = Impl::ds_alloc<T>(sz);
709  for (int i=0; i<sz; ++i)
710  m[i] = src[i*stride];
711  return m;
712  }
713 
716  static void copy(const T* src, T* dest, int sz) {
717  if (sz > 0 && dest != NULL && src != NULL)
718 #if defined( __CUDACC__) || defined(__HIPCC__ )
719  for (int i=0; i<sz; ++i)
720  dest[i] = src[i];
721 #else
722  std::memcpy(dest,src,sz*sizeof(T));
723 #endif
724  }
725 
728  static void strided_copy(const T* src, int src_stride,
729  T* dest, int dest_stride, int sz) {
730  for (int i=0; i<sz; ++i) {
731  *(dest) = *(src);
732  dest += dest_stride;
733  src += src_stride;
734  }
735  }
736 
739  static void zero(T* dest, int sz) {
740  if (sz > 0 && dest != NULL)
741 #if defined(__CUDACC__ ) || defined(__HIPCC__ )
742  for (int i=0; i<sz; ++i)
743  dest[i] = T(0.);
744 #else
745  std::memset(dest,0,sz*sizeof(T));
746 #endif
747  }
748 
751  static void strided_zero(T* dest, int stride, int sz) {
752  for (int i=0; i<sz; ++i) {
753  *(dest) = T(0.);
754  dest += stride;
755  }
756  }
757 
760  static void destroy_and_release(T* m, int sz) {
761  Impl::ds_free(m, sz);
762  }
763  };
764 
765 #endif
766 
767 } // namespace Sacado
768 
769 #endif // SACADO_DYNAMICARRAY_HPP
const char * p
static SACADO_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
void f()
static SACADO_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
void createGlobalMemoryPool(const ExecSpace &space, const size_t min_total_alloc_size, const uint32_t min_block_alloc_size, const uint32_t max_block_alloc_size, const uint32_t min_superblock_size)
static SACADO_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
static SACADO_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
static SACADO_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
expr true
static SACADO_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
#define T
Definition: Sacado_rad.hpp:573
static SACADO_INLINE_FUNCTION void ds_free(T *m, int sz)
static SACADO_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
int value
static SACADO_INLINE_FUNCTION T * ds_alloc(const int sz)
static SACADO_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
static SACADO_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
void destroyGlobalMemoryPool(const ExecSpace &space)
#define SACADO_INLINE_FUNCTION
static SACADO_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
static SACADO_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
Dynamic array allocation class that works for any type.
static SACADO_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
const double y