10 #if defined( __CUDA_ARCH__ )
14 template <
typename ordinal_t,
typename value_t>
15 class DynamicThreadedStorage<ordinal_t, value_t, Kokkos::Cuda> {
18 static const bool is_static =
false;
19 static const int static_size = 0;
20 static const bool supports_reset =
true;
26 typedef volatile value_type& volatile_reference;
28 typedef const volatile value_type& const_volatile_reference;
32 typedef const volatile value_type* const_volatile_pointer;
36 template <
typename ord_t,
typename val_t = value_t ,
typename dev_t = Kokkos::Cuda >
38 typedef DynamicThreadedStorage<ord_t,val_t,dev_t> type;
45 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
46 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
52 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
53 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
58 DynamicThreadedStorage(
const ordinal_type& sz, pointer v,
bool owned) :
59 coeff_(v), sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_),
64 DynamicThreadedStorage(
const DynamicThreadedStorage& s) :
65 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
66 allocate_coeff_array(coeff_, is_owned_, total_sz_);
68 coeff_[i] = s.coeff_[i];
73 DynamicThreadedStorage(
const volatile DynamicThreadedStorage& s) :
74 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
75 allocate_coeff_array(coeff_, is_owned_, total_sz_);
77 coeff_[i] = s.coeff_[i];
82 ~DynamicThreadedStorage() {
83 destroy_coeff_array(coeff_, is_owned_, total_sz_);
88 DynamicThreadedStorage& operator=(
const DynamicThreadedStorage& s) {
91 destroy_coeff_array(coeff_, is_owned_, total_sz_);
94 total_sz_ = sz_*stride_;
95 allocate_coeff_array(coeff_, is_owned_, total_sz_);
97 coeff_[i] = s.coeff_[i];
101 coeff_[i] = s.coeff_[i];
109 DynamicThreadedStorage&
110 operator=(
const volatile DynamicThreadedStorage& s) {
113 destroy_coeff_array(coeff_, is_owned_, total_sz_);
116 total_sz_ = sz_*stride_;
117 allocate_coeff_array(coeff_, is_owned_, total_sz_);
119 coeff_[i] = s.coeff_[i];
123 coeff_[i] = s.coeff_[i];
131 volatile DynamicThreadedStorage&
132 operator=(
const DynamicThreadedStorage& s)
volatile {
135 destroy_coeff_array(coeff_, is_owned_, total_sz_);
138 total_sz_ = sz_*stride_;
139 allocate_coeff_array(coeff_, is_owned_, total_sz_);
141 coeff_[i] = s.coeff_[i];
145 coeff_[i] = s.coeff_[i];
153 volatile DynamicThreadedStorage&
154 operator=(
const volatile DynamicThreadedStorage& s)
volatile {
157 destroy_coeff_array(coeff_, is_owned_, total_sz_);
160 total_sz_ = sz_*stride_;
161 allocate_coeff_array(coeff_, is_owned_, total_sz_);
163 coeff_[i] = s.coeff_[i];
167 coeff_[i] = s.coeff_[i];
175 void init(const_reference v) {
182 void init(const_reference v)
volatile {
189 void init(const_pointer v,
const ordinal_type& sz = 0) {
199 void init(const_pointer v,
const ordinal_type& sz = 0)
volatile {
209 void load(pointer v) {
216 void load(pointer v)
volatile {
228 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
230 if (total_sz_ > total_sz_new)
231 my_tsz = total_sz_new;
233 coeff_new[i] = coeff_[i];
234 destroy_coeff_array(coeff_, is_owned_, total_sz_);
237 total_sz_ = total_sz_new;
238 is_owned_ = owned_new;
249 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
251 if (total_sz_ > total_sz_new)
252 my_tsz = total_sz_new;
254 coeff_new[i] = coeff_[i];
255 destroy_coeff_array(coeff_, is_owned_, total_sz_);
258 total_sz_ = total_sz_new;
259 is_owned_ = owned_new;
267 destroy_coeff_array(coeff_, is_owned_, total_sz_);
271 total_sz_ = sz_*stride_;
279 destroy_coeff_array(coeff_, is_owned_, total_sz_);
283 total_sz_ = sz_*stride_;
296 KOKKOS_INLINE_FUNCTION
297 const_reference operator[] (
const ordinal_type& i)
const {
298 return coeff_[i*stride_];
302 KOKKOS_INLINE_FUNCTION
303 const_volatile_reference operator[] (
const ordinal_type& i)
const volatile {
304 return coeff_[i*stride_];
308 KOKKOS_INLINE_FUNCTION
310 return coeff_[i*stride_];
314 KOKKOS_INLINE_FUNCTION
315 volatile_reference operator[] (
const ordinal_type& i)
volatile {
316 return coeff_[i*stride_];
320 KOKKOS_INLINE_FUNCTION
321 reference getCoeff() {
return coeff_[i*stride_]; }
324 KOKKOS_INLINE_FUNCTION
325 volatile_reference getCoeff()
volatile {
return coeff_[i*stride_]; }
328 KOKKOS_INLINE_FUNCTION
329 const_reference getCoeff()
const {
return coeff_[i*stride_]; }
332 KOKKOS_INLINE_FUNCTION
333 const_volatile_reference getCoeff()
const volatile {
return coeff_[i*stride_]; }
336 KOKKOS_INLINE_FUNCTION
337 const_volatile_pointer coeff()
const volatile {
return coeff_; }
340 KOKKOS_INLINE_FUNCTION
341 const_pointer coeff()
const {
return coeff_; }
344 KOKKOS_INLINE_FUNCTION
345 volatile_pointer coeff()
volatile {
return coeff_; }
348 KOKKOS_INLINE_FUNCTION
349 pointer coeff() {
return coeff_; }
356 return blockDim.x*blockDim.y*blockDim.z;
362 return blockDim.x*blockDim.y*blockDim.z;
368 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
374 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
379 void allocate_coeff_array(pointer& c,
bool& owned,
384 __shared__ pointer ptr;
387 ptr = ds::get_and_fill(total_size,x);
400 void allocate_coeff_array(pointer& c,
bool& owned,
405 __shared__ pointer ptr;
408 ptr = ds::get_and_fill(total_size,x);
421 void allocate_coeff_array(pointer& c,
bool& owned,
426 __shared__ pointer ptr;
429 ptr = ds::get_and_fill(x, total_size);
442 void destroy_coeff_array(pointer c,
bool owned,
ordinal_type total_size) {
445 ds::destroy_and_release(c, total_size);
450 void destroy_coeff_array(pointer c,
bool owned,
ordinal_type total_size)
volatile {
453 ds::destroy_and_release(c, total_size);
Kokkos::DefaultExecutionSpace execution_space
Dynamic array allocation class that is specialized for scalar i.e., fundamental or built-in types (fl...