42 #if defined( __CUDA_ARCH__ )
46 template <
typename ordinal_t,
typename value_t>
47 class DynamicThreadedStorage<ordinal_t, value_t, Kokkos::Cuda> {
50 static const bool is_static =
false;
51 static const int static_size = 0;
52 static const bool supports_reset =
true;
58 typedef volatile value_type& volatile_reference;
60 typedef const volatile value_type& const_volatile_reference;
64 typedef const volatile value_type* const_volatile_pointer;
68 template <
typename ord_t,
typename val_t = value_t ,
typename dev_t = Kokkos::Cuda >
70 typedef DynamicThreadedStorage<ord_t,val_t,dev_t> type;
77 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
78 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
84 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
85 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
90 DynamicThreadedStorage(
const ordinal_type& sz, pointer v,
bool owned) :
91 coeff_(v), sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_),
96 DynamicThreadedStorage(
const DynamicThreadedStorage& s) :
97 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
98 allocate_coeff_array(coeff_, is_owned_, total_sz_);
100 coeff_[i] = s.coeff_[i];
105 DynamicThreadedStorage(
const volatile DynamicThreadedStorage& s) :
106 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
107 allocate_coeff_array(coeff_, is_owned_, total_sz_);
109 coeff_[i] = s.coeff_[i];
114 ~DynamicThreadedStorage() {
115 destroy_coeff_array(coeff_, is_owned_, total_sz_);
120 DynamicThreadedStorage& operator=(
const DynamicThreadedStorage& s) {
123 destroy_coeff_array(coeff_, is_owned_, total_sz_);
126 total_sz_ = sz_*stride_;
127 allocate_coeff_array(coeff_, is_owned_, total_sz_);
129 coeff_[i] = s.coeff_[i];
133 coeff_[i] = s.coeff_[i];
141 DynamicThreadedStorage&
142 operator=(
const volatile DynamicThreadedStorage& s) {
145 destroy_coeff_array(coeff_, is_owned_, total_sz_);
148 total_sz_ = sz_*stride_;
149 allocate_coeff_array(coeff_, is_owned_, total_sz_);
151 coeff_[i] = s.coeff_[i];
155 coeff_[i] = s.coeff_[i];
163 volatile DynamicThreadedStorage&
164 operator=(
const DynamicThreadedStorage& s)
volatile {
167 destroy_coeff_array(coeff_, is_owned_, total_sz_);
170 total_sz_ = sz_*stride_;
171 allocate_coeff_array(coeff_, is_owned_, total_sz_);
173 coeff_[i] = s.coeff_[i];
177 coeff_[i] = s.coeff_[i];
185 volatile DynamicThreadedStorage&
186 operator=(
const volatile DynamicThreadedStorage& s)
volatile {
189 destroy_coeff_array(coeff_, is_owned_, total_sz_);
192 total_sz_ = sz_*stride_;
193 allocate_coeff_array(coeff_, is_owned_, total_sz_);
195 coeff_[i] = s.coeff_[i];
199 coeff_[i] = s.coeff_[i];
207 void init(const_reference v) {
214 void init(const_reference v)
volatile {
221 void init(const_pointer v,
const ordinal_type& sz = 0) {
231 void init(const_pointer v,
const ordinal_type& sz = 0)
volatile {
241 void load(pointer v) {
248 void load(pointer v)
volatile {
260 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
262 if (total_sz_ > total_sz_new)
263 my_tsz = total_sz_new;
265 coeff_new[i] = coeff_[i];
266 destroy_coeff_array(coeff_, is_owned_, total_sz_);
269 total_sz_ = total_sz_new;
270 is_owned_ = owned_new;
281 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
283 if (total_sz_ > total_sz_new)
284 my_tsz = total_sz_new;
286 coeff_new[i] = coeff_[i];
287 destroy_coeff_array(coeff_, is_owned_, total_sz_);
290 total_sz_ = total_sz_new;
291 is_owned_ = owned_new;
299 destroy_coeff_array(coeff_, is_owned_, total_sz_);
303 total_sz_ = sz_*stride_;
311 destroy_coeff_array(coeff_, is_owned_, total_sz_);
315 total_sz_ = sz_*stride_;
328 KOKKOS_INLINE_FUNCTION
329 const_reference operator[] (
const ordinal_type& i)
const {
330 return coeff_[i*stride_];
334 KOKKOS_INLINE_FUNCTION
335 const_volatile_reference operator[] (
const ordinal_type& i)
const volatile {
336 return coeff_[i*stride_];
340 KOKKOS_INLINE_FUNCTION
342 return coeff_[i*stride_];
346 KOKKOS_INLINE_FUNCTION
347 volatile_reference operator[] (
const ordinal_type& i)
volatile {
348 return coeff_[i*stride_];
352 KOKKOS_INLINE_FUNCTION
353 reference getCoeff() {
return coeff_[i*stride_]; }
356 KOKKOS_INLINE_FUNCTION
357 volatile_reference getCoeff()
volatile {
return coeff_[i*stride_]; }
360 KOKKOS_INLINE_FUNCTION
361 const_reference getCoeff()
const {
return coeff_[i*stride_]; }
364 KOKKOS_INLINE_FUNCTION
365 const_volatile_reference getCoeff()
const volatile {
return coeff_[i*stride_]; }
368 KOKKOS_INLINE_FUNCTION
369 const_volatile_pointer coeff()
const volatile {
return coeff_; }
372 KOKKOS_INLINE_FUNCTION
373 const_pointer coeff()
const {
return coeff_; }
376 KOKKOS_INLINE_FUNCTION
377 volatile_pointer coeff()
volatile {
return coeff_; }
380 KOKKOS_INLINE_FUNCTION
381 pointer coeff() {
return coeff_; }
388 return blockDim.x*blockDim.y*blockDim.z;
394 return blockDim.x*blockDim.y*blockDim.z;
400 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
406 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
411 void allocate_coeff_array(pointer& c,
bool& owned,
416 __shared__ pointer ptr;
419 ptr = ds::get_and_fill(total_size,x);
432 void allocate_coeff_array(pointer& c,
bool& owned,
437 __shared__ pointer ptr;
440 ptr = ds::get_and_fill(total_size,x);
453 void allocate_coeff_array(pointer& c,
bool& owned,
458 __shared__ pointer ptr;
461 ptr = ds::get_and_fill(x, total_size);
474 void destroy_coeff_array(pointer c,
bool owned,
ordinal_type total_size) {
477 ds::destroy_and_release(c, total_size);
482 void destroy_coeff_array(pointer c,
bool owned,
ordinal_type total_size)
volatile {
485 ds::destroy_and_release(c, total_size);
Kokkos::DefaultExecutionSpace execution_space
Dynamic array allocation class that is specialized for scalar i.e., fundamental or built-in types (fl...