Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_DynamicThreadedStorage_cuda.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #if defined( __CUDA_ARCH__ )
43 
44 namespace Stokhos {
45 
46  template <typename ordinal_t, typename value_t>
47  class DynamicThreadedStorage<ordinal_t, value_t, Kokkos::Cuda> {
48  public:
49 
50  static const bool is_static = false;
51  static const int static_size = 0;
52  static const bool supports_reset = true;
53 
54  typedef ordinal_t ordinal_type;
55  typedef value_t value_type;
56  typedef Kokkos::Cuda execution_space;
57  typedef value_type& reference;
58  typedef volatile value_type& volatile_reference;
59  typedef const value_type& const_reference;
60  typedef const volatile value_type& const_volatile_reference;
61  typedef value_type* pointer;
62  typedef volatile value_type* volatile_pointer;
63  typedef const value_type* const_pointer;
64  typedef const volatile value_type* const_volatile_pointer;
66 
68  template <typename ord_t, typename val_t = value_t , typename dev_t = Kokkos::Cuda >
69  struct apply {
70  typedef DynamicThreadedStorage<ord_t,val_t,dev_t> type;
71  };
72 
74  __device__
75  DynamicThreadedStorage(const ordinal_type& sz = 1,
76  const value_type& x = value_type(0.0)) :
77  sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
78  allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
79  }
80 
82  __device__
83  DynamicThreadedStorage(const ordinal_type& sz, const value_type* x) :
84  sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
85  allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
86  }
87 
89  __device__
90  DynamicThreadedStorage(const ordinal_type& sz, pointer v, bool owned) :
91  coeff_(v), sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_),
92  is_owned_(owned) {}
93 
95  __device__
96  DynamicThreadedStorage(const DynamicThreadedStorage& s) :
97  sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
98  allocate_coeff_array(coeff_, is_owned_, total_sz_);
99  for (ordinal_type i=0; i<total_sz_; i+=stride_)
100  coeff_[i] = s.coeff_[i];
101  }
102 
104  __device__
105  DynamicThreadedStorage(const volatile DynamicThreadedStorage& s) :
106  sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
107  allocate_coeff_array(coeff_, is_owned_, total_sz_);
108  for (ordinal_type i=0; i<total_sz_; i+=stride_)
109  coeff_[i] = s.coeff_[i];
110  }
111 
113  __device__
114  ~DynamicThreadedStorage() {
115  destroy_coeff_array(coeff_, is_owned_, total_sz_);
116  }
117 
119  __device__
120  DynamicThreadedStorage& operator=(const DynamicThreadedStorage& s) {
121  if (&s != this) {
122  if (s.sz_ != sz_) {
123  destroy_coeff_array(coeff_, is_owned_, total_sz_);
124  sz_ = s.sz_;
125  stride_ = s.stride_;
126  total_sz_ = sz_*stride_;
127  allocate_coeff_array(coeff_, is_owned_, total_sz_);
128  for (ordinal_type i=0; i<total_sz_; i+=stride_)
129  coeff_[i] = s.coeff_[i];
130  }
131  else {
132  for (ordinal_type i=0; i<total_sz_; i+=stride_)
133  coeff_[i] = s.coeff_[i];
134  }
135  }
136  return *this;
137  }
138 
140  __device__
141  DynamicThreadedStorage&
142  operator=(const volatile DynamicThreadedStorage& s) {
143  if (&s != this) {
144  if (s.sz_ != sz_) {
145  destroy_coeff_array(coeff_, is_owned_, total_sz_);
146  sz_ = s.sz_;
147  stride_ = s.stride_;
148  total_sz_ = sz_*stride_;
149  allocate_coeff_array(coeff_, is_owned_, total_sz_);
150  for (ordinal_type i=0; i<total_sz_; i+=stride_)
151  coeff_[i] = s.coeff_[i];
152  }
153  else {
154  for (ordinal_type i=0; i<total_sz_; i+=stride_)
155  coeff_[i] = s.coeff_[i];
156  }
157  }
158  return *this;
159  }
160 
162  __device__
163  volatile DynamicThreadedStorage&
164  operator=(const DynamicThreadedStorage& s) volatile {
165  if (&s != this) {
166  if (s.sz_ != sz_) {
167  destroy_coeff_array(coeff_, is_owned_, total_sz_);
168  sz_ = s.sz_;
169  stride_ = s.stride_;
170  total_sz_ = sz_*stride_;
171  allocate_coeff_array(coeff_, is_owned_, total_sz_);
172  for (ordinal_type i=0; i<total_sz_; i+=stride_)
173  coeff_[i] = s.coeff_[i];
174  }
175  else {
176  for (ordinal_type i=0; i<total_sz_; i+=stride_)
177  coeff_[i] = s.coeff_[i];
178  }
179  }
180  return *this;
181  }
182 
184  __device__
185  volatile DynamicThreadedStorage&
186  operator=(const volatile DynamicThreadedStorage& s) volatile {
187  if (&s != this) {
188  if (s.sz_ != sz_) {
189  destroy_coeff_array(coeff_, is_owned_, total_sz_);
190  sz_ = s.sz_;
191  stride_ = s.stride_;
192  total_sz_ = sz_*stride_;
193  allocate_coeff_array(coeff_, is_owned_, total_sz_);
194  for (ordinal_type i=0; i<total_sz_; i+=stride_)
195  coeff_[i] = s.coeff_[i];
196  }
197  else {
198  for (ordinal_type i=0; i<total_sz_; i+=stride_)
199  coeff_[i] = s.coeff_[i];
200  }
201  }
202  return *this;
203  }
204 
206  __device__
207  void init(const_reference v) {
208  for (ordinal_type i=0; i<total_sz_; i+=stride_)
209  coeff_[i] = v;
210  }
211 
213  __device__
214  void init(const_reference v) volatile {
215  for (ordinal_type i=0; i<total_sz_; i+=stride_)
216  coeff_[i] = v;
217  }
218 
220  __device__
221  void init(const_pointer v, const ordinal_type& sz = 0) {
222  ordinal_type my_sz = stride_*sz;
223  if (sz == 0)
224  my_sz = total_sz_;
225  for (ordinal_type i=0; i<my_sz; i+=stride_)
226  coeff_[i] = v[i];
227  }
228 
230  __device__
231  void init(const_pointer v, const ordinal_type& sz = 0) volatile {
232  ordinal_type my_sz = stride_*sz;
233  if (sz == 0)
234  my_sz = total_sz_;
235  for (ordinal_type i=0; i<my_sz; i+=stride_)
236  coeff_[i] = v[i];
237  }
238 
240  __device__
241  void load(pointer v) {
242  for (ordinal_type i=0; i<total_sz_; i+=stride_)
243  coeff_[i] = v[i];
244  }
245 
247  __device__
248  void load(pointer v) volatile {
249  for (ordinal_type i=0; i<total_sz_; i+=stride_)
250  coeff_[i] = v[i];
251  }
252 
254  __device__
255  void resize(const ordinal_type& sz) {
256  if (sz != sz_) {
257  value_type *coeff_new;
258  bool owned_new;
259  ordinal_type total_sz_new = sz*stride_;
260  allocate_coeff_array(coeff_new, owned_new, total_sz_new);
261  ordinal_type my_tsz = total_sz_;
262  if (total_sz_ > total_sz_new)
263  my_tsz = total_sz_new;
264  for (ordinal_type i=0; i<my_tsz; i+=stride_)
265  coeff_new[i] = coeff_[i];
266  destroy_coeff_array(coeff_, is_owned_, total_sz_);
267  coeff_ = coeff_new;
268  sz_ = sz;
269  total_sz_ = total_sz_new;
270  is_owned_ = owned_new;
271  }
272  }
273 
275  __device__
276  void resize(const ordinal_type& sz) volatile {
277  if (sz != sz_) {
278  value_type *coeff_new;
279  bool owned_new;
280  ordinal_type total_sz_new = sz*stride_;
281  allocate_coeff_array(coeff_new, owned_new, total_sz_new);
282  ordinal_type my_tsz = total_sz_;
283  if (total_sz_ > total_sz_new)
284  my_tsz = total_sz_new;
285  for (ordinal_type i=0; i<my_tsz; i+=stride_)
286  coeff_new[i] = coeff_[i];
287  destroy_coeff_array(coeff_, is_owned_, total_sz_);
288  coeff_ = coeff_new;
289  sz_ = sz;
290  total_sz_ = total_sz_new;
291  is_owned_ = owned_new;
292  }
293  }
294 
296  __device__
297  void shallowReset(pointer v, const ordinal_type& sz,
298  const ordinal_type& stride, bool owned) {
299  destroy_coeff_array(coeff_, is_owned_, total_sz_);
300  coeff_ = v;
301  sz_ = sz;
302  stride_ = stride;
303  total_sz_ = sz_*stride_;
304  is_owned_ = owned;
305  }
306 
308  __device__
309  void shallowReset(pointer v, const ordinal_type& sz,
310  const ordinal_type& stride, bool owned) volatile {
311  destroy_coeff_array(coeff_, is_owned_, total_sz_);
312  coeff_ = v;
313  sz_ = sz;
314  stride_ = stride;
315  total_sz_ = sz_*stride_;
316  is_owned_ = owned;
317  }
318 
320  __device__
321  ordinal_type size() const { return sz_; }
322 
324  __device__
325  ordinal_type size() const volatile { return sz_; }
326 
328  KOKKOS_INLINE_FUNCTION
329  const_reference operator[] (const ordinal_type& i) const {
330  return coeff_[i*stride_];
331  }
332 
334  KOKKOS_INLINE_FUNCTION
335  const_volatile_reference operator[] (const ordinal_type& i) const volatile {
336  return coeff_[i*stride_];
337  }
338 
340  KOKKOS_INLINE_FUNCTION
341  reference operator[] (const ordinal_type& i) {
342  return coeff_[i*stride_];
343  }
344 
346  KOKKOS_INLINE_FUNCTION
347  volatile_reference operator[] (const ordinal_type& i) volatile {
348  return coeff_[i*stride_];
349  }
350 
351  template <int i>
352  KOKKOS_INLINE_FUNCTION
353  reference getCoeff() { return coeff_[i*stride_]; }
354 
355  template <int i>
356  KOKKOS_INLINE_FUNCTION
357  volatile_reference getCoeff() volatile { return coeff_[i*stride_]; }
358 
359  template <int i>
360  KOKKOS_INLINE_FUNCTION
361  const_reference getCoeff() const { return coeff_[i*stride_]; }
362 
363  template <int i>
364  KOKKOS_INLINE_FUNCTION
365  const_volatile_reference getCoeff() const volatile { return coeff_[i*stride_]; }
366 
368  KOKKOS_INLINE_FUNCTION
369  const_volatile_pointer coeff() const volatile { return coeff_; }
370 
372  KOKKOS_INLINE_FUNCTION
373  const_pointer coeff() const { return coeff_; }
374 
376  KOKKOS_INLINE_FUNCTION
377  volatile_pointer coeff() volatile { return coeff_; }
378 
380  KOKKOS_INLINE_FUNCTION
381  pointer coeff() { return coeff_; }
382 
383  protected:
384 
386  __device__
387  ordinal_type num_threads() const {
388  return blockDim.x*blockDim.y*blockDim.z;
389  }
390 
392  __device__
393  ordinal_type num_threads() const volatile {
394  return blockDim.x*blockDim.y*blockDim.z;
395  }
396 
398  __device__
399  ordinal_type thread_index() const {
400  return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
401  }
402 
404  __device__
405  ordinal_type thread_index() const volatile {
406  return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
407  }
408 
410  __device__
411  void allocate_coeff_array(pointer& c, bool& owned,
412  ordinal_type total_size,
413  const value_type& x = value_type(0.0)) {
414 
415  // Allocate coefficient array on thread 0
416  __shared__ pointer ptr;
417  ordinal_type tidx = thread_index();
418  if (tidx == 0) {
419  ptr = ds::get_and_fill(total_size,x);
420  owned = true;
421  }
422  else
423  owned = false;
424  __syncthreads();
425 
426  // Give each thread its portion of the array
427  c = ptr + tidx;
428  }
429 
431  __device__
432  void allocate_coeff_array(pointer& c, bool& owned,
433  ordinal_type total_size,
434  const value_type& x = value_type(0.0)) volatile {
435 
436  // Allocate coefficient array on thread 0
437  __shared__ pointer ptr;
438  ordinal_type tidx = thread_index();
439  if (tidx == 0) {
440  ptr = ds::get_and_fill(total_size,x);
441  owned = true;
442  }
443  else
444  owned = false;
445  __syncthreads();
446 
447  // Give each thread its portion of the array
448  c = ptr + tidx;
449  }
450 
452  __device__
453  void allocate_coeff_array(pointer& c, bool& owned,
454  ordinal_type total_size,
455  const value_type* x) {
456 
457  // Allocate coefficient array on thread 0
458  __shared__ pointer ptr;
459  ordinal_type tidx = thread_index();
460  if (tidx == 0) {
461  ptr = ds::get_and_fill(x, total_size);
462  owned = true;
463  }
464  else
465  owned = false;
466  __syncthreads();
467 
468  // Give each thread its portion of the array
469  c = ptr + tidx;
470  }
471 
473  __device__
474  void destroy_coeff_array(pointer c, bool owned, ordinal_type total_size) {
475  __syncthreads();
476  if (owned)
477  ds::destroy_and_release(c, total_size);
478  }
479 
481  __device__
482  void destroy_coeff_array(pointer c, bool owned, ordinal_type total_size) volatile {
483  __syncthreads();
484  if (owned)
485  ds::destroy_and_release(c, total_size);
486  }
487 
488  private:
489 
491  pointer coeff_;
492 
494  ordinal_type sz_;
495 
497  ordinal_type stride_;
498 
500  ordinal_type total_sz_;
501 
503  bool is_owned_;
504 
505  };
506 
507 }
508 
509 #endif
Kokkos::DefaultExecutionSpace execution_space
Dynamic array allocation class that is specialized for scalar i.e., fundamental or built-in types (fl...