Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_ScatterView.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
50 
51 #ifndef KOKKOS_SCATTER_VIEW_HPP
52 #define KOKKOS_SCATTER_VIEW_HPP
53 
54 #include <Kokkos_Core.hpp>
55 #include <utility>
56 
57 namespace Kokkos {
58 namespace Experimental {
59 
60 /*
61  * Reduction Type list
62  * - These corresponds to subset of the reducers in parallel_reduce
63  * - See Implementations of ScatterValue for details.
64  */
65 enum : int {
66  ScatterSum,
67  ScatterProd,
68  ScatterMax,
69  ScatterMin,
70 };
71 
72 enum : int { ScatterNonDuplicated = 0, ScatterDuplicated = 1 };
73 
74 enum : int { ScatterNonAtomic = 0, ScatterAtomic = 1 };
75 
76 } // namespace Experimental
77 } // namespace Kokkos
78 
79 namespace Kokkos {
80 namespace Impl {
81 namespace Experimental {
82 
83 template <typename ExecSpace>
84 struct DefaultDuplication;
85 
86 template <typename ExecSpace, int duplication>
87 struct DefaultContribution;
88 
89 #ifdef KOKKOS_ENABLE_SERIAL
90 template <>
91 struct DefaultDuplication<Kokkos::Serial> {
92  enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
93 };
94 template <>
95 struct DefaultContribution<Kokkos::Serial,
96  Kokkos::Experimental::ScatterNonDuplicated> {
97  enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
98 };
99 template <>
100 struct DefaultContribution<Kokkos::Serial,
101  Kokkos::Experimental::ScatterDuplicated> {
102  enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
103 };
104 #endif
105 
106 #ifdef KOKKOS_ENABLE_OPENMP
107 template <>
108 struct DefaultDuplication<Kokkos::OpenMP> {
109  enum : int { value = Kokkos::Experimental::ScatterDuplicated };
110 };
111 template <>
112 struct DefaultContribution<Kokkos::OpenMP,
113  Kokkos::Experimental::ScatterNonDuplicated> {
114  enum : int { value = Kokkos::Experimental::ScatterAtomic };
115 };
116 template <>
117 struct DefaultContribution<Kokkos::OpenMP,
118  Kokkos::Experimental::ScatterDuplicated> {
119  enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
120 };
121 #endif
122 
123 #ifdef KOKKOS_ENABLE_HPX
124 template <>
125 struct DefaultDuplication<Kokkos::Experimental::HPX> {
126  enum : int { value = Kokkos::Experimental::ScatterDuplicated };
127 };
128 template <>
129 struct DefaultContribution<Kokkos::Experimental::HPX,
130  Kokkos::Experimental::ScatterNonDuplicated> {
131  enum : int { value = Kokkos::Experimental::ScatterAtomic };
132 };
133 template <>
134 struct DefaultContribution<Kokkos::Experimental::HPX,
135  Kokkos::Experimental::ScatterDuplicated> {
136  enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
137 };
138 #endif
139 
140 #ifdef KOKKOS_ENABLE_THREADS
141 template <>
142 struct DefaultDuplication<Kokkos::Threads> {
143  enum : int { value = Kokkos::Experimental::ScatterDuplicated };
144 };
145 template <>
146 struct DefaultContribution<Kokkos::Threads,
147  Kokkos::Experimental::ScatterNonDuplicated> {
148  enum : int { value = Kokkos::Experimental::ScatterAtomic };
149 };
150 template <>
151 struct DefaultContribution<Kokkos::Threads,
152  Kokkos::Experimental::ScatterDuplicated> {
153  enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
154 };
155 #endif
156 
157 #ifdef KOKKOS_ENABLE_CUDA
158 template <>
159 struct DefaultDuplication<Kokkos::Cuda> {
160  enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
161 };
162 template <>
163 struct DefaultContribution<Kokkos::Cuda,
164  Kokkos::Experimental::ScatterNonDuplicated> {
165  enum : int { value = Kokkos::Experimental::ScatterAtomic };
166 };
167 template <>
168 struct DefaultContribution<Kokkos::Cuda,
169  Kokkos::Experimental::ScatterDuplicated> {
170  enum : int { value = Kokkos::Experimental::ScatterAtomic };
171 };
172 #endif
173 
174 #ifdef KOKKOS_ENABLE_HIP
175 template <>
176 struct DefaultDuplication<Kokkos::Experimental::HIP> {
177  enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
178 };
179 template <>
180 struct DefaultContribution<Kokkos::Experimental::HIP,
181  Kokkos::Experimental::ScatterNonDuplicated> {
182  enum : int { value = Kokkos::Experimental::ScatterAtomic };
183 };
184 template <>
185 struct DefaultContribution<Kokkos::Experimental::HIP,
186  Kokkos::Experimental::ScatterDuplicated> {
187  enum : int { value = Kokkos::Experimental::ScatterAtomic };
188 };
189 #endif
190 
191 /* ScatterValue <Op=ScatterSum, contribution=ScatterNonAtomic> is the object
192  returned by the access operator() of ScatterAccess, This class inherits from
193  the Sum<> reducer and it wraps join(dest, src) with convenient operator+=,
194  etc. Note the addition of update(ValueType const& rhs) and reset() so that
195  all reducers can have common functions See ReduceDuplicates and
196  ResetDuplicates ) */
197 template <typename ValueType, int Op, typename DeviceType, int contribution>
198 struct ScatterValue;
199 
200 template <typename ValueType, typename DeviceType>
201 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
202  Kokkos::Experimental::ScatterNonAtomic>
203  : Sum<ValueType, DeviceType> {
204  public:
205  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
206  : Sum<ValueType, DeviceType>(value_in) {}
207  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
208  : Sum<ValueType, DeviceType>(other.reference()) {}
209  KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
210  this->join(this->reference(), rhs);
211  }
212  KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
213  this->join(this->reference(), -rhs);
214  }
215  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
216  this->join(this->reference(), rhs);
217  }
218  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
219 };
220 
221 /* ScatterValue <Op=ScatterSum, contribution=ScatterAtomic> is the object
222  returned by the access operator()
223  * of ScatterAccess, similar to that returned by an Atomic View, it wraps
224  Kokkos::atomic_add with convenient operator+=, etc. This version also has the
225  update(rhs) and reset() functions. */
226 template <typename ValueType, typename DeviceType>
227 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
228  Kokkos::Experimental::ScatterAtomic>
229  : Sum<ValueType, DeviceType> {
230  public:
231  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
232  : Sum<ValueType, DeviceType>(value_in) {}
233 
234  KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
235  this->join(this->reference(), rhs);
236  }
237  KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
238  this->join(this->reference(), -rhs);
239  }
240 
241  KOKKOS_INLINE_FUNCTION
242  void join(ValueType& dest, const ValueType& src) const {
243  Kokkos::atomic_add(&dest, src);
244  }
245 
246  KOKKOS_INLINE_FUNCTION
247  void join(volatile ValueType& dest, const volatile ValueType& src) const {
248  Kokkos::atomic_add(&dest, src);
249  }
250 
251  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
252  this->join(this->reference(), rhs);
253  }
254 
255  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
256 };
257 
258 /* ScatterValue <Op=ScatterProd, contribution=ScatterNonAtomic> is the object
259  returned by the access operator() of ScatterAccess, This class inherits from
260  the Prod<> reducer and it wraps join(dest, src) with convenient operator*=,
261  etc. Note the addition of update(ValueType const& rhs) and reset() so that
262  all reducers can have common functions See ReduceDuplicates and
263  ResetDuplicates ) */
264 template <typename ValueType, typename DeviceType>
265 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
266  Kokkos::Experimental::ScatterNonAtomic>
267  : Prod<ValueType, DeviceType> {
268  public:
269  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
270  : Prod<ValueType, DeviceType>(value_in) {}
271  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
272  : Prod<ValueType, DeviceType>(other.reference()) {}
273  KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
274  this->join(this->reference(), rhs);
275  }
276  KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
277  this->join(this->reference(), static_cast<ValueType>(1) / rhs);
278  }
279  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
280  this->join(this->reference(), rhs);
281  }
282  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
283 };
284 
285 /* ScatterValue <Op=ScatterProd, contribution=ScatterAtomic> is the object
286  returned by the access operator()
287  * of ScatterAccess, similar to that returned by an Atomic View, it wraps and
288  atomic_prod with convenient operator*=, etc. atomic_prod uses the
289  atomic_compare_exchange. This version also has the update(rhs) and reset()
290  functions. */
291 template <typename ValueType, typename DeviceType>
292 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
293  Kokkos::Experimental::ScatterAtomic>
294  : Prod<ValueType, DeviceType> {
295  public:
296  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
297  : Prod<ValueType, DeviceType>(value_in) {}
298 
299  KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
300  this->join(this->reference(), rhs);
301  }
302  KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
303  this->join(this->reference(), static_cast<ValueType>(1) / rhs);
304  }
305 
306  KOKKOS_FORCEINLINE_FUNCTION
307  void atomic_prod(ValueType& dest, const ValueType& src) const {
308  bool success = false;
309  while (!success) {
310  ValueType dest_old = dest;
311  ValueType dest_new = dest_old * src;
312  dest_new =
313  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
314  success = ((dest_new - dest_old) / dest_old <= 1e-15);
315  }
316  }
317 
318  KOKKOS_INLINE_FUNCTION
319  void join(ValueType& dest, const ValueType& src) const {
320  atomic_prod(dest, src);
321  }
322 
323  KOKKOS_INLINE_FUNCTION
324  void join(volatile ValueType& dest, const volatile ValueType& src) const {
325  atomic_prod(dest, src);
326  }
327 
328  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
329  this->join(this->reference(), rhs);
330  }
331  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
332 };
333 
334 /* ScatterValue <Op=ScatterMin, contribution=ScatterNonAtomic> is the object
335  returned by the access operator() of ScatterAccess, This class inherits from
336  the Min<> reducer and it wraps join(dest, src) with convenient update(rhs).
337  Note the addition of update(ValueType const& rhs) and reset() are so that all
338  reducers can have a common update function See ReduceDuplicates and
339  ResetDuplicates ) */
340 template <typename ValueType, typename DeviceType>
341 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
342  Kokkos::Experimental::ScatterNonAtomic>
343  : Min<ValueType, DeviceType> {
344  public:
345  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
346  : Min<ValueType, DeviceType>(value_in) {}
347  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
348  : Min<ValueType, DeviceType>(other.reference()) {}
349  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
350  this->join(this->reference(), rhs);
351  }
352  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
353 };
354 
355 /* ScatterValue <Op=ScatterMin, contribution=ScatterAtomic> is the object
356  returned by the access operator()
357  * of ScatterAccess, similar to that returned by an Atomic View, it wraps and
358  atomic_min with the update(rhs) function. atomic_min uses the
359  atomic_compare_exchange. This version also has the reset() function */
360 template <typename ValueType, typename DeviceType>
361 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
362  Kokkos::Experimental::ScatterAtomic>
363  : Min<ValueType, DeviceType> {
364  public:
365  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
366  : Min<ValueType, DeviceType>(value_in) {}
367 
368  KOKKOS_FORCEINLINE_FUNCTION
369  void atomic_min(ValueType& dest, const ValueType& src) const {
370  bool success = false;
371  while (!success) {
372  ValueType dest_old = dest;
373  ValueType dest_new = (dest_old > src) ? src : dest_old;
374  dest_new =
375  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
376  success = ((dest_new - dest_old) / dest_old <= 1e-15);
377  }
378  }
379 
380  KOKKOS_INLINE_FUNCTION
381  void join(ValueType& dest, const ValueType& src) const {
382  atomic_min(dest, src);
383  }
384 
385  KOKKOS_INLINE_FUNCTION
386  void join(volatile ValueType& dest, const volatile ValueType& src) const {
387  atomic_min(dest, src);
388  }
389 
390  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
391  this->join(this->reference(), rhs);
392  }
393  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
394 };
395 
396 /* ScatterValue <Op=ScatterMax, contribution=ScatterNonAtomic> is the object
397  returned by the access operataor() of ScatterAccess, This class inherits from
398  the Max<> reducer and it wraps join(dest, src) with convenient update(rhs).
399  Note the addition of update(ValueType const& rhs) and reset() are so that all
400  reducers can have a common update function See ReduceDuplicates and
401  ResetDuplicates ) */
402 template <typename ValueType, typename DeviceType>
403 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
404  Kokkos::Experimental::ScatterNonAtomic>
405  : Max<ValueType, DeviceType> {
406  public:
407  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
408  : Max<ValueType, DeviceType>(value_in) {}
409  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
410  : Max<ValueType, DeviceType>(other.reference()) {}
411  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
412  this->join(this->reference(), rhs);
413  }
414  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
415 };
416 
417 /* ScatterValue <Op=ScatterMax, contribution=ScatterAtomic> is the object
418  returned by the access operator()
419  * of ScatterAccess, similar to that returned by an Atomic View, it wraps and
420  atomic_max with the update(rhs) function. atomic_max uses the
421  atomic_compare_exchange. This version also has the reset() function */
422 template <typename ValueType, typename DeviceType>
423 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
424  Kokkos::Experimental::ScatterAtomic>
425  : Max<ValueType, DeviceType> {
426  public:
427  KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
428  : Max<ValueType, DeviceType>(value_in) {}
429 
430  KOKKOS_FORCEINLINE_FUNCTION
431  void atomic_max(ValueType& dest, const ValueType& src) const {
432  bool success = false;
433  while (!success) {
434  ValueType dest_old = dest;
435  ValueType dest_new = (dest_old < src) ? src : dest_old;
436  dest_new =
437  Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
438  success = ((dest_new - dest_old) / dest_old <= 1e-15);
439  }
440  }
441 
442  KOKKOS_INLINE_FUNCTION
443  void join(ValueType& dest, const ValueType& src) const {
444  atomic_max(dest, src);
445  }
446 
447  KOKKOS_INLINE_FUNCTION
448  void join(volatile ValueType& dest, const volatile ValueType& src) const {
449  atomic_max(dest, src);
450  }
451 
452  KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
453  this->join(this->reference(), rhs);
454  }
455  KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init(this->reference()); }
456 };
457 
458 /* DuplicatedDataType, given a View DataType, will create a new DataType
459  that has a new runtime dimension which becomes the largest-stride dimension.
460  In the case of LayoutLeft, due to the limitation induced by the design of
461  DataType itself, it must convert any existing compile-time dimensions into
462  runtime dimensions. */
463 template <typename T, typename Layout>
464 struct DuplicatedDataType;
465 
466 template <typename T>
467 struct DuplicatedDataType<T, Kokkos::LayoutRight> {
468  typedef T* value_type; // For LayoutRight, add a star all the way on the left
469 };
470 
471 template <typename T, size_t N>
472 struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
473  typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type
474  value_type[N];
475 };
476 
477 template <typename T>
478 struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
479  typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type
480  value_type[];
481 };
482 
483 template <typename T>
484 struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
485  typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type*
486  value_type;
487 };
488 
489 template <typename T>
490 struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
491  typedef T* value_type;
492 };
493 
494 template <typename T, size_t N>
495 struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
496  typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*
497  value_type;
498 };
499 
500 template <typename T>
501 struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
502  typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*
503  value_type;
504 };
505 
506 template <typename T>
507 struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
508  typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*
509  value_type;
510 };
511 
512 /* Insert integer argument pack into array */
513 
514 template <class T>
515 void args_to_array(size_t* array, int pos, T dim0) {
516  array[pos] = dim0;
517 }
518 template <class T, class... Dims>
519 void args_to_array(size_t* array, int pos, T dim0, Dims... dims) {
520  array[pos] = dim0;
521  args_to_array(array, pos + 1, dims...);
522 }
523 
524 /* Slice is just responsible for stuffing the correct number of Kokkos::ALL
525  arguments on the correct side of the index in a call to subview() to get a
526  subview where the index specified is the largest-stride one. */
527 template <typename Layout, int rank, typename V, typename... Args>
528 struct Slice {
529  typedef Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...> next;
530  typedef typename next::value_type value_type;
531 
532  static value_type get(V const& src, const size_t i, Args... args) {
533  return next::get(src, i, Kokkos::ALL, args...);
534  }
535 };
536 
537 template <typename V, typename... Args>
538 struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
539  typedef
540  typename Kokkos::Impl::ViewMapping<void, V, const size_t, Args...>::type
541  value_type;
542  static value_type get(V const& src, const size_t i, Args... args) {
543  return Kokkos::subview(src, i, args...);
544  }
545 };
546 
547 template <typename V, typename... Args>
548 struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
549  typedef
550  typename Kokkos::Impl::ViewMapping<void, V, Args..., const size_t>::type
551  value_type;
552  static value_type get(V const& src, const size_t i, Args... args) {
553  return Kokkos::subview(src, args..., i);
554  }
555 };
556 
557 template <typename ExecSpace, typename ValueType, int Op>
558 struct ReduceDuplicates;
559 
560 template <typename ExecSpace, typename ValueType, int Op>
561 struct ReduceDuplicatesBase {
562  typedef ReduceDuplicates<ExecSpace, ValueType, Op> Derived;
563  ValueType const* src;
564  ValueType* dst;
565  size_t stride;
566  size_t start;
567  size_t n;
568  ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in,
569  size_t stride_in, size_t start_in, size_t n_in,
570  std::string const& name)
571  : src(src_in), dst(dest_in), stride(stride_in), start(start_in), n(n_in) {
572 #if defined(KOKKOS_ENABLE_PROFILING)
573  uint64_t kpID = 0;
574  if (Kokkos::Profiling::profileLibraryLoaded()) {
575  Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0,
576  &kpID);
577  }
578 #else
579  (void)name;
580 #endif
581  typedef RangePolicy<ExecSpace, size_t> policy_type;
583  const closure_type closure(*(static_cast<Derived*>(this)),
584  policy_type(0, stride));
585  closure.execute();
586 #if defined(KOKKOS_ENABLE_PROFILING)
587  if (Kokkos::Profiling::profileLibraryLoaded()) {
588  Kokkos::Profiling::endParallelFor(kpID);
589  }
590 #endif
591  }
592 };
593 
594 /* ReduceDuplicates -- Perform reduction on destination array using strided
595  * source Use ScatterValue<> specific to operation to wrap destination array so
596  * that the reduction operation can be accessed via the update(rhs) function */
597 template <typename ExecSpace, typename ValueType, int Op>
598 struct ReduceDuplicates
599  : public ReduceDuplicatesBase<ExecSpace, ValueType, Op> {
600  typedef ReduceDuplicatesBase<ExecSpace, ValueType, Op> Base;
601  ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in,
602  size_t start_in, size_t n_in, std::string const& name)
603  : Base(src_in, dst_in, stride_in, start_in, n_in, name) {}
604  KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
605  for (size_t j = Base::start; j < Base::n; ++j) {
606  ScatterValue<ValueType, Op, ExecSpace,
607  Kokkos::Experimental::ScatterNonAtomic>
608  sv(Base::dst[i]);
609  sv.update(Base::src[i + Base::stride * j]);
610  }
611  }
612 };
613 
614 template <typename ExecSpace, typename ValueType, int Op>
615 struct ResetDuplicates;
616 
617 template <typename ExecSpace, typename ValueType, int Op>
618 struct ResetDuplicatesBase {
619  typedef ResetDuplicates<ExecSpace, ValueType, Op> Derived;
620  ValueType* data;
621  ResetDuplicatesBase(ValueType* data_in, size_t size_in,
622  std::string const& name)
623  : data(data_in) {
624 #if defined(KOKKOS_ENABLE_PROFILING)
625  uint64_t kpID = 0;
626  if (Kokkos::Profiling::profileLibraryLoaded()) {
627  Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0,
628  &kpID);
629  }
630 #else
631  (void)name;
632 #endif
633  typedef RangePolicy<ExecSpace, size_t> policy_type;
635  const closure_type closure(*(static_cast<Derived*>(this)),
636  policy_type(0, size_in));
637  closure.execute();
638 #if defined(KOKKOS_ENABLE_PROFILING)
639  if (Kokkos::Profiling::profileLibraryLoaded()) {
640  Kokkos::Profiling::endParallelFor(kpID);
641  }
642 #endif
643  }
644 };
645 
646 /* ResetDuplicates -- Perform reset on destination array
647  * Use ScatterValue<> specific to operation to wrap destination array so that
648  * the reset operation can be accessed via the reset() function */
649 template <typename ExecSpace, typename ValueType, int Op>
650 struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
651  typedef ResetDuplicatesBase<ExecSpace, ValueType, Op> Base;
652  ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name)
653  : Base(data_in, size_in, name) {}
654  KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
655  ScatterValue<ValueType, Op, ExecSpace,
656  Kokkos::Experimental::ScatterNonAtomic>
657  sv(Base::data[i]);
658  sv.reset();
659  }
660 };
661 
662 } // namespace Experimental
663 } // namespace Impl
664 } // namespace Kokkos
665 
666 namespace Kokkos {
667 namespace Experimental {
668 
669 template <typename DataType,
670  typename Layout = Kokkos::DefaultExecutionSpace::array_layout,
671  typename DeviceType = Kokkos::DefaultExecutionSpace,
672  int Op = ScatterSum,
673  int duplication = Kokkos::Impl::Experimental::DefaultDuplication<
674  typename DeviceType::execution_space>::value,
675  int contribution = Kokkos::Impl::Experimental::DefaultContribution<
676  typename DeviceType::execution_space, duplication>::value>
677 class ScatterView;
678 
679 template <typename DataType, int Op, typename DeviceType, typename Layout,
680  int duplication, int contribution, int override_contribution>
681 class ScatterAccess;
682 
683 // non-duplicated implementation
684 template <typename DataType, int Op, typename DeviceType, typename Layout,
685  int contribution>
686 class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
687  contribution> {
688  public:
689  using execution_space = typename DeviceType::execution_space;
690  using memory_space = typename DeviceType::memory_space;
691  using device_type = Kokkos::Device<execution_space, memory_space>;
692  typedef Kokkos::View<DataType, Layout, device_type> original_view_type;
693  typedef typename original_view_type::value_type original_value_type;
694  typedef typename original_view_type::reference_type original_reference_type;
695  friend class ScatterAccess<DataType, Op, DeviceType, Layout,
696  ScatterNonDuplicated, contribution,
697  ScatterNonAtomic>;
698  friend class ScatterAccess<DataType, Op, DeviceType, Layout,
699  ScatterNonDuplicated, contribution, ScatterAtomic>;
700  template <class, class, class, int, int, int>
701  friend class ScatterView;
702 
703  ScatterView() = default;
704 
705  template <typename RT, typename... RP>
706  ScatterView(View<RT, RP...> const& original_view)
707  : internal_view(original_view) {}
708 
709  template <typename... Dims>
710  ScatterView(std::string const& name, Dims... dims)
711  : internal_view(name, dims...) {}
712 
713  template <typename OtherDataType, typename OtherDeviceType>
714  KOKKOS_FUNCTION ScatterView(
715  const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
716  ScatterNonDuplicated, contribution>& other_view)
717  : internal_view(other_view.internal_view) {}
718 
719  template <typename OtherDataType, typename OtherDeviceType>
720  KOKKOS_FUNCTION void operator=(
721  const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
722  ScatterNonDuplicated, contribution>& other_view) {
723  internal_view = other_view.internal_view;
724  }
725 
726  template <int override_contrib = contribution>
727  KOKKOS_FORCEINLINE_FUNCTION
728  ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
729  contribution, override_contrib>
730  access() const {
731  return ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
732  contribution, override_contrib>(*this);
733  }
734 
735  original_view_type subview() const { return internal_view; }
736 
737  template <typename DT, typename... RP>
738  void contribute_into(View<DT, RP...> const& dest) const {
739  typedef View<DT, RP...> dest_type;
740  static_assert(std::is_same<typename dest_type::array_layout, Layout>::value,
741  "ScatterView contribute destination has different layout");
742  static_assert(
743  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
744  memory_space, typename dest_type::memory_space>::value,
745  "ScatterView contribute destination memory space not accessible");
746  if (dest.data() == internal_view.data()) return;
747  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
748  original_value_type, Op>(
749  internal_view.data(), dest.data(), 0, 0, 1, internal_view.label());
750  }
751 
752  void reset() {
753  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
754  original_value_type, Op>(
755  internal_view.data(), internal_view.size(), internal_view.label());
756  }
757  template <typename DT, typename... RP>
758  void reset_except(View<DT, RP...> const& view) {
759  if (view.data() != internal_view.data()) reset();
760  }
761 
762  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
763  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
764  const size_t n6 = 0, const size_t n7 = 0) {
765  ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
766  }
767 
768  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
769  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
770  const size_t n6 = 0, const size_t n7 = 0) {
771  ::Kokkos::realloc(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
772  }
773 
774  protected:
775  template <typename... Args>
776  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args... args) const {
777  return internal_view(args...);
778  }
779 
780  private:
781  typedef original_view_type internal_view_type;
782  internal_view_type internal_view;
783 };
784 
785 template <typename DataType, int Op, typename DeviceType, typename Layout,
786  int contribution, int override_contribution>
787 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
788  contribution, override_contribution> {
789  public:
790  typedef ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
791  contribution>
792  view_type;
793  typedef typename view_type::original_value_type original_value_type;
794  typedef Kokkos::Impl::Experimental::ScatterValue<
795  original_value_type, Op, DeviceType, override_contribution>
796  value_type;
797 
798  KOKKOS_INLINE_FUNCTION
799  ScatterAccess() : view(view_type()) {}
800 
801  KOKKOS_INLINE_FUNCTION
802  ScatterAccess(view_type const& view_in) : view(view_in) {}
803  KOKKOS_DEFAULTED_FUNCTION
804  ~ScatterAccess() = default;
805 
806  template <typename... Args>
807  KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
808  return view.at(args...);
809  }
810 
811  template <typename Arg>
812  KOKKOS_FORCEINLINE_FUNCTION
813  typename std::enable_if<view_type::original_view_type::rank == 1 &&
814  std::is_integral<Arg>::value,
815  value_type>::type
816  operator[](Arg arg) const {
817  return view.at(arg);
818  }
819 
820  private:
821  view_type const& view;
822 };
823 
824 // duplicated implementation
825 // LayoutLeft and LayoutRight are different enough that we'll just specialize
826 // each
827 
828 template <typename DataType, int Op, typename DeviceType, int contribution>
829 class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
830  ScatterDuplicated, contribution> {
831  public:
832  using execution_space = typename DeviceType::execution_space;
833  using memory_space = typename DeviceType::memory_space;
834  using device_type = Kokkos::Device<execution_space, memory_space>;
836  original_view_type;
837  typedef typename original_view_type::value_type original_value_type;
838  typedef typename original_view_type::reference_type original_reference_type;
839  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
840  ScatterDuplicated, contribution, ScatterNonAtomic>;
841  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
842  ScatterDuplicated, contribution, ScatterAtomic>;
843  template <class, class, class, int, int, int>
844  friend class ScatterView;
845 
846  typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<
847  DataType, Kokkos::LayoutRight>
848  data_type_info;
849  typedef typename data_type_info::value_type internal_data_type;
850  typedef Kokkos::View<internal_data_type, Kokkos::LayoutRight, device_type>
851  internal_view_type;
852 
853  ScatterView() = default;
854 
855  template <typename OtherDataType, typename OtherDeviceType>
856  KOKKOS_FUNCTION ScatterView(
857  const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
858  ScatterDuplicated, contribution>& other_view)
859  : unique_token(other_view.unique_token),
860  internal_view(other_view.internal_view) {}
861 
862  template <typename OtherDataType, typename OtherDeviceType>
863  KOKKOS_FUNCTION void operator=(
864  const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
865  ScatterDuplicated, contribution>& other_view) {
866  unique_token = other_view.unique_token;
867  internal_view = other_view.internal_view;
868  }
869 
870  template <typename RT, typename... RP>
871  ScatterView(View<RT, RP...> const& original_view)
872  : unique_token(),
873  internal_view(Kokkos::ViewAllocateWithoutInitializing(
874  std::string("duplicated_") + original_view.label()),
875  unique_token.size(),
876 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
877  original_view.extent(0), original_view.extent(1),
878  original_view.extent(2), original_view.extent(3),
879  original_view.extent(4), original_view.extent(5),
880  original_view.extent(6))
881 #else
882  original_view.rank_dynamic > 0
883  ? original_view.extent(0)
884  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
885  original_view.rank_dynamic > 1
886  ? original_view.extent(1)
887  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
888  original_view.rank_dynamic > 2
889  ? original_view.extent(2)
890  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
891  original_view.rank_dynamic > 3
892  ? original_view.extent(3)
893  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
894  original_view.rank_dynamic > 4
895  ? original_view.extent(4)
896  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
897  original_view.rank_dynamic > 5
898  ? original_view.extent(5)
899  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
900  original_view.rank_dynamic > 6
901  ? original_view.extent(6)
902  : KOKKOS_IMPL_CTOR_DEFAULT_ARG)
903 
904 #endif
905  {
906  reset();
907  }
908 
909  template <typename... Dims>
910  ScatterView(std::string const& name, Dims... dims)
911  : internal_view(Kokkos::ViewAllocateWithoutInitializing(name),
912  unique_token.size(), dims...) {
913  reset();
914  }
915 
916  template <int override_contribution = contribution>
917  KOKKOS_FORCEINLINE_FUNCTION
918  ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
919  ScatterDuplicated, contribution, override_contribution>
920  access() const {
921  return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
922  ScatterDuplicated, contribution,
923  override_contribution>(*this);
924  }
925 
926  typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutRight,
927  internal_view_type::rank,
928  internal_view_type>::value_type
929  subview() const {
930  return Kokkos::Impl::Experimental::Slice<
931  Kokkos::LayoutRight, internal_view_type::Rank,
932  internal_view_type>::get(internal_view, 0);
933  }
934 
935  template <typename DT, typename... RP>
936  void contribute_into(View<DT, RP...> const& dest) const {
937  typedef View<DT, RP...> dest_type;
938  static_assert(std::is_same<typename dest_type::array_layout,
939  Kokkos::LayoutRight>::value,
940  "ScatterView deep_copy destination has different layout");
941  static_assert(
942  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
943  memory_space, typename dest_type::memory_space>::value,
944  "ScatterView deep_copy destination memory space not accessible");
945  bool is_equal = (dest.data() == internal_view.data());
946  size_t start = is_equal ? 1 : 0;
947  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
948  original_value_type, Op>(
949  internal_view.data(), dest.data(), internal_view.stride(0), start,
950  internal_view.extent(0), internal_view.label());
951  }
952 
953  void reset() {
954  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
955  original_value_type, Op>(
956  internal_view.data(), internal_view.size(), internal_view.label());
957  }
958  template <typename DT, typename... RP>
959  void reset_except(View<DT, RP...> const& view) {
960  if (view.data() != internal_view.data()) {
961  reset();
962  return;
963  }
964  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
965  original_value_type, Op>(
966  internal_view.data() + view.size(), internal_view.size() - view.size(),
967  internal_view.label());
968  }
969 
970  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
971  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
972  const size_t n6 = 0) {
973  ::Kokkos::resize(internal_view, unique_token.size(), n0, n1, n2, n3, n4, n5,
974  n6);
975  }
976 
977  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
978  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
979  const size_t n6 = 0) {
980  ::Kokkos::realloc(internal_view, unique_token.size(), n0, n1, n2, n3, n4,
981  n5, n6);
982  }
983 
984  protected:
985  template <typename... Args>
986  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int rank,
987  Args... args) const {
988  return internal_view(rank, args...);
989  }
990 
991  protected:
993  execution_space, Kokkos::Experimental::UniqueTokenScope::Global>
994  unique_token_type;
995 
996  unique_token_type unique_token;
997  internal_view_type internal_view;
998 };
999 
1000 template <typename DataType, int Op, typename DeviceType, int contribution>
1001 class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
1002  ScatterDuplicated, contribution> {
1003  public:
1004  using execution_space = typename DeviceType::execution_space;
1005  using memory_space = typename DeviceType::memory_space;
1006  using device_type = Kokkos::Device<execution_space, memory_space>;
1008  original_view_type;
1009  typedef typename original_view_type::value_type original_value_type;
1010  typedef typename original_view_type::reference_type original_reference_type;
1011  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1012  ScatterDuplicated, contribution, ScatterNonAtomic>;
1013  friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1014  ScatterDuplicated, contribution, ScatterAtomic>;
1015  template <class, class, class, int, int, int>
1016  friend class ScatterView;
1017 
1018  typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<
1019  DataType, Kokkos::LayoutLeft>
1020  data_type_info;
1021  typedef typename data_type_info::value_type internal_data_type;
1022  typedef Kokkos::View<internal_data_type, Kokkos::LayoutLeft, device_type>
1023  internal_view_type;
1024 
1025  ScatterView() = default;
1026 
1027  template <typename RT, typename... RP>
1028  ScatterView(View<RT, RP...> const& original_view) : unique_token() {
1029  size_t arg_N[8] = {original_view.rank > 0 ? original_view.extent(0)
1030  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1031  original_view.rank > 1 ? original_view.extent(1)
1032  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1033  original_view.rank > 2 ? original_view.extent(2)
1034  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1035  original_view.rank > 3 ? original_view.extent(3)
1036  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1037  original_view.rank > 4 ? original_view.extent(4)
1038  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1039  original_view.rank > 5 ? original_view.extent(5)
1040  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1041  original_view.rank > 6 ? original_view.extent(6)
1042  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1043  KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1044  arg_N[internal_view_type::rank - 1] = unique_token.size();
1045  internal_view = internal_view_type(
1046  Kokkos::ViewAllocateWithoutInitializing(std::string("duplicated_") +
1047  original_view.label()),
1048  arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6],
1049  arg_N[7]);
1050  reset();
1051  }
1052 
1053  template <typename... Dims>
1054  ScatterView(std::string const& name, Dims... dims) {
1055  original_view_type original_view;
1056  size_t arg_N[8] = {original_view.rank > 0 ? original_view.static_extent(0)
1057  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1058  original_view.rank > 1 ? original_view.static_extent(1)
1059  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1060  original_view.rank > 2 ? original_view.static_extent(2)
1061  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1062  original_view.rank > 3 ? original_view.static_extent(3)
1063  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1064  original_view.rank > 4 ? original_view.static_extent(4)
1065  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1066  original_view.rank > 5 ? original_view.static_extent(5)
1067  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1068  original_view.rank > 6 ? original_view.static_extent(6)
1069  : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1070  KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1071  Kokkos::Impl::Experimental::args_to_array(arg_N, 0, dims...);
1072  arg_N[internal_view_type::rank - 1] = unique_token.size();
1073  internal_view = internal_view_type(
1074  Kokkos::ViewAllocateWithoutInitializing(name), arg_N[0], arg_N[1],
1075  arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1076  reset();
1077  }
1078 
1079  template <typename OtherDataType, typename OtherDeviceType>
1080  KOKKOS_FUNCTION ScatterView(
1081  const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1082  ScatterDuplicated, contribution>& other_view)
1083  : unique_token(other_view.unique_token),
1084  internal_view(other_view.internal_view) {}
1085 
1086  template <typename OtherDataType, typename OtherDeviceType>
1087  KOKKOS_FUNCTION void operator=(
1088  const ScatterView<OtherDataType, Kokkos::LayoutLeft, OtherDeviceType, Op,
1089  ScatterDuplicated, contribution>& other_view) {
1090  unique_token = other_view.unique_token;
1091  internal_view = other_view.internal_view;
1092  }
1093 
1094  template <int override_contribution = contribution>
1095  KOKKOS_FORCEINLINE_FUNCTION
1096  ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1097  ScatterDuplicated, contribution, override_contribution>
1098  access() const {
1099  return ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1100  ScatterDuplicated, contribution,
1101  override_contribution>(*this);
1102  }
1103 
1104  typename Kokkos::Impl::Experimental::Slice<Kokkos::LayoutLeft,
1105  internal_view_type::rank,
1106  internal_view_type>::value_type
1107  subview() const {
1108  return Kokkos::Impl::Experimental::Slice<
1109  Kokkos::LayoutLeft, internal_view_type::rank,
1110  internal_view_type>::get(internal_view, 0);
1111  }
1112 
1113  template <typename... RP>
1114  void contribute_into(View<RP...> const& dest) const {
1115  typedef View<RP...> dest_type;
1116  static_assert(
1117  std::is_same<typename dest_type::value_type,
1118  typename original_view_type::non_const_value_type>::value,
1119  "ScatterView deep_copy destination has wrong value_type");
1120  static_assert(std::is_same<typename dest_type::array_layout,
1121  Kokkos::LayoutLeft>::value,
1122  "ScatterView deep_copy destination has different layout");
1123  static_assert(
1124  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
1125  memory_space, typename dest_type::memory_space>::value,
1126  "ScatterView deep_copy destination memory space not accessible");
1127  auto extent = internal_view.extent(internal_view_type::rank - 1);
1128  bool is_equal = (dest.data() == internal_view.data());
1129  size_t start = is_equal ? 1 : 0;
1130  Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1131  original_value_type, Op>(
1132  internal_view.data(), dest.data(),
1133  internal_view.stride(internal_view_type::rank - 1), start, extent,
1134  internal_view.label());
1135  }
1136 
1137  void reset() {
1138  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1139  original_value_type, Op>(
1140  internal_view.data(), internal_view.size(), internal_view.label());
1141  }
1142  template <typename DT, typename... RP>
1143  void reset_except(View<DT, RP...> const& view) {
1144  if (view.data() != internal_view.data()) {
1145  reset();
1146  return;
1147  }
1148  Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1149  original_value_type, Op>(
1150  internal_view.data() + view.size(), internal_view.size() - view.size(),
1151  internal_view.label());
1152  }
1153 
1154  void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1155  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1156  const size_t n6 = 0) {
1157  size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1158  const int i = internal_view.rank - 1;
1159  arg_N[i] = unique_token.size();
1160 
1161  ::Kokkos::resize(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1162  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1163  }
1164 
1165  void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0,
1166  const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0,
1167  const size_t n6 = 0) {
1168  size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1169  const int i = internal_view.rank - 1;
1170  arg_N[i] = unique_token.size();
1171 
1172  ::Kokkos::realloc(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1173  arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1174  }
1175 
1176  protected:
1177  template <typename... Args>
1178  KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int thread_id,
1179  Args... args) const {
1180  return internal_view(args..., thread_id);
1181  }
1182 
1183  protected:
1185  execution_space, Kokkos::Experimental::UniqueTokenScope::Global>
1186  unique_token_type;
1187 
1188  unique_token_type unique_token;
1189  internal_view_type internal_view;
1190 };
1191 
1192 /* This object has to be separate in order to store the thread ID, which cannot
1193  be obtained until one is inside a parallel construct, and may be relatively
1194  expensive to obtain at every contribution
1195  (calls a non-inlined function, looks up a thread-local variable).
1196  Due to the expense, it is sensible to query it at most once per parallel
1197  iterate (ideally once per thread, but parallel_for doesn't expose that) and
1198  then store it in a stack variable.
1199  ScatterAccess serves as a non-const object on the stack which can store the
1200  thread ID */
1201 
1202 template <typename DataType, int Op, typename DeviceType, typename Layout,
1203  int contribution, int override_contribution>
1204 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
1205  contribution, override_contribution> {
1206  public:
1207  typedef ScatterView<DataType, Layout, DeviceType, Op, ScatterDuplicated,
1208  contribution>
1209  view_type;
1210  typedef typename view_type::original_value_type original_value_type;
1211  typedef Kokkos::Impl::Experimental::ScatterValue<
1212  original_value_type, Op, DeviceType, override_contribution>
1213  value_type;
1214 
1215  KOKKOS_FORCEINLINE_FUNCTION
1216  ScatterAccess(view_type const& view_in)
1217  : view(view_in), thread_id(view_in.unique_token.acquire()) {}
1218 
1219  KOKKOS_FORCEINLINE_FUNCTION
1220  ~ScatterAccess() {
1221  if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
1222  }
1223 
1224  template <typename... Args>
1225  KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
1226  return view.at(thread_id, args...);
1227  }
1228 
1229  template <typename Arg>
1230  KOKKOS_FORCEINLINE_FUNCTION
1231  typename std::enable_if<view_type::original_view_type::rank == 1 &&
1232  std::is_integral<Arg>::value,
1233  value_type>::type
1234  operator[](Arg arg) const {
1235  return view.at(thread_id, arg);
1236  }
1237 
1238  private:
1239  view_type const& view;
1240 
1241  // simplify RAII by disallowing copies
1242  ScatterAccess(ScatterAccess const& other) = delete;
1243  ScatterAccess& operator=(ScatterAccess const& other) = delete;
1244  ScatterAccess& operator=(ScatterAccess&& other) = delete;
1245 
1246  public:
1247  // do need to allow moves though, for the common
1248  // auto b = a.access();
1249  // that assignments turns into a move constructor call
1250  KOKKOS_FORCEINLINE_FUNCTION
1251  ScatterAccess(ScatterAccess&& other)
1252  : view(other.view), thread_id(other.thread_id) {
1253  other.thread_id = ~thread_id_type(0);
1254  }
1255 
1256  private:
1257  typedef typename view_type::unique_token_type unique_token_type;
1258  typedef typename unique_token_type::size_type thread_id_type;
1259  thread_id_type thread_id;
1260 };
1261 
1262 template <int Op = Kokkos::Experimental::ScatterSum, int duplication = -1,
1263  int contribution = -1, typename RT, typename... RP>
1264 ScatterView<
1265  RT, typename ViewTraits<RT, RP...>::array_layout,
1266  typename ViewTraits<RT, RP...>::device_type,
1267  Op
1268  /* just setting defaults if not specified... things got messy because the
1269  view type does not come before the duplication/contribution settings in
1270  the template parameter list */
1271  ,
1272  duplication == -1
1273  ? Kokkos::Impl::Experimental::DefaultDuplication<
1274  typename ViewTraits<RT, RP...>::execution_space>::value
1275  : duplication,
1276  contribution == -1
1277  ? Kokkos::Impl::Experimental::DefaultContribution<
1278  typename ViewTraits<RT, RP...>::execution_space,
1279  (duplication == -1
1280  ? Kokkos::Impl::Experimental::DefaultDuplication<
1281  typename ViewTraits<RT, RP...>::execution_space>::value
1282  : duplication)>::value
1283  : contribution>
1284 create_scatter_view(View<RT, RP...> const& original_view) {
1285  return original_view; // implicit ScatterView constructor call
1286 }
1287 
1288 } // namespace Experimental
1289 } // namespace Kokkos
1290 
1291 namespace Kokkos {
1292 namespace Experimental {
1293 
1294 template <typename DT1, typename DT2, typename LY, typename ES, int OP, int CT,
1295  int DP, typename... VP>
1296 void contribute(
1297  View<DT1, VP...>& dest,
1298  Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) {
1299  src.contribute_into(dest);
1300 }
1301 
1302 } // namespace Experimental
1303 } // namespace Kokkos
1304 
1305 namespace Kokkos {
1306 
1307 template <typename DT, typename LY, typename ES, int OP, int CT, int DP,
1308  typename... IS>
1309 void realloc(
1310  Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1311  IS... is) {
1312  scatter_view.realloc(is...);
1313 }
1314 
1315 template <typename DT, typename LY, typename ES, int OP, int CT, int DP,
1316  typename... IS>
1317 void resize(
1318  Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1319  IS... is) {
1320  scatter_view.resize(is...);
1321 }
1322 
1323 } // namespace Kokkos
1324 
1325 #endif
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
class to generate unique ids base on the required amount of concurrency
View to an array of data.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices...
std::enable_if< std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutLeft >::value||std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutRight >::value >::type resize(Kokkos::View< T, P...> &v, const size_t n0=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n3=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n4=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n5=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7=KOKKOS_IMPL_CTOR_DEFAULT_ARG)
Resize a view with copying old data to new data at the corresponding indices.
Implementation of the ParallelFor operator that has a partial specialization for the device...
std::enable_if< std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutLeft >::value||std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutRight >::value >::type realloc(Kokkos::View< T, P...> &v, const size_t n0=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n3=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n4=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n5=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7=KOKKOS_IMPL_CTOR_DEFAULT_ARG)
Resize a view with discarding old data.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P...> &V)
Temporary free function rank() until rank() is implemented in the View.