51 #ifndef KOKKOS_SCATTER_VIEW_HPP
52 #define KOKKOS_SCATTER_VIEW_HPP
54 #include <Kokkos_Core.hpp>
58 namespace Experimental {
72 enum :
int { ScatterNonDuplicated = 0, ScatterDuplicated = 1 };
74 enum :
int { ScatterNonAtomic = 0, ScatterAtomic = 1 };
81 namespace Experimental {
83 template <
typename ExecSpace>
84 struct DefaultDuplication;
86 template <
typename ExecSpace,
int duplication>
87 struct DefaultContribution;
89 #ifdef KOKKOS_ENABLE_SERIAL
91 struct DefaultDuplication<Kokkos::Serial> {
92 enum :
int { value = Kokkos::Experimental::ScatterNonDuplicated };
95 struct DefaultContribution<Kokkos::Serial,
96 Kokkos::Experimental::ScatterNonDuplicated> {
97 enum :
int { value = Kokkos::Experimental::ScatterNonAtomic };
100 struct DefaultContribution<Kokkos::Serial,
101 Kokkos::Experimental::ScatterDuplicated> {
102 enum :
int { value = Kokkos::Experimental::ScatterNonAtomic };
106 #ifdef KOKKOS_ENABLE_OPENMP
108 struct DefaultDuplication<Kokkos::OpenMP> {
109 enum :
int { value = Kokkos::Experimental::ScatterDuplicated };
112 struct DefaultContribution<Kokkos::OpenMP,
113 Kokkos::Experimental::ScatterNonDuplicated> {
114 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
117 struct DefaultContribution<Kokkos::OpenMP,
118 Kokkos::Experimental::ScatterDuplicated> {
119 enum :
int { value = Kokkos::Experimental::ScatterNonAtomic };
123 #ifdef KOKKOS_ENABLE_HPX
125 struct DefaultDuplication<Kokkos::Experimental::HPX> {
126 enum :
int { value = Kokkos::Experimental::ScatterDuplicated };
129 struct DefaultContribution<Kokkos::Experimental::HPX,
130 Kokkos::Experimental::ScatterNonDuplicated> {
131 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
134 struct DefaultContribution<Kokkos::Experimental::HPX,
135 Kokkos::Experimental::ScatterDuplicated> {
136 enum :
int { value = Kokkos::Experimental::ScatterNonAtomic };
140 #ifdef KOKKOS_ENABLE_THREADS
142 struct DefaultDuplication<Kokkos::Threads> {
143 enum :
int { value = Kokkos::Experimental::ScatterDuplicated };
146 struct DefaultContribution<Kokkos::Threads,
147 Kokkos::Experimental::ScatterNonDuplicated> {
148 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
151 struct DefaultContribution<Kokkos::Threads,
152 Kokkos::Experimental::ScatterDuplicated> {
153 enum :
int { value = Kokkos::Experimental::ScatterNonAtomic };
157 #ifdef KOKKOS_ENABLE_CUDA
159 struct DefaultDuplication<Kokkos::Cuda> {
160 enum :
int { value = Kokkos::Experimental::ScatterNonDuplicated };
163 struct DefaultContribution<Kokkos::Cuda,
164 Kokkos::Experimental::ScatterNonDuplicated> {
165 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
168 struct DefaultContribution<Kokkos::Cuda,
169 Kokkos::Experimental::ScatterDuplicated> {
170 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
174 #ifdef KOKKOS_ENABLE_HIP
176 struct DefaultDuplication<Kokkos::Experimental::HIP> {
177 enum :
int { value = Kokkos::Experimental::ScatterNonDuplicated };
180 struct DefaultContribution<Kokkos::Experimental::HIP,
181 Kokkos::Experimental::ScatterNonDuplicated> {
182 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
185 struct DefaultContribution<Kokkos::Experimental::HIP,
186 Kokkos::Experimental::ScatterDuplicated> {
187 enum :
int { value = Kokkos::Experimental::ScatterAtomic };
197 template <
typename ValueType,
int Op,
typename DeviceType,
int contribution>
200 template <
typename ValueType,
typename DeviceType>
201 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
202 Kokkos::Experimental::ScatterNonAtomic>
203 : Sum<ValueType, DeviceType> {
205 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
206 : Sum<ValueType, DeviceType>(value_in) {}
207 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
208 : Sum<ValueType, DeviceType>(other.reference()) {}
209 KOKKOS_FORCEINLINE_FUNCTION
void operator+=(ValueType
const& rhs) {
210 this->join(this->reference(), rhs);
212 KOKKOS_FORCEINLINE_FUNCTION
void operator-=(ValueType
const& rhs) {
213 this->join(this->reference(), -rhs);
215 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
216 this->join(this->reference(), rhs);
218 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
226 template <
typename ValueType,
typename DeviceType>
227 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
228 Kokkos::Experimental::ScatterAtomic>
229 : Sum<ValueType, DeviceType> {
231 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
232 : Sum<ValueType, DeviceType>(value_in) {}
234 KOKKOS_FORCEINLINE_FUNCTION
void operator+=(ValueType
const& rhs) {
235 this->join(this->reference(), rhs);
237 KOKKOS_FORCEINLINE_FUNCTION
void operator-=(ValueType
const& rhs) {
238 this->join(this->reference(), -rhs);
241 KOKKOS_INLINE_FUNCTION
242 void join(ValueType& dest,
const ValueType& src)
const {
243 Kokkos::atomic_add(&dest, src);
246 KOKKOS_INLINE_FUNCTION
247 void join(
volatile ValueType& dest,
const volatile ValueType& src)
const {
248 Kokkos::atomic_add(&dest, src);
251 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
252 this->join(this->reference(), rhs);
255 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
264 template <
typename ValueType,
typename DeviceType>
265 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
266 Kokkos::Experimental::ScatterNonAtomic>
267 : Prod<ValueType, DeviceType> {
269 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
270 : Prod<ValueType, DeviceType>(value_in) {}
271 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
272 : Prod<ValueType, DeviceType>(other.reference()) {}
273 KOKKOS_FORCEINLINE_FUNCTION
void operator*=(ValueType
const& rhs) {
274 this->join(this->reference(), rhs);
276 KOKKOS_FORCEINLINE_FUNCTION
void operator/=(ValueType
const& rhs) {
277 this->join(this->reference(), static_cast<ValueType>(1) / rhs);
279 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
280 this->join(this->reference(), rhs);
282 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
291 template <
typename ValueType,
typename DeviceType>
292 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
293 Kokkos::Experimental::ScatterAtomic>
294 : Prod<ValueType, DeviceType> {
296 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
297 : Prod<ValueType, DeviceType>(value_in) {}
299 KOKKOS_FORCEINLINE_FUNCTION
void operator*=(ValueType
const& rhs) {
300 this->join(this->reference(), rhs);
302 KOKKOS_FORCEINLINE_FUNCTION
void operator/=(ValueType
const& rhs) {
303 this->join(this->reference(), static_cast<ValueType>(1) / rhs);
306 KOKKOS_FORCEINLINE_FUNCTION
307 void atomic_prod(ValueType& dest,
const ValueType& src)
const {
308 bool success =
false;
310 ValueType dest_old = dest;
311 ValueType dest_new = dest_old * src;
313 Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
314 success = ((dest_new - dest_old) / dest_old <= 1e-15);
318 KOKKOS_INLINE_FUNCTION
319 void join(ValueType& dest,
const ValueType& src)
const {
320 atomic_prod(dest, src);
323 KOKKOS_INLINE_FUNCTION
324 void join(
volatile ValueType& dest,
const volatile ValueType& src)
const {
325 atomic_prod(dest, src);
328 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
329 this->join(this->reference(), rhs);
331 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
340 template <
typename ValueType,
typename DeviceType>
341 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
342 Kokkos::Experimental::ScatterNonAtomic>
343 : Min<ValueType, DeviceType> {
345 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
346 : Min<ValueType, DeviceType>(value_in) {}
347 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
348 : Min<ValueType, DeviceType>(other.reference()) {}
349 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
350 this->join(this->reference(), rhs);
352 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
360 template <
typename ValueType,
typename DeviceType>
361 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
362 Kokkos::Experimental::ScatterAtomic>
363 : Min<ValueType, DeviceType> {
365 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
366 : Min<ValueType, DeviceType>(value_in) {}
368 KOKKOS_FORCEINLINE_FUNCTION
369 void atomic_min(ValueType& dest,
const ValueType& src)
const {
370 bool success =
false;
372 ValueType dest_old = dest;
373 ValueType dest_new = (dest_old > src) ? src : dest_old;
375 Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
376 success = ((dest_new - dest_old) / dest_old <= 1e-15);
380 KOKKOS_INLINE_FUNCTION
381 void join(ValueType& dest,
const ValueType& src)
const {
382 atomic_min(dest, src);
385 KOKKOS_INLINE_FUNCTION
386 void join(
volatile ValueType& dest,
const volatile ValueType& src)
const {
387 atomic_min(dest, src);
390 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
391 this->join(this->reference(), rhs);
393 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
402 template <
typename ValueType,
typename DeviceType>
403 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
404 Kokkos::Experimental::ScatterNonAtomic>
405 : Max<ValueType, DeviceType> {
407 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
408 : Max<ValueType, DeviceType>(value_in) {}
409 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
410 : Max<ValueType, DeviceType>(other.reference()) {}
411 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
412 this->join(this->reference(), rhs);
414 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
422 template <
typename ValueType,
typename DeviceType>
423 struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
424 Kokkos::Experimental::ScatterAtomic>
425 : Max<ValueType, DeviceType> {
427 KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
428 : Max<ValueType, DeviceType>(value_in) {}
430 KOKKOS_FORCEINLINE_FUNCTION
431 void atomic_max(ValueType& dest,
const ValueType& src)
const {
432 bool success =
false;
434 ValueType dest_old = dest;
435 ValueType dest_new = (dest_old < src) ? src : dest_old;
437 Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
438 success = ((dest_new - dest_old) / dest_old <= 1e-15);
442 KOKKOS_INLINE_FUNCTION
443 void join(ValueType& dest,
const ValueType& src)
const {
444 atomic_max(dest, src);
447 KOKKOS_INLINE_FUNCTION
448 void join(
volatile ValueType& dest,
const volatile ValueType& src)
const {
449 atomic_max(dest, src);
452 KOKKOS_FORCEINLINE_FUNCTION
void update(ValueType
const& rhs) {
453 this->join(this->reference(), rhs);
455 KOKKOS_FORCEINLINE_FUNCTION
void reset() { this->init(this->reference()); }
463 template <
typename T,
typename Layout>
464 struct DuplicatedDataType;
466 template <
typename T>
467 struct DuplicatedDataType<T, Kokkos::LayoutRight> {
468 typedef T* value_type;
471 template <
typename T,
size_t N>
472 struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
473 typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type
477 template <
typename T>
478 struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
479 typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type
483 template <
typename T>
485 typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type*
489 template <
typename T>
490 struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
491 typedef T* value_type;
494 template <
typename T,
size_t N>
495 struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
496 typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*
500 template <
typename T>
501 struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
502 typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*
506 template <
typename T>
508 typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type*
515 void args_to_array(
size_t* array,
int pos, T dim0) {
518 template <
class T,
class... Dims>
519 void args_to_array(
size_t* array,
int pos, T dim0, Dims... dims) {
521 args_to_array(array, pos + 1, dims...);
527 template <
typename Layout,
int rank,
typename V,
typename... Args>
529 typedef Slice<Layout,
rank - 1, V, Kokkos::Impl::ALL_t, Args...> next;
530 typedef typename next::value_type value_type;
532 static value_type
get(V
const& src,
const size_t i, Args... args) {
533 return next::get(src, i, Kokkos::ALL, args...);
537 template <
typename V,
typename... Args>
538 struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
540 typename Kokkos::Impl::ViewMapping<void, V,
const size_t, Args...>::type
542 static value_type
get(V
const& src,
const size_t i, Args... args) {
543 return Kokkos::subview(src, i, args...);
547 template <
typename V,
typename... Args>
548 struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
550 typename Kokkos::Impl::ViewMapping<void, V, Args...,
const size_t>::type
552 static value_type
get(V
const& src,
const size_t i, Args... args) {
553 return Kokkos::subview(src, args..., i);
557 template <
typename ExecSpace,
typename ValueType,
int Op>
558 struct ReduceDuplicates;
560 template <
typename ExecSpace,
typename ValueType,
int Op>
561 struct ReduceDuplicatesBase {
562 typedef ReduceDuplicates<ExecSpace, ValueType, Op> Derived;
563 ValueType
const* src;
568 ReduceDuplicatesBase(ValueType
const* src_in, ValueType* dest_in,
569 size_t stride_in,
size_t start_in,
size_t n_in,
570 std::string
const& name)
571 : src(src_in), dst(dest_in), stride(stride_in), start(start_in), n(n_in) {
572 #if defined(KOKKOS_ENABLE_PROFILING)
574 if (Kokkos::Profiling::profileLibraryLoaded()) {
575 Kokkos::Profiling::beginParallelFor(std::string(
"reduce_") + name, 0,
581 typedef RangePolicy<ExecSpace, size_t> policy_type;
583 const closure_type closure(*(static_cast<Derived*>(
this)),
584 policy_type(0, stride));
586 #if defined(KOKKOS_ENABLE_PROFILING)
587 if (Kokkos::Profiling::profileLibraryLoaded()) {
588 Kokkos::Profiling::endParallelFor(kpID);
597 template <
typename ExecSpace,
typename ValueType,
int Op>
598 struct ReduceDuplicates
599 :
public ReduceDuplicatesBase<ExecSpace, ValueType, Op> {
600 typedef ReduceDuplicatesBase<ExecSpace, ValueType, Op> Base;
601 ReduceDuplicates(ValueType
const* src_in, ValueType* dst_in,
size_t stride_in,
602 size_t start_in,
size_t n_in, std::string
const& name)
603 : Base(src_in, dst_in, stride_in, start_in, n_in, name) {}
604 KOKKOS_FORCEINLINE_FUNCTION
void operator()(
size_t i)
const {
605 for (
size_t j = Base::start; j < Base::n; ++j) {
606 ScatterValue<ValueType, Op, ExecSpace,
607 Kokkos::Experimental::ScatterNonAtomic>
609 sv.update(Base::src[i + Base::stride * j]);
614 template <
typename ExecSpace,
typename ValueType,
int Op>
615 struct ResetDuplicates;
617 template <
typename ExecSpace,
typename ValueType,
int Op>
618 struct ResetDuplicatesBase {
619 typedef ResetDuplicates<ExecSpace, ValueType, Op> Derived;
621 ResetDuplicatesBase(ValueType* data_in,
size_t size_in,
622 std::string
const& name)
624 #if defined(KOKKOS_ENABLE_PROFILING)
626 if (Kokkos::Profiling::profileLibraryLoaded()) {
627 Kokkos::Profiling::beginParallelFor(std::string(
"reduce_") + name, 0,
633 typedef RangePolicy<ExecSpace, size_t> policy_type;
635 const closure_type closure(*(static_cast<Derived*>(
this)),
636 policy_type(0, size_in));
638 #if defined(KOKKOS_ENABLE_PROFILING)
639 if (Kokkos::Profiling::profileLibraryLoaded()) {
640 Kokkos::Profiling::endParallelFor(kpID);
649 template <
typename ExecSpace,
typename ValueType,
int Op>
650 struct ResetDuplicates :
public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
651 typedef ResetDuplicatesBase<ExecSpace, ValueType, Op> Base;
652 ResetDuplicates(ValueType* data_in,
size_t size_in, std::string
const& name)
653 : Base(data_in, size_in, name) {}
654 KOKKOS_FORCEINLINE_FUNCTION
void operator()(
size_t i)
const {
655 ScatterValue<ValueType, Op, ExecSpace,
656 Kokkos::Experimental::ScatterNonAtomic>
667 namespace Experimental {
669 template <
typename DataType,
670 typename Layout = Kokkos::DefaultExecutionSpace::array_layout,
671 typename DeviceType = Kokkos::DefaultExecutionSpace,
673 int duplication = Kokkos::Impl::Experimental::DefaultDuplication<
674 typename DeviceType::execution_space>::value,
675 int contribution = Kokkos::Impl::Experimental::DefaultContribution<
676 typename DeviceType::execution_space, duplication>::value>
679 template <
typename DataType,
int Op,
typename DeviceType,
typename Layout,
680 int duplication,
int contribution,
int override_contribution>
684 template <
typename DataType,
int Op,
typename DeviceType,
typename Layout,
686 class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
689 using execution_space =
typename DeviceType::execution_space;
690 using memory_space =
typename DeviceType::memory_space;
691 using device_type = Kokkos::Device<execution_space, memory_space>;
693 typedef typename original_view_type::value_type original_value_type;
694 typedef typename original_view_type::reference_type original_reference_type;
695 friend class ScatterAccess<DataType, Op, DeviceType, Layout,
696 ScatterNonDuplicated, contribution,
698 friend class ScatterAccess<DataType, Op, DeviceType, Layout,
699 ScatterNonDuplicated, contribution, ScatterAtomic>;
700 template <
class,
class,
class,
int,
int,
int>
701 friend class ScatterView;
703 ScatterView() =
default;
705 template <
typename RT,
typename... RP>
706 ScatterView(View<RT, RP...>
const& original_view)
707 : internal_view(original_view) {}
709 template <
typename... Dims>
710 ScatterView(std::string
const& name, Dims... dims)
711 : internal_view(name, dims...) {}
713 template <
typename OtherDataType,
typename OtherDeviceType>
714 KOKKOS_FUNCTION ScatterView(
715 const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
716 ScatterNonDuplicated, contribution>& other_view)
717 : internal_view(other_view.internal_view) {}
719 template <
typename OtherDataType,
typename OtherDeviceType>
720 KOKKOS_FUNCTION
void operator=(
721 const ScatterView<OtherDataType, Layout, OtherDeviceType, Op,
722 ScatterNonDuplicated, contribution>& other_view) {
723 internal_view = other_view.internal_view;
726 template <
int overr
ide_contrib = contribution>
727 KOKKOS_FORCEINLINE_FUNCTION
728 ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
729 contribution, override_contrib>
731 return ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
732 contribution, override_contrib>(*this);
735 original_view_type subview()
const {
return internal_view; }
737 template <
typename DT,
typename... RP>
738 void contribute_into(View<DT, RP...>
const& dest)
const {
739 typedef View<DT, RP...> dest_type;
740 static_assert(std::is_same<typename dest_type::array_layout, Layout>::value,
741 "ScatterView contribute destination has different layout");
743 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
744 memory_space,
typename dest_type::memory_space>::value,
745 "ScatterView contribute destination memory space not accessible");
746 if (dest.data() == internal_view.data())
return;
747 Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
748 original_value_type, Op>(
749 internal_view.data(), dest.data(), 0, 0, 1, internal_view.label());
753 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
754 original_value_type, Op>(
755 internal_view.data(), internal_view.size(), internal_view.label());
757 template <
typename DT,
typename... RP>
758 void reset_except(View<DT, RP...>
const& view) {
759 if (view.data() != internal_view.data()) reset();
762 void resize(
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
763 const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
764 const size_t n6 = 0,
const size_t n7 = 0) {
765 ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
768 void realloc(
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
769 const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
770 const size_t n6 = 0,
const size_t n7 = 0) {
771 ::Kokkos::realloc(internal_view, n0, n1, n2, n3, n4, n5, n6, n7);
775 template <
typename... Args>
776 KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args... args)
const {
777 return internal_view(args...);
781 typedef original_view_type internal_view_type;
782 internal_view_type internal_view;
785 template <
typename DataType,
int Op,
typename DeviceType,
typename Layout,
786 int contribution,
int override_contribution>
787 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
788 contribution, override_contribution> {
790 typedef ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
793 typedef typename view_type::original_value_type original_value_type;
794 typedef Kokkos::Impl::Experimental::ScatterValue<
795 original_value_type, Op, DeviceType, override_contribution>
798 KOKKOS_INLINE_FUNCTION
799 ScatterAccess() : view(view_type()) {}
801 KOKKOS_INLINE_FUNCTION
802 ScatterAccess(view_type
const& view_in) : view(view_in) {}
803 KOKKOS_DEFAULTED_FUNCTION
804 ~ScatterAccess() =
default;
806 template <
typename... Args>
807 KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args)
const {
808 return view.at(args...);
811 template <
typename Arg>
812 KOKKOS_FORCEINLINE_FUNCTION
813 typename std::enable_if<view_type::original_view_type::rank == 1 &&
814 std::is_integral<Arg>::value,
816 operator[](Arg arg)
const {
821 view_type
const& view;
828 template <
typename DataType,
int Op,
typename DeviceType,
int contribution>
829 class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
830 ScatterDuplicated, contribution> {
832 using execution_space =
typename DeviceType::execution_space;
833 using memory_space =
typename DeviceType::memory_space;
834 using device_type = Kokkos::Device<execution_space, memory_space>;
837 typedef typename original_view_type::value_type original_value_type;
838 typedef typename original_view_type::reference_type original_reference_type;
839 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
840 ScatterDuplicated, contribution, ScatterNonAtomic>;
841 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutRight,
842 ScatterDuplicated, contribution, ScatterAtomic>;
843 template <class, class, class, int, int, int>
844 friend class ScatterView;
846 typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<
847 DataType, Kokkos::LayoutRight>
849 typedef typename data_type_info::value_type internal_data_type;
850 typedef
Kokkos::View<internal_data_type, Kokkos::LayoutRight, device_type>
853 ScatterView() = default;
855 template <typename OtherDataType, typename OtherDeviceType>
856 KOKKOS_FUNCTION ScatterView(
857 const ScatterView<OtherDataType, Kokkos::LayoutRight, OtherDeviceType, Op,
858 ScatterDuplicated, contribution>& other_view)
859 : unique_token(other_view.unique_token),
860 internal_view(other_view.internal_view) {}
862 template <
typename OtherDataType,
typename OtherDeviceType>
863 KOKKOS_FUNCTION
void operator=(
865 ScatterDuplicated, contribution>& other_view) {
866 unique_token = other_view.unique_token;
867 internal_view = other_view.internal_view;
870 template <
typename RT,
typename... RP>
871 ScatterView(View<RT, RP...>
const& original_view)
873 internal_view(Kokkos::ViewAllocateWithoutInitializing(
874 std::string(
"duplicated_") + original_view.label()),
876 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
877 original_view.extent(0), original_view.extent(1),
878 original_view.extent(2), original_view.extent(3),
879 original_view.extent(4), original_view.extent(5),
880 original_view.extent(6))
882 original_view.rank_dynamic > 0
883 ? original_view.extent(0)
884 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
885 original_view.rank_dynamic > 1
886 ? original_view.extent(1)
887 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
888 original_view.rank_dynamic > 2
889 ? original_view.extent(2)
890 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
891 original_view.rank_dynamic > 3
892 ? original_view.extent(3)
893 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
894 original_view.rank_dynamic > 4
895 ? original_view.extent(4)
896 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
897 original_view.rank_dynamic > 5
898 ? original_view.extent(5)
899 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
900 original_view.rank_dynamic > 6
901 ? original_view.extent(6)
902 : KOKKOS_IMPL_CTOR_DEFAULT_ARG)
909 template <
typename... Dims>
910 ScatterView(std::string
const& name, Dims... dims)
911 : internal_view(Kokkos::ViewAllocateWithoutInitializing(name),
912 unique_token.size(), dims...) {
916 template <
int overr
ide_contribution = contribution>
917 KOKKOS_FORCEINLINE_FUNCTION
919 ScatterDuplicated, contribution, override_contribution>
922 ScatterDuplicated, contribution,
923 override_contribution>(*this);
927 internal_view_type::rank,
928 internal_view_type>::value_type
930 return Kokkos::Impl::Experimental::Slice<
932 internal_view_type>::get(internal_view, 0);
935 template <
typename DT,
typename... RP>
936 void contribute_into(View<DT, RP...>
const& dest)
const {
937 typedef View<DT, RP...> dest_type;
938 static_assert(std::is_same<
typename dest_type::array_layout,
940 "ScatterView deep_copy destination has different layout");
942 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
943 memory_space,
typename dest_type::memory_space>::value,
944 "ScatterView deep_copy destination memory space not accessible");
945 bool is_equal = (dest.data() == internal_view.data());
946 size_t start = is_equal ? 1 : 0;
947 Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
948 original_value_type, Op>(
949 internal_view.data(), dest.data(), internal_view.stride(0), start,
950 internal_view.extent(0), internal_view.label());
954 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
955 original_value_type, Op>(
956 internal_view.data(), internal_view.size(), internal_view.label());
958 template <
typename DT,
typename... RP>
959 void reset_except(View<DT, RP...>
const& view) {
960 if (view.data() != internal_view.data()) {
964 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
965 original_value_type, Op>(
966 internal_view.data() + view.size(), internal_view.size() - view.size(),
967 internal_view.label());
970 void resize(
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
971 const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
972 const size_t n6 = 0) {
973 ::Kokkos::resize(internal_view, unique_token.size(), n0, n1, n2, n3, n4, n5,
977 void realloc(
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
978 const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
979 const size_t n6 = 0) {
980 ::Kokkos::realloc(internal_view, unique_token.size(), n0, n1, n2, n3, n4,
985 template <
typename... Args>
986 KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(
int rank,
987 Args... args)
const {
988 return internal_view(rank, args...);
993 execution_space, Kokkos::Experimental::UniqueTokenScope::Global>
996 unique_token_type unique_token;
997 internal_view_type internal_view;
1000 template <
typename DataType,
int Op,
typename DeviceType,
int contribution>
1001 class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
1002 ScatterDuplicated, contribution> {
1004 using execution_space =
typename DeviceType::execution_space;
1005 using memory_space =
typename DeviceType::memory_space;
1006 using device_type = Kokkos::Device<execution_space, memory_space>;
1009 typedef typename original_view_type::value_type original_value_type;
1010 typedef typename original_view_type::reference_type original_reference_type;
1011 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1012 ScatterDuplicated, contribution, ScatterNonAtomic>;
1013 friend class ScatterAccess<DataType, Op, DeviceType, Kokkos::LayoutLeft,
1014 ScatterDuplicated, contribution, ScatterAtomic>;
1015 template <class, class, class, int, int, int>
1016 friend class ScatterView;
1018 typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<
1019 DataType, Kokkos::LayoutLeft>
1021 typedef typename data_type_info::value_type internal_data_type;
1022 typedef
Kokkos::View<internal_data_type, Kokkos::LayoutLeft, device_type>
1025 ScatterView() = default;
1027 template <typename RT, typename... RP>
1028 ScatterView(View<RT, RP...> const& original_view) : unique_token() {
1029 size_t arg_N[8] = {original_view.rank > 0 ? original_view.extent(0)
1030 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1031 original_view.rank > 1 ? original_view.extent(1)
1032 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1033 original_view.rank > 2 ? original_view.extent(2)
1034 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1035 original_view.rank > 3 ? original_view.extent(3)
1036 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1037 original_view.rank > 4 ? original_view.extent(4)
1038 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1039 original_view.rank > 5 ? original_view.extent(5)
1040 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1041 original_view.rank > 6 ? original_view.extent(6)
1042 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1043 KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1044 arg_N[internal_view_type::rank - 1] = unique_token.size();
1045 internal_view = internal_view_type(
1046 Kokkos::ViewAllocateWithoutInitializing(std::string(
"duplicated_") +
1047 original_view.label()),
1048 arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6],
1053 template <
typename... Dims>
1054 ScatterView(std::string
const& name, Dims... dims) {
1055 original_view_type original_view;
1056 size_t arg_N[8] = {original_view.rank > 0 ? original_view.static_extent(0)
1057 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1058 original_view.rank > 1 ? original_view.static_extent(1)
1059 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1060 original_view.rank > 2 ? original_view.static_extent(2)
1061 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1062 original_view.rank > 3 ? original_view.static_extent(3)
1063 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1064 original_view.rank > 4 ? original_view.static_extent(4)
1065 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1066 original_view.rank > 5 ? original_view.static_extent(5)
1067 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1068 original_view.rank > 6 ? original_view.static_extent(6)
1069 : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
1070 KOKKOS_IMPL_CTOR_DEFAULT_ARG};
1071 Kokkos::Impl::Experimental::args_to_array(arg_N, 0, dims...);
1072 arg_N[internal_view_type::rank - 1] = unique_token.size();
1073 internal_view = internal_view_type(
1074 Kokkos::ViewAllocateWithoutInitializing(name), arg_N[0], arg_N[1],
1075 arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1079 template <
typename OtherDataType,
typename OtherDeviceType>
1080 KOKKOS_FUNCTION ScatterView(
1082 ScatterDuplicated, contribution>& other_view)
1083 : unique_token(other_view.unique_token),
1084 internal_view(other_view.internal_view) {}
1086 template <
typename OtherDataType,
typename OtherDeviceType>
1087 KOKKOS_FUNCTION
void operator=(
1089 ScatterDuplicated, contribution>& other_view) {
1090 unique_token = other_view.unique_token;
1091 internal_view = other_view.internal_view;
1094 template <
int overr
ide_contribution = contribution>
1095 KOKKOS_FORCEINLINE_FUNCTION
1097 ScatterDuplicated, contribution, override_contribution>
1100 ScatterDuplicated, contribution,
1101 override_contribution>(*this);
1105 internal_view_type::rank,
1106 internal_view_type>::value_type
1108 return Kokkos::Impl::Experimental::Slice<
1110 internal_view_type>::get(internal_view, 0);
1113 template <
typename... RP>
1114 void contribute_into(View<RP...>
const& dest)
const {
1115 typedef View<RP...> dest_type;
1117 std::is_same<
typename dest_type::value_type,
1118 typename original_view_type::non_const_value_type>::value,
1119 "ScatterView deep_copy destination has wrong value_type");
1120 static_assert(std::is_same<
typename dest_type::array_layout,
1122 "ScatterView deep_copy destination has different layout");
1124 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
1125 memory_space,
typename dest_type::memory_space>::value,
1126 "ScatterView deep_copy destination memory space not accessible");
1127 auto extent = internal_view.extent(internal_view_type::rank - 1);
1128 bool is_equal = (dest.data() == internal_view.data());
1129 size_t start = is_equal ? 1 : 0;
1130 Kokkos::Impl::Experimental::ReduceDuplicates<execution_space,
1131 original_value_type, Op>(
1132 internal_view.data(), dest.data(),
1133 internal_view.stride(internal_view_type::rank - 1), start, extent,
1134 internal_view.label());
1138 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1139 original_value_type, Op>(
1140 internal_view.data(), internal_view.size(), internal_view.label());
1142 template <
typename DT,
typename... RP>
1143 void reset_except(View<DT, RP...>
const& view) {
1144 if (view.data() != internal_view.data()) {
1148 Kokkos::Impl::Experimental::ResetDuplicates<execution_space,
1149 original_value_type, Op>(
1150 internal_view.data() + view.size(), internal_view.size() - view.size(),
1151 internal_view.label());
1154 void resize(
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
1155 const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
1156 const size_t n6 = 0) {
1157 size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1158 const int i = internal_view.rank - 1;
1159 arg_N[i] = unique_token.size();
1161 ::Kokkos::resize(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1162 arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1165 void realloc(
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
1166 const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
1167 const size_t n6 = 0) {
1168 size_t arg_N[8] = {n0, n1, n2, n3, n4, n5, n6, 0};
1169 const int i = internal_view.rank - 1;
1170 arg_N[i] = unique_token.size();
1172 ::Kokkos::realloc(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3],
1173 arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
1177 template <
typename... Args>
1178 KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(
int thread_id,
1179 Args... args)
const {
1180 return internal_view(args..., thread_id);
1185 execution_space, Kokkos::Experimental::UniqueTokenScope::Global>
1188 unique_token_type unique_token;
1189 internal_view_type internal_view;
1202 template <
typename DataType,
int Op,
typename DeviceType,
typename Layout,
1203 int contribution,
int override_contribution>
1204 class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
1205 contribution, override_contribution> {
1207 typedef ScatterView<DataType, Layout, DeviceType, Op, ScatterDuplicated,
1210 typedef typename view_type::original_value_type original_value_type;
1211 typedef Kokkos::Impl::Experimental::ScatterValue<
1212 original_value_type, Op, DeviceType, override_contribution>
1215 KOKKOS_FORCEINLINE_FUNCTION
1216 ScatterAccess(view_type
const& view_in)
1217 : view(view_in), thread_id(view_in.unique_token.acquire()) {}
1219 KOKKOS_FORCEINLINE_FUNCTION
1221 if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
1224 template <
typename... Args>
1225 KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args)
const {
1226 return view.at(thread_id, args...);
1229 template <
typename Arg>
1230 KOKKOS_FORCEINLINE_FUNCTION
1231 typename std::enable_if<view_type::original_view_type::rank == 1 &&
1232 std::is_integral<Arg>::value,
1234 operator[](Arg arg)
const {
1235 return view.at(thread_id, arg);
1239 view_type
const& view;
1242 ScatterAccess(ScatterAccess
const& other) =
delete;
1243 ScatterAccess& operator=(ScatterAccess
const& other) =
delete;
1244 ScatterAccess& operator=(ScatterAccess&& other) =
delete;
1250 KOKKOS_FORCEINLINE_FUNCTION
1251 ScatterAccess(ScatterAccess&& other)
1252 : view(other.view), thread_id(other.thread_id) {
1253 other.thread_id = ~thread_id_type(0);
1257 typedef typename view_type::unique_token_type unique_token_type;
1258 typedef typename unique_token_type::size_type thread_id_type;
1259 thread_id_type thread_id;
1262 template <
int Op = Kokkos::Experimental::ScatterSum,
int duplication = -1,
1263 int contribution = -1,
typename RT,
typename... RP>
1265 RT,
typename ViewTraits<RT, RP...>::array_layout,
1266 typename ViewTraits<RT, RP...>::device_type,
1273 ? Kokkos::Impl::Experimental::DefaultDuplication<
1274 typename ViewTraits<RT, RP...>::execution_space>::value
1277 ? Kokkos::Impl::Experimental::DefaultContribution<
1278 typename ViewTraits<RT, RP...>::execution_space,
1280 ? Kokkos::Impl::Experimental::DefaultDuplication<
1281 typename ViewTraits<RT, RP...>::execution_space>::value
1282 : duplication)>::value
1284 create_scatter_view(View<RT, RP...>
const& original_view) {
1285 return original_view;
1292 namespace Experimental {
1294 template <
typename DT1,
typename DT2,
typename LY,
typename ES,
int OP,
int CT,
1295 int DP,
typename... VP>
1297 View<DT1, VP...>& dest,
1298 Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP>
const& src) {
1299 src.contribute_into(dest);
1307 template <
typename DT,
typename LY,
typename ES,
int OP,
int CT,
int DP,
1310 Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1312 scatter_view.realloc(is...);
1315 template <
typename DT,
typename LY,
typename ES,
int OP,
int CT,
int DP,
1318 Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view,
1320 scatter_view.resize(is...);
Memory layout tag indicating left-to-right (Fortran scheme) striding of multi-indices.
class to generate unique ids base on the required amount of concurrency
View to an array of data.
Memory layout tag indicating right-to-left (C or lexigraphical scheme) striding of multi-indices...
std::enable_if< std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutLeft >::value||std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutRight >::value >::type resize(Kokkos::View< T, P...> &v, const size_t n0=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n3=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n4=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n5=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7=KOKKOS_IMPL_CTOR_DEFAULT_ARG)
Resize a view with copying old data to new data at the corresponding indices.
Implementation of the ParallelFor operator that has a partial specialization for the device...
std::enable_if< std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutLeft >::value||std::is_same< typename Kokkos::View< T, P...>::array_layout, Kokkos::LayoutRight >::value >::type realloc(Kokkos::View< T, P...> &v, const size_t n0=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n3=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n4=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n5=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6=KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7=KOKKOS_IMPL_CTOR_DEFAULT_ARG)
Resize a view with discarding old data.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P...> &V)
Temporary free function rank() until rank() is implemented in the View.