49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include "impl/Kokkos_Atomic_Generic.hpp"
59 namespace KokkosRefactor {
75 template<
class IntegerType,
76 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
78 static KOKKOS_INLINE_FUNCTION
bool
79 test (
const IntegerType x,
80 const IntegerType exclusiveUpperBound);
84 template<
class IntegerType>
86 static KOKKOS_INLINE_FUNCTION
bool
87 test (
const IntegerType x,
88 const IntegerType exclusiveUpperBound)
90 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
95 template<
class IntegerType>
96 struct OutOfBounds<IntegerType, false> {
97 static KOKKOS_INLINE_FUNCTION
bool
98 test (
const IntegerType x,
99 const IntegerType exclusiveUpperBound)
101 return x >= exclusiveUpperBound;
107 template<
class IntegerType>
108 KOKKOS_INLINE_FUNCTION
bool
109 outOfBounds (
const IntegerType x,
const IntegerType exclusiveUpperBound)
119 template <
typename DstView,
typename SrcView,
typename IdxView>
120 struct PackArraySingleColumn {
121 typedef typename DstView::execution_space execution_space;
122 typedef typename execution_space::size_type size_type;
129 PackArraySingleColumn (
const DstView& dst_,
133 dst(dst_), src(src_), idx(idx_), col(col_) {}
135 KOKKOS_INLINE_FUNCTION
void
136 operator() (
const size_type k)
const {
137 dst(k) = src(idx(k), col);
141 pack (
const DstView& dst,
146 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
148 (
"Tpetra::MultiVector pack one col",
149 range_type (0, idx.size ()),
150 PackArraySingleColumn (dst, src, idx, col));
154 template <
typename DstView,
157 typename SizeType =
typename DstView::execution_space::size_type>
158 class PackArraySingleColumnWithBoundsCheck {
160 static_assert (Kokkos::Impl::is_view<DstView>::value,
161 "DstView must be a Kokkos::View.");
162 static_assert (Kokkos::Impl::is_view<SrcView>::value,
163 "SrcView must be a Kokkos::View.");
164 static_assert (Kokkos::Impl::is_view<IdxView>::value,
165 "IdxView must be a Kokkos::View.");
166 static_assert (static_cast<int> (DstView::rank) == 1,
167 "DstView must be a rank-1 Kokkos::View.");
168 static_assert (static_cast<int> (SrcView::rank) == 2,
169 "SrcView must be a rank-2 Kokkos::View.");
170 static_assert (static_cast<int> (IdxView::rank) == 1,
171 "IdxView must be a rank-1 Kokkos::View.");
172 static_assert (std::is_integral<SizeType>::value,
173 "SizeType must be a built-in integer type.");
175 typedef SizeType size_type;
176 using value_type = size_t;
185 PackArraySingleColumnWithBoundsCheck (
const DstView& dst_,
188 const size_type col_) :
189 dst (dst_), src (src_), idx (idx_), col (col_) {}
191 KOKKOS_INLINE_FUNCTION
void
192 operator() (
const size_type k, value_type& lclErrCount)
const {
193 using index_type =
typename IdxView::non_const_value_type;
195 const index_type lclRow = idx(k);
196 if (lclRow < static_cast<index_type> (0) ||
197 lclRow >= static_cast<index_type> (src.extent (0))) {
201 dst(k) = src(lclRow, col);
205 KOKKOS_INLINE_FUNCTION
206 void init (value_type& initialErrorCount)
const {
207 initialErrorCount = 0;
210 KOKKOS_INLINE_FUNCTION
void
211 join (
volatile value_type& dstErrorCount,
212 const volatile value_type& srcErrorCount)
const
214 dstErrorCount += srcErrorCount;
218 pack (
const DstView& dst,
223 typedef typename DstView::execution_space execution_space;
224 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
225 typedef typename IdxView::non_const_value_type index_type;
227 size_t errorCount = 0;
228 Kokkos::parallel_reduce
229 (
"Tpetra::MultiVector pack one col debug only",
230 range_type (0, idx.size ()),
231 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
233 if (errorCount != 0) {
237 auto idx_h = Kokkos::create_mirror_view (idx);
240 std::vector<index_type> badIndices;
241 const size_type numInds = idx_h.extent (0);
242 for (size_type k = 0; k < numInds; ++k) {
243 if (idx_h(k) < static_cast<index_type> (0) ||
244 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
245 badIndices.push_back (idx_h(k));
249 TEUCHOS_TEST_FOR_EXCEPTION
250 (errorCount != badIndices.size (), std::logic_error,
251 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
252 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
253 "uld never happen. Please report this to the Tpetra developers.");
255 std::ostringstream os;
256 os <<
"MultiVector single-column pack kernel had "
257 << badIndices.size () <<
" out-of bounds index/ices. "
259 for (
size_t k = 0; k < badIndices.size (); ++k) {
261 if (k + 1 < badIndices.size ()) {
266 throw std::runtime_error (os.str ());
272 template <
typename DstView,
typename SrcView,
typename IdxView>
274 pack_array_single_column (
const DstView& dst,
278 const bool debug =
true)
280 static_assert (Kokkos::Impl::is_view<DstView>::value,
281 "DstView must be a Kokkos::View.");
282 static_assert (Kokkos::Impl::is_view<SrcView>::value,
283 "SrcView must be a Kokkos::View.");
284 static_assert (Kokkos::Impl::is_view<IdxView>::value,
285 "IdxView must be a Kokkos::View.");
286 static_assert (static_cast<int> (DstView::rank) == 1,
287 "DstView must be a rank-1 Kokkos::View.");
288 static_assert (static_cast<int> (SrcView::rank) == 2,
289 "SrcView must be a rank-2 Kokkos::View.");
290 static_assert (static_cast<int> (IdxView::rank) == 1,
291 "IdxView must be a rank-1 Kokkos::View.");
294 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
295 impl_type::pack (dst, src, idx, col);
298 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
299 impl_type::pack (dst, src, idx, col);
303 template <
typename DstView,
typename SrcView,
typename IdxView>
304 struct PackArrayMultiColumn {
305 typedef typename DstView::execution_space execution_space;
306 typedef typename execution_space::size_type size_type;
313 PackArrayMultiColumn (
const DstView& dst_,
316 const size_t numCols_) :
317 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
319 KOKKOS_INLINE_FUNCTION
void
320 operator() (
const size_type k)
const {
321 const typename IdxView::value_type localRow = idx(k);
322 const size_t offset = k*numCols;
323 for (
size_t j = 0; j < numCols; ++j) {
324 dst(offset + j) = src(localRow, j);
328 static void pack(
const DstView& dst,
332 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
334 (
"Tpetra::MultiVector pack multicol const stride",
335 range_type (0, idx.size ()),
336 PackArrayMultiColumn (dst, src, idx, numCols));
340 template <
typename DstView,
343 typename SizeType =
typename DstView::execution_space::size_type>
344 class PackArrayMultiColumnWithBoundsCheck {
346 using size_type = SizeType;
347 using value_type = size_t;
356 PackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
359 const size_type numCols_) :
360 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
362 KOKKOS_INLINE_FUNCTION
void
363 operator() (
const size_type k, value_type& lclErrorCount)
const {
364 typedef typename IdxView::non_const_value_type index_type;
366 const index_type lclRow = idx(k);
367 if (lclRow < static_cast<index_type> (0) ||
368 lclRow >= static_cast<index_type> (src.extent (0))) {
372 const size_type offset = k*numCols;
373 for (size_type j = 0; j < numCols; ++j) {
374 dst(offset + j) = src(lclRow, j);
379 KOKKOS_INLINE_FUNCTION
380 void init (value_type& initialErrorCount)
const {
381 initialErrorCount = 0;
384 KOKKOS_INLINE_FUNCTION
void
385 join (
volatile value_type& dstErrorCount,
386 const volatile value_type& srcErrorCount)
const
388 dstErrorCount += srcErrorCount;
392 pack (
const DstView& dst,
395 const size_type numCols)
397 typedef typename DstView::execution_space execution_space;
398 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
399 typedef typename IdxView::non_const_value_type index_type;
401 size_t errorCount = 0;
402 Kokkos::parallel_reduce
403 (
"Tpetra::MultiVector pack multicol const stride debug only",
404 range_type (0, idx.size ()),
405 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
407 if (errorCount != 0) {
411 auto idx_h = Kokkos::create_mirror_view (idx);
414 std::vector<index_type> badIndices;
415 const size_type numInds = idx_h.extent (0);
416 for (size_type k = 0; k < numInds; ++k) {
417 if (idx_h(k) < static_cast<index_type> (0) ||
418 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
419 badIndices.push_back (idx_h(k));
423 TEUCHOS_TEST_FOR_EXCEPTION
424 (errorCount != badIndices.size (), std::logic_error,
425 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
426 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
427 "uld never happen. Please report this to the Tpetra developers.");
429 std::ostringstream os;
430 os <<
"Tpetra::MultiVector multiple-column pack kernel had "
431 << badIndices.size () <<
" out-of bounds index/ices (errorCount = "
432 << errorCount <<
"): [";
433 for (
size_t k = 0; k < badIndices.size (); ++k) {
435 if (k + 1 < badIndices.size ()) {
440 throw std::runtime_error (os.str ());
446 template <
typename DstView,
450 pack_array_multi_column (
const DstView& dst,
453 const size_t numCols,
454 const bool debug =
true)
456 static_assert (Kokkos::Impl::is_view<DstView>::value,
457 "DstView must be a Kokkos::View.");
458 static_assert (Kokkos::Impl::is_view<SrcView>::value,
459 "SrcView must be a Kokkos::View.");
460 static_assert (Kokkos::Impl::is_view<IdxView>::value,
461 "IdxView must be a Kokkos::View.");
462 static_assert (static_cast<int> (DstView::rank) == 1,
463 "DstView must be a rank-1 Kokkos::View.");
464 static_assert (static_cast<int> (SrcView::rank) == 2,
465 "SrcView must be a rank-2 Kokkos::View.");
466 static_assert (static_cast<int> (IdxView::rank) == 1,
467 "IdxView must be a rank-1 Kokkos::View.");
470 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
471 SrcView, IdxView> impl_type;
472 impl_type::pack (dst, src, idx, numCols);
475 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
476 impl_type::pack (dst, src, idx, numCols);
480 template <
typename DstView,
typename SrcView,
typename IdxView,
482 struct PackArrayMultiColumnVariableStride {
483 typedef typename DstView::execution_space execution_space;
484 typedef typename execution_space::size_type size_type;
492 PackArrayMultiColumnVariableStride (
const DstView& dst_,
496 const size_t numCols_) :
497 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
499 KOKKOS_INLINE_FUNCTION
500 void operator() (
const size_type k)
const {
501 const typename IdxView::value_type localRow = idx(k);
502 const size_t offset = k*numCols;
503 for (
size_t j = 0; j < numCols; ++j) {
504 dst(offset + j) = src(localRow, col(j));
508 static void pack(
const DstView& dst,
513 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
515 (
"Tpetra::MultiVector pack multicol var stride",
516 range_type (0, idx.size ()),
517 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
521 template <
typename DstView,
525 typename SizeType =
typename DstView::execution_space::size_type>
526 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
528 using size_type = SizeType;
529 using value_type = size_t;
539 PackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
543 const size_type numCols_) :
544 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
546 KOKKOS_INLINE_FUNCTION
void
547 operator() (
const size_type k, value_type& lclErrorCount)
const {
548 typedef typename IdxView::non_const_value_type row_index_type;
549 typedef typename ColView::non_const_value_type col_index_type;
551 const row_index_type lclRow = idx(k);
552 if (lclRow < static_cast<row_index_type> (0) ||
553 lclRow >= static_cast<row_index_type> (src.extent (0))) {
557 const size_type offset = k*numCols;
558 for (size_type j = 0; j < numCols; ++j) {
559 const col_index_type lclCol = col(j);
560 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
564 dst(offset + j) = src(lclRow, lclCol);
570 KOKKOS_INLINE_FUNCTION
void
571 init (value_type& initialErrorCount)
const {
572 initialErrorCount = 0;
575 KOKKOS_INLINE_FUNCTION
void
576 join (
volatile value_type& dstErrorCount,
577 const volatile value_type& srcErrorCount)
const
579 dstErrorCount += srcErrorCount;
583 pack (
const DstView& dst,
587 const size_type numCols)
589 using execution_space =
typename DstView::execution_space;
590 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
591 using row_index_type =
typename IdxView::non_const_value_type;
592 using col_index_type =
typename ColView::non_const_value_type;
594 size_t errorCount = 0;
595 Kokkos::parallel_reduce
596 (
"Tpetra::MultiVector pack multicol var stride debug only",
597 range_type (0, idx.size ()),
598 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
601 if (errorCount != 0) {
602 constexpr
size_t maxNumBadIndicesToPrint = 100;
604 std::ostringstream os;
605 os <<
"Tpetra::MultiVector multicolumn variable stride pack kernel "
606 "found " << errorCount
607 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
612 auto idx_h = Kokkos::create_mirror_view (idx);
615 std::vector<row_index_type> badRows;
616 const size_type numRowInds = idx_h.extent (0);
617 for (size_type k = 0; k < numRowInds; ++k) {
618 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
619 badRows.push_back (idx_h(k));
623 if (badRows.size () != 0) {
624 os << badRows.size () <<
" out-of-bounds row ind"
625 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
626 if (badRows.size () <= maxNumBadIndicesToPrint) {
628 for (
size_t k = 0; k < badRows.size (); ++k) {
630 if (k + 1 < badRows.size ()) {
641 os <<
"No out-of-bounds row indices. ";
646 auto col_h = Kokkos::create_mirror_view (col);
649 std::vector<col_index_type> badCols;
650 const size_type numColInds = col_h.extent (0);
651 for (size_type k = 0; k < numColInds; ++k) {
652 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
653 badCols.push_back (col_h(k));
657 if (badCols.size () != 0) {
658 os << badCols.size () <<
" out-of-bounds column ind"
659 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
660 if (badCols.size () <= maxNumBadIndicesToPrint) {
662 for (
size_t k = 0; k < badCols.size (); ++k) {
664 if (k + 1 < badCols.size ()) {
675 os <<
"No out-of-bounds column indices. ";
678 TEUCHOS_TEST_FOR_EXCEPTION
679 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
680 std::logic_error,
"Tpetra::MultiVector variable stride pack "
681 "kernel reports errorCount=" << errorCount <<
", but we failed "
682 "to find any bad rows or columns. This should never happen. "
683 "Please report this to the Tpetra developers.");
685 throw std::runtime_error (os.str ());
690 template <
typename DstView,
695 pack_array_multi_column_variable_stride (
const DstView& dst,
699 const size_t numCols,
700 const bool debug =
true)
702 static_assert (Kokkos::Impl::is_view<DstView>::value,
703 "DstView must be a Kokkos::View.");
704 static_assert (Kokkos::Impl::is_view<SrcView>::value,
705 "SrcView must be a Kokkos::View.");
706 static_assert (Kokkos::Impl::is_view<IdxView>::value,
707 "IdxView must be a Kokkos::View.");
708 static_assert (Kokkos::Impl::is_view<ColView>::value,
709 "ColView must be a Kokkos::View.");
710 static_assert (static_cast<int> (DstView::rank) == 1,
711 "DstView must be a rank-1 Kokkos::View.");
712 static_assert (static_cast<int> (SrcView::rank) == 2,
713 "SrcView must be a rank-2 Kokkos::View.");
714 static_assert (static_cast<int> (IdxView::rank) == 1,
715 "IdxView must be a rank-1 Kokkos::View.");
716 static_assert (static_cast<int> (ColView::rank) == 1,
717 "ColView must be a rank-1 Kokkos::View.");
720 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
721 SrcView, IdxView, ColView> impl_type;
722 impl_type::pack (dst, src, idx, col, numCols);
725 typedef PackArrayMultiColumnVariableStride<DstView,
726 SrcView, IdxView, ColView> impl_type;
727 impl_type::pack (dst, src, idx, col, numCols);
733 struct atomic_tag {};
734 struct nonatomic_tag {};
738 KOKKOS_INLINE_FUNCTION
739 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
740 Kokkos::atomic_add (&dest, src);
743 KOKKOS_INLINE_FUNCTION
744 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
755 KOKKOS_INLINE_FUNCTION
756 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
760 KOKKOS_INLINE_FUNCTION
761 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
767 template<
class Scalar1,
class Scalar2>
769 KOKKOS_INLINE_FUNCTION
770 static Scalar1 apply(
const Scalar1& val1,
const Scalar2& val2) {
771 const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
772 const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
773 return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
777 template <
typename SC>
779 KOKKOS_INLINE_FUNCTION
780 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
781 Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
784 KOKKOS_INLINE_FUNCTION
785 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
786 dest = AbsMaxOper<SC, SC> ().apply (dest, src);
790 template <
typename ExecutionSpace,
795 class UnpackArrayMultiColumn {
797 static_assert (Kokkos::Impl::is_view<DstView>::value,
798 "DstView must be a Kokkos::View.");
799 static_assert (Kokkos::Impl::is_view<SrcView>::value,
800 "SrcView must be a Kokkos::View.");
801 static_assert (Kokkos::Impl::is_view<IdxView>::value,
802 "IdxView must be a Kokkos::View.");
803 static_assert (static_cast<int> (DstView::rank) == 2,
804 "DstView must be a rank-2 Kokkos::View.");
805 static_assert (static_cast<int> (SrcView::rank) == 1,
806 "SrcView must be a rank-1 Kokkos::View.");
807 static_assert (static_cast<int> (IdxView::rank) == 1,
808 "IdxView must be a rank-1 Kokkos::View.");
811 typedef typename ExecutionSpace::execution_space execution_space;
812 typedef typename execution_space::size_type size_type;
822 UnpackArrayMultiColumn (
const ExecutionSpace& ,
827 const size_t numCols_) :
835 template<
class TagType>
836 KOKKOS_INLINE_FUNCTION
void
837 operator() (TagType tag,
const size_type k)
const
840 (std::is_same<TagType, atomic_tag>::value ||
841 std::is_same<TagType, nonatomic_tag>::value,
842 "TagType must be atomic_tag or nonatomic_tag.");
844 const typename IdxView::value_type localRow = idx(k);
845 const size_t offset = k*numCols;
846 for (
size_t j = 0; j < numCols; ++j) {
847 op (tag, dst(localRow, j), src(offset+j));
852 unpack (
const ExecutionSpace& execSpace,
857 const size_t numCols,
858 const bool use_atomic_updates)
860 if (use_atomic_updates) {
862 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
864 (
"Tpetra::MultiVector unpack const stride atomic",
865 range_type (0, idx.size ()),
866 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
870 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
872 (
"Tpetra::MultiVector unpack const stride nonatomic",
873 range_type (0, idx.size ()),
874 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
879 template <
typename ExecutionSpace,
884 typename SizeType =
typename ExecutionSpace::execution_space::size_type>
885 class UnpackArrayMultiColumnWithBoundsCheck {
887 static_assert (Kokkos::Impl::is_view<DstView>::value,
888 "DstView must be a Kokkos::View.");
889 static_assert (Kokkos::Impl::is_view<SrcView>::value,
890 "SrcView must be a Kokkos::View.");
891 static_assert (Kokkos::Impl::is_view<IdxView>::value,
892 "IdxView must be a Kokkos::View.");
893 static_assert (static_cast<int> (DstView::rank) == 2,
894 "DstView must be a rank-2 Kokkos::View.");
895 static_assert (static_cast<int> (SrcView::rank) == 1,
896 "SrcView must be a rank-1 Kokkos::View.");
897 static_assert (static_cast<int> (IdxView::rank) == 1,
898 "IdxView must be a rank-1 Kokkos::View.");
899 static_assert (std::is_integral<SizeType>::value,
900 "SizeType must be a built-in integer type.");
903 using execution_space =
typename ExecutionSpace::execution_space;
904 using size_type = SizeType;
905 using value_type = size_t;
915 UnpackArrayMultiColumnWithBoundsCheck (
const ExecutionSpace& ,
920 const size_type numCols_) :
928 template<
class TagType>
929 KOKKOS_INLINE_FUNCTION
void
930 operator() (TagType tag,
932 size_t& lclErrCount)
const
935 (std::is_same<TagType, atomic_tag>::value ||
936 std::is_same<TagType, nonatomic_tag>::value,
937 "TagType must be atomic_tag or nonatomic_tag.");
938 using index_type =
typename IdxView::non_const_value_type;
940 const index_type lclRow = idx(k);
941 if (lclRow < static_cast<index_type> (0) ||
942 lclRow >= static_cast<index_type> (dst.extent (0))) {
946 const size_type offset = k*numCols;
947 for (size_type j = 0; j < numCols; ++j) {
948 op (tag, dst(lclRow,j), src(offset+j));
953 template<
class TagType>
954 KOKKOS_INLINE_FUNCTION
void
955 init (TagType,
size_t& initialErrorCount)
const {
956 initialErrorCount = 0;
959 template<
class TagType>
960 KOKKOS_INLINE_FUNCTION
void
962 volatile size_t& dstErrorCount,
963 const volatile size_t& srcErrorCount)
const
965 dstErrorCount += srcErrorCount;
969 unpack (
const ExecutionSpace& execSpace,
974 const size_type numCols,
975 const bool use_atomic_updates)
977 using index_type =
typename IdxView::non_const_value_type;
979 size_t errorCount = 0;
980 if (use_atomic_updates) {
982 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
983 Kokkos::parallel_reduce
984 (
"Tpetra::MultiVector unpack multicol const stride atomic debug only",
985 range_type (0, idx.size ()),
986 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
992 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
993 Kokkos::parallel_reduce
994 (
"Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
995 range_type (0, idx.size ()),
996 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1001 if (errorCount != 0) {
1005 auto idx_h = Kokkos::create_mirror_view (idx);
1008 std::vector<index_type> badIndices;
1009 const size_type numInds = idx_h.extent (0);
1010 for (size_type k = 0; k < numInds; ++k) {
1011 if (idx_h(k) < static_cast<index_type> (0) ||
1012 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1013 badIndices.push_back (idx_h(k));
1017 if (errorCount != badIndices.size ()) {
1018 std::ostringstream os;
1019 os <<
"MultiVector unpack kernel: errorCount = " << errorCount
1020 <<
" != badIndices.size() = " << badIndices.size ()
1021 <<
". This should never happen. "
1022 "Please report this to the Tpetra developers.";
1023 throw std::logic_error (os.str ());
1026 std::ostringstream os;
1027 os <<
"MultiVector unpack kernel had " << badIndices.size ()
1028 <<
" out-of bounds index/ices. Here they are: [";
1029 for (
size_t k = 0; k < badIndices.size (); ++k) {
1030 os << badIndices[k];
1031 if (k + 1 < badIndices.size ()) {
1036 throw std::runtime_error (os.str ());
1041 template <
typename ExecutionSpace,
1047 unpack_array_multi_column (
const ExecutionSpace& execSpace,
1052 const size_t numCols,
1053 const bool use_atomic_updates,
1056 static_assert (Kokkos::Impl::is_view<DstView>::value,
1057 "DstView must be a Kokkos::View.");
1058 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1059 "SrcView must be a Kokkos::View.");
1060 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1061 "IdxView must be a Kokkos::View.");
1062 static_assert (static_cast<int> (DstView::rank) == 2,
1063 "DstView must be a rank-2 Kokkos::View.");
1064 static_assert (static_cast<int> (SrcView::rank) == 1,
1065 "SrcView must be a rank-1 Kokkos::View.");
1066 static_assert (static_cast<int> (IdxView::rank) == 1,
1067 "IdxView must be a rank-1 Kokkos::View.");
1070 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1071 DstView, SrcView, IdxView, Op> impl_type;
1072 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1073 use_atomic_updates);
1076 typedef UnpackArrayMultiColumn<ExecutionSpace,
1077 DstView, SrcView, IdxView, Op> impl_type;
1078 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1079 use_atomic_updates);
1083 template <
typename ExecutionSpace,
1089 class UnpackArrayMultiColumnVariableStride {
1091 static_assert (Kokkos::Impl::is_view<DstView>::value,
1092 "DstView must be a Kokkos::View.");
1093 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1094 "SrcView must be a Kokkos::View.");
1095 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1096 "IdxView must be a Kokkos::View.");
1097 static_assert (Kokkos::Impl::is_view<ColView>::value,
1098 "ColView must be a Kokkos::View.");
1099 static_assert (static_cast<int> (DstView::rank) == 2,
1100 "DstView must be a rank-2 Kokkos::View.");
1101 static_assert (static_cast<int> (SrcView::rank) == 1,
1102 "SrcView must be a rank-1 Kokkos::View.");
1103 static_assert (static_cast<int> (IdxView::rank) == 1,
1104 "IdxView must be a rank-1 Kokkos::View.");
1105 static_assert (static_cast<int> (ColView::rank) == 1,
1106 "ColView must be a rank-1 Kokkos::View.");
1109 using execution_space =
typename ExecutionSpace::execution_space;
1110 using size_type =
typename execution_space::size_type;
1121 UnpackArrayMultiColumnVariableStride (
const ExecutionSpace& ,
1122 const DstView& dst_,
1123 const SrcView& src_,
1124 const IdxView& idx_,
1125 const ColView& col_,
1127 const size_t numCols_) :
1136 template<
class TagType>
1137 KOKKOS_INLINE_FUNCTION
void
1138 operator() (TagType tag,
const size_type k)
const
1141 (std::is_same<TagType, atomic_tag>::value ||
1142 std::is_same<TagType, nonatomic_tag>::value,
1143 "TagType must be atomic_tag or nonatomic_tag.");
1145 const typename IdxView::value_type localRow = idx(k);
1146 const size_t offset = k*numCols;
1147 for (
size_t j = 0; j < numCols; ++j) {
1148 op (tag, dst(localRow, col(j)), src(offset+j));
1153 unpack (
const ExecutionSpace& execSpace,
1159 const size_t numCols,
1160 const bool use_atomic_updates)
1162 if (use_atomic_updates) {
1164 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1165 Kokkos::parallel_for
1166 (
"Tpetra::MultiVector unpack var stride atomic",
1167 range_type (0, idx.size ()),
1168 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1169 idx, col, op, numCols));
1173 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1174 Kokkos::parallel_for
1175 (
"Tpetra::MultiVector unpack var stride nonatomic",
1176 range_type (0, idx.size ()),
1177 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1178 idx, col, op, numCols));
1183 template <
typename ExecutionSpace,
1189 typename SizeType =
typename ExecutionSpace::execution_space::size_type>
1190 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1192 static_assert (Kokkos::Impl::is_view<DstView>::value,
1193 "DstView must be a Kokkos::View.");
1194 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1195 "SrcView must be a Kokkos::View.");
1196 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1197 "IdxView must be a Kokkos::View.");
1198 static_assert (Kokkos::Impl::is_view<ColView>::value,
1199 "ColView must be a Kokkos::View.");
1200 static_assert (static_cast<int> (DstView::rank) == 2,
1201 "DstView must be a rank-2 Kokkos::View.");
1202 static_assert (static_cast<int> (SrcView::rank) == 1,
1203 "SrcView must be a rank-1 Kokkos::View.");
1204 static_assert (static_cast<int> (IdxView::rank) == 1,
1205 "IdxView must be a rank-1 Kokkos::View.");
1206 static_assert (static_cast<int> (ColView::rank) == 1,
1207 "ColView must be a rank-1 Kokkos::View.");
1208 static_assert (std::is_integral<SizeType>::value,
1209 "SizeType must be a built-in integer type.");
1212 using execution_space =
typename ExecutionSpace::execution_space;
1213 using size_type = SizeType;
1214 using value_type = size_t;
1225 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1226 (
const ExecutionSpace& ,
1227 const DstView& dst_,
1228 const SrcView& src_,
1229 const IdxView& idx_,
1230 const ColView& col_,
1232 const size_t numCols_) :
1241 template<
class TagType>
1242 KOKKOS_INLINE_FUNCTION
void
1243 operator() (TagType tag,
1245 value_type& lclErrorCount)
const
1248 (std::is_same<TagType, atomic_tag>::value ||
1249 std::is_same<TagType, nonatomic_tag>::value,
1250 "TagType must be atomic_tag or nonatomic_tag.");
1251 using row_index_type =
typename IdxView::non_const_value_type;
1252 using col_index_type =
typename ColView::non_const_value_type;
1254 const row_index_type lclRow = idx(k);
1255 if (lclRow < static_cast<row_index_type> (0) ||
1256 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1260 const size_type offset = k * numCols;
1261 for (size_type j = 0; j < numCols; ++j) {
1262 const col_index_type lclCol = col(j);
1263 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1267 op (tag, dst(lclRow, col(j)), src(offset+j));
1273 KOKKOS_INLINE_FUNCTION
void
1274 init (value_type& initialErrorCount)
const {
1275 initialErrorCount = 0;
1278 KOKKOS_INLINE_FUNCTION
void
1279 join (
volatile value_type& dstErrorCount,
1280 const volatile value_type& srcErrorCount)
const
1282 dstErrorCount += srcErrorCount;
1286 unpack (
const ExecutionSpace& execSpace,
1292 const size_type numCols,
1293 const bool use_atomic_updates)
1295 using row_index_type =
typename IdxView::non_const_value_type;
1296 using col_index_type =
typename ColView::non_const_value_type;
1298 size_t errorCount = 0;
1299 if (use_atomic_updates) {
1301 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1302 Kokkos::parallel_reduce
1303 (
"Tpetra::MultiVector unpack var stride atomic debug only",
1304 range_type (0, idx.size ()),
1305 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1306 (execSpace, dst, src, idx, col, op, numCols),
1311 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1312 Kokkos::parallel_reduce
1313 (
"Tpetra::MultiVector unpack var stride nonatomic debug only",
1314 range_type (0, idx.size ()),
1315 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1316 (execSpace, dst, src, idx, col, op, numCols),
1320 if (errorCount != 0) {
1321 constexpr
size_t maxNumBadIndicesToPrint = 100;
1323 std::ostringstream os;
1324 os <<
"Tpetra::MultiVector multicolumn variable stride unpack kernel "
1325 "found " << errorCount
1326 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
1332 auto idx_h = Kokkos::create_mirror_view (idx);
1335 std::vector<row_index_type> badRows;
1336 const size_type numRowInds = idx_h.extent (0);
1337 for (size_type k = 0; k < numRowInds; ++k) {
1338 if (idx_h(k) < static_cast<row_index_type> (0) ||
1339 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1340 badRows.push_back (idx_h(k));
1344 if (badRows.size () != 0) {
1345 os << badRows.size () <<
" out-of-bounds row ind"
1346 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
1347 if (badRows.size () <= maxNumBadIndicesToPrint) {
1349 for (
size_t k = 0; k < badRows.size (); ++k) {
1351 if (k + 1 < badRows.size ()) {
1362 os <<
"No out-of-bounds row indices. ";
1367 auto col_h = Kokkos::create_mirror_view (col);
1370 std::vector<col_index_type> badCols;
1371 const size_type numColInds = col_h.extent (0);
1372 for (size_type k = 0; k < numColInds; ++k) {
1373 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1374 badCols.push_back (col_h(k));
1378 if (badCols.size () != 0) {
1379 os << badCols.size () <<
" out-of-bounds column ind"
1380 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
1381 if (badCols.size () <= maxNumBadIndicesToPrint) {
1382 for (
size_t k = 0; k < badCols.size (); ++k) {
1385 if (k + 1 < badCols.size ()) {
1396 os <<
"No out-of-bounds column indices. ";
1399 TEUCHOS_TEST_FOR_EXCEPTION
1400 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1401 std::logic_error,
"Tpetra::MultiVector variable stride unpack "
1402 "kernel reports errorCount=" << errorCount <<
", but we failed "
1403 "to find any bad rows or columns. This should never happen. "
1404 "Please report this to the Tpetra developers.");
1406 throw std::runtime_error (os.str ());
1411 template <
typename ExecutionSpace,
1418 unpack_array_multi_column_variable_stride (
const ExecutionSpace& execSpace,
1424 const size_t numCols,
1425 const bool use_atomic_updates,
1428 static_assert (Kokkos::Impl::is_view<DstView>::value,
1429 "DstView must be a Kokkos::View.");
1430 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1431 "SrcView must be a Kokkos::View.");
1432 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1433 "IdxView must be a Kokkos::View.");
1434 static_assert (Kokkos::Impl::is_view<ColView>::value,
1435 "ColView must be a Kokkos::View.");
1436 static_assert (static_cast<int> (DstView::rank) == 2,
1437 "DstView must be a rank-2 Kokkos::View.");
1438 static_assert (static_cast<int> (SrcView::rank) == 1,
1439 "SrcView must be a rank-1 Kokkos::View.");
1440 static_assert (static_cast<int> (IdxView::rank) == 1,
1441 "IdxView must be a rank-1 Kokkos::View.");
1442 static_assert (static_cast<int> (ColView::rank) == 1,
1443 "ColView must be a rank-1 Kokkos::View.");
1447 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1448 DstView, SrcView, IdxView, ColView, Op>;
1449 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1450 use_atomic_updates);
1453 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1454 DstView, SrcView, IdxView, ColView, Op>;
1455 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1456 use_atomic_updates);
1460 template <
typename DstView,
typename SrcView,
1461 typename DstIdxView,
typename SrcIdxView>
1462 struct PermuteArrayMultiColumn {
1463 typedef typename DstView::execution_space execution_space;
1464 typedef typename execution_space::size_type size_type;
1472 PermuteArrayMultiColumn (
const DstView& dst_,
1473 const SrcView& src_,
1474 const DstIdxView& dst_idx_,
1475 const SrcIdxView& src_idx_,
1476 const size_t numCols_) :
1477 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1478 numCols(numCols_) {}
1480 KOKKOS_INLINE_FUNCTION
void
1481 operator() (
const size_type k)
const {
1482 const typename DstIdxView::value_type toRow = dst_idx(k);
1483 const typename SrcIdxView::value_type fromRow = src_idx(k);
1484 for (
size_t j = 0; j < numCols; ++j) {
1485 dst(toRow, j) = src(fromRow, j);
1490 permute (
const DstView& dst,
1492 const DstIdxView& dst_idx,
1493 const SrcIdxView& src_idx,
1494 const size_t numCols)
1496 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1497 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1498 Kokkos::parallel_for
1499 (
"Tpetra::MultiVector permute multicol const stride",
1501 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols));
1507 template <
typename DstView,
typename SrcView,
1508 typename DstIdxView,
typename SrcIdxView>
1509 void permute_array_multi_column(
const DstView& dst,
1511 const DstIdxView& dst_idx,
1512 const SrcIdxView& src_idx,
1514 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1515 dst, src, dst_idx, src_idx, numCols);
1518 template <
typename DstView,
typename SrcView,
1519 typename DstIdxView,
typename SrcIdxView,
1520 typename DstColView,
typename SrcColView>
1521 struct PermuteArrayMultiColumnVariableStride {
1522 typedef typename DstView::execution_space execution_space;
1523 typedef typename execution_space::size_type size_type;
1533 PermuteArrayMultiColumnVariableStride(
const DstView& dst_,
1534 const SrcView& src_,
1535 const DstIdxView& dst_idx_,
1536 const SrcIdxView& src_idx_,
1537 const DstColView& dst_col_,
1538 const SrcColView& src_col_,
1539 const size_t numCols_) :
1540 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1541 dst_col(dst_col_), src_col(src_col_),
1542 numCols(numCols_) {}
1544 KOKKOS_INLINE_FUNCTION
void
1545 operator() (
const size_type k)
const {
1546 const typename DstIdxView::value_type toRow = dst_idx(k);
1547 const typename SrcIdxView::value_type fromRow = src_idx(k);
1548 for (
size_t j = 0; j < numCols; ++j) {
1549 dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1554 permute (
const DstView& dst,
1556 const DstIdxView& dst_idx,
1557 const SrcIdxView& src_idx,
1558 const DstColView& dst_col,
1559 const SrcColView& src_col,
1560 const size_t numCols)
1562 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1563 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1564 Kokkos::parallel_for
1565 (
"Tpetra::MultiVector permute multicol var stride",
1567 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1568 dst_col, src_col, numCols));
1574 template <
typename DstView,
typename SrcView,
1575 typename DstIdxView,
typename SrcIdxView,
1576 typename DstColView,
typename SrcColView>
1577 void permute_array_multi_column_variable_stride(
const DstView& dst,
1579 const DstIdxView& dst_idx,
1580 const SrcIdxView& src_idx,
1581 const DstColView& dst_col,
1582 const SrcColView& src_col,
1584 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1585 DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1586 dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1593 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...