49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include "impl/Kokkos_Atomic_Generic.hpp"
59 namespace KokkosRefactor {
75 template<
class IntegerType,
76 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
78 static KOKKOS_INLINE_FUNCTION
bool
79 test (
const IntegerType x,
80 const IntegerType exclusiveUpperBound);
84 template<
class IntegerType>
86 static KOKKOS_INLINE_FUNCTION
bool
87 test (
const IntegerType x,
88 const IntegerType exclusiveUpperBound)
90 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
95 template<
class IntegerType>
96 struct OutOfBounds<IntegerType, false> {
97 static KOKKOS_INLINE_FUNCTION
bool
98 test (
const IntegerType x,
99 const IntegerType exclusiveUpperBound)
101 return x >= exclusiveUpperBound;
107 template<
class IntegerType>
108 KOKKOS_INLINE_FUNCTION
bool
109 outOfBounds (
const IntegerType x,
const IntegerType exclusiveUpperBound)
119 template <
typename DstView,
typename SrcView,
typename IdxView>
120 struct PackArraySingleColumn {
121 typedef typename DstView::execution_space execution_space;
122 typedef typename execution_space::size_type size_type;
129 PackArraySingleColumn (
const DstView& dst_,
133 dst(dst_), src(src_), idx(idx_), col(col_) {}
135 KOKKOS_INLINE_FUNCTION
void
136 operator() (
const size_type k)
const {
137 dst(k) = src(idx(k), col);
141 pack (
const DstView& dst,
146 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
148 (
"Tpetra::MultiVector pack one col",
149 range_type (0, idx.size ()),
150 PackArraySingleColumn (dst, src, idx, col));
154 template <
typename DstView,
157 typename SizeType =
typename DstView::execution_space::size_type>
158 class PackArraySingleColumnWithBoundsCheck {
160 static_assert (Kokkos::Impl::is_view<DstView>::value,
161 "DstView must be a Kokkos::View.");
162 static_assert (Kokkos::Impl::is_view<SrcView>::value,
163 "SrcView must be a Kokkos::View.");
164 static_assert (Kokkos::Impl::is_view<IdxView>::value,
165 "IdxView must be a Kokkos::View.");
166 static_assert (static_cast<int> (DstView::rank) == 1,
167 "DstView must be a rank-1 Kokkos::View.");
168 static_assert (static_cast<int> (SrcView::rank) == 2,
169 "SrcView must be a rank-2 Kokkos::View.");
170 static_assert (static_cast<int> (IdxView::rank) == 1,
171 "IdxView must be a rank-1 Kokkos::View.");
172 static_assert (std::is_integral<SizeType>::value,
173 "SizeType must be a built-in integer type.");
175 typedef SizeType size_type;
176 using value_type = size_t;
185 PackArraySingleColumnWithBoundsCheck (
const DstView& dst_,
188 const size_type col_) :
189 dst (dst_), src (src_), idx (idx_), col (col_) {}
191 KOKKOS_INLINE_FUNCTION
void
192 operator() (
const size_type k, value_type& lclErrCount)
const {
193 using index_type =
typename IdxView::non_const_value_type;
195 const index_type lclRow = idx(k);
196 if (lclRow < static_cast<index_type> (0) ||
197 lclRow >= static_cast<index_type> (src.extent (0))) {
201 dst(k) = src(lclRow, col);
205 KOKKOS_INLINE_FUNCTION
206 void init (value_type& initialErrorCount)
const {
207 initialErrorCount = 0;
210 KOKKOS_INLINE_FUNCTION
void
211 join (
volatile value_type& dstErrorCount,
212 const volatile value_type& srcErrorCount)
const
214 dstErrorCount += srcErrorCount;
218 pack (
const DstView& dst,
223 typedef typename DstView::execution_space execution_space;
224 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
225 typedef typename IdxView::non_const_value_type index_type;
227 size_t errorCount = 0;
228 Kokkos::parallel_reduce
229 (
"Tpetra::MultiVector pack one col debug only",
230 range_type (0, idx.size ()),
231 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
234 if (errorCount != 0) {
238 auto idx_h = Kokkos::create_mirror_view (idx);
241 std::vector<index_type> badIndices;
242 const size_type numInds = idx_h.extent (0);
243 for (size_type k = 0; k < numInds; ++k) {
244 if (idx_h(k) < static_cast<index_type> (0) ||
245 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
246 badIndices.push_back (idx_h(k));
250 TEUCHOS_TEST_FOR_EXCEPTION
251 (errorCount != badIndices.size (), std::logic_error,
252 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
253 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
254 "uld never happen. Please report this to the Tpetra developers.");
256 std::ostringstream os;
257 os <<
"MultiVector single-column pack kernel had "
258 << badIndices.size () <<
" out-of bounds index/ices. "
260 for (
size_t k = 0; k < badIndices.size (); ++k) {
262 if (k + 1 < badIndices.size ()) {
267 throw std::runtime_error (os.str ());
273 template <
typename DstView,
typename SrcView,
typename IdxView>
275 pack_array_single_column (
const DstView& dst,
279 const bool debug =
true)
281 static_assert (Kokkos::Impl::is_view<DstView>::value,
282 "DstView must be a Kokkos::View.");
283 static_assert (Kokkos::Impl::is_view<SrcView>::value,
284 "SrcView must be a Kokkos::View.");
285 static_assert (Kokkos::Impl::is_view<IdxView>::value,
286 "IdxView must be a Kokkos::View.");
287 static_assert (static_cast<int> (DstView::rank) == 1,
288 "DstView must be a rank-1 Kokkos::View.");
289 static_assert (static_cast<int> (SrcView::rank) == 2,
290 "SrcView must be a rank-2 Kokkos::View.");
291 static_assert (static_cast<int> (IdxView::rank) == 1,
292 "IdxView must be a rank-1 Kokkos::View.");
295 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
296 impl_type::pack (dst, src, idx, col);
299 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
300 impl_type::pack (dst, src, idx, col);
304 template <
typename DstView,
typename SrcView,
typename IdxView>
305 struct PackArrayMultiColumn {
306 typedef typename DstView::execution_space execution_space;
307 typedef typename execution_space::size_type size_type;
314 PackArrayMultiColumn (
const DstView& dst_,
317 const size_t numCols_) :
318 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
320 KOKKOS_INLINE_FUNCTION
void
321 operator() (
const size_type k)
const {
322 const typename IdxView::value_type localRow = idx(k);
323 const size_t offset = k*numCols;
324 for (
size_t j = 0; j < numCols; ++j) {
325 dst(offset + j) = src(localRow, j);
329 static void pack(
const DstView& dst,
333 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
335 (
"Tpetra::MultiVector pack multicol const stride",
336 range_type (0, idx.size ()),
337 PackArrayMultiColumn (dst, src, idx, numCols));
341 template <
typename DstView,
344 typename SizeType =
typename DstView::execution_space::size_type>
345 class PackArrayMultiColumnWithBoundsCheck {
347 using size_type = SizeType;
348 using value_type = size_t;
357 PackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
360 const size_type numCols_) :
361 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
363 KOKKOS_INLINE_FUNCTION
void
364 operator() (
const size_type k, value_type& lclErrorCount)
const {
365 typedef typename IdxView::non_const_value_type index_type;
367 const index_type lclRow = idx(k);
368 if (lclRow < static_cast<index_type> (0) ||
369 lclRow >= static_cast<index_type> (src.extent (0))) {
373 const size_type offset = k*numCols;
374 for (size_type j = 0; j < numCols; ++j) {
375 dst(offset + j) = src(lclRow, j);
380 KOKKOS_INLINE_FUNCTION
381 void init (value_type& initialErrorCount)
const {
382 initialErrorCount = 0;
385 KOKKOS_INLINE_FUNCTION
void
386 join (
volatile value_type& dstErrorCount,
387 const volatile value_type& srcErrorCount)
const
389 dstErrorCount += srcErrorCount;
393 pack (
const DstView& dst,
396 const size_type numCols)
398 typedef typename DstView::execution_space execution_space;
399 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
400 typedef typename IdxView::non_const_value_type index_type;
402 size_t errorCount = 0;
403 Kokkos::parallel_reduce
404 (
"Tpetra::MultiVector pack multicol const stride debug only",
405 range_type (0, idx.size ()),
406 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
408 if (errorCount != 0) {
412 auto idx_h = Kokkos::create_mirror_view (idx);
415 std::vector<index_type> badIndices;
416 const size_type numInds = idx_h.extent (0);
417 for (size_type k = 0; k < numInds; ++k) {
418 if (idx_h(k) < static_cast<index_type> (0) ||
419 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
420 badIndices.push_back (idx_h(k));
424 TEUCHOS_TEST_FOR_EXCEPTION
425 (errorCount != badIndices.size (), std::logic_error,
426 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
427 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
428 "uld never happen. Please report this to the Tpetra developers.");
430 std::ostringstream os;
431 os <<
"Tpetra::MultiVector multiple-column pack kernel had "
432 << badIndices.size () <<
" out-of bounds index/ices (errorCount = "
433 << errorCount <<
"): [";
434 for (
size_t k = 0; k < badIndices.size (); ++k) {
436 if (k + 1 < badIndices.size ()) {
441 throw std::runtime_error (os.str ());
447 template <
typename DstView,
451 pack_array_multi_column (
const DstView& dst,
454 const size_t numCols,
455 const bool debug =
true)
457 static_assert (Kokkos::Impl::is_view<DstView>::value,
458 "DstView must be a Kokkos::View.");
459 static_assert (Kokkos::Impl::is_view<SrcView>::value,
460 "SrcView must be a Kokkos::View.");
461 static_assert (Kokkos::Impl::is_view<IdxView>::value,
462 "IdxView must be a Kokkos::View.");
463 static_assert (static_cast<int> (DstView::rank) == 1,
464 "DstView must be a rank-1 Kokkos::View.");
465 static_assert (static_cast<int> (SrcView::rank) == 2,
466 "SrcView must be a rank-2 Kokkos::View.");
467 static_assert (static_cast<int> (IdxView::rank) == 1,
468 "IdxView must be a rank-1 Kokkos::View.");
471 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
472 SrcView, IdxView> impl_type;
473 impl_type::pack (dst, src, idx, numCols);
476 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
477 impl_type::pack (dst, src, idx, numCols);
481 template <
typename DstView,
typename SrcView,
typename IdxView,
483 struct PackArrayMultiColumnVariableStride {
484 typedef typename DstView::execution_space execution_space;
485 typedef typename execution_space::size_type size_type;
493 PackArrayMultiColumnVariableStride (
const DstView& dst_,
497 const size_t numCols_) :
498 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
500 KOKKOS_INLINE_FUNCTION
501 void operator() (
const size_type k)
const {
502 const typename IdxView::value_type localRow = idx(k);
503 const size_t offset = k*numCols;
504 for (
size_t j = 0; j < numCols; ++j) {
505 dst(offset + j) = src(localRow, col(j));
509 static void pack(
const DstView& dst,
514 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
516 (
"Tpetra::MultiVector pack multicol var stride",
517 range_type (0, idx.size ()),
518 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
522 template <
typename DstView,
526 typename SizeType =
typename DstView::execution_space::size_type>
527 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
529 using size_type = SizeType;
530 using value_type = size_t;
540 PackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
544 const size_type numCols_) :
545 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
547 KOKKOS_INLINE_FUNCTION
void
548 operator() (
const size_type k, value_type& lclErrorCount)
const {
549 typedef typename IdxView::non_const_value_type row_index_type;
550 typedef typename ColView::non_const_value_type col_index_type;
552 const row_index_type lclRow = idx(k);
553 if (lclRow < static_cast<row_index_type> (0) ||
554 lclRow >= static_cast<row_index_type> (src.extent (0))) {
558 const size_type offset = k*numCols;
559 for (size_type j = 0; j < numCols; ++j) {
560 const col_index_type lclCol = col(j);
561 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
565 dst(offset + j) = src(lclRow, lclCol);
571 KOKKOS_INLINE_FUNCTION
void
572 init (value_type& initialErrorCount)
const {
573 initialErrorCount = 0;
576 KOKKOS_INLINE_FUNCTION
void
577 join (
volatile value_type& dstErrorCount,
578 const volatile value_type& srcErrorCount)
const
580 dstErrorCount += srcErrorCount;
584 pack (
const DstView& dst,
588 const size_type numCols)
590 using execution_space =
typename DstView::execution_space;
591 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
592 using row_index_type =
typename IdxView::non_const_value_type;
593 using col_index_type =
typename ColView::non_const_value_type;
595 size_t errorCount = 0;
596 Kokkos::parallel_reduce
597 (
"Tpetra::MultiVector pack multicol var stride debug only",
598 range_type (0, idx.size ()),
599 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
602 if (errorCount != 0) {
603 constexpr
size_t maxNumBadIndicesToPrint = 100;
605 std::ostringstream os;
606 os <<
"Tpetra::MultiVector multicolumn variable stride pack kernel "
607 "found " << errorCount
608 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
613 auto idx_h = Kokkos::create_mirror_view (idx);
616 std::vector<row_index_type> badRows;
617 const size_type numRowInds = idx_h.extent (0);
618 for (size_type k = 0; k < numRowInds; ++k) {
619 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
620 badRows.push_back (idx_h(k));
624 if (badRows.size () != 0) {
625 os << badRows.size () <<
" out-of-bounds row ind"
626 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
627 if (badRows.size () <= maxNumBadIndicesToPrint) {
629 for (
size_t k = 0; k < badRows.size (); ++k) {
631 if (k + 1 < badRows.size ()) {
642 os <<
"No out-of-bounds row indices. ";
647 auto col_h = Kokkos::create_mirror_view (col);
650 std::vector<col_index_type> badCols;
651 const size_type numColInds = col_h.extent (0);
652 for (size_type k = 0; k < numColInds; ++k) {
653 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
654 badCols.push_back (col_h(k));
658 if (badCols.size () != 0) {
659 os << badCols.size () <<
" out-of-bounds column ind"
660 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
661 if (badCols.size () <= maxNumBadIndicesToPrint) {
663 for (
size_t k = 0; k < badCols.size (); ++k) {
665 if (k + 1 < badCols.size ()) {
676 os <<
"No out-of-bounds column indices. ";
679 TEUCHOS_TEST_FOR_EXCEPTION
680 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
681 std::logic_error,
"Tpetra::MultiVector variable stride pack "
682 "kernel reports errorCount=" << errorCount <<
", but we failed "
683 "to find any bad rows or columns. This should never happen. "
684 "Please report this to the Tpetra developers.");
686 throw std::runtime_error (os.str ());
691 template <
typename DstView,
696 pack_array_multi_column_variable_stride (
const DstView& dst,
700 const size_t numCols,
701 const bool debug =
true)
703 static_assert (Kokkos::Impl::is_view<DstView>::value,
704 "DstView must be a Kokkos::View.");
705 static_assert (Kokkos::Impl::is_view<SrcView>::value,
706 "SrcView must be a Kokkos::View.");
707 static_assert (Kokkos::Impl::is_view<IdxView>::value,
708 "IdxView must be a Kokkos::View.");
709 static_assert (Kokkos::Impl::is_view<ColView>::value,
710 "ColView must be a Kokkos::View.");
711 static_assert (static_cast<int> (DstView::rank) == 1,
712 "DstView must be a rank-1 Kokkos::View.");
713 static_assert (static_cast<int> (SrcView::rank) == 2,
714 "SrcView must be a rank-2 Kokkos::View.");
715 static_assert (static_cast<int> (IdxView::rank) == 1,
716 "IdxView must be a rank-1 Kokkos::View.");
717 static_assert (static_cast<int> (ColView::rank) == 1,
718 "ColView must be a rank-1 Kokkos::View.");
721 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
722 SrcView, IdxView, ColView> impl_type;
723 impl_type::pack (dst, src, idx, col, numCols);
726 typedef PackArrayMultiColumnVariableStride<DstView,
727 SrcView, IdxView, ColView> impl_type;
728 impl_type::pack (dst, src, idx, col, numCols);
734 struct atomic_tag {};
735 struct nonatomic_tag {};
739 KOKKOS_INLINE_FUNCTION
740 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
741 Kokkos::atomic_add (&dest, src);
744 KOKKOS_INLINE_FUNCTION
745 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
756 KOKKOS_INLINE_FUNCTION
757 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
761 KOKKOS_INLINE_FUNCTION
762 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
768 template<
class Scalar1,
class Scalar2>
770 KOKKOS_INLINE_FUNCTION
771 static Scalar1 apply(
const Scalar1& val1,
const Scalar2& val2) {
772 const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
773 const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
774 return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
778 template <
typename SC>
780 KOKKOS_INLINE_FUNCTION
781 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
782 Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
785 KOKKOS_INLINE_FUNCTION
786 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
787 dest = AbsMaxOper<SC, SC> ().apply (dest, src);
791 template <
typename ExecutionSpace,
796 class UnpackArrayMultiColumn {
798 static_assert (Kokkos::Impl::is_view<DstView>::value,
799 "DstView must be a Kokkos::View.");
800 static_assert (Kokkos::Impl::is_view<SrcView>::value,
801 "SrcView must be a Kokkos::View.");
802 static_assert (Kokkos::Impl::is_view<IdxView>::value,
803 "IdxView must be a Kokkos::View.");
804 static_assert (static_cast<int> (DstView::rank) == 2,
805 "DstView must be a rank-2 Kokkos::View.");
806 static_assert (static_cast<int> (SrcView::rank) == 1,
807 "SrcView must be a rank-1 Kokkos::View.");
808 static_assert (static_cast<int> (IdxView::rank) == 1,
809 "IdxView must be a rank-1 Kokkos::View.");
812 typedef typename ExecutionSpace::execution_space execution_space;
813 typedef typename execution_space::size_type size_type;
823 UnpackArrayMultiColumn (
const ExecutionSpace& ,
828 const size_t numCols_) :
836 template<
class TagType>
837 KOKKOS_INLINE_FUNCTION
void
838 operator() (TagType tag,
const size_type k)
const
841 (std::is_same<TagType, atomic_tag>::value ||
842 std::is_same<TagType, nonatomic_tag>::value,
843 "TagType must be atomic_tag or nonatomic_tag.");
845 const typename IdxView::value_type localRow = idx(k);
846 const size_t offset = k*numCols;
847 for (
size_t j = 0; j < numCols; ++j) {
848 op (tag, dst(localRow, j), src(offset+j));
853 unpack (
const ExecutionSpace& execSpace,
858 const size_t numCols,
859 const bool use_atomic_updates)
861 if (use_atomic_updates) {
863 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
865 (
"Tpetra::MultiVector unpack const stride atomic",
866 range_type (0, idx.size ()),
867 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
871 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
873 (
"Tpetra::MultiVector unpack const stride nonatomic",
874 range_type (0, idx.size ()),
875 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
880 template <
typename ExecutionSpace,
885 typename SizeType =
typename ExecutionSpace::execution_space::size_type>
886 class UnpackArrayMultiColumnWithBoundsCheck {
888 static_assert (Kokkos::Impl::is_view<DstView>::value,
889 "DstView must be a Kokkos::View.");
890 static_assert (Kokkos::Impl::is_view<SrcView>::value,
891 "SrcView must be a Kokkos::View.");
892 static_assert (Kokkos::Impl::is_view<IdxView>::value,
893 "IdxView must be a Kokkos::View.");
894 static_assert (static_cast<int> (DstView::rank) == 2,
895 "DstView must be a rank-2 Kokkos::View.");
896 static_assert (static_cast<int> (SrcView::rank) == 1,
897 "SrcView must be a rank-1 Kokkos::View.");
898 static_assert (static_cast<int> (IdxView::rank) == 1,
899 "IdxView must be a rank-1 Kokkos::View.");
900 static_assert (std::is_integral<SizeType>::value,
901 "SizeType must be a built-in integer type.");
904 using execution_space =
typename ExecutionSpace::execution_space;
905 using size_type = SizeType;
906 using value_type = size_t;
916 UnpackArrayMultiColumnWithBoundsCheck (
const ExecutionSpace& ,
921 const size_type numCols_) :
929 template<
class TagType>
930 KOKKOS_INLINE_FUNCTION
void
931 operator() (TagType tag,
933 size_t& lclErrCount)
const
936 (std::is_same<TagType, atomic_tag>::value ||
937 std::is_same<TagType, nonatomic_tag>::value,
938 "TagType must be atomic_tag or nonatomic_tag.");
939 using index_type =
typename IdxView::non_const_value_type;
941 const index_type lclRow = idx(k);
942 if (lclRow < static_cast<index_type> (0) ||
943 lclRow >= static_cast<index_type> (dst.extent (0))) {
947 const size_type offset = k*numCols;
948 for (size_type j = 0; j < numCols; ++j) {
949 op (tag, dst(lclRow,j), src(offset+j));
954 template<
class TagType>
955 KOKKOS_INLINE_FUNCTION
void
956 init (TagType,
size_t& initialErrorCount)
const {
957 initialErrorCount = 0;
960 template<
class TagType>
961 KOKKOS_INLINE_FUNCTION
void
963 volatile size_t& dstErrorCount,
964 const volatile size_t& srcErrorCount)
const
966 dstErrorCount += srcErrorCount;
970 unpack (
const ExecutionSpace& execSpace,
975 const size_type numCols,
976 const bool use_atomic_updates)
978 using index_type =
typename IdxView::non_const_value_type;
980 size_t errorCount = 0;
981 if (use_atomic_updates) {
983 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
984 Kokkos::parallel_reduce
985 (
"Tpetra::MultiVector unpack multicol const stride atomic debug only",
986 range_type (0, idx.size ()),
987 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
993 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
994 Kokkos::parallel_reduce
995 (
"Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
996 range_type (0, idx.size ()),
997 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1002 if (errorCount != 0) {
1006 auto idx_h = Kokkos::create_mirror_view (idx);
1009 std::vector<index_type> badIndices;
1010 const size_type numInds = idx_h.extent (0);
1011 for (size_type k = 0; k < numInds; ++k) {
1012 if (idx_h(k) < static_cast<index_type> (0) ||
1013 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1014 badIndices.push_back (idx_h(k));
1018 if (errorCount != badIndices.size ()) {
1019 std::ostringstream os;
1020 os <<
"MultiVector unpack kernel: errorCount = " << errorCount
1021 <<
" != badIndices.size() = " << badIndices.size ()
1022 <<
". This should never happen. "
1023 "Please report this to the Tpetra developers.";
1024 throw std::logic_error (os.str ());
1027 std::ostringstream os;
1028 os <<
"MultiVector unpack kernel had " << badIndices.size ()
1029 <<
" out-of bounds index/ices. Here they are: [";
1030 for (
size_t k = 0; k < badIndices.size (); ++k) {
1031 os << badIndices[k];
1032 if (k + 1 < badIndices.size ()) {
1037 throw std::runtime_error (os.str ());
1042 template <
typename ExecutionSpace,
1048 unpack_array_multi_column (
const ExecutionSpace& execSpace,
1053 const size_t numCols,
1054 const bool use_atomic_updates,
1057 static_assert (Kokkos::Impl::is_view<DstView>::value,
1058 "DstView must be a Kokkos::View.");
1059 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1060 "SrcView must be a Kokkos::View.");
1061 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1062 "IdxView must be a Kokkos::View.");
1063 static_assert (static_cast<int> (DstView::rank) == 2,
1064 "DstView must be a rank-2 Kokkos::View.");
1065 static_assert (static_cast<int> (SrcView::rank) == 1,
1066 "SrcView must be a rank-1 Kokkos::View.");
1067 static_assert (static_cast<int> (IdxView::rank) == 1,
1068 "IdxView must be a rank-1 Kokkos::View.");
1071 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1072 DstView, SrcView, IdxView, Op> impl_type;
1073 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1074 use_atomic_updates);
1077 typedef UnpackArrayMultiColumn<ExecutionSpace,
1078 DstView, SrcView, IdxView, Op> impl_type;
1079 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1080 use_atomic_updates);
1084 template <
typename ExecutionSpace,
1090 class UnpackArrayMultiColumnVariableStride {
1092 static_assert (Kokkos::Impl::is_view<DstView>::value,
1093 "DstView must be a Kokkos::View.");
1094 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1095 "SrcView must be a Kokkos::View.");
1096 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1097 "IdxView must be a Kokkos::View.");
1098 static_assert (Kokkos::Impl::is_view<ColView>::value,
1099 "ColView must be a Kokkos::View.");
1100 static_assert (static_cast<int> (DstView::rank) == 2,
1101 "DstView must be a rank-2 Kokkos::View.");
1102 static_assert (static_cast<int> (SrcView::rank) == 1,
1103 "SrcView must be a rank-1 Kokkos::View.");
1104 static_assert (static_cast<int> (IdxView::rank) == 1,
1105 "IdxView must be a rank-1 Kokkos::View.");
1106 static_assert (static_cast<int> (ColView::rank) == 1,
1107 "ColView must be a rank-1 Kokkos::View.");
1110 using execution_space =
typename ExecutionSpace::execution_space;
1111 using size_type =
typename execution_space::size_type;
1122 UnpackArrayMultiColumnVariableStride (
const ExecutionSpace& ,
1123 const DstView& dst_,
1124 const SrcView& src_,
1125 const IdxView& idx_,
1126 const ColView& col_,
1128 const size_t numCols_) :
1137 template<
class TagType>
1138 KOKKOS_INLINE_FUNCTION
void
1139 operator() (TagType tag,
const size_type k)
const
1142 (std::is_same<TagType, atomic_tag>::value ||
1143 std::is_same<TagType, nonatomic_tag>::value,
1144 "TagType must be atomic_tag or nonatomic_tag.");
1146 const typename IdxView::value_type localRow = idx(k);
1147 const size_t offset = k*numCols;
1148 for (
size_t j = 0; j < numCols; ++j) {
1149 op (tag, dst(localRow, col(j)), src(offset+j));
1154 unpack (
const ExecutionSpace& execSpace,
1160 const size_t numCols,
1161 const bool use_atomic_updates)
1163 if (use_atomic_updates) {
1165 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1166 Kokkos::parallel_for
1167 (
"Tpetra::MultiVector unpack var stride atomic",
1168 range_type (0, idx.size ()),
1169 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1170 idx, col, op, numCols));
1174 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1175 Kokkos::parallel_for
1176 (
"Tpetra::MultiVector unpack var stride nonatomic",
1177 range_type (0, idx.size ()),
1178 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1179 idx, col, op, numCols));
1184 template <
typename ExecutionSpace,
1190 typename SizeType =
typename ExecutionSpace::execution_space::size_type>
1191 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1193 static_assert (Kokkos::Impl::is_view<DstView>::value,
1194 "DstView must be a Kokkos::View.");
1195 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1196 "SrcView must be a Kokkos::View.");
1197 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1198 "IdxView must be a Kokkos::View.");
1199 static_assert (Kokkos::Impl::is_view<ColView>::value,
1200 "ColView must be a Kokkos::View.");
1201 static_assert (static_cast<int> (DstView::rank) == 2,
1202 "DstView must be a rank-2 Kokkos::View.");
1203 static_assert (static_cast<int> (SrcView::rank) == 1,
1204 "SrcView must be a rank-1 Kokkos::View.");
1205 static_assert (static_cast<int> (IdxView::rank) == 1,
1206 "IdxView must be a rank-1 Kokkos::View.");
1207 static_assert (static_cast<int> (ColView::rank) == 1,
1208 "ColView must be a rank-1 Kokkos::View.");
1209 static_assert (std::is_integral<SizeType>::value,
1210 "SizeType must be a built-in integer type.");
1213 using execution_space =
typename ExecutionSpace::execution_space;
1214 using size_type = SizeType;
1215 using value_type = size_t;
1226 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1227 (
const ExecutionSpace& ,
1228 const DstView& dst_,
1229 const SrcView& src_,
1230 const IdxView& idx_,
1231 const ColView& col_,
1233 const size_t numCols_) :
1242 template<
class TagType>
1243 KOKKOS_INLINE_FUNCTION
void
1244 operator() (TagType tag,
1246 value_type& lclErrorCount)
const
1249 (std::is_same<TagType, atomic_tag>::value ||
1250 std::is_same<TagType, nonatomic_tag>::value,
1251 "TagType must be atomic_tag or nonatomic_tag.");
1252 using row_index_type =
typename IdxView::non_const_value_type;
1253 using col_index_type =
typename ColView::non_const_value_type;
1255 const row_index_type lclRow = idx(k);
1256 if (lclRow < static_cast<row_index_type> (0) ||
1257 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1261 const size_type offset = k * numCols;
1262 for (size_type j = 0; j < numCols; ++j) {
1263 const col_index_type lclCol = col(j);
1264 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1268 op (tag, dst(lclRow, col(j)), src(offset+j));
1274 KOKKOS_INLINE_FUNCTION
void
1275 init (value_type& initialErrorCount)
const {
1276 initialErrorCount = 0;
1279 KOKKOS_INLINE_FUNCTION
void
1280 join (
volatile value_type& dstErrorCount,
1281 const volatile value_type& srcErrorCount)
const
1283 dstErrorCount += srcErrorCount;
1287 unpack (
const ExecutionSpace& execSpace,
1293 const size_type numCols,
1294 const bool use_atomic_updates)
1296 using row_index_type =
typename IdxView::non_const_value_type;
1297 using col_index_type =
typename ColView::non_const_value_type;
1299 size_t errorCount = 0;
1300 if (use_atomic_updates) {
1302 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1303 Kokkos::parallel_reduce
1304 (
"Tpetra::MultiVector unpack var stride atomic debug only",
1305 range_type (0, idx.size ()),
1306 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1307 (execSpace, dst, src, idx, col, op, numCols),
1312 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1313 Kokkos::parallel_reduce
1314 (
"Tpetra::MultiVector unpack var stride nonatomic debug only",
1315 range_type (0, idx.size ()),
1316 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1317 (execSpace, dst, src, idx, col, op, numCols),
1321 if (errorCount != 0) {
1322 constexpr
size_t maxNumBadIndicesToPrint = 100;
1324 std::ostringstream os;
1325 os <<
"Tpetra::MultiVector multicolumn variable stride unpack kernel "
1326 "found " << errorCount
1327 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
1333 auto idx_h = Kokkos::create_mirror_view (idx);
1336 std::vector<row_index_type> badRows;
1337 const size_type numRowInds = idx_h.extent (0);
1338 for (size_type k = 0; k < numRowInds; ++k) {
1339 if (idx_h(k) < static_cast<row_index_type> (0) ||
1340 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1341 badRows.push_back (idx_h(k));
1345 if (badRows.size () != 0) {
1346 os << badRows.size () <<
" out-of-bounds row ind"
1347 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
1348 if (badRows.size () <= maxNumBadIndicesToPrint) {
1350 for (
size_t k = 0; k < badRows.size (); ++k) {
1352 if (k + 1 < badRows.size ()) {
1363 os <<
"No out-of-bounds row indices. ";
1368 auto col_h = Kokkos::create_mirror_view (col);
1371 std::vector<col_index_type> badCols;
1372 const size_type numColInds = col_h.extent (0);
1373 for (size_type k = 0; k < numColInds; ++k) {
1374 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1375 badCols.push_back (col_h(k));
1379 if (badCols.size () != 0) {
1380 os << badCols.size () <<
" out-of-bounds column ind"
1381 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
1382 if (badCols.size () <= maxNumBadIndicesToPrint) {
1383 for (
size_t k = 0; k < badCols.size (); ++k) {
1386 if (k + 1 < badCols.size ()) {
1397 os <<
"No out-of-bounds column indices. ";
1400 TEUCHOS_TEST_FOR_EXCEPTION
1401 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1402 std::logic_error,
"Tpetra::MultiVector variable stride unpack "
1403 "kernel reports errorCount=" << errorCount <<
", but we failed "
1404 "to find any bad rows or columns. This should never happen. "
1405 "Please report this to the Tpetra developers.");
1407 throw std::runtime_error (os.str ());
1412 template <
typename ExecutionSpace,
1419 unpack_array_multi_column_variable_stride (
const ExecutionSpace& execSpace,
1425 const size_t numCols,
1426 const bool use_atomic_updates,
1429 static_assert (Kokkos::Impl::is_view<DstView>::value,
1430 "DstView must be a Kokkos::View.");
1431 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1432 "SrcView must be a Kokkos::View.");
1433 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1434 "IdxView must be a Kokkos::View.");
1435 static_assert (Kokkos::Impl::is_view<ColView>::value,
1436 "ColView must be a Kokkos::View.");
1437 static_assert (static_cast<int> (DstView::rank) == 2,
1438 "DstView must be a rank-2 Kokkos::View.");
1439 static_assert (static_cast<int> (SrcView::rank) == 1,
1440 "SrcView must be a rank-1 Kokkos::View.");
1441 static_assert (static_cast<int> (IdxView::rank) == 1,
1442 "IdxView must be a rank-1 Kokkos::View.");
1443 static_assert (static_cast<int> (ColView::rank) == 1,
1444 "ColView must be a rank-1 Kokkos::View.");
1448 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1449 DstView, SrcView, IdxView, ColView, Op>;
1450 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1451 use_atomic_updates);
1454 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1455 DstView, SrcView, IdxView, ColView, Op>;
1456 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1457 use_atomic_updates);
1461 template <
typename DstView,
typename SrcView,
1462 typename DstIdxView,
typename SrcIdxView>
1463 struct PermuteArrayMultiColumn {
1464 typedef typename DstView::execution_space execution_space;
1465 typedef typename execution_space::size_type size_type;
1473 PermuteArrayMultiColumn (
const DstView& dst_,
1474 const SrcView& src_,
1475 const DstIdxView& dst_idx_,
1476 const SrcIdxView& src_idx_,
1477 const size_t numCols_) :
1478 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1479 numCols(numCols_) {}
1481 KOKKOS_INLINE_FUNCTION
void
1482 operator() (
const size_type k)
const {
1483 const typename DstIdxView::value_type toRow = dst_idx(k);
1484 const typename SrcIdxView::value_type fromRow = src_idx(k);
1485 for (
size_t j = 0; j < numCols; ++j) {
1486 dst(toRow, j) = src(fromRow, j);
1491 permute (
const DstView& dst,
1493 const DstIdxView& dst_idx,
1494 const SrcIdxView& src_idx,
1495 const size_t numCols)
1497 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1498 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1499 Kokkos::parallel_for
1500 (
"Tpetra::MultiVector permute multicol const stride",
1502 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols));
1508 template <
typename DstView,
typename SrcView,
1509 typename DstIdxView,
typename SrcIdxView>
1510 void permute_array_multi_column(
const DstView& dst,
1512 const DstIdxView& dst_idx,
1513 const SrcIdxView& src_idx,
1515 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1516 dst, src, dst_idx, src_idx, numCols);
1519 template <
typename DstView,
typename SrcView,
1520 typename DstIdxView,
typename SrcIdxView,
1521 typename DstColView,
typename SrcColView>
1522 struct PermuteArrayMultiColumnVariableStride {
1523 typedef typename DstView::execution_space execution_space;
1524 typedef typename execution_space::size_type size_type;
1534 PermuteArrayMultiColumnVariableStride(
const DstView& dst_,
1535 const SrcView& src_,
1536 const DstIdxView& dst_idx_,
1537 const SrcIdxView& src_idx_,
1538 const DstColView& dst_col_,
1539 const SrcColView& src_col_,
1540 const size_t numCols_) :
1541 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1542 dst_col(dst_col_), src_col(src_col_),
1543 numCols(numCols_) {}
1545 KOKKOS_INLINE_FUNCTION
void
1546 operator() (
const size_type k)
const {
1547 const typename DstIdxView::value_type toRow = dst_idx(k);
1548 const typename SrcIdxView::value_type fromRow = src_idx(k);
1549 for (
size_t j = 0; j < numCols; ++j) {
1550 dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1555 permute (
const DstView& dst,
1557 const DstIdxView& dst_idx,
1558 const SrcIdxView& src_idx,
1559 const DstColView& dst_col,
1560 const SrcColView& src_col,
1561 const size_t numCols)
1563 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1564 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1565 Kokkos::parallel_for
1566 (
"Tpetra::MultiVector permute multicol var stride",
1568 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1569 dst_col, src_col, numCols));
1575 template <
typename DstView,
typename SrcView,
1576 typename DstIdxView,
typename SrcIdxView,
1577 typename DstColView,
typename SrcColView>
1578 void permute_array_multi_column_variable_stride(
const DstView& dst,
1580 const DstIdxView& dst_idx,
1581 const SrcIdxView& src_idx,
1582 const DstColView& dst_col,
1583 const SrcColView& src_col,
1585 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1586 DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1587 dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1594 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...