Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
11 // incorrect (as() is not a device function) and usually irrelevant
12 // (it would only matter if LocalOrdinal were bigger than size_t on a
13 // particular platform, which is unlikely).
14 
15 // KDD Aug 2020: In the permute/pack/unpack functors,
16 // the Enabled template parameter is specialized in
17 // downstream packages like Stokhos using SFINAE to provide partial
18 // specializations based on the scalar type of the SrcView and DstView
19 // template parameters. See #7898.
20 // Do not remove it before checking with Stokhos and other specializing users.
21 
22 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
23 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
24 
25 #include "Kokkos_Core.hpp"
26 #if KOKKOS_VERSION >= 40799
27 #include "KokkosKernels_ArithTraits.hpp"
28 #else
29 #include "Kokkos_ArithTraits.hpp"
30 #endif
31 #include <sstream>
32 #include <stdexcept>
33 
34 namespace Tpetra {
35 namespace KokkosRefactor {
36 namespace Details {
37 
43 namespace Impl {
44 
51 template <class IntegerType,
52  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
53 struct OutOfBounds {
54  static KOKKOS_INLINE_FUNCTION bool
55  test(const IntegerType x,
56  const IntegerType exclusiveUpperBound);
57 };
58 
59 // Partial specialization for the case where IntegerType IS signed.
60 template <class IntegerType>
61 struct OutOfBounds<IntegerType, true> {
62  static KOKKOS_INLINE_FUNCTION bool
63  test(const IntegerType x,
64  const IntegerType exclusiveUpperBound) {
65  return x < static_cast<IntegerType>(0) || x >= exclusiveUpperBound;
66  }
67 };
68 
69 // Partial specialization for the case where IntegerType is NOT signed.
70 template <class IntegerType>
71 struct OutOfBounds<IntegerType, false> {
72  static KOKKOS_INLINE_FUNCTION bool
73  test(const IntegerType x,
74  const IntegerType exclusiveUpperBound) {
75  return x >= exclusiveUpperBound;
76  }
77 };
78 
81 template <class IntegerType>
82 KOKKOS_INLINE_FUNCTION bool
83 outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound) {
84  return OutOfBounds<IntegerType>::test(x, exclusiveUpperBound);
85 }
86 
87 } // namespace Impl
88 
89 // Functors for implementing packAndPrepare and unpackAndCombine
90 // through parallel_for
91 
92 template <typename DstView, typename SrcView, typename IdxView,
93  typename Enabled = void>
94 struct PackArraySingleColumn {
95  typedef typename DstView::execution_space execution_space;
96  typedef typename execution_space::size_type size_type;
97 
98  DstView dst;
99  SrcView src;
100  IdxView idx;
101  size_t col;
102 
103  PackArraySingleColumn(const DstView& dst_,
104  const SrcView& src_,
105  const IdxView& idx_,
106  const size_t col_)
107  : dst(dst_)
108  , src(src_)
109  , idx(idx_)
110  , col(col_) {}
111 
112  KOKKOS_INLINE_FUNCTION void
113  operator()(const size_type k) const {
114  dst(k) = src(idx(k), col);
115  }
116 
117  static void
118  pack(const DstView& dst,
119  const SrcView& src,
120  const IdxView& idx,
121  const size_t col,
122  const execution_space& space) {
123  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
124  Kokkos::parallel_for("Tpetra::MultiVector pack one col",
125  range_type(space, 0, idx.size()),
126  PackArraySingleColumn(dst, src, idx, col));
127  }
128 };
129 
130 template <typename DstView,
131  typename SrcView,
132  typename IdxView,
133  typename SizeType = typename DstView::execution_space::size_type,
134  typename Enabled = void>
135 class PackArraySingleColumnWithBoundsCheck {
136  private:
137  static_assert(Kokkos::is_view<DstView>::value,
138  "DstView must be a Kokkos::View.");
139  static_assert(Kokkos::is_view<SrcView>::value,
140  "SrcView must be a Kokkos::View.");
141  static_assert(Kokkos::is_view<IdxView>::value,
142  "IdxView must be a Kokkos::View.");
143  static_assert(static_cast<int>(DstView::rank) == 1,
144  "DstView must be a rank-1 Kokkos::View.");
145  static_assert(static_cast<int>(SrcView::rank) == 2,
146  "SrcView must be a rank-2 Kokkos::View.");
147  static_assert(static_cast<int>(IdxView::rank) == 1,
148  "IdxView must be a rank-1 Kokkos::View.");
149  static_assert(std::is_integral<SizeType>::value,
150  "SizeType must be a built-in integer type.");
151 
152  using execution_space = typename DstView::execution_space;
153 
154  public:
155  typedef SizeType size_type;
156  using value_type = size_t;
157 
158  private:
159  DstView dst;
160  SrcView src;
161  IdxView idx;
162  size_type col;
163  execution_space space;
164 
165  public:
166  PackArraySingleColumnWithBoundsCheck(const DstView& dst_,
167  const SrcView& src_,
168  const IdxView& idx_,
169  const size_type col_)
170  : dst(dst_)
171  , src(src_)
172  , idx(idx_)
173  , col(col_) {}
174 
175  KOKKOS_INLINE_FUNCTION void
176  operator()(const size_type k, value_type& lclErrCount) const {
177  using index_type = typename IdxView::non_const_value_type;
178 
179  const index_type lclRow = idx(k);
180  if (lclRow < static_cast<index_type>(0) ||
181  lclRow >= static_cast<index_type>(src.extent(0))) {
182  ++lclErrCount;
183  } else {
184  dst(k) = src(lclRow, col);
185  }
186  }
187 
188  KOKKOS_INLINE_FUNCTION
189  void init(value_type& initialErrorCount) const {
190  initialErrorCount = 0;
191  }
192 
193  KOKKOS_INLINE_FUNCTION void
194  join(value_type& dstErrorCount,
195  const value_type& srcErrorCount) const {
196  dstErrorCount += srcErrorCount;
197  }
198 
199  static void
200  pack(const DstView& dst,
201  const SrcView& src,
202  const IdxView& idx,
203  const size_type col,
204  const execution_space& space) {
205  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
206  typedef typename IdxView::non_const_value_type index_type;
207 
208  size_t errorCount = 0;
209  Kokkos::parallel_reduce("Tpetra::MultiVector pack one col debug only",
210  range_type(space, 0, idx.size()),
211  PackArraySingleColumnWithBoundsCheck(dst, src, idx, col),
212  errorCount);
213 
214  if (errorCount != 0) {
215  // Go back and find the out-of-bounds entries in the index
216  // array. Performance doesn't matter since we are already in
217  // an error state, so we can do this sequentially, on host.
218  auto idx_h = Kokkos::create_mirror_view(idx);
219 
220  // DEEP_COPY REVIEW - NOT TESTED
221  Kokkos::deep_copy(idx_h, idx);
222 
223  std::vector<index_type> badIndices;
224  const size_type numInds = idx_h.extent(0);
225  for (size_type k = 0; k < numInds; ++k) {
226  if (idx_h(k) < static_cast<index_type>(0) ||
227  idx_h(k) >= static_cast<index_type>(src.extent(0))) {
228  badIndices.push_back(idx_h(k));
229  }
230  }
231 
232  TEUCHOS_TEST_FOR_EXCEPTION(errorCount != badIndices.size(), std::logic_error,
233  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
234  << " != badIndices.size() = " << badIndices.size() << ". This sho"
235  "uld never happen. Please report this to the Tpetra developers.");
236 
237  std::ostringstream os;
238  os << "MultiVector single-column pack kernel had "
239  << badIndices.size() << " out-of bounds index/ices. "
240  "Here they are: [";
241  for (size_t k = 0; k < badIndices.size(); ++k) {
242  os << badIndices[k];
243  if (k + 1 < badIndices.size()) {
244  os << ", ";
245  }
246  }
247  os << "].";
248  throw std::runtime_error(os.str());
249  }
250  }
251 };
252 
253 template <typename DstView, typename SrcView, typename IdxView>
254 void pack_array_single_column(const DstView& dst,
255  const SrcView& src,
256  const IdxView& idx,
257  const size_t col,
258  const bool debug,
259  const typename DstView::execution_space& space) {
260  static_assert(Kokkos::is_view<DstView>::value,
261  "DstView must be a Kokkos::View.");
262  static_assert(Kokkos::is_view<SrcView>::value,
263  "SrcView must be a Kokkos::View.");
264  static_assert(Kokkos::is_view<IdxView>::value,
265  "IdxView must be a Kokkos::View.");
266  static_assert(static_cast<int>(DstView::rank) == 1,
267  "DstView must be a rank-1 Kokkos::View.");
268  static_assert(static_cast<int>(SrcView::rank) == 2,
269  "SrcView must be a rank-2 Kokkos::View.");
270  static_assert(static_cast<int>(IdxView::rank) == 1,
271  "IdxView must be a rank-1 Kokkos::View.");
272 
273  using execution_space = typename DstView::execution_space;
274 
275  static_assert(Kokkos::SpaceAccessibility<execution_space,
276  typename DstView::memory_space>::accessible,
277  "DstView not accessible from execution space");
278  static_assert(Kokkos::SpaceAccessibility<execution_space,
279  typename SrcView::memory_space>::accessible,
280  "SrcView not accessible from execution space");
281  static_assert(Kokkos::SpaceAccessibility<execution_space,
282  typename IdxView::memory_space>::accessible,
283  "IdxView not accessible from execution space");
284 
285  if (debug) {
286  typedef PackArraySingleColumnWithBoundsCheck<DstView, SrcView, IdxView> impl_type;
287  impl_type::pack(dst, src, idx, col, space);
288  } else {
289  typedef PackArraySingleColumn<DstView, SrcView, IdxView> impl_type;
290  impl_type::pack(dst, src, idx, col, space);
291  }
292 }
293 
296 template <typename DstView, typename SrcView, typename IdxView>
297 void pack_array_single_column(const DstView& dst,
298  const SrcView& src,
299  const IdxView& idx,
300  const size_t col,
301  const bool debug = true) {
302  pack_array_single_column(dst, src, idx, col, debug, typename DstView::execution_space());
303 }
304 
305 template <typename DstView, typename SrcView, typename IdxView,
306  typename Enabled = void>
307 struct PackArrayMultiColumn {
308  using execution_space = typename DstView::execution_space;
309  typedef typename execution_space::size_type size_type;
310 
311  DstView dst;
312  SrcView src;
313  IdxView idx;
314  size_t numCols;
315 
316  PackArrayMultiColumn(const DstView& dst_,
317  const SrcView& src_,
318  const IdxView& idx_,
319  const size_t numCols_)
320  : dst(dst_)
321  , src(src_)
322  , idx(idx_)
323  , numCols(numCols_) {}
324 
325  KOKKOS_INLINE_FUNCTION void
326  operator()(const size_type k) const {
327  const typename IdxView::value_type localRow = idx(k);
328  const size_t offset = k * numCols;
329  for (size_t j = 0; j < numCols; ++j) {
330  dst(offset + j) = src(localRow, j);
331  }
332  }
333 
334  static void pack(const DstView& dst,
335  const SrcView& src,
336  const IdxView& idx,
337  size_t numCols,
338  const execution_space& space) {
339  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
340  Kokkos::parallel_for("Tpetra::MultiVector pack multicol const stride",
341  range_type(space, 0, idx.size()),
342  PackArrayMultiColumn(dst, src, idx, numCols));
343  }
344 };
345 
346 template <typename DstView,
347  typename SrcView,
348  typename IdxView,
349  typename SizeType = typename DstView::execution_space::size_type,
350  typename Enabled = void>
351 class PackArrayMultiColumnWithBoundsCheck {
352  public:
353  using size_type = SizeType;
354  using value_type = size_t;
355  using execution_space = typename DstView::execution_space;
356 
357  private:
358  DstView dst;
359  SrcView src;
360  IdxView idx;
361  size_type numCols;
362 
363  public:
364  PackArrayMultiColumnWithBoundsCheck(const DstView& dst_,
365  const SrcView& src_,
366  const IdxView& idx_,
367  const size_type numCols_)
368  : dst(dst_)
369  , src(src_)
370  , idx(idx_)
371  , numCols(numCols_) {}
372 
373  KOKKOS_INLINE_FUNCTION void
374  operator()(const size_type k, value_type& lclErrorCount) const {
375  typedef typename IdxView::non_const_value_type index_type;
376 
377  const index_type lclRow = idx(k);
378  if (lclRow < static_cast<index_type>(0) ||
379  lclRow >= static_cast<index_type>(src.extent(0))) {
380  ++lclErrorCount; // failed
381  } else {
382  const size_type offset = k * numCols;
383  for (size_type j = 0; j < numCols; ++j) {
384  dst(offset + j) = src(lclRow, j);
385  }
386  }
387  }
388 
389  KOKKOS_INLINE_FUNCTION
390  void init(value_type& initialErrorCount) const {
391  initialErrorCount = 0;
392  }
393 
394  KOKKOS_INLINE_FUNCTION void
395  join(value_type& dstErrorCount,
396  const value_type& srcErrorCount) const {
397  dstErrorCount += srcErrorCount;
398  }
399 
400  static void
401  pack(const DstView& dst,
402  const SrcView& src,
403  const IdxView& idx,
404  const size_type numCols,
405  const execution_space& space) {
406  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
407  typedef typename IdxView::non_const_value_type index_type;
408 
409  size_t errorCount = 0;
410  Kokkos::parallel_reduce("Tpetra::MultiVector pack multicol const stride debug only",
411  range_type(space, 0, idx.size()),
412  PackArrayMultiColumnWithBoundsCheck(dst, src, idx, numCols),
413  errorCount);
414  if (errorCount != 0) {
415  // Go back and find the out-of-bounds entries in the index
416  // array. Performance doesn't matter since we are already in
417  // an error state, so we can do this sequentially, on host.
418  auto idx_h = Kokkos::create_mirror_view(idx);
419 
420  // DEEP_COPY REVIEW - NOT TESTED
421  Kokkos::deep_copy(idx_h, idx);
422 
423  std::vector<index_type> badIndices;
424  const size_type numInds = idx_h.extent(0);
425  for (size_type k = 0; k < numInds; ++k) {
426  if (idx_h(k) < static_cast<index_type>(0) ||
427  idx_h(k) >= static_cast<index_type>(src.extent(0))) {
428  badIndices.push_back(idx_h(k));
429  }
430  }
431 
432  TEUCHOS_TEST_FOR_EXCEPTION(errorCount != badIndices.size(), std::logic_error,
433  "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
434  << " != badIndices.size() = " << badIndices.size() << ". This sho"
435  "uld never happen. Please report this to the Tpetra developers.");
436 
437  std::ostringstream os;
438  os << "Tpetra::MultiVector multiple-column pack kernel had "
439  << badIndices.size() << " out-of bounds index/ices (errorCount = "
440  << errorCount << "): [";
441  for (size_t k = 0; k < badIndices.size(); ++k) {
442  os << badIndices[k];
443  if (k + 1 < badIndices.size()) {
444  os << ", ";
445  }
446  }
447  os << "].";
448  throw std::runtime_error(os.str());
449  }
450  }
451 };
452 
453 template <typename DstView,
454  typename SrcView,
455  typename IdxView>
456 void pack_array_multi_column(const DstView& dst,
457  const SrcView& src,
458  const IdxView& idx,
459  const size_t numCols,
460  const bool debug,
461  const typename DstView::execution_space& space) {
462  static_assert(Kokkos::is_view<DstView>::value,
463  "DstView must be a Kokkos::View.");
464  static_assert(Kokkos::is_view<SrcView>::value,
465  "SrcView must be a Kokkos::View.");
466  static_assert(Kokkos::is_view<IdxView>::value,
467  "IdxView must be a Kokkos::View.");
468  static_assert(static_cast<int>(DstView::rank) == 1,
469  "DstView must be a rank-1 Kokkos::View.");
470  static_assert(static_cast<int>(SrcView::rank) == 2,
471  "SrcView must be a rank-2 Kokkos::View.");
472  static_assert(static_cast<int>(IdxView::rank) == 1,
473  "IdxView must be a rank-1 Kokkos::View.");
474 
475  using execution_space = typename DstView::execution_space;
476 
477  static_assert(Kokkos::SpaceAccessibility<execution_space,
478  typename DstView::memory_space>::accessible,
479  "DstView not accessible from execution space");
480  static_assert(Kokkos::SpaceAccessibility<execution_space,
481  typename SrcView::memory_space>::accessible,
482  "SrcView not accessible from execution space");
483  static_assert(Kokkos::SpaceAccessibility<execution_space,
484  typename IdxView::memory_space>::accessible,
485  "IdxView not accessible from execution space");
486 
487  if (debug) {
488  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
489  SrcView, IdxView>
490  impl_type;
491  impl_type::pack(dst, src, idx, numCols, space);
492  } else {
493  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
494  impl_type::pack(dst, src, idx, numCols, space);
495  }
496 }
497 
498 template <typename DstView,
499  typename SrcView,
500  typename IdxView>
501 void pack_array_multi_column(const DstView& dst,
502  const SrcView& src,
503  const IdxView& idx,
504  const size_t numCols,
505  const bool debug = true) {
506  pack_array_multi_column(dst, src, idx, numCols, debug, typename DstView::execution_space());
507 }
508 
509 template <typename DstView, typename SrcView, typename IdxView,
510  typename ColView, typename Enabled = void>
511 struct PackArrayMultiColumnVariableStride {
512  using execution_space = typename DstView::execution_space;
513  typedef typename execution_space::size_type size_type;
514 
515  DstView dst;
516  SrcView src;
517  IdxView idx;
518  ColView col;
519  size_t numCols;
520 
521  PackArrayMultiColumnVariableStride(const DstView& dst_,
522  const SrcView& src_,
523  const IdxView& idx_,
524  const ColView& col_,
525  const size_t numCols_)
526  : dst(dst_)
527  , src(src_)
528  , idx(idx_)
529  , col(col_)
530  , numCols(numCols_) {}
531 
532  KOKKOS_INLINE_FUNCTION
533  void operator()(const size_type k) const {
534  const typename IdxView::value_type localRow = idx(k);
535  const size_t offset = k * numCols;
536  for (size_t j = 0; j < numCols; ++j) {
537  dst(offset + j) = src(localRow, col(j));
538  }
539  }
540 
541  static void pack(const DstView& dst,
542  const SrcView& src,
543  const IdxView& idx,
544  const ColView& col,
545  size_t numCols,
546  const execution_space& space) {
547  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
548  Kokkos::parallel_for("Tpetra::MultiVector pack multicol var stride",
549  range_type(space, 0, idx.size()),
550  PackArrayMultiColumnVariableStride(dst, src, idx, col, numCols));
551  }
552 };
553 
554 template <typename DstView,
555  typename SrcView,
556  typename IdxView,
557  typename ColView,
558  typename SizeType = typename DstView::execution_space::size_type,
559  typename Enabled = void>
560 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
561  public:
562  using size_type = SizeType;
563  using value_type = size_t;
564  using execution_space = typename DstView::execution_space;
565 
566  private:
567  DstView dst;
568  SrcView src;
569  IdxView idx;
570  ColView col;
571  size_type numCols;
572 
573  public:
574  PackArrayMultiColumnVariableStrideWithBoundsCheck(const DstView& dst_,
575  const SrcView& src_,
576  const IdxView& idx_,
577  const ColView& col_,
578  const size_type numCols_)
579  : dst(dst_)
580  , src(src_)
581  , idx(idx_)
582  , col(col_)
583  , numCols(numCols_) {}
584 
585  KOKKOS_INLINE_FUNCTION void
586  operator()(const size_type k, value_type& lclErrorCount) const {
587  typedef typename IdxView::non_const_value_type row_index_type;
588  typedef typename ColView::non_const_value_type col_index_type;
589 
590  const row_index_type lclRow = idx(k);
591  if (lclRow < static_cast<row_index_type>(0) ||
592  lclRow >= static_cast<row_index_type>(src.extent(0))) {
593  ++lclErrorCount = 0;
594  } else {
595  const size_type offset = k * numCols;
596  for (size_type j = 0; j < numCols; ++j) {
597  const col_index_type lclCol = col(j);
598  if (Impl::outOfBounds<col_index_type>(lclCol, src.extent(1))) {
599  ++lclErrorCount = 0;
600  } else { // all indices are valid; do the assignment
601  dst(offset + j) = src(lclRow, lclCol);
602  }
603  }
604  }
605  }
606 
607  KOKKOS_INLINE_FUNCTION void
608  init(value_type& initialErrorCount) const {
609  initialErrorCount = 0;
610  }
611 
612  KOKKOS_INLINE_FUNCTION void
613  join(value_type& dstErrorCount,
614  const value_type& srcErrorCount) const {
615  dstErrorCount += srcErrorCount;
616  }
617 
618  static void
619  pack(const DstView& dst,
620  const SrcView& src,
621  const IdxView& idx,
622  const ColView& col,
623  const size_type numCols,
624  const execution_space& space) {
625  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
626  using row_index_type = typename IdxView::non_const_value_type;
627  using col_index_type = typename ColView::non_const_value_type;
628 
629  size_t errorCount = 0;
630  Kokkos::parallel_reduce("Tpetra::MultiVector pack multicol var stride debug only",
631  range_type(space, 0, idx.size()),
632  PackArrayMultiColumnVariableStrideWithBoundsCheck(dst, src, idx,
633  col, numCols),
634  errorCount);
635  if (errorCount != 0) {
636  constexpr size_t maxNumBadIndicesToPrint = 100;
637 
638  std::ostringstream os; // for error reporting
639  os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
640  "found "
641  << errorCount
642  << " error" << (errorCount != size_t(1) ? "s" : "") << ". ";
643 
644  // Go back and find any out-of-bounds entries in the array of
645  // row indices. Performance doesn't matter since we are already
646  // in an error state, so we can do this sequentially, on host.
647  auto idx_h = Kokkos::create_mirror_view(idx);
648 
649  // DEEP_COPY REVIEW - NOT TESTED
650  Kokkos::deep_copy(idx_h, idx);
651 
652  std::vector<row_index_type> badRows;
653  const size_type numRowInds = idx_h.extent(0);
654  for (size_type k = 0; k < numRowInds; ++k) {
655  if (Impl::outOfBounds<row_index_type>(idx_h(k), src.extent(0))) {
656  badRows.push_back(idx_h(k));
657  }
658  }
659 
660  if (badRows.size() != 0) {
661  os << badRows.size() << " out-of-bounds row ind"
662  << (badRows.size() != size_t(1) ? "ices" : "ex");
663  if (badRows.size() <= maxNumBadIndicesToPrint) {
664  os << ": [";
665  for (size_t k = 0; k < badRows.size(); ++k) {
666  os << badRows[k];
667  if (k + 1 < badRows.size()) {
668  os << ", ";
669  }
670  }
671  os << "]. ";
672  } else {
673  os << ". ";
674  }
675  } else {
676  os << "No out-of-bounds row indices. ";
677  }
678 
679  // Go back and find any out-of-bounds entries in the array
680  // of column indices.
681  auto col_h = Kokkos::create_mirror_view(col);
682 
683  // DEEP_COPY REVIEW - NOT TESTED
684  Kokkos::deep_copy(col_h, col);
685 
686  std::vector<col_index_type> badCols;
687  const size_type numColInds = col_h.extent(0);
688  for (size_type k = 0; k < numColInds; ++k) {
689  if (Impl::outOfBounds<col_index_type>(col_h(k), src.extent(1))) {
690  badCols.push_back(col_h(k));
691  }
692  }
693 
694  if (badCols.size() != 0) {
695  os << badCols.size() << " out-of-bounds column ind"
696  << (badCols.size() != size_t(1) ? "ices" : "ex");
697  if (badCols.size() <= maxNumBadIndicesToPrint) {
698  os << ": [";
699  for (size_t k = 0; k < badCols.size(); ++k) {
700  os << badCols[k];
701  if (k + 1 < badCols.size()) {
702  os << ", ";
703  }
704  }
705  os << "]. ";
706  } else {
707  os << ". ";
708  }
709  } else {
710  os << "No out-of-bounds column indices. ";
711  }
712 
713  TEUCHOS_TEST_FOR_EXCEPTION(errorCount != 0 && badRows.size() == 0 && badCols.size() == 0,
714  std::logic_error,
715  "Tpetra::MultiVector variable stride pack "
716  "kernel reports errorCount="
717  << errorCount << ", but we failed "
718  "to find any bad rows or columns. This should never happen. "
719  "Please report this to the Tpetra developers.");
720 
721  throw std::runtime_error(os.str());
722  } // hasErr
723  }
724 };
725 
726 template <typename DstView,
727  typename SrcView,
728  typename IdxView,
729  typename ColView>
730 void pack_array_multi_column_variable_stride(const DstView& dst,
731  const SrcView& src,
732  const IdxView& idx,
733  const ColView& col,
734  const size_t numCols,
735  const bool debug,
736  const typename DstView::execution_space& space) {
737  static_assert(Kokkos::is_view<DstView>::value,
738  "DstView must be a Kokkos::View.");
739  static_assert(Kokkos::is_view<SrcView>::value,
740  "SrcView must be a Kokkos::View.");
741  static_assert(Kokkos::is_view<IdxView>::value,
742  "IdxView must be a Kokkos::View.");
743  static_assert(Kokkos::is_view<ColView>::value,
744  "ColView must be a Kokkos::View.");
745  static_assert(static_cast<int>(DstView::rank) == 1,
746  "DstView must be a rank-1 Kokkos::View.");
747  static_assert(static_cast<int>(SrcView::rank) == 2,
748  "SrcView must be a rank-2 Kokkos::View.");
749  static_assert(static_cast<int>(IdxView::rank) == 1,
750  "IdxView must be a rank-1 Kokkos::View.");
751  static_assert(static_cast<int>(ColView::rank) == 1,
752  "ColView must be a rank-1 Kokkos::View.");
753 
754  using execution_space = typename DstView::execution_space;
755 
756  static_assert(Kokkos::SpaceAccessibility<execution_space,
757  typename DstView::memory_space>::accessible,
758  "DstView not accessible from execution space");
759  static_assert(Kokkos::SpaceAccessibility<execution_space,
760  typename SrcView::memory_space>::accessible,
761  "SrcView not accessible from execution space");
762  static_assert(Kokkos::SpaceAccessibility<execution_space,
763  typename IdxView::memory_space>::accessible,
764  "IdxView not accessible from execution space");
765 
766  if (debug) {
767  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
768  SrcView, IdxView, ColView>
769  impl_type;
770  impl_type::pack(dst, src, idx, col, numCols, space);
771  } else {
772  typedef PackArrayMultiColumnVariableStride<DstView,
773  SrcView, IdxView, ColView>
774  impl_type;
775  impl_type::pack(dst, src, idx, col, numCols, space);
776  }
777 }
778 
779 template <typename DstView,
780  typename SrcView,
781  typename IdxView,
782  typename ColView>
783 void pack_array_multi_column_variable_stride(const DstView& dst,
784  const SrcView& src,
785  const IdxView& idx,
786  const ColView& col,
787  const size_t numCols,
788  const bool debug = true) {
789  pack_array_multi_column_variable_stride(dst, src, idx, col, numCols, debug,
790  typename DstView::execution_space());
791 }
792 
793 // Tag types to indicate whether to use atomic updates in the
794 // various CombineMode "Op"s.
795 struct atomic_tag {};
796 struct nonatomic_tag {};
797 
798 struct AddOp {
799  template <class SC>
800  KOKKOS_INLINE_FUNCTION void operator()(atomic_tag, SC& dest, const SC& src) const {
801  Kokkos::atomic_add(&dest, src);
802  }
803 
804  template <class SC>
805  KOKKOS_INLINE_FUNCTION void operator()(nonatomic_tag, SC& dest, const SC& src) const {
806  dest += src;
807  }
808 };
809 
810 struct InsertOp {
811  // There's no point to using Kokkos::atomic_assign for the REPLACE
812  // or INSERT CombineModes, since this is not a well-defined
813  // reduction for MultiVector anyway. See GitHub Issue #4417
814  // (which this fixes).
815  template <class SC>
816  KOKKOS_INLINE_FUNCTION void operator()(atomic_tag, SC& dest, const SC& src) const {
817  dest = src;
818  }
819 
820  template <class SC>
821  KOKKOS_INLINE_FUNCTION void operator()(nonatomic_tag, SC& dest, const SC& src) const {
822  dest = src;
823  }
824 };
825 
826 struct AbsMaxOp {
827  template <class Scalar>
828  struct AbsMaxHelper {
829  Scalar value;
830 
831  KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
832 #if KOKKOS_VERSION >= 40799
833  auto lhs_abs_value = KokkosKernels::ArithTraits<Scalar>::abs(value);
834 #else
835  auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
836 #endif
837 #if KOKKOS_VERSION >= 40799
838  auto rhs_abs_value = KokkosKernels::ArithTraits<Scalar>::abs(rhs.value);
839 #else
840  auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
841 #endif
842  value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
843  return *this;
844  }
845 
846  KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
847  AbsMaxHelper ret = *this;
848  ret += rhs;
849  return ret;
850  }
851  };
852 
853  template <typename SC>
854  KOKKOS_INLINE_FUNCTION void operator()(atomic_tag, SC& dst, const SC& src) const {
855  Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
856  }
857 
858  template <typename SC>
859  KOKKOS_INLINE_FUNCTION void operator()(nonatomic_tag, SC& dst, const SC& src) const {
860 #if KOKKOS_VERSION >= 40799
861  auto dst_abs_value = KokkosKernels::ArithTraits<SC>::abs(dst);
862 #else
863  auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
864 #endif
865 #if KOKKOS_VERSION >= 40799
866  auto src_abs_value = KokkosKernels::ArithTraits<SC>::abs(src);
867 #else
868  auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
869 #endif
870  dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
871  }
872 };
873 
874 template <typename ExecutionSpace,
875  typename DstView,
876  typename SrcView,
877  typename IdxView,
878  typename Op,
879  typename Enabled = void>
880 class UnpackArrayMultiColumn {
881  private:
882  static_assert(Kokkos::is_view<DstView>::value,
883  "DstView must be a Kokkos::View.");
884  static_assert(Kokkos::is_view<SrcView>::value,
885  "SrcView must be a Kokkos::View.");
886  static_assert(Kokkos::is_view<IdxView>::value,
887  "IdxView must be a Kokkos::View.");
888  static_assert(static_cast<int>(DstView::rank) == 2,
889  "DstView must be a rank-2 Kokkos::View.");
890  static_assert(static_cast<int>(SrcView::rank) == 1,
891  "SrcView must be a rank-1 Kokkos::View.");
892  static_assert(static_cast<int>(IdxView::rank) == 1,
893  "IdxView must be a rank-1 Kokkos::View.");
894 
895  public:
896  typedef typename ExecutionSpace::execution_space execution_space;
897  typedef typename execution_space::size_type size_type;
898 
899  private:
900  DstView dst;
901  SrcView src;
902  IdxView idx;
903  Op op;
904  size_t numCols;
905 
906  public:
907  UnpackArrayMultiColumn(const ExecutionSpace& /* execSpace */,
908  const DstView& dst_,
909  const SrcView& src_,
910  const IdxView& idx_,
911  const Op& op_,
912  const size_t numCols_)
913  : dst(dst_)
914  , src(src_)
915  , idx(idx_)
916  , op(op_)
917  , numCols(numCols_) {}
918 
919  template <class TagType>
920  KOKKOS_INLINE_FUNCTION void
921  operator()(TagType tag, const size_type k) const {
922  static_assert(std::is_same<TagType, atomic_tag>::value ||
923  std::is_same<TagType, nonatomic_tag>::value,
924  "TagType must be atomic_tag or nonatomic_tag.");
925 
926  const typename IdxView::value_type localRow = idx(k);
927  const size_t offset = k * numCols;
928  for (size_t j = 0; j < numCols; ++j) {
929  op(tag, dst(localRow, j), src(offset + j));
930  }
931  }
932 
933  static void
934  unpack(const ExecutionSpace& execSpace,
935  const DstView& dst,
936  const SrcView& src,
937  const IdxView& idx,
938  const Op& op,
939  const size_t numCols,
940  const bool use_atomic_updates) {
941  if (use_atomic_updates) {
942  using range_type =
943  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
944  Kokkos::parallel_for("Tpetra::MultiVector unpack const stride atomic",
945  range_type(0, idx.size()),
946  UnpackArrayMultiColumn(execSpace, dst, src, idx, op, numCols));
947  } else {
948  using range_type =
949  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
950  Kokkos::parallel_for("Tpetra::MultiVector unpack const stride nonatomic",
951  range_type(0, idx.size()),
952  UnpackArrayMultiColumn(execSpace, dst, src, idx, op, numCols));
953  }
954  }
955 };
956 
957 template <typename ExecutionSpace,
958  typename DstView,
959  typename SrcView,
960  typename IdxView,
961  typename Op,
962  typename SizeType = typename ExecutionSpace::execution_space::size_type,
963  typename Enabled = void>
964 class UnpackArrayMultiColumnWithBoundsCheck {
965  private:
966  static_assert(Kokkos::is_view<DstView>::value,
967  "DstView must be a Kokkos::View.");
968  static_assert(Kokkos::is_view<SrcView>::value,
969  "SrcView must be a Kokkos::View.");
970  static_assert(Kokkos::is_view<IdxView>::value,
971  "IdxView must be a Kokkos::View.");
972  static_assert(static_cast<int>(DstView::rank) == 2,
973  "DstView must be a rank-2 Kokkos::View.");
974  static_assert(static_cast<int>(SrcView::rank) == 1,
975  "SrcView must be a rank-1 Kokkos::View.");
976  static_assert(static_cast<int>(IdxView::rank) == 1,
977  "IdxView must be a rank-1 Kokkos::View.");
978  static_assert(std::is_integral<SizeType>::value,
979  "SizeType must be a built-in integer type.");
980 
981  public:
982  using execution_space = typename ExecutionSpace::execution_space;
983  using size_type = SizeType;
984  using value_type = size_t;
985 
986  private:
987  DstView dst;
988  SrcView src;
989  IdxView idx;
990  Op op;
991  size_type numCols;
992 
993  public:
994  UnpackArrayMultiColumnWithBoundsCheck(const ExecutionSpace& /* execSpace */,
995  const DstView& dst_,
996  const SrcView& src_,
997  const IdxView& idx_,
998  const Op& op_,
999  const size_type numCols_)
1000  : dst(dst_)
1001  , src(src_)
1002  , idx(idx_)
1003  , op(op_)
1004  , numCols(numCols_) {}
1005 
1006  template <class TagType>
1007  KOKKOS_INLINE_FUNCTION void
1008  operator()(TagType tag,
1009  const size_type k,
1010  size_t& lclErrCount) const {
1011  static_assert(std::is_same<TagType, atomic_tag>::value ||
1012  std::is_same<TagType, nonatomic_tag>::value,
1013  "TagType must be atomic_tag or nonatomic_tag.");
1014  using index_type = typename IdxView::non_const_value_type;
1015 
1016  const index_type lclRow = idx(k);
1017  if (lclRow < static_cast<index_type>(0) ||
1018  lclRow >= static_cast<index_type>(dst.extent(0))) {
1019  ++lclErrCount;
1020  } else {
1021  const size_type offset = k * numCols;
1022  for (size_type j = 0; j < numCols; ++j) {
1023  op(tag, dst(lclRow, j), src(offset + j));
1024  }
1025  }
1026  }
1027 
1028  template <class TagType>
1029  KOKKOS_INLINE_FUNCTION void
1030  init(TagType, size_t& initialErrorCount) const {
1031  initialErrorCount = 0;
1032  }
1033 
1034  template <class TagType>
1035  KOKKOS_INLINE_FUNCTION void
1036  join(TagType,
1037  size_t& dstErrorCount,
1038  const size_t& srcErrorCount) const {
1039  dstErrorCount += srcErrorCount;
1040  }
1041 
1042  static void
1043  unpack(const ExecutionSpace& execSpace,
1044  const DstView& dst,
1045  const SrcView& src,
1046  const IdxView& idx,
1047  const Op& op,
1048  const size_type numCols,
1049  const bool use_atomic_updates) {
1050  using index_type = typename IdxView::non_const_value_type;
1051 
1052  size_t errorCount = 0;
1053  if (use_atomic_updates) {
1054  using range_type =
1055  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1056  Kokkos::parallel_reduce("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1057  range_type(0, idx.size()),
1058  UnpackArrayMultiColumnWithBoundsCheck(execSpace, dst, src,
1059  idx, op, numCols),
1060  errorCount);
1061  } else {
1062  using range_type =
1063  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1064  Kokkos::parallel_reduce("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1065  range_type(0, idx.size()),
1066  UnpackArrayMultiColumnWithBoundsCheck(execSpace, dst, src,
1067  idx, op, numCols),
1068  errorCount);
1069  }
1070 
1071  if (errorCount != 0) {
1072  // Go back and find the out-of-bounds entries in the index
1073  // array. Performance doesn't matter since we are already in
1074  // an error state, so we can do this sequentially, on host.
1075  auto idx_h = Kokkos::create_mirror_view(idx);
1076 
1077  // DEEP_COPY REVIEW - NOT TESTED
1078  Kokkos::deep_copy(idx_h, idx);
1079 
1080  std::vector<index_type> badIndices;
1081  const size_type numInds = idx_h.extent(0);
1082  for (size_type k = 0; k < numInds; ++k) {
1083  if (idx_h(k) < static_cast<index_type>(0) ||
1084  idx_h(k) >= static_cast<index_type>(dst.extent(0))) {
1085  badIndices.push_back(idx_h(k));
1086  }
1087  }
1088 
1089  if (errorCount != badIndices.size()) {
1090  std::ostringstream os;
1091  os << "MultiVector unpack kernel: errorCount = " << errorCount
1092  << " != badIndices.size() = " << badIndices.size()
1093  << ". This should never happen. "
1094  "Please report this to the Tpetra developers.";
1095  throw std::logic_error(os.str());
1096  }
1097 
1098  std::ostringstream os;
1099  os << "MultiVector unpack kernel had " << badIndices.size()
1100  << " out-of bounds index/ices. Here they are: [";
1101  for (size_t k = 0; k < badIndices.size(); ++k) {
1102  os << badIndices[k];
1103  if (k + 1 < badIndices.size()) {
1104  os << ", ";
1105  }
1106  }
1107  os << "].";
1108  throw std::runtime_error(os.str());
1109  }
1110  }
1111 };
1112 
1113 template <typename ExecutionSpace,
1114  typename DstView,
1115  typename SrcView,
1116  typename IdxView,
1117  typename Op>
1118 void unpack_array_multi_column(const ExecutionSpace& execSpace,
1119  const DstView& dst,
1120  const SrcView& src,
1121  const IdxView& idx,
1122  const Op& op,
1123  const size_t numCols,
1124  const bool use_atomic_updates,
1125  const bool debug) {
1126  static_assert(Kokkos::is_view<DstView>::value,
1127  "DstView must be a Kokkos::View.");
1128  static_assert(Kokkos::is_view<SrcView>::value,
1129  "SrcView must be a Kokkos::View.");
1130  static_assert(Kokkos::is_view<IdxView>::value,
1131  "IdxView must be a Kokkos::View.");
1132  static_assert(static_cast<int>(DstView::rank) == 2,
1133  "DstView must be a rank-2 Kokkos::View.");
1134  static_assert(static_cast<int>(SrcView::rank) == 1,
1135  "SrcView must be a rank-1 Kokkos::View.");
1136  static_assert(static_cast<int>(IdxView::rank) == 1,
1137  "IdxView must be a rank-1 Kokkos::View.");
1138 
1139  if (debug) {
1140  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1141  DstView, SrcView, IdxView, Op>
1142  impl_type;
1143  impl_type::unpack(execSpace, dst, src, idx, op, numCols,
1144  use_atomic_updates);
1145  } else {
1146  typedef UnpackArrayMultiColumn<ExecutionSpace,
1147  DstView, SrcView, IdxView, Op>
1148  impl_type;
1149  impl_type::unpack(execSpace, dst, src, idx, op, numCols,
1150  use_atomic_updates);
1151  }
1152 }
1153 
1154 template <typename ExecutionSpace,
1155  typename DstView,
1156  typename SrcView,
1157  typename IdxView,
1158  typename ColView,
1159  typename Op,
1160  typename Enabled = void>
1161 class UnpackArrayMultiColumnVariableStride {
1162  private:
1163  static_assert(Kokkos::is_view<DstView>::value,
1164  "DstView must be a Kokkos::View.");
1165  static_assert(Kokkos::is_view<SrcView>::value,
1166  "SrcView must be a Kokkos::View.");
1167  static_assert(Kokkos::is_view<IdxView>::value,
1168  "IdxView must be a Kokkos::View.");
1169  static_assert(Kokkos::is_view<ColView>::value,
1170  "ColView must be a Kokkos::View.");
1171  static_assert(static_cast<int>(DstView::rank) == 2,
1172  "DstView must be a rank-2 Kokkos::View.");
1173  static_assert(static_cast<int>(SrcView::rank) == 1,
1174  "SrcView must be a rank-1 Kokkos::View.");
1175  static_assert(static_cast<int>(IdxView::rank) == 1,
1176  "IdxView must be a rank-1 Kokkos::View.");
1177  static_assert(static_cast<int>(ColView::rank) == 1,
1178  "ColView must be a rank-1 Kokkos::View.");
1179 
1180  public:
1181  using execution_space = typename ExecutionSpace::execution_space;
1182  using size_type = typename execution_space::size_type;
1183 
1184  private:
1185  DstView dst;
1186  SrcView src;
1187  IdxView idx;
1188  ColView col;
1189  Op op;
1190  size_t numCols;
1191 
1192  public:
1193  UnpackArrayMultiColumnVariableStride(const ExecutionSpace& /* execSpace */,
1194  const DstView& dst_,
1195  const SrcView& src_,
1196  const IdxView& idx_,
1197  const ColView& col_,
1198  const Op& op_,
1199  const size_t numCols_)
1200  : dst(dst_)
1201  , src(src_)
1202  , idx(idx_)
1203  , col(col_)
1204  , op(op_)
1205  , numCols(numCols_) {}
1206 
1207  template <class TagType>
1208  KOKKOS_INLINE_FUNCTION void
1209  operator()(TagType tag, const size_type k) const {
1210  static_assert(std::is_same<TagType, atomic_tag>::value ||
1211  std::is_same<TagType, nonatomic_tag>::value,
1212  "TagType must be atomic_tag or nonatomic_tag.");
1213 
1214  const typename IdxView::value_type localRow = idx(k);
1215  const size_t offset = k * numCols;
1216  for (size_t j = 0; j < numCols; ++j) {
1217  op(tag, dst(localRow, col(j)), src(offset + j));
1218  }
1219  }
1220 
1221  static void
1222  unpack(const ExecutionSpace& execSpace,
1223  const DstView& dst,
1224  const SrcView& src,
1225  const IdxView& idx,
1226  const ColView& col,
1227  const Op& op,
1228  const size_t numCols,
1229  const bool use_atomic_updates) {
1230  if (use_atomic_updates) {
1231  using range_type =
1232  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1233  Kokkos::parallel_for("Tpetra::MultiVector unpack var stride atomic",
1234  range_type(0, idx.size()),
1235  UnpackArrayMultiColumnVariableStride(execSpace, dst, src,
1236  idx, col, op, numCols));
1237  } else {
1238  using range_type =
1239  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1240  Kokkos::parallel_for("Tpetra::MultiVector unpack var stride nonatomic",
1241  range_type(0, idx.size()),
1242  UnpackArrayMultiColumnVariableStride(execSpace, dst, src,
1243  idx, col, op, numCols));
1244  }
1245  }
1246 };
1247 
1248 template <typename ExecutionSpace,
1249  typename DstView,
1250  typename SrcView,
1251  typename IdxView,
1252  typename ColView,
1253  typename Op,
1254  typename SizeType = typename ExecutionSpace::execution_space::size_type,
1255  typename Enabled = void>
1256 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1257  private:
1258  static_assert(Kokkos::is_view<DstView>::value,
1259  "DstView must be a Kokkos::View.");
1260  static_assert(Kokkos::is_view<SrcView>::value,
1261  "SrcView must be a Kokkos::View.");
1262  static_assert(Kokkos::is_view<IdxView>::value,
1263  "IdxView must be a Kokkos::View.");
1264  static_assert(Kokkos::is_view<ColView>::value,
1265  "ColView must be a Kokkos::View.");
1266  static_assert(static_cast<int>(DstView::rank) == 2,
1267  "DstView must be a rank-2 Kokkos::View.");
1268  static_assert(static_cast<int>(SrcView::rank) == 1,
1269  "SrcView must be a rank-1 Kokkos::View.");
1270  static_assert(static_cast<int>(IdxView::rank) == 1,
1271  "IdxView must be a rank-1 Kokkos::View.");
1272  static_assert(static_cast<int>(ColView::rank) == 1,
1273  "ColView must be a rank-1 Kokkos::View.");
1274  static_assert(std::is_integral<SizeType>::value,
1275  "SizeType must be a built-in integer type.");
1276 
1277  public:
1278  using execution_space = typename ExecutionSpace::execution_space;
1279  using size_type = SizeType;
1280  using value_type = size_t;
1281 
1282  private:
1283  DstView dst;
1284  SrcView src;
1285  IdxView idx;
1286  ColView col;
1287  Op op;
1288  size_type numCols;
1289 
1290  public:
1291  UnpackArrayMultiColumnVariableStrideWithBoundsCheck(const ExecutionSpace& /* execSpace */,
1292  const DstView& dst_,
1293  const SrcView& src_,
1294  const IdxView& idx_,
1295  const ColView& col_,
1296  const Op& op_,
1297  const size_t numCols_)
1298  : dst(dst_)
1299  , src(src_)
1300  , idx(idx_)
1301  , col(col_)
1302  , op(op_)
1303  , numCols(numCols_) {}
1304 
1305  template <class TagType>
1306  KOKKOS_INLINE_FUNCTION void
1307  operator()(TagType tag,
1308  const size_type k,
1309  value_type& lclErrorCount) const {
1310  static_assert(std::is_same<TagType, atomic_tag>::value ||
1311  std::is_same<TagType, nonatomic_tag>::value,
1312  "TagType must be atomic_tag or nonatomic_tag.");
1313  using row_index_type = typename IdxView::non_const_value_type;
1314  using col_index_type = typename ColView::non_const_value_type;
1315 
1316  const row_index_type lclRow = idx(k);
1317  if (lclRow < static_cast<row_index_type>(0) ||
1318  lclRow >= static_cast<row_index_type>(dst.extent(0))) {
1319  ++lclErrorCount;
1320  } else {
1321  const size_type offset = k * numCols;
1322  for (size_type j = 0; j < numCols; ++j) {
1323  const col_index_type lclCol = col(j);
1324  if (Impl::outOfBounds<col_index_type>(lclCol, dst.extent(1))) {
1325  ++lclErrorCount;
1326  } else { // all indices are valid; apply the op
1327  op(tag, dst(lclRow, col(j)), src(offset + j));
1328  }
1329  }
1330  }
1331  }
1332 
1333  KOKKOS_INLINE_FUNCTION void
1334  init(value_type& initialErrorCount) const {
1335  initialErrorCount = 0;
1336  }
1337 
1338  KOKKOS_INLINE_FUNCTION void
1339  join(value_type& dstErrorCount,
1340  const value_type& srcErrorCount) const {
1341  dstErrorCount += srcErrorCount;
1342  }
1343 
1344  static void
1345  unpack(const ExecutionSpace& execSpace,
1346  const DstView& dst,
1347  const SrcView& src,
1348  const IdxView& idx,
1349  const ColView& col,
1350  const Op& op,
1351  const size_type numCols,
1352  const bool use_atomic_updates) {
1353  using row_index_type = typename IdxView::non_const_value_type;
1354  using col_index_type = typename ColView::non_const_value_type;
1355 
1356  size_t errorCount = 0;
1357  if (use_atomic_updates) {
1358  using range_type =
1359  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1360  Kokkos::parallel_reduce("Tpetra::MultiVector unpack var stride atomic debug only",
1361  range_type(0, idx.size()),
1362  UnpackArrayMultiColumnVariableStrideWithBoundsCheck(execSpace, dst, src, idx, col, op, numCols),
1363  errorCount);
1364  } else {
1365  using range_type =
1366  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1367  Kokkos::parallel_reduce("Tpetra::MultiVector unpack var stride nonatomic debug only",
1368  range_type(0, idx.size()),
1369  UnpackArrayMultiColumnVariableStrideWithBoundsCheck(execSpace, dst, src, idx, col, op, numCols),
1370  errorCount);
1371  }
1372 
1373  if (errorCount != 0) {
1374  constexpr size_t maxNumBadIndicesToPrint = 100;
1375 
1376  std::ostringstream os; // for error reporting
1377  os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1378  "found "
1379  << errorCount
1380  << " error" << (errorCount != size_t(1) ? "s" : "") << ". ";
1381 
1382  // Go back and find any out-of-bounds entries in the array of
1383  // row indices. Performance doesn't matter since we are
1384  // already in an error state, so we can do this sequentially,
1385  // on host.
1386  auto idx_h = Kokkos::create_mirror_view(idx);
1387 
1388  // DEEP_COPY REVIEW - NOT TESTED
1389  Kokkos::deep_copy(idx_h, idx);
1390 
1391  std::vector<row_index_type> badRows;
1392  const size_type numRowInds = idx_h.extent(0);
1393  for (size_type k = 0; k < numRowInds; ++k) {
1394  if (idx_h(k) < static_cast<row_index_type>(0) ||
1395  idx_h(k) >= static_cast<row_index_type>(dst.extent(0))) {
1396  badRows.push_back(idx_h(k));
1397  }
1398  }
1399 
1400  if (badRows.size() != 0) {
1401  os << badRows.size() << " out-of-bounds row ind"
1402  << (badRows.size() != size_t(1) ? "ices" : "ex");
1403  if (badRows.size() <= maxNumBadIndicesToPrint) {
1404  os << ": [";
1405  for (size_t k = 0; k < badRows.size(); ++k) {
1406  os << badRows[k];
1407  if (k + 1 < badRows.size()) {
1408  os << ", ";
1409  }
1410  }
1411  os << "]. ";
1412  } else {
1413  os << ". ";
1414  }
1415  } else {
1416  os << "No out-of-bounds row indices. ";
1417  }
1418 
1419  // Go back and find any out-of-bounds entries in the array
1420  // of column indices.
1421  auto col_h = Kokkos::create_mirror_view(col);
1422 
1423  // DEEP_COPY REVIEW - NOT TESTED
1424  Kokkos::deep_copy(col_h, col);
1425 
1426  std::vector<col_index_type> badCols;
1427  const size_type numColInds = col_h.extent(0);
1428  for (size_type k = 0; k < numColInds; ++k) {
1429  if (Impl::outOfBounds<col_index_type>(col_h(k), dst.extent(1))) {
1430  badCols.push_back(col_h(k));
1431  }
1432  }
1433 
1434  if (badCols.size() != 0) {
1435  os << badCols.size() << " out-of-bounds column ind"
1436  << (badCols.size() != size_t(1) ? "ices" : "ex");
1437  if (badCols.size() <= maxNumBadIndicesToPrint) {
1438  for (size_t k = 0; k < badCols.size(); ++k) {
1439  os << ": [";
1440  os << badCols[k];
1441  if (k + 1 < badCols.size()) {
1442  os << ", ";
1443  }
1444  }
1445  os << "]. ";
1446  } else {
1447  os << ". ";
1448  }
1449  } else {
1450  os << "No out-of-bounds column indices. ";
1451  }
1452 
1453  TEUCHOS_TEST_FOR_EXCEPTION(errorCount != 0 && badRows.size() == 0 && badCols.size() == 0,
1454  std::logic_error,
1455  "Tpetra::MultiVector variable stride unpack "
1456  "kernel reports errorCount="
1457  << errorCount << ", but we failed "
1458  "to find any bad rows or columns. This should never happen. "
1459  "Please report this to the Tpetra developers.");
1460 
1461  throw std::runtime_error(os.str());
1462  } // hasErr
1463  }
1464 };
1465 
1466 template <typename ExecutionSpace,
1467  typename DstView,
1468  typename SrcView,
1469  typename IdxView,
1470  typename ColView,
1471  typename Op>
1472 void unpack_array_multi_column_variable_stride(const ExecutionSpace& execSpace,
1473  const DstView& dst,
1474  const SrcView& src,
1475  const IdxView& idx,
1476  const ColView& col,
1477  const Op& op,
1478  const size_t numCols,
1479  const bool use_atomic_updates,
1480  const bool debug) {
1481  static_assert(Kokkos::is_view<DstView>::value,
1482  "DstView must be a Kokkos::View.");
1483  static_assert(Kokkos::is_view<SrcView>::value,
1484  "SrcView must be a Kokkos::View.");
1485  static_assert(Kokkos::is_view<IdxView>::value,
1486  "IdxView must be a Kokkos::View.");
1487  static_assert(Kokkos::is_view<ColView>::value,
1488  "ColView must be a Kokkos::View.");
1489  static_assert(static_cast<int>(DstView::rank) == 2,
1490  "DstView must be a rank-2 Kokkos::View.");
1491  static_assert(static_cast<int>(SrcView::rank) == 1,
1492  "SrcView must be a rank-1 Kokkos::View.");
1493  static_assert(static_cast<int>(IdxView::rank) == 1,
1494  "IdxView must be a rank-1 Kokkos::View.");
1495  static_assert(static_cast<int>(ColView::rank) == 1,
1496  "ColView must be a rank-1 Kokkos::View.");
1497 
1498  if (debug) {
1499  using impl_type =
1500  UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1501  DstView, SrcView, IdxView, ColView, Op>;
1502  impl_type::unpack(execSpace, dst, src, idx, col, op, numCols,
1503  use_atomic_updates);
1504  } else {
1505  using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1506  DstView, SrcView, IdxView, ColView, Op>;
1507  impl_type::unpack(execSpace, dst, src, idx, col, op, numCols,
1508  use_atomic_updates);
1509  }
1510 }
1511 
1512 template <typename DstView, typename SrcView,
1513  typename DstIdxView, typename SrcIdxView, typename Op,
1514  typename Enabled = void>
1515 struct PermuteArrayMultiColumn {
1516  using size_type = typename DstView::size_type;
1517 
1518  DstView dst;
1519  SrcView src;
1520  DstIdxView dst_idx;
1521  SrcIdxView src_idx;
1522  size_t numCols;
1523  Op op;
1524 
1525  PermuteArrayMultiColumn(const DstView& dst_,
1526  const SrcView& src_,
1527  const DstIdxView& dst_idx_,
1528  const SrcIdxView& src_idx_,
1529  const size_t numCols_,
1530  const Op& op_)
1531  : dst(dst_)
1532  , src(src_)
1533  , dst_idx(dst_idx_)
1534  , src_idx(src_idx_)
1535  , numCols(numCols_)
1536  , op(op_) {}
1537 
1538  KOKKOS_INLINE_FUNCTION void
1539  operator()(const size_type k) const {
1540  const typename DstIdxView::value_type toRow = dst_idx(k);
1541  const typename SrcIdxView::value_type fromRow = src_idx(k);
1542  nonatomic_tag tag; // permute does not need atomics
1543  for (size_t j = 0; j < numCols; ++j) {
1544  op(tag, dst(toRow, j), src(fromRow, j));
1545  }
1546  }
1547 
1548  template <typename ExecutionSpace>
1549  static void
1550  permute(const ExecutionSpace& space,
1551  const DstView& dst,
1552  const SrcView& src,
1553  const DstIdxView& dst_idx,
1554  const SrcIdxView& src_idx,
1555  const size_t numCols,
1556  const Op& op) {
1557  using range_type =
1558  Kokkos::RangePolicy<ExecutionSpace, size_type>;
1559  // permute does not need atomics for Op
1560  const size_type n = std::min(dst_idx.size(), src_idx.size());
1561  Kokkos::parallel_for("Tpetra::MultiVector permute multicol const stride",
1562  range_type(space, 0, n),
1563  PermuteArrayMultiColumn(dst, src, dst_idx, src_idx, numCols, op));
1564  }
1565 };
1566 
1567 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1568 // SrcView::rank == 2
1569 template <typename ExecutionSpace, typename DstView, typename SrcView,
1570  typename DstIdxView, typename SrcIdxView, typename Op>
1571 void permute_array_multi_column(const ExecutionSpace& space, const DstView& dst,
1572  const SrcView& src, const DstIdxView& dst_idx,
1573  const SrcIdxView& src_idx, size_t numCols,
1574  const Op& op) {
1575  PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView,
1576  Op>::permute(space, dst, src, dst_idx, src_idx,
1577  numCols, op);
1578 }
1579 
1580 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1581 // SrcView::rank == 2
1582 template <typename DstView, typename SrcView,
1583  typename DstIdxView, typename SrcIdxView, typename Op>
1584 void permute_array_multi_column(const DstView& dst,
1585  const SrcView& src,
1586  const DstIdxView& dst_idx,
1587  const SrcIdxView& src_idx,
1588  size_t numCols,
1589  const Op& op) {
1590  using execution_space = typename DstView::execution_space;
1591  PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView, Op>::permute(
1592  execution_space(), dst, src, dst_idx, src_idx, numCols, op);
1593 }
1594 
1595 template <typename DstView, typename SrcView,
1596  typename DstIdxView, typename SrcIdxView,
1597  typename DstColView, typename SrcColView, typename Op,
1598  typename Enabled = void>
1599 struct PermuteArrayMultiColumnVariableStride {
1600  using size_type = typename DstView::size_type;
1601 
1602  DstView dst;
1603  SrcView src;
1604  DstIdxView dst_idx;
1605  SrcIdxView src_idx;
1606  DstColView dst_col;
1607  SrcColView src_col;
1608  size_t numCols;
1609  Op op;
1610 
1611  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1612  const SrcView& src_,
1613  const DstIdxView& dst_idx_,
1614  const SrcIdxView& src_idx_,
1615  const DstColView& dst_col_,
1616  const SrcColView& src_col_,
1617  const size_t numCols_,
1618  const Op& op_)
1619  : dst(dst_)
1620  , src(src_)
1621  , dst_idx(dst_idx_)
1622  , src_idx(src_idx_)
1623  , dst_col(dst_col_)
1624  , src_col(src_col_)
1625  , numCols(numCols_)
1626  , op(op_) {}
1627 
1628  KOKKOS_INLINE_FUNCTION void
1629  operator()(const size_type k) const {
1630  const typename DstIdxView::value_type toRow = dst_idx(k);
1631  const typename SrcIdxView::value_type fromRow = src_idx(k);
1632  const nonatomic_tag tag; // permute does not need atomics
1633  for (size_t j = 0; j < numCols; ++j) {
1634  op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1635  }
1636  }
1637 
1638  template <typename ExecutionSpace>
1639  static void
1640  permute(const ExecutionSpace& space,
1641  const DstView& dst,
1642  const SrcView& src,
1643  const DstIdxView& dst_idx,
1644  const SrcIdxView& src_idx,
1645  const DstColView& dst_col,
1646  const SrcColView& src_col,
1647  const size_t numCols,
1648  const Op& op) {
1649  static_assert(Kokkos::SpaceAccessibility<
1650  ExecutionSpace, typename DstView::memory_space>::accessible,
1651  "ExecutionSpace must be able to access DstView");
1652 
1653  using range_type = Kokkos::RangePolicy<ExecutionSpace, size_type>;
1654  const size_type n = std::min(dst_idx.size(), src_idx.size());
1655  Kokkos::parallel_for("Tpetra::MultiVector permute multicol var stride",
1656  range_type(space, 0, n),
1657  PermuteArrayMultiColumnVariableStride(dst, src, dst_idx, src_idx,
1658  dst_col, src_col, numCols, op));
1659  }
1660 };
1661 
1662 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1663 // SrcView::rank == 2
1664 template <typename ExecutionSpace, typename DstView, typename SrcView,
1665  typename DstIdxView, typename SrcIdxView, typename DstColView,
1666  typename SrcColView, typename Op>
1667 void permute_array_multi_column_variable_stride(
1668  const ExecutionSpace& space, const DstView& dst, const SrcView& src,
1669  const DstIdxView& dst_idx, const SrcIdxView& src_idx,
1670  const DstColView& dst_col, const SrcColView& src_col, size_t numCols,
1671  const Op& op) {
1672  PermuteArrayMultiColumnVariableStride<DstView, SrcView, DstIdxView,
1673  SrcIdxView, DstColView, SrcColView,
1674  Op>::permute(space, dst, src, dst_idx,
1675  src_idx, dst_col, src_col,
1676  numCols, op);
1677 }
1678 
1679 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1680 // SrcView::rank == 2
1681 template <typename DstView, typename SrcView,
1682  typename DstIdxView, typename SrcIdxView,
1683  typename DstColView, typename SrcColView, typename Op>
1684 void permute_array_multi_column_variable_stride(const DstView& dst,
1685  const SrcView& src,
1686  const DstIdxView& dst_idx,
1687  const SrcIdxView& src_idx,
1688  const DstColView& dst_col,
1689  const SrcColView& src_col,
1690  size_t numCols,
1691  const Op& op) {
1692  using execution_space = typename DstView::execution_space;
1693  PermuteArrayMultiColumnVariableStride<DstView, SrcView,
1694  DstIdxView, SrcIdxView, DstColView, SrcColView, Op>::permute(execution_space(), dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1695 }
1696 
1697 } // namespace Details
1698 } // namespace KokkosRefactor
1699 } // namespace Tpetra
1700 
1701 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...