Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // clang-format off
11 
12 
13 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
14 // incorrect (as() is not a device function) and usually irrelevant
15 // (it would only matter if LocalOrdinal were bigger than size_t on a
16 // particular platform, which is unlikely).
17 
18 // KDD Aug 2020: In the permute/pack/unpack functors,
19 // the Enabled template parameter is specialized in
20 // downstream packages like Stokhos using SFINAE to provide partial
21 // specializations based on the scalar type of the SrcView and DstView
22 // template parameters. See #7898.
23 // Do not remove it before checking with Stokhos and other specializing users.
24 
25 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
26 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
27 
28 #include "Kokkos_Core.hpp"
29 #include "Kokkos_ArithTraits.hpp"
30 #include <sstream>
31 #include <stdexcept>
32 
33 namespace Tpetra {
34 namespace KokkosRefactor {
35 namespace Details {
36 
42 namespace Impl {
43 
50 template<class IntegerType,
51  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
52 struct OutOfBounds {
53  static KOKKOS_INLINE_FUNCTION bool
54  test (const IntegerType x,
55  const IntegerType exclusiveUpperBound);
56 };
57 
58 // Partial specialization for the case where IntegerType IS signed.
59 template<class IntegerType>
60 struct OutOfBounds<IntegerType, true> {
61  static KOKKOS_INLINE_FUNCTION bool
62  test (const IntegerType x,
63  const IntegerType exclusiveUpperBound)
64  {
65  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
66  }
67 };
68 
69 // Partial specialization for the case where IntegerType is NOT signed.
70 template<class IntegerType>
71 struct OutOfBounds<IntegerType, false> {
72  static KOKKOS_INLINE_FUNCTION bool
73  test (const IntegerType x,
74  const IntegerType exclusiveUpperBound)
75  {
76  return x >= exclusiveUpperBound;
77  }
78 };
79 
82 template<class IntegerType>
83 KOKKOS_INLINE_FUNCTION bool
84 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
85 {
86  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
87 }
88 
89 } // namespace Impl
90 
91  // Functors for implementing packAndPrepare and unpackAndCombine
92  // through parallel_for
93 
94  template <typename DstView, typename SrcView, typename IdxView,
95  typename Enabled = void>
96  struct PackArraySingleColumn {
97  typedef typename DstView::execution_space execution_space;
98  typedef typename execution_space::size_type size_type;
99 
100  DstView dst;
101  SrcView src;
102  IdxView idx;
103  size_t col;
104 
105  PackArraySingleColumn (const DstView& dst_,
106  const SrcView& src_,
107  const IdxView& idx_,
108  const size_t col_) :
109  dst(dst_), src(src_), idx(idx_), col(col_) {}
110 
111  KOKKOS_INLINE_FUNCTION void
112  operator() (const size_type k) const {
113  dst(k) = src(idx(k), col);
114  }
115 
116  static void
117  pack (const DstView& dst,
118  const SrcView& src,
119  const IdxView& idx,
120  const size_t col,
121  const execution_space &space)
122  {
123  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
124  Kokkos::parallel_for
125  ("Tpetra::MultiVector pack one col",
126  range_type (space, 0, idx.size ()),
127  PackArraySingleColumn (dst, src, idx, col));
128  }
129  };
130 
131  template <typename DstView,
132  typename SrcView,
133  typename IdxView,
134  typename SizeType = typename DstView::execution_space::size_type,
135  typename Enabled = void>
136  class PackArraySingleColumnWithBoundsCheck {
137  private:
138  static_assert (Kokkos::is_view<DstView>::value,
139  "DstView must be a Kokkos::View.");
140  static_assert (Kokkos::is_view<SrcView>::value,
141  "SrcView must be a Kokkos::View.");
142  static_assert (Kokkos::is_view<IdxView>::value,
143  "IdxView must be a Kokkos::View.");
144  static_assert (static_cast<int> (DstView::rank) == 1,
145  "DstView must be a rank-1 Kokkos::View.");
146  static_assert (static_cast<int> (SrcView::rank) == 2,
147  "SrcView must be a rank-2 Kokkos::View.");
148  static_assert (static_cast<int> (IdxView::rank) == 1,
149  "IdxView must be a rank-1 Kokkos::View.");
150  static_assert (std::is_integral<SizeType>::value,
151  "SizeType must be a built-in integer type.");
152 
153  using execution_space = typename DstView::execution_space;
154 
155  public:
156  typedef SizeType size_type;
157  using value_type = size_t;
158 
159  private:
160  DstView dst;
161  SrcView src;
162  IdxView idx;
163  size_type col;
164  execution_space space;
165 
166  public:
167  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
168  const SrcView& src_,
169  const IdxView& idx_,
170  const size_type col_) :
171  dst (dst_), src (src_), idx (idx_), col (col_) {}
172 
173  KOKKOS_INLINE_FUNCTION void
174  operator() (const size_type k, value_type& lclErrCount) const {
175  using index_type = typename IdxView::non_const_value_type;
176 
177  const index_type lclRow = idx(k);
178  if (lclRow < static_cast<index_type> (0) ||
179  lclRow >= static_cast<index_type> (src.extent (0))) {
180  ++lclErrCount;
181  }
182  else {
183  dst(k) = src(lclRow, col);
184  }
185  }
186 
187  KOKKOS_INLINE_FUNCTION
188  void init (value_type& initialErrorCount) const {
189  initialErrorCount = 0;
190  }
191 
192  KOKKOS_INLINE_FUNCTION void
193  join (value_type& dstErrorCount,
194  const value_type& srcErrorCount) const
195  {
196  dstErrorCount += srcErrorCount;
197  }
198 
199  static void
200  pack (const DstView& dst,
201  const SrcView& src,
202  const IdxView& idx,
203  const size_type col,
204  const execution_space &space)
205  {
206  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
207  typedef typename IdxView::non_const_value_type index_type;
208 
209  size_t errorCount = 0;
210  Kokkos::parallel_reduce
211  ("Tpetra::MultiVector pack one col debug only",
212  range_type (space, 0, idx.size ()),
213  PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
214  errorCount);
215 
216  if (errorCount != 0) {
217  // Go back and find the out-of-bounds entries in the index
218  // array. Performance doesn't matter since we are already in
219  // an error state, so we can do this sequentially, on host.
220  auto idx_h = Kokkos::create_mirror_view (idx);
221 
222  // DEEP_COPY REVIEW - NOT TESTED
223  Kokkos::deep_copy (idx_h, idx);
224 
225  std::vector<index_type> badIndices;
226  const size_type numInds = idx_h.extent (0);
227  for (size_type k = 0; k < numInds; ++k) {
228  if (idx_h(k) < static_cast<index_type> (0) ||
229  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
230  badIndices.push_back (idx_h(k));
231  }
232  }
233 
234  TEUCHOS_TEST_FOR_EXCEPTION
235  (errorCount != badIndices.size (), std::logic_error,
236  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
237  << " != badIndices.size() = " << badIndices.size () << ". This sho"
238  "uld never happen. Please report this to the Tpetra developers.");
239 
240  std::ostringstream os;
241  os << "MultiVector single-column pack kernel had "
242  << badIndices.size () << " out-of bounds index/ices. "
243  "Here they are: [";
244  for (size_t k = 0; k < badIndices.size (); ++k) {
245  os << badIndices[k];
246  if (k + 1 < badIndices.size ()) {
247  os << ", ";
248  }
249  }
250  os << "].";
251  throw std::runtime_error (os.str ());
252  }
253  }
254  };
255 
256 
257  template <typename DstView, typename SrcView, typename IdxView>
258  void
259  pack_array_single_column (const DstView& dst,
260  const SrcView& src,
261  const IdxView& idx,
262  const size_t col,
263  const bool debug,
264  const typename DstView::execution_space &space)
265  {
266  static_assert (Kokkos::is_view<DstView>::value,
267  "DstView must be a Kokkos::View.");
268  static_assert (Kokkos::is_view<SrcView>::value,
269  "SrcView must be a Kokkos::View.");
270  static_assert (Kokkos::is_view<IdxView>::value,
271  "IdxView must be a Kokkos::View.");
272  static_assert (static_cast<int> (DstView::rank) == 1,
273  "DstView must be a rank-1 Kokkos::View.");
274  static_assert (static_cast<int> (SrcView::rank) == 2,
275  "SrcView must be a rank-2 Kokkos::View.");
276  static_assert (static_cast<int> (IdxView::rank) == 1,
277  "IdxView must be a rank-1 Kokkos::View.");
278 
279  using execution_space = typename DstView::execution_space;
280 
281  static_assert (Kokkos::SpaceAccessibility<execution_space,
282  typename DstView::memory_space>::accessible,
283  "DstView not accessible from execution space");
284  static_assert (Kokkos::SpaceAccessibility<execution_space,
285  typename SrcView::memory_space>::accessible,
286  "SrcView not accessible from execution space");
287  static_assert (Kokkos::SpaceAccessibility<execution_space,
288  typename IdxView::memory_space>::accessible,
289  "IdxView not accessible from execution space");
290 
291  if (debug) {
292  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
293  impl_type::pack (dst, src, idx, col, space);
294  }
295  else {
296  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
297  impl_type::pack (dst, src, idx, col, space);
298  }
299  }
300 
303  template <typename DstView, typename SrcView, typename IdxView>
304  void
305  pack_array_single_column (const DstView& dst,
306  const SrcView& src,
307  const IdxView& idx,
308  const size_t col,
309  const bool debug = true)
310  {
311  pack_array_single_column(dst, src, idx, col, debug, typename DstView::execution_space());
312  }
313 
314  template <typename DstView, typename SrcView, typename IdxView,
315  typename Enabled = void>
316  struct PackArrayMultiColumn {
317  using execution_space = typename DstView::execution_space;
318  typedef typename execution_space::size_type size_type;
319 
320  DstView dst;
321  SrcView src;
322  IdxView idx;
323  size_t numCols;
324 
325  PackArrayMultiColumn (const DstView& dst_,
326  const SrcView& src_,
327  const IdxView& idx_,
328  const size_t numCols_) :
329  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
330 
331  KOKKOS_INLINE_FUNCTION void
332  operator() (const size_type k) const {
333  const typename IdxView::value_type localRow = idx(k);
334  const size_t offset = k*numCols;
335  for (size_t j = 0; j < numCols; ++j) {
336  dst(offset + j) = src(localRow, j);
337  }
338  }
339 
340  static void pack(const DstView& dst,
341  const SrcView& src,
342  const IdxView& idx,
343  size_t numCols,
344  const execution_space &space) {
345  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
346  Kokkos::parallel_for
347  ("Tpetra::MultiVector pack multicol const stride",
348  range_type (space, 0, idx.size ()),
349  PackArrayMultiColumn (dst, src, idx, numCols));
350  }
351  };
352 
353  template <typename DstView,
354  typename SrcView,
355  typename IdxView,
356  typename SizeType = typename DstView::execution_space::size_type,
357  typename Enabled = void>
358  class PackArrayMultiColumnWithBoundsCheck {
359  public:
360  using size_type = SizeType;
361  using value_type = size_t;
362  using execution_space = typename DstView::execution_space;
363 
364  private:
365  DstView dst;
366  SrcView src;
367  IdxView idx;
368  size_type numCols;
369 
370  public:
371  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
372  const SrcView& src_,
373  const IdxView& idx_,
374  const size_type numCols_) :
375  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
376 
377  KOKKOS_INLINE_FUNCTION void
378  operator() (const size_type k, value_type& lclErrorCount) const {
379  typedef typename IdxView::non_const_value_type index_type;
380 
381  const index_type lclRow = idx(k);
382  if (lclRow < static_cast<index_type> (0) ||
383  lclRow >= static_cast<index_type> (src.extent (0))) {
384  ++lclErrorCount; // failed
385  }
386  else {
387  const size_type offset = k*numCols;
388  for (size_type j = 0; j < numCols; ++j) {
389  dst(offset + j) = src(lclRow, j);
390  }
391  }
392  }
393 
394  KOKKOS_INLINE_FUNCTION
395  void init (value_type& initialErrorCount) const {
396  initialErrorCount = 0;
397  }
398 
399  KOKKOS_INLINE_FUNCTION void
400  join (value_type& dstErrorCount,
401  const value_type& srcErrorCount) const
402  {
403  dstErrorCount += srcErrorCount;
404  }
405 
406  static void
407  pack (const DstView& dst,
408  const SrcView& src,
409  const IdxView& idx,
410  const size_type numCols,
411  const execution_space &space)
412  {
413  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
414  typedef typename IdxView::non_const_value_type index_type;
415 
416  size_t errorCount = 0;
417  Kokkos::parallel_reduce
418  ("Tpetra::MultiVector pack multicol const stride debug only",
419  range_type (space, 0, idx.size ()),
420  PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
421  errorCount);
422  if (errorCount != 0) {
423  // Go back and find the out-of-bounds entries in the index
424  // array. Performance doesn't matter since we are already in
425  // an error state, so we can do this sequentially, on host.
426  auto idx_h = Kokkos::create_mirror_view (idx);
427 
428  // DEEP_COPY REVIEW - NOT TESTED
429  Kokkos::deep_copy (idx_h, idx);
430 
431  std::vector<index_type> badIndices;
432  const size_type numInds = idx_h.extent (0);
433  for (size_type k = 0; k < numInds; ++k) {
434  if (idx_h(k) < static_cast<index_type> (0) ||
435  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
436  badIndices.push_back (idx_h(k));
437  }
438  }
439 
440  TEUCHOS_TEST_FOR_EXCEPTION
441  (errorCount != badIndices.size (), std::logic_error,
442  "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
443  << " != badIndices.size() = " << badIndices.size () << ". This sho"
444  "uld never happen. Please report this to the Tpetra developers.");
445 
446  std::ostringstream os;
447  os << "Tpetra::MultiVector multiple-column pack kernel had "
448  << badIndices.size () << " out-of bounds index/ices (errorCount = "
449  << errorCount << "): [";
450  for (size_t k = 0; k < badIndices.size (); ++k) {
451  os << badIndices[k];
452  if (k + 1 < badIndices.size ()) {
453  os << ", ";
454  }
455  }
456  os << "].";
457  throw std::runtime_error (os.str ());
458  }
459  }
460  };
461 
462 
463  template <typename DstView,
464  typename SrcView,
465  typename IdxView>
466  void
467  pack_array_multi_column (const DstView& dst,
468  const SrcView& src,
469  const IdxView& idx,
470  const size_t numCols,
471  const bool debug,
472  const typename DstView::execution_space &space)
473  {
474  static_assert (Kokkos::is_view<DstView>::value,
475  "DstView must be a Kokkos::View.");
476  static_assert (Kokkos::is_view<SrcView>::value,
477  "SrcView must be a Kokkos::View.");
478  static_assert (Kokkos::is_view<IdxView>::value,
479  "IdxView must be a Kokkos::View.");
480  static_assert (static_cast<int> (DstView::rank) == 1,
481  "DstView must be a rank-1 Kokkos::View.");
482  static_assert (static_cast<int> (SrcView::rank) == 2,
483  "SrcView must be a rank-2 Kokkos::View.");
484  static_assert (static_cast<int> (IdxView::rank) == 1,
485  "IdxView must be a rank-1 Kokkos::View.");
486 
487  using execution_space = typename DstView::execution_space;
488 
489  static_assert (Kokkos::SpaceAccessibility<execution_space,
490  typename DstView::memory_space>::accessible,
491  "DstView not accessible from execution space");
492  static_assert (Kokkos::SpaceAccessibility<execution_space,
493  typename SrcView::memory_space>::accessible,
494  "SrcView not accessible from execution space");
495  static_assert (Kokkos::SpaceAccessibility<execution_space,
496  typename IdxView::memory_space>::accessible,
497  "IdxView not accessible from execution space");
498 
499  if (debug) {
500  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
501  SrcView, IdxView> impl_type;
502  impl_type::pack (dst, src, idx, numCols, space);
503  }
504  else {
505  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
506  impl_type::pack (dst, src, idx, numCols, space);
507  }
508  }
509 
510  template <typename DstView,
511  typename SrcView,
512  typename IdxView>
513  void
514  pack_array_multi_column (const DstView& dst,
515  const SrcView& src,
516  const IdxView& idx,
517  const size_t numCols,
518  const bool debug = true) {
519  pack_array_multi_column(dst, src, idx, numCols, debug, typename DstView::execution_space());
520  }
521 
522  template <typename DstView, typename SrcView, typename IdxView,
523  typename ColView, typename Enabled = void>
524  struct PackArrayMultiColumnVariableStride {
525  using execution_space = typename DstView::execution_space;
526  typedef typename execution_space::size_type size_type;
527 
528  DstView dst;
529  SrcView src;
530  IdxView idx;
531  ColView col;
532  size_t numCols;
533 
534  PackArrayMultiColumnVariableStride (const DstView& dst_,
535  const SrcView& src_,
536  const IdxView& idx_,
537  const ColView& col_,
538  const size_t numCols_) :
539  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
540 
541  KOKKOS_INLINE_FUNCTION
542  void operator() (const size_type k) const {
543  const typename IdxView::value_type localRow = idx(k);
544  const size_t offset = k*numCols;
545  for (size_t j = 0; j < numCols; ++j) {
546  dst(offset + j) = src(localRow, col(j));
547  }
548  }
549 
550  static void pack(const DstView& dst,
551  const SrcView& src,
552  const IdxView& idx,
553  const ColView& col,
554  size_t numCols,
555  const execution_space &space) {
556  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
557  Kokkos::parallel_for
558  ("Tpetra::MultiVector pack multicol var stride",
559  range_type (space, 0, idx.size ()),
560  PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
561  }
562  };
563 
564  template <typename DstView,
565  typename SrcView,
566  typename IdxView,
567  typename ColView,
568  typename SizeType = typename DstView::execution_space::size_type,
569  typename Enabled = void>
570  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
571  public:
572  using size_type = SizeType;
573  using value_type = size_t;
574  using execution_space = typename DstView::execution_space;
575 
576  private:
577  DstView dst;
578  SrcView src;
579  IdxView idx;
580  ColView col;
581  size_type numCols;
582 
583  public:
584  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
585  const SrcView& src_,
586  const IdxView& idx_,
587  const ColView& col_,
588  const size_type numCols_) :
589  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
590 
591  KOKKOS_INLINE_FUNCTION void
592  operator() (const size_type k, value_type& lclErrorCount) const {
593  typedef typename IdxView::non_const_value_type row_index_type;
594  typedef typename ColView::non_const_value_type col_index_type;
595 
596  const row_index_type lclRow = idx(k);
597  if (lclRow < static_cast<row_index_type> (0) ||
598  lclRow >= static_cast<row_index_type> (src.extent (0))) {
599  ++lclErrorCount = 0;
600  }
601  else {
602  const size_type offset = k*numCols;
603  for (size_type j = 0; j < numCols; ++j) {
604  const col_index_type lclCol = col(j);
605  if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
606  ++lclErrorCount = 0;
607  }
608  else { // all indices are valid; do the assignment
609  dst(offset + j) = src(lclRow, lclCol);
610  }
611  }
612  }
613  }
614 
615  KOKKOS_INLINE_FUNCTION void
616  init (value_type& initialErrorCount) const {
617  initialErrorCount = 0;
618  }
619 
620  KOKKOS_INLINE_FUNCTION void
621  join (value_type& dstErrorCount,
622  const value_type& srcErrorCount) const
623  {
624  dstErrorCount += srcErrorCount;
625  }
626 
627  static void
628  pack (const DstView& dst,
629  const SrcView& src,
630  const IdxView& idx,
631  const ColView& col,
632  const size_type numCols,
633  const execution_space &space)
634  {
635  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
636  using row_index_type = typename IdxView::non_const_value_type;
637  using col_index_type = typename ColView::non_const_value_type;
638 
639  size_t errorCount = 0;
640  Kokkos::parallel_reduce
641  ("Tpetra::MultiVector pack multicol var stride debug only",
642  range_type (space, 0, idx.size ()),
643  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
644  col, numCols),
645  errorCount);
646  if (errorCount != 0) {
647  constexpr size_t maxNumBadIndicesToPrint = 100;
648 
649  std::ostringstream os; // for error reporting
650  os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
651  "found " << errorCount
652  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
653 
654  // Go back and find any out-of-bounds entries in the array of
655  // row indices. Performance doesn't matter since we are already
656  // in an error state, so we can do this sequentially, on host.
657  auto idx_h = Kokkos::create_mirror_view (idx);
658 
659  // DEEP_COPY REVIEW - NOT TESTED
660  Kokkos::deep_copy (idx_h, idx);
661 
662  std::vector<row_index_type> badRows;
663  const size_type numRowInds = idx_h.extent (0);
664  for (size_type k = 0; k < numRowInds; ++k) {
665  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
666  badRows.push_back (idx_h(k));
667  }
668  }
669 
670  if (badRows.size () != 0) {
671  os << badRows.size () << " out-of-bounds row ind"
672  << (badRows.size () != size_t (1) ? "ices" : "ex");
673  if (badRows.size () <= maxNumBadIndicesToPrint) {
674  os << ": [";
675  for (size_t k = 0; k < badRows.size (); ++k) {
676  os << badRows[k];
677  if (k + 1 < badRows.size ()) {
678  os << ", ";
679  }
680  }
681  os << "]. ";
682  }
683  else {
684  os << ". ";
685  }
686  }
687  else {
688  os << "No out-of-bounds row indices. ";
689  }
690 
691  // Go back and find any out-of-bounds entries in the array
692  // of column indices.
693  auto col_h = Kokkos::create_mirror_view (col);
694 
695  // DEEP_COPY REVIEW - NOT TESTED
696  Kokkos::deep_copy (col_h, col);
697 
698  std::vector<col_index_type> badCols;
699  const size_type numColInds = col_h.extent (0);
700  for (size_type k = 0; k < numColInds; ++k) {
701  if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
702  badCols.push_back (col_h(k));
703  }
704  }
705 
706  if (badCols.size () != 0) {
707  os << badCols.size () << " out-of-bounds column ind"
708  << (badCols.size () != size_t (1) ? "ices" : "ex");
709  if (badCols.size () <= maxNumBadIndicesToPrint) {
710  os << ": [";
711  for (size_t k = 0; k < badCols.size (); ++k) {
712  os << badCols[k];
713  if (k + 1 < badCols.size ()) {
714  os << ", ";
715  }
716  }
717  os << "]. ";
718  }
719  else {
720  os << ". ";
721  }
722  }
723  else {
724  os << "No out-of-bounds column indices. ";
725  }
726 
727  TEUCHOS_TEST_FOR_EXCEPTION
728  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
729  std::logic_error, "Tpetra::MultiVector variable stride pack "
730  "kernel reports errorCount=" << errorCount << ", but we failed "
731  "to find any bad rows or columns. This should never happen. "
732  "Please report this to the Tpetra developers.");
733 
734  throw std::runtime_error (os.str ());
735  } // hasErr
736  }
737  };
738 
739  template <typename DstView,
740  typename SrcView,
741  typename IdxView,
742  typename ColView>
743  void
744  pack_array_multi_column_variable_stride (const DstView& dst,
745  const SrcView& src,
746  const IdxView& idx,
747  const ColView& col,
748  const size_t numCols,
749  const bool debug,
750  const typename DstView::execution_space &space)
751  {
752  static_assert (Kokkos::is_view<DstView>::value,
753  "DstView must be a Kokkos::View.");
754  static_assert (Kokkos::is_view<SrcView>::value,
755  "SrcView must be a Kokkos::View.");
756  static_assert (Kokkos::is_view<IdxView>::value,
757  "IdxView must be a Kokkos::View.");
758  static_assert (Kokkos::is_view<ColView>::value,
759  "ColView must be a Kokkos::View.");
760  static_assert (static_cast<int> (DstView::rank) == 1,
761  "DstView must be a rank-1 Kokkos::View.");
762  static_assert (static_cast<int> (SrcView::rank) == 2,
763  "SrcView must be a rank-2 Kokkos::View.");
764  static_assert (static_cast<int> (IdxView::rank) == 1,
765  "IdxView must be a rank-1 Kokkos::View.");
766  static_assert (static_cast<int> (ColView::rank) == 1,
767  "ColView must be a rank-1 Kokkos::View.");
768 
769  using execution_space = typename DstView::execution_space;
770 
771  static_assert (Kokkos::SpaceAccessibility<execution_space,
772  typename DstView::memory_space>::accessible,
773  "DstView not accessible from execution space");
774  static_assert (Kokkos::SpaceAccessibility<execution_space,
775  typename SrcView::memory_space>::accessible,
776  "SrcView not accessible from execution space");
777  static_assert (Kokkos::SpaceAccessibility<execution_space,
778  typename IdxView::memory_space>::accessible,
779  "IdxView not accessible from execution space");
780 
781  if (debug) {
782  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
783  SrcView, IdxView, ColView> impl_type;
784  impl_type::pack (dst, src, idx, col, numCols, space);
785  }
786  else {
787  typedef PackArrayMultiColumnVariableStride<DstView,
788  SrcView, IdxView, ColView> impl_type;
789  impl_type::pack (dst, src, idx, col, numCols, space);
790  }
791  }
792 
793  template <typename DstView,
794  typename SrcView,
795  typename IdxView,
796  typename ColView>
797  void
798  pack_array_multi_column_variable_stride (const DstView& dst,
799  const SrcView& src,
800  const IdxView& idx,
801  const ColView& col,
802  const size_t numCols,
803  const bool debug = true) {
804  pack_array_multi_column_variable_stride(dst, src, idx, col, numCols, debug,
805  typename DstView::execution_space());
806  }
807 
808  // Tag types to indicate whether to use atomic updates in the
809  // various CombineMode "Op"s.
810  struct atomic_tag {};
811  struct nonatomic_tag {};
812 
813  struct AddOp {
814  template<class SC>
815  KOKKOS_INLINE_FUNCTION
816  void operator() (atomic_tag, SC& dest, const SC& src) const {
817  Kokkos::atomic_add (&dest, src);
818  }
819 
820  template<class SC>
821  KOKKOS_INLINE_FUNCTION
822  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
823  dest += src;
824  }
825  };
826 
827  struct InsertOp {
828  // There's no point to using Kokkos::atomic_assign for the REPLACE
829  // or INSERT CombineModes, since this is not a well-defined
830  // reduction for MultiVector anyway. See GitHub Issue #4417
831  // (which this fixes).
832  template<class SC>
833  KOKKOS_INLINE_FUNCTION
834  void operator() (atomic_tag, SC& dest, const SC& src) const {
835  dest = src;
836  }
837 
838  template<class SC>
839  KOKKOS_INLINE_FUNCTION
840  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
841  dest = src;
842  }
843  };
844 
845  struct AbsMaxOp {
846  template <class Scalar>
847  struct AbsMaxHelper{
848  Scalar value;
849 
850  KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
851  auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
852  auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
853  value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
854  return *this;
855  }
856 
857  KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
858  AbsMaxHelper ret = *this;
859  ret += rhs;
860  return ret;
861  }
862  };
863 
864  template <typename SC>
865  KOKKOS_INLINE_FUNCTION
866  void operator() (atomic_tag, SC& dst, const SC& src) const {
867  Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
868  }
869 
870  template <typename SC>
871  KOKKOS_INLINE_FUNCTION
872  void operator() (nonatomic_tag, SC& dst, const SC& src) const {
873  auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
874  auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
875  dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
876  }
877  };
878 
879  template <typename ExecutionSpace,
880  typename DstView,
881  typename SrcView,
882  typename IdxView,
883  typename Op,
884  typename Enabled = void>
885  class UnpackArrayMultiColumn {
886  private:
887  static_assert (Kokkos::is_view<DstView>::value,
888  "DstView must be a Kokkos::View.");
889  static_assert (Kokkos::is_view<SrcView>::value,
890  "SrcView must be a Kokkos::View.");
891  static_assert (Kokkos::is_view<IdxView>::value,
892  "IdxView must be a Kokkos::View.");
893  static_assert (static_cast<int> (DstView::rank) == 2,
894  "DstView must be a rank-2 Kokkos::View.");
895  static_assert (static_cast<int> (SrcView::rank) == 1,
896  "SrcView must be a rank-1 Kokkos::View.");
897  static_assert (static_cast<int> (IdxView::rank) == 1,
898  "IdxView must be a rank-1 Kokkos::View.");
899 
900  public:
901  typedef typename ExecutionSpace::execution_space execution_space;
902  typedef typename execution_space::size_type size_type;
903 
904  private:
905  DstView dst;
906  SrcView src;
907  IdxView idx;
908  Op op;
909  size_t numCols;
910 
911  public:
912  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
913  const DstView& dst_,
914  const SrcView& src_,
915  const IdxView& idx_,
916  const Op& op_,
917  const size_t numCols_) :
918  dst (dst_),
919  src (src_),
920  idx (idx_),
921  op (op_),
922  numCols (numCols_)
923  {}
924 
925  template<class TagType>
926  KOKKOS_INLINE_FUNCTION void
927  operator() (TagType tag, const size_type k) const
928  {
929  static_assert
930  (std::is_same<TagType, atomic_tag>::value ||
931  std::is_same<TagType, nonatomic_tag>::value,
932  "TagType must be atomic_tag or nonatomic_tag.");
933 
934  const typename IdxView::value_type localRow = idx(k);
935  const size_t offset = k*numCols;
936  for (size_t j = 0; j < numCols; ++j) {
937  op (tag, dst(localRow, j), src(offset+j));
938  }
939  }
940 
941  static void
942  unpack (const ExecutionSpace& execSpace,
943  const DstView& dst,
944  const SrcView& src,
945  const IdxView& idx,
946  const Op& op,
947  const size_t numCols,
948  const bool use_atomic_updates)
949  {
950  if (use_atomic_updates) {
951  using range_type =
952  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
953  Kokkos::parallel_for
954  ("Tpetra::MultiVector unpack const stride atomic",
955  range_type (0, idx.size ()),
956  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
957  }
958  else {
959  using range_type =
960  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
961  Kokkos::parallel_for
962  ("Tpetra::MultiVector unpack const stride nonatomic",
963  range_type (0, idx.size ()),
964  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
965  }
966  }
967  };
968 
969  template <typename ExecutionSpace,
970  typename DstView,
971  typename SrcView,
972  typename IdxView,
973  typename Op,
974  typename SizeType = typename ExecutionSpace::execution_space::size_type,
975  typename Enabled = void>
976  class UnpackArrayMultiColumnWithBoundsCheck {
977  private:
978  static_assert (Kokkos::is_view<DstView>::value,
979  "DstView must be a Kokkos::View.");
980  static_assert (Kokkos::is_view<SrcView>::value,
981  "SrcView must be a Kokkos::View.");
982  static_assert (Kokkos::is_view<IdxView>::value,
983  "IdxView must be a Kokkos::View.");
984  static_assert (static_cast<int> (DstView::rank) == 2,
985  "DstView must be a rank-2 Kokkos::View.");
986  static_assert (static_cast<int> (SrcView::rank) == 1,
987  "SrcView must be a rank-1 Kokkos::View.");
988  static_assert (static_cast<int> (IdxView::rank) == 1,
989  "IdxView must be a rank-1 Kokkos::View.");
990  static_assert (std::is_integral<SizeType>::value,
991  "SizeType must be a built-in integer type.");
992 
993  public:
994  using execution_space = typename ExecutionSpace::execution_space;
995  using size_type = SizeType;
996  using value_type = size_t;
997 
998  private:
999  DstView dst;
1000  SrcView src;
1001  IdxView idx;
1002  Op op;
1003  size_type numCols;
1004 
1005  public:
1006  UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
1007  const DstView& dst_,
1008  const SrcView& src_,
1009  const IdxView& idx_,
1010  const Op& op_,
1011  const size_type numCols_) :
1012  dst (dst_),
1013  src (src_),
1014  idx (idx_),
1015  op (op_),
1016  numCols (numCols_)
1017  {}
1018 
1019  template<class TagType>
1020  KOKKOS_INLINE_FUNCTION void
1021  operator() (TagType tag,
1022  const size_type k,
1023  size_t& lclErrCount) const
1024  {
1025  static_assert
1026  (std::is_same<TagType, atomic_tag>::value ||
1027  std::is_same<TagType, nonatomic_tag>::value,
1028  "TagType must be atomic_tag or nonatomic_tag.");
1029  using index_type = typename IdxView::non_const_value_type;
1030 
1031  const index_type lclRow = idx(k);
1032  if (lclRow < static_cast<index_type> (0) ||
1033  lclRow >= static_cast<index_type> (dst.extent (0))) {
1034  ++lclErrCount;
1035  }
1036  else {
1037  const size_type offset = k*numCols;
1038  for (size_type j = 0; j < numCols; ++j) {
1039  op (tag, dst(lclRow,j), src(offset+j));
1040  }
1041  }
1042  }
1043 
1044  template<class TagType>
1045  KOKKOS_INLINE_FUNCTION void
1046  init (TagType, size_t& initialErrorCount) const {
1047  initialErrorCount = 0;
1048  }
1049 
1050  template<class TagType>
1051  KOKKOS_INLINE_FUNCTION void
1052  join (TagType,
1053  size_t& dstErrorCount,
1054  const size_t& srcErrorCount) const
1055  {
1056  dstErrorCount += srcErrorCount;
1057  }
1058 
1059  static void
1060  unpack (const ExecutionSpace& execSpace,
1061  const DstView& dst,
1062  const SrcView& src,
1063  const IdxView& idx,
1064  const Op& op,
1065  const size_type numCols,
1066  const bool use_atomic_updates)
1067  {
1068  using index_type = typename IdxView::non_const_value_type;
1069 
1070  size_t errorCount = 0;
1071  if (use_atomic_updates) {
1072  using range_type =
1073  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1074  Kokkos::parallel_reduce
1075  ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1076  range_type (0, idx.size ()),
1077  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1078  idx, op, numCols),
1079  errorCount);
1080  }
1081  else {
1082  using range_type =
1083  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1084  Kokkos::parallel_reduce
1085  ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1086  range_type (0, idx.size ()),
1087  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1088  idx, op, numCols),
1089  errorCount);
1090  }
1091 
1092  if (errorCount != 0) {
1093  // Go back and find the out-of-bounds entries in the index
1094  // array. Performance doesn't matter since we are already in
1095  // an error state, so we can do this sequentially, on host.
1096  auto idx_h = Kokkos::create_mirror_view (idx);
1097 
1098  // DEEP_COPY REVIEW - NOT TESTED
1099  Kokkos::deep_copy (idx_h, idx);
1100 
1101  std::vector<index_type> badIndices;
1102  const size_type numInds = idx_h.extent (0);
1103  for (size_type k = 0; k < numInds; ++k) {
1104  if (idx_h(k) < static_cast<index_type> (0) ||
1105  idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1106  badIndices.push_back (idx_h(k));
1107  }
1108  }
1109 
1110  if (errorCount != badIndices.size ()) {
1111  std::ostringstream os;
1112  os << "MultiVector unpack kernel: errorCount = " << errorCount
1113  << " != badIndices.size() = " << badIndices.size ()
1114  << ". This should never happen. "
1115  "Please report this to the Tpetra developers.";
1116  throw std::logic_error (os.str ());
1117  }
1118 
1119  std::ostringstream os;
1120  os << "MultiVector unpack kernel had " << badIndices.size ()
1121  << " out-of bounds index/ices. Here they are: [";
1122  for (size_t k = 0; k < badIndices.size (); ++k) {
1123  os << badIndices[k];
1124  if (k + 1 < badIndices.size ()) {
1125  os << ", ";
1126  }
1127  }
1128  os << "].";
1129  throw std::runtime_error (os.str ());
1130  }
1131  }
1132  };
1133 
1134  template <typename ExecutionSpace,
1135  typename DstView,
1136  typename SrcView,
1137  typename IdxView,
1138  typename Op>
1139  void
1140  unpack_array_multi_column (const ExecutionSpace& execSpace,
1141  const DstView& dst,
1142  const SrcView& src,
1143  const IdxView& idx,
1144  const Op& op,
1145  const size_t numCols,
1146  const bool use_atomic_updates,
1147  const bool debug)
1148  {
1149  static_assert (Kokkos::is_view<DstView>::value,
1150  "DstView must be a Kokkos::View.");
1151  static_assert (Kokkos::is_view<SrcView>::value,
1152  "SrcView must be a Kokkos::View.");
1153  static_assert (Kokkos::is_view<IdxView>::value,
1154  "IdxView must be a Kokkos::View.");
1155  static_assert (static_cast<int> (DstView::rank) == 2,
1156  "DstView must be a rank-2 Kokkos::View.");
1157  static_assert (static_cast<int> (SrcView::rank) == 1,
1158  "SrcView must be a rank-1 Kokkos::View.");
1159  static_assert (static_cast<int> (IdxView::rank) == 1,
1160  "IdxView must be a rank-1 Kokkos::View.");
1161 
1162  if (debug) {
1163  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1164  DstView, SrcView, IdxView, Op> impl_type;
1165  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1166  use_atomic_updates);
1167  }
1168  else {
1169  typedef UnpackArrayMultiColumn<ExecutionSpace,
1170  DstView, SrcView, IdxView, Op> impl_type;
1171  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1172  use_atomic_updates);
1173  }
1174  }
1175 
1176  template <typename ExecutionSpace,
1177  typename DstView,
1178  typename SrcView,
1179  typename IdxView,
1180  typename ColView,
1181  typename Op,
1182  typename Enabled = void>
1183  class UnpackArrayMultiColumnVariableStride {
1184  private:
1185  static_assert (Kokkos::is_view<DstView>::value,
1186  "DstView must be a Kokkos::View.");
1187  static_assert (Kokkos::is_view<SrcView>::value,
1188  "SrcView must be a Kokkos::View.");
1189  static_assert (Kokkos::is_view<IdxView>::value,
1190  "IdxView must be a Kokkos::View.");
1191  static_assert (Kokkos::is_view<ColView>::value,
1192  "ColView must be a Kokkos::View.");
1193  static_assert (static_cast<int> (DstView::rank) == 2,
1194  "DstView must be a rank-2 Kokkos::View.");
1195  static_assert (static_cast<int> (SrcView::rank) == 1,
1196  "SrcView must be a rank-1 Kokkos::View.");
1197  static_assert (static_cast<int> (IdxView::rank) == 1,
1198  "IdxView must be a rank-1 Kokkos::View.");
1199  static_assert (static_cast<int> (ColView::rank) == 1,
1200  "ColView must be a rank-1 Kokkos::View.");
1201 
1202  public:
1203  using execution_space = typename ExecutionSpace::execution_space;
1204  using size_type = typename execution_space::size_type;
1205 
1206  private:
1207  DstView dst;
1208  SrcView src;
1209  IdxView idx;
1210  ColView col;
1211  Op op;
1212  size_t numCols;
1213 
1214  public:
1215  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1216  const DstView& dst_,
1217  const SrcView& src_,
1218  const IdxView& idx_,
1219  const ColView& col_,
1220  const Op& op_,
1221  const size_t numCols_) :
1222  dst (dst_),
1223  src (src_),
1224  idx (idx_),
1225  col (col_),
1226  op (op_),
1227  numCols (numCols_)
1228  {}
1229 
1230  template<class TagType>
1231  KOKKOS_INLINE_FUNCTION void
1232  operator() (TagType tag, const size_type k) const
1233  {
1234  static_assert
1235  (std::is_same<TagType, atomic_tag>::value ||
1236  std::is_same<TagType, nonatomic_tag>::value,
1237  "TagType must be atomic_tag or nonatomic_tag.");
1238 
1239  const typename IdxView::value_type localRow = idx(k);
1240  const size_t offset = k*numCols;
1241  for (size_t j = 0; j < numCols; ++j) {
1242  op (tag, dst(localRow, col(j)), src(offset+j));
1243  }
1244  }
1245 
1246  static void
1247  unpack (const ExecutionSpace& execSpace,
1248  const DstView& dst,
1249  const SrcView& src,
1250  const IdxView& idx,
1251  const ColView& col,
1252  const Op& op,
1253  const size_t numCols,
1254  const bool use_atomic_updates)
1255  {
1256  if (use_atomic_updates) {
1257  using range_type =
1258  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1259  Kokkos::parallel_for
1260  ("Tpetra::MultiVector unpack var stride atomic",
1261  range_type (0, idx.size ()),
1262  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1263  idx, col, op, numCols));
1264  }
1265  else {
1266  using range_type =
1267  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1268  Kokkos::parallel_for
1269  ("Tpetra::MultiVector unpack var stride nonatomic",
1270  range_type (0, idx.size ()),
1271  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1272  idx, col, op, numCols));
1273  }
1274  }
1275  };
1276 
1277  template <typename ExecutionSpace,
1278  typename DstView,
1279  typename SrcView,
1280  typename IdxView,
1281  typename ColView,
1282  typename Op,
1283  typename SizeType = typename ExecutionSpace::execution_space::size_type,
1284  typename Enabled = void>
1285  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1286  private:
1287  static_assert (Kokkos::is_view<DstView>::value,
1288  "DstView must be a Kokkos::View.");
1289  static_assert (Kokkos::is_view<SrcView>::value,
1290  "SrcView must be a Kokkos::View.");
1291  static_assert (Kokkos::is_view<IdxView>::value,
1292  "IdxView must be a Kokkos::View.");
1293  static_assert (Kokkos::is_view<ColView>::value,
1294  "ColView must be a Kokkos::View.");
1295  static_assert (static_cast<int> (DstView::rank) == 2,
1296  "DstView must be a rank-2 Kokkos::View.");
1297  static_assert (static_cast<int> (SrcView::rank) == 1,
1298  "SrcView must be a rank-1 Kokkos::View.");
1299  static_assert (static_cast<int> (IdxView::rank) == 1,
1300  "IdxView must be a rank-1 Kokkos::View.");
1301  static_assert (static_cast<int> (ColView::rank) == 1,
1302  "ColView must be a rank-1 Kokkos::View.");
1303  static_assert (std::is_integral<SizeType>::value,
1304  "SizeType must be a built-in integer type.");
1305 
1306  public:
1307  using execution_space = typename ExecutionSpace::execution_space;
1308  using size_type = SizeType;
1309  using value_type = size_t;
1310 
1311  private:
1312  DstView dst;
1313  SrcView src;
1314  IdxView idx;
1315  ColView col;
1316  Op op;
1317  size_type numCols;
1318 
1319  public:
1320  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1321  (const ExecutionSpace& /* execSpace */,
1322  const DstView& dst_,
1323  const SrcView& src_,
1324  const IdxView& idx_,
1325  const ColView& col_,
1326  const Op& op_,
1327  const size_t numCols_) :
1328  dst (dst_),
1329  src (src_),
1330  idx (idx_),
1331  col (col_),
1332  op (op_),
1333  numCols (numCols_)
1334  {}
1335 
1336  template<class TagType>
1337  KOKKOS_INLINE_FUNCTION void
1338  operator() (TagType tag,
1339  const size_type k,
1340  value_type& lclErrorCount) const
1341  {
1342  static_assert
1343  (std::is_same<TagType, atomic_tag>::value ||
1344  std::is_same<TagType, nonatomic_tag>::value,
1345  "TagType must be atomic_tag or nonatomic_tag.");
1346  using row_index_type = typename IdxView::non_const_value_type;
1347  using col_index_type = typename ColView::non_const_value_type;
1348 
1349  const row_index_type lclRow = idx(k);
1350  if (lclRow < static_cast<row_index_type> (0) ||
1351  lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1352  ++lclErrorCount;
1353  }
1354  else {
1355  const size_type offset = k * numCols;
1356  for (size_type j = 0; j < numCols; ++j) {
1357  const col_index_type lclCol = col(j);
1358  if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1359  ++lclErrorCount;
1360  }
1361  else { // all indices are valid; apply the op
1362  op (tag, dst(lclRow, col(j)), src(offset+j));
1363  }
1364  }
1365  }
1366  }
1367 
1368  KOKKOS_INLINE_FUNCTION void
1369  init (value_type& initialErrorCount) const {
1370  initialErrorCount = 0;
1371  }
1372 
1373  KOKKOS_INLINE_FUNCTION void
1374  join (value_type& dstErrorCount,
1375  const value_type& srcErrorCount) const
1376  {
1377  dstErrorCount += srcErrorCount;
1378  }
1379 
1380  static void
1381  unpack (const ExecutionSpace& execSpace,
1382  const DstView& dst,
1383  const SrcView& src,
1384  const IdxView& idx,
1385  const ColView& col,
1386  const Op& op,
1387  const size_type numCols,
1388  const bool use_atomic_updates)
1389  {
1390  using row_index_type = typename IdxView::non_const_value_type;
1391  using col_index_type = typename ColView::non_const_value_type;
1392 
1393  size_t errorCount = 0;
1394  if (use_atomic_updates) {
1395  using range_type =
1396  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1397  Kokkos::parallel_reduce
1398  ("Tpetra::MultiVector unpack var stride atomic debug only",
1399  range_type (0, idx.size ()),
1400  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1401  (execSpace, dst, src, idx, col, op, numCols),
1402  errorCount);
1403  }
1404  else {
1405  using range_type =
1406  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1407  Kokkos::parallel_reduce
1408  ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1409  range_type (0, idx.size ()),
1410  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1411  (execSpace, dst, src, idx, col, op, numCols),
1412  errorCount);
1413  }
1414 
1415  if (errorCount != 0) {
1416  constexpr size_t maxNumBadIndicesToPrint = 100;
1417 
1418  std::ostringstream os; // for error reporting
1419  os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1420  "found " << errorCount
1421  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1422 
1423  // Go back and find any out-of-bounds entries in the array of
1424  // row indices. Performance doesn't matter since we are
1425  // already in an error state, so we can do this sequentially,
1426  // on host.
1427  auto idx_h = Kokkos::create_mirror_view (idx);
1428 
1429  // DEEP_COPY REVIEW - NOT TESTED
1430  Kokkos::deep_copy (idx_h, idx);
1431 
1432  std::vector<row_index_type> badRows;
1433  const size_type numRowInds = idx_h.extent (0);
1434  for (size_type k = 0; k < numRowInds; ++k) {
1435  if (idx_h(k) < static_cast<row_index_type> (0) ||
1436  idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1437  badRows.push_back (idx_h(k));
1438  }
1439  }
1440 
1441  if (badRows.size () != 0) {
1442  os << badRows.size () << " out-of-bounds row ind"
1443  << (badRows.size () != size_t (1) ? "ices" : "ex");
1444  if (badRows.size () <= maxNumBadIndicesToPrint) {
1445  os << ": [";
1446  for (size_t k = 0; k < badRows.size (); ++k) {
1447  os << badRows[k];
1448  if (k + 1 < badRows.size ()) {
1449  os << ", ";
1450  }
1451  }
1452  os << "]. ";
1453  }
1454  else {
1455  os << ". ";
1456  }
1457  }
1458  else {
1459  os << "No out-of-bounds row indices. ";
1460  }
1461 
1462  // Go back and find any out-of-bounds entries in the array
1463  // of column indices.
1464  auto col_h = Kokkos::create_mirror_view (col);
1465 
1466  // DEEP_COPY REVIEW - NOT TESTED
1467  Kokkos::deep_copy (col_h, col);
1468 
1469  std::vector<col_index_type> badCols;
1470  const size_type numColInds = col_h.extent (0);
1471  for (size_type k = 0; k < numColInds; ++k) {
1472  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1473  badCols.push_back (col_h(k));
1474  }
1475  }
1476 
1477  if (badCols.size () != 0) {
1478  os << badCols.size () << " out-of-bounds column ind"
1479  << (badCols.size () != size_t (1) ? "ices" : "ex");
1480  if (badCols.size () <= maxNumBadIndicesToPrint) {
1481  for (size_t k = 0; k < badCols.size (); ++k) {
1482  os << ": [";
1483  os << badCols[k];
1484  if (k + 1 < badCols.size ()) {
1485  os << ", ";
1486  }
1487  }
1488  os << "]. ";
1489  }
1490  else {
1491  os << ". ";
1492  }
1493  }
1494  else {
1495  os << "No out-of-bounds column indices. ";
1496  }
1497 
1498  TEUCHOS_TEST_FOR_EXCEPTION
1499  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1500  std::logic_error, "Tpetra::MultiVector variable stride unpack "
1501  "kernel reports errorCount=" << errorCount << ", but we failed "
1502  "to find any bad rows or columns. This should never happen. "
1503  "Please report this to the Tpetra developers.");
1504 
1505  throw std::runtime_error (os.str ());
1506  } // hasErr
1507  }
1508  };
1509 
1510  template <typename ExecutionSpace,
1511  typename DstView,
1512  typename SrcView,
1513  typename IdxView,
1514  typename ColView,
1515  typename Op>
1516  void
1517  unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1518  const DstView& dst,
1519  const SrcView& src,
1520  const IdxView& idx,
1521  const ColView& col,
1522  const Op& op,
1523  const size_t numCols,
1524  const bool use_atomic_updates,
1525  const bool debug)
1526  {
1527  static_assert (Kokkos::is_view<DstView>::value,
1528  "DstView must be a Kokkos::View.");
1529  static_assert (Kokkos::is_view<SrcView>::value,
1530  "SrcView must be a Kokkos::View.");
1531  static_assert (Kokkos::is_view<IdxView>::value,
1532  "IdxView must be a Kokkos::View.");
1533  static_assert (Kokkos::is_view<ColView>::value,
1534  "ColView must be a Kokkos::View.");
1535  static_assert (static_cast<int> (DstView::rank) == 2,
1536  "DstView must be a rank-2 Kokkos::View.");
1537  static_assert (static_cast<int> (SrcView::rank) == 1,
1538  "SrcView must be a rank-1 Kokkos::View.");
1539  static_assert (static_cast<int> (IdxView::rank) == 1,
1540  "IdxView must be a rank-1 Kokkos::View.");
1541  static_assert (static_cast<int> (ColView::rank) == 1,
1542  "ColView must be a rank-1 Kokkos::View.");
1543 
1544  if (debug) {
1545  using impl_type =
1546  UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1547  DstView, SrcView, IdxView, ColView, Op>;
1548  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1549  use_atomic_updates);
1550  }
1551  else {
1552  using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1553  DstView, SrcView, IdxView, ColView, Op>;
1554  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1555  use_atomic_updates);
1556  }
1557  }
1558 
1559  template <typename DstView, typename SrcView,
1560  typename DstIdxView, typename SrcIdxView, typename Op,
1561  typename Enabled = void>
1562  struct PermuteArrayMultiColumn {
1563  using size_type = typename DstView::size_type;
1564 
1565  DstView dst;
1566  SrcView src;
1567  DstIdxView dst_idx;
1568  SrcIdxView src_idx;
1569  size_t numCols;
1570  Op op;
1571 
1572  PermuteArrayMultiColumn (const DstView& dst_,
1573  const SrcView& src_,
1574  const DstIdxView& dst_idx_,
1575  const SrcIdxView& src_idx_,
1576  const size_t numCols_,
1577  const Op& op_) :
1578  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1579  numCols(numCols_), op(op_) {}
1580 
1581  KOKKOS_INLINE_FUNCTION void
1582  operator() (const size_type k) const {
1583  const typename DstIdxView::value_type toRow = dst_idx(k);
1584  const typename SrcIdxView::value_type fromRow = src_idx(k);
1585  nonatomic_tag tag; // permute does not need atomics
1586  for (size_t j = 0; j < numCols; ++j) {
1587  op(tag, dst(toRow, j), src(fromRow, j));
1588  }
1589  }
1590 
1591  template <typename ExecutionSpace>
1592  static void
1593  permute (const ExecutionSpace &space,
1594  const DstView& dst,
1595  const SrcView& src,
1596  const DstIdxView& dst_idx,
1597  const SrcIdxView& src_idx,
1598  const size_t numCols,
1599  const Op& op)
1600  {
1601  using range_type =
1602  Kokkos::RangePolicy<ExecutionSpace, size_type>;
1603  // permute does not need atomics for Op
1604  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1605  Kokkos::parallel_for
1606  ("Tpetra::MultiVector permute multicol const stride",
1607  range_type (space, 0, n),
1608  PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1609  }
1610  };
1611 
1612 // clang-format on
1613 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1614 // SrcView::rank == 2
1615 template <typename ExecutionSpace, typename DstView, typename SrcView,
1616  typename DstIdxView, typename SrcIdxView, typename Op>
1617 void permute_array_multi_column(const ExecutionSpace &space, const DstView &dst,
1618  const SrcView &src, const DstIdxView &dst_idx,
1619  const SrcIdxView &src_idx, size_t numCols,
1620  const Op &op) {
1621  PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView,
1622  Op>::permute(space, dst, src, dst_idx, src_idx,
1623  numCols, op);
1624 }
1625 // clang-format off
1626 
1627  // To do: Add enable_if<> restrictions on DstView::rank == 1,
1628  // SrcView::rank == 2
1629  template <typename DstView, typename SrcView,
1630  typename DstIdxView, typename SrcIdxView, typename Op>
1631  void permute_array_multi_column(const DstView& dst,
1632  const SrcView& src,
1633  const DstIdxView& dst_idx,
1634  const SrcIdxView& src_idx,
1635  size_t numCols,
1636  const Op& op) {
1637  using execution_space = typename DstView::execution_space;
1638  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1639  execution_space(), dst, src, dst_idx, src_idx, numCols, op);
1640  }
1641 
1642  template <typename DstView, typename SrcView,
1643  typename DstIdxView, typename SrcIdxView,
1644  typename DstColView, typename SrcColView, typename Op,
1645  typename Enabled = void>
1646  struct PermuteArrayMultiColumnVariableStride {
1647  using size_type = typename DstView::size_type;
1648 
1649  DstView dst;
1650  SrcView src;
1651  DstIdxView dst_idx;
1652  SrcIdxView src_idx;
1653  DstColView dst_col;
1654  SrcColView src_col;
1655  size_t numCols;
1656  Op op;
1657 
1658  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1659  const SrcView& src_,
1660  const DstIdxView& dst_idx_,
1661  const SrcIdxView& src_idx_,
1662  const DstColView& dst_col_,
1663  const SrcColView& src_col_,
1664  const size_t numCols_,
1665  const Op& op_) :
1666  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1667  dst_col(dst_col_), src_col(src_col_),
1668  numCols(numCols_), op(op_) {}
1669 
1670  KOKKOS_INLINE_FUNCTION void
1671  operator() (const size_type k) const {
1672  const typename DstIdxView::value_type toRow = dst_idx(k);
1673  const typename SrcIdxView::value_type fromRow = src_idx(k);
1674  const nonatomic_tag tag; // permute does not need atomics
1675  for (size_t j = 0; j < numCols; ++j) {
1676  op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1677  }
1678  }
1679 
1680  template <typename ExecutionSpace>
1681  static void
1682  permute ( const ExecutionSpace &space,
1683  const DstView& dst,
1684  const SrcView& src,
1685  const DstIdxView& dst_idx,
1686  const SrcIdxView& src_idx,
1687  const DstColView& dst_col,
1688  const SrcColView& src_col,
1689  const size_t numCols,
1690  const Op& op)
1691  {
1692 
1693  static_assert(Kokkos::SpaceAccessibility<
1694  ExecutionSpace, typename DstView::memory_space>::accessible,
1695  "ExecutionSpace must be able to access DstView");
1696 
1697  using range_type = Kokkos::RangePolicy<ExecutionSpace, size_type>;
1698  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1699  Kokkos::parallel_for
1700  ("Tpetra::MultiVector permute multicol var stride",
1701  range_type (space, 0, n),
1702  PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1703  dst_col, src_col, numCols, op));
1704  }
1705  };
1706 
1707 // clang-format on
1708 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1709 // SrcView::rank == 2
1710 template <typename ExecutionSpace, typename DstView, typename SrcView,
1711  typename DstIdxView, typename SrcIdxView, typename DstColView,
1712  typename SrcColView, typename Op>
1713 void permute_array_multi_column_variable_stride(
1714  const ExecutionSpace &space, const DstView &dst, const SrcView &src,
1715  const DstIdxView &dst_idx, const SrcIdxView &src_idx,
1716  const DstColView &dst_col, const SrcColView &src_col, size_t numCols,
1717  const Op &op) {
1718  PermuteArrayMultiColumnVariableStride<DstView, SrcView, DstIdxView,
1719  SrcIdxView, DstColView, SrcColView,
1720  Op>::permute(space, dst, src, dst_idx,
1721  src_idx, dst_col, src_col,
1722  numCols, op);
1723 }
1724 // clang-format off
1725 
1726  // To do: Add enable_if<> restrictions on DstView::rank == 1,
1727  // SrcView::rank == 2
1728  template <typename DstView, typename SrcView,
1729  typename DstIdxView, typename SrcIdxView,
1730  typename DstColView, typename SrcColView, typename Op>
1731  void permute_array_multi_column_variable_stride(const DstView& dst,
1732  const SrcView& src,
1733  const DstIdxView& dst_idx,
1734  const SrcIdxView& src_idx,
1735  const DstColView& dst_col,
1736  const SrcColView& src_col,
1737  size_t numCols,
1738  const Op& op) {
1739  using execution_space = typename DstView::execution_space;
1740  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1741  DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1742  execution_space(), dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1743  }
1744 
1745 } // Details namespace
1746 } // KokkosRefactor namespace
1747 } // Tpetra namespace
1748 
1749 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...