Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
51 
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include "impl/Kokkos_Atomic_Generic.hpp"
55 #include <sstream>
56 #include <stdexcept>
57 
58 namespace Tpetra {
59 namespace KokkosRefactor {
60 namespace Details {
61 
67 namespace Impl {
68 
75 template<class IntegerType,
76  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
77 struct OutOfBounds {
78  static KOKKOS_INLINE_FUNCTION bool
79  test (const IntegerType x,
80  const IntegerType exclusiveUpperBound);
81 };
82 
83 // Partial specialization for the case where IntegerType IS signed.
84 template<class IntegerType>
85 struct OutOfBounds<IntegerType, true> {
86  static KOKKOS_INLINE_FUNCTION bool
87  test (const IntegerType x,
88  const IntegerType exclusiveUpperBound)
89  {
90  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
91  }
92 };
93 
94 // Partial specialization for the case where IntegerType is NOT signed.
95 template<class IntegerType>
96 struct OutOfBounds<IntegerType, false> {
97  static KOKKOS_INLINE_FUNCTION bool
98  test (const IntegerType x,
99  const IntegerType exclusiveUpperBound)
100  {
101  return x >= exclusiveUpperBound;
102  }
103 };
104 
107 template<class IntegerType>
108 KOKKOS_INLINE_FUNCTION bool
109 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
110 {
111  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
112 }
113 
114 } // namespace Impl
115 
116  // Functors for implementing packAndPrepare and unpackAndCombine
117  // through parallel_for
118 
119  template <typename DstView, typename SrcView, typename IdxView>
120  struct PackArraySingleColumn {
121  typedef typename DstView::execution_space execution_space;
122  typedef typename execution_space::size_type size_type;
123 
124  DstView dst;
125  SrcView src;
126  IdxView idx;
127  size_t col;
128 
129  PackArraySingleColumn (const DstView& dst_,
130  const SrcView& src_,
131  const IdxView& idx_,
132  const size_t col_) :
133  dst(dst_), src(src_), idx(idx_), col(col_) {}
134 
135  KOKKOS_INLINE_FUNCTION void
136  operator() (const size_type k) const {
137  dst(k) = src(idx(k), col);
138  }
139 
140  static void
141  pack (const DstView& dst,
142  const SrcView& src,
143  const IdxView& idx,
144  const size_t col)
145  {
146  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
147  Kokkos::parallel_for
148  ("Tpetra::MultiVector pack one col",
149  range_type (0, idx.size ()),
150  PackArraySingleColumn (dst, src, idx, col));
151  }
152  };
153 
154  template <typename DstView,
155  typename SrcView,
156  typename IdxView,
157  typename SizeType = typename DstView::execution_space::size_type>
158  class PackArraySingleColumnWithBoundsCheck {
159  private:
160  static_assert (Kokkos::Impl::is_view<DstView>::value,
161  "DstView must be a Kokkos::View.");
162  static_assert (Kokkos::Impl::is_view<SrcView>::value,
163  "SrcView must be a Kokkos::View.");
164  static_assert (Kokkos::Impl::is_view<IdxView>::value,
165  "IdxView must be a Kokkos::View.");
166  static_assert (static_cast<int> (DstView::rank) == 1,
167  "DstView must be a rank-1 Kokkos::View.");
168  static_assert (static_cast<int> (SrcView::rank) == 2,
169  "SrcView must be a rank-2 Kokkos::View.");
170  static_assert (static_cast<int> (IdxView::rank) == 1,
171  "IdxView must be a rank-1 Kokkos::View.");
172  static_assert (std::is_integral<SizeType>::value,
173  "SizeType must be a built-in integer type.");
174  public:
175  typedef SizeType size_type;
176  using value_type = size_t;
177 
178  private:
179  DstView dst;
180  SrcView src;
181  IdxView idx;
182  size_type col;
183 
184  public:
185  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
186  const SrcView& src_,
187  const IdxView& idx_,
188  const size_type col_) :
189  dst (dst_), src (src_), idx (idx_), col (col_) {}
190 
191  KOKKOS_INLINE_FUNCTION void
192  operator() (const size_type k, value_type& lclErrCount) const {
193  using index_type = typename IdxView::non_const_value_type;
194 
195  const index_type lclRow = idx(k);
196  if (lclRow < static_cast<index_type> (0) ||
197  lclRow >= static_cast<index_type> (src.extent (0))) {
198  ++lclErrCount;
199  }
200  else {
201  dst(k) = src(lclRow, col);
202  }
203  }
204 
205  KOKKOS_INLINE_FUNCTION
206  void init (value_type& initialErrorCount) const {
207  initialErrorCount = 0;
208  }
209 
210  KOKKOS_INLINE_FUNCTION void
211  join (volatile value_type& dstErrorCount,
212  const volatile value_type& srcErrorCount) const
213  {
214  dstErrorCount += srcErrorCount;
215  }
216 
217  static void
218  pack (const DstView& dst,
219  const SrcView& src,
220  const IdxView& idx,
221  const size_type col)
222  {
223  typedef typename DstView::execution_space execution_space;
224  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
225  typedef typename IdxView::non_const_value_type index_type;
226 
227  size_t errorCount = 0;
228  Kokkos::parallel_reduce
229  ("Tpetra::MultiVector pack one col debug only",
230  range_type (0, idx.size ()),
231  PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
232  errorCount);
233 
234  if (errorCount != 0) {
235  // Go back and find the out-of-bounds entries in the index
236  // array. Performance doesn't matter since we are already in
237  // an error state, so we can do this sequentially, on host.
238  auto idx_h = Kokkos::create_mirror_view (idx);
239  Kokkos::deep_copy (idx_h, idx);
240 
241  std::vector<index_type> badIndices;
242  const size_type numInds = idx_h.extent (0);
243  for (size_type k = 0; k < numInds; ++k) {
244  if (idx_h(k) < static_cast<index_type> (0) ||
245  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
246  badIndices.push_back (idx_h(k));
247  }
248  }
249 
250  TEUCHOS_TEST_FOR_EXCEPTION
251  (errorCount != badIndices.size (), std::logic_error,
252  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
253  << " != badIndices.size() = " << badIndices.size () << ". This sho"
254  "uld never happen. Please report this to the Tpetra developers.");
255 
256  std::ostringstream os;
257  os << "MultiVector single-column pack kernel had "
258  << badIndices.size () << " out-of bounds index/ices. "
259  "Here they are: [";
260  for (size_t k = 0; k < badIndices.size (); ++k) {
261  os << badIndices[k];
262  if (k + 1 < badIndices.size ()) {
263  os << ", ";
264  }
265  }
266  os << "].";
267  throw std::runtime_error (os.str ());
268  }
269  }
270  };
271 
272 
273  template <typename DstView, typename SrcView, typename IdxView>
274  void
275  pack_array_single_column (const DstView& dst,
276  const SrcView& src,
277  const IdxView& idx,
278  const size_t col,
279  const bool debug = true)
280  {
281  static_assert (Kokkos::Impl::is_view<DstView>::value,
282  "DstView must be a Kokkos::View.");
283  static_assert (Kokkos::Impl::is_view<SrcView>::value,
284  "SrcView must be a Kokkos::View.");
285  static_assert (Kokkos::Impl::is_view<IdxView>::value,
286  "IdxView must be a Kokkos::View.");
287  static_assert (static_cast<int> (DstView::rank) == 1,
288  "DstView must be a rank-1 Kokkos::View.");
289  static_assert (static_cast<int> (SrcView::rank) == 2,
290  "SrcView must be a rank-2 Kokkos::View.");
291  static_assert (static_cast<int> (IdxView::rank) == 1,
292  "IdxView must be a rank-1 Kokkos::View.");
293 
294  if (debug) {
295  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
296  impl_type::pack (dst, src, idx, col);
297  }
298  else {
299  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
300  impl_type::pack (dst, src, idx, col);
301  }
302  }
303 
304  template <typename DstView, typename SrcView, typename IdxView>
305  struct PackArrayMultiColumn {
306  typedef typename DstView::execution_space execution_space;
307  typedef typename execution_space::size_type size_type;
308 
309  DstView dst;
310  SrcView src;
311  IdxView idx;
312  size_t numCols;
313 
314  PackArrayMultiColumn (const DstView& dst_,
315  const SrcView& src_,
316  const IdxView& idx_,
317  const size_t numCols_) :
318  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
319 
320  KOKKOS_INLINE_FUNCTION void
321  operator() (const size_type k) const {
322  const typename IdxView::value_type localRow = idx(k);
323  const size_t offset = k*numCols;
324  for (size_t j = 0; j < numCols; ++j) {
325  dst(offset + j) = src(localRow, j);
326  }
327  }
328 
329  static void pack(const DstView& dst,
330  const SrcView& src,
331  const IdxView& idx,
332  size_t numCols) {
333  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
334  Kokkos::parallel_for
335  ("Tpetra::MultiVector pack multicol const stride",
336  range_type (0, idx.size ()),
337  PackArrayMultiColumn (dst, src, idx, numCols));
338  }
339  };
340 
341  template <typename DstView,
342  typename SrcView,
343  typename IdxView,
344  typename SizeType = typename DstView::execution_space::size_type>
345  class PackArrayMultiColumnWithBoundsCheck {
346  public:
347  using size_type = SizeType;
348  using value_type = size_t;
349 
350  private:
351  DstView dst;
352  SrcView src;
353  IdxView idx;
354  size_type numCols;
355 
356  public:
357  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
358  const SrcView& src_,
359  const IdxView& idx_,
360  const size_type numCols_) :
361  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
362 
363  KOKKOS_INLINE_FUNCTION void
364  operator() (const size_type k, value_type& lclErrorCount) const {
365  typedef typename IdxView::non_const_value_type index_type;
366 
367  const index_type lclRow = idx(k);
368  if (lclRow < static_cast<index_type> (0) ||
369  lclRow >= static_cast<index_type> (src.extent (0))) {
370  ++lclErrorCount; // failed
371  }
372  else {
373  const size_type offset = k*numCols;
374  for (size_type j = 0; j < numCols; ++j) {
375  dst(offset + j) = src(lclRow, j);
376  }
377  }
378  }
379 
380  KOKKOS_INLINE_FUNCTION
381  void init (value_type& initialErrorCount) const {
382  initialErrorCount = 0;
383  }
384 
385  KOKKOS_INLINE_FUNCTION void
386  join (volatile value_type& dstErrorCount,
387  const volatile value_type& srcErrorCount) const
388  {
389  dstErrorCount += srcErrorCount;
390  }
391 
392  static void
393  pack (const DstView& dst,
394  const SrcView& src,
395  const IdxView& idx,
396  const size_type numCols)
397  {
398  typedef typename DstView::execution_space execution_space;
399  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
400  typedef typename IdxView::non_const_value_type index_type;
401 
402  size_t errorCount = 0;
403  Kokkos::parallel_reduce
404  ("Tpetra::MultiVector pack multicol const stride debug only",
405  range_type (0, idx.size ()),
406  PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
407  errorCount);
408  if (errorCount != 0) {
409  // Go back and find the out-of-bounds entries in the index
410  // array. Performance doesn't matter since we are already in
411  // an error state, so we can do this sequentially, on host.
412  auto idx_h = Kokkos::create_mirror_view (idx);
413  Kokkos::deep_copy (idx_h, idx);
414 
415  std::vector<index_type> badIndices;
416  const size_type numInds = idx_h.extent (0);
417  for (size_type k = 0; k < numInds; ++k) {
418  if (idx_h(k) < static_cast<index_type> (0) ||
419  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
420  badIndices.push_back (idx_h(k));
421  }
422  }
423 
424  TEUCHOS_TEST_FOR_EXCEPTION
425  (errorCount != badIndices.size (), std::logic_error,
426  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
427  << " != badIndices.size() = " << badIndices.size () << ". This sho"
428  "uld never happen. Please report this to the Tpetra developers.");
429 
430  std::ostringstream os;
431  os << "Tpetra::MultiVector multiple-column pack kernel had "
432  << badIndices.size () << " out-of bounds index/ices (errorCount = "
433  << errorCount << "): [";
434  for (size_t k = 0; k < badIndices.size (); ++k) {
435  os << badIndices[k];
436  if (k + 1 < badIndices.size ()) {
437  os << ", ";
438  }
439  }
440  os << "].";
441  throw std::runtime_error (os.str ());
442  }
443  }
444  };
445 
446 
447  template <typename DstView,
448  typename SrcView,
449  typename IdxView>
450  void
451  pack_array_multi_column (const DstView& dst,
452  const SrcView& src,
453  const IdxView& idx,
454  const size_t numCols,
455  const bool debug = true)
456  {
457  static_assert (Kokkos::Impl::is_view<DstView>::value,
458  "DstView must be a Kokkos::View.");
459  static_assert (Kokkos::Impl::is_view<SrcView>::value,
460  "SrcView must be a Kokkos::View.");
461  static_assert (Kokkos::Impl::is_view<IdxView>::value,
462  "IdxView must be a Kokkos::View.");
463  static_assert (static_cast<int> (DstView::rank) == 1,
464  "DstView must be a rank-1 Kokkos::View.");
465  static_assert (static_cast<int> (SrcView::rank) == 2,
466  "SrcView must be a rank-2 Kokkos::View.");
467  static_assert (static_cast<int> (IdxView::rank) == 1,
468  "IdxView must be a rank-1 Kokkos::View.");
469 
470  if (debug) {
471  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
472  SrcView, IdxView> impl_type;
473  impl_type::pack (dst, src, idx, numCols);
474  }
475  else {
476  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
477  impl_type::pack (dst, src, idx, numCols);
478  }
479  }
480 
481  template <typename DstView, typename SrcView, typename IdxView,
482  typename ColView>
483  struct PackArrayMultiColumnVariableStride {
484  typedef typename DstView::execution_space execution_space;
485  typedef typename execution_space::size_type size_type;
486 
487  DstView dst;
488  SrcView src;
489  IdxView idx;
490  ColView col;
491  size_t numCols;
492 
493  PackArrayMultiColumnVariableStride (const DstView& dst_,
494  const SrcView& src_,
495  const IdxView& idx_,
496  const ColView& col_,
497  const size_t numCols_) :
498  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
499 
500  KOKKOS_INLINE_FUNCTION
501  void operator() (const size_type k) const {
502  const typename IdxView::value_type localRow = idx(k);
503  const size_t offset = k*numCols;
504  for (size_t j = 0; j < numCols; ++j) {
505  dst(offset + j) = src(localRow, col(j));
506  }
507  }
508 
509  static void pack(const DstView& dst,
510  const SrcView& src,
511  const IdxView& idx,
512  const ColView& col,
513  size_t numCols) {
514  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
515  Kokkos::parallel_for
516  ("Tpetra::MultiVector pack multicol var stride",
517  range_type (0, idx.size ()),
518  PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
519  }
520  };
521 
522  template <typename DstView,
523  typename SrcView,
524  typename IdxView,
525  typename ColView,
526  typename SizeType = typename DstView::execution_space::size_type>
527  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
528  public:
529  using size_type = SizeType;
530  using value_type = size_t;
531 
532  private:
533  DstView dst;
534  SrcView src;
535  IdxView idx;
536  ColView col;
537  size_type numCols;
538 
539  public:
540  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
541  const SrcView& src_,
542  const IdxView& idx_,
543  const ColView& col_,
544  const size_type numCols_) :
545  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
546 
547  KOKKOS_INLINE_FUNCTION void
548  operator() (const size_type k, value_type& lclErrorCount) const {
549  typedef typename IdxView::non_const_value_type row_index_type;
550  typedef typename ColView::non_const_value_type col_index_type;
551 
552  const row_index_type lclRow = idx(k);
553  if (lclRow < static_cast<row_index_type> (0) ||
554  lclRow >= static_cast<row_index_type> (src.extent (0))) {
555  ++lclErrorCount = 0;
556  }
557  else {
558  const size_type offset = k*numCols;
559  for (size_type j = 0; j < numCols; ++j) {
560  const col_index_type lclCol = col(j);
561  if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
562  ++lclErrorCount = 0;
563  }
564  else { // all indices are valid; do the assignment
565  dst(offset + j) = src(lclRow, lclCol);
566  }
567  }
568  }
569  }
570 
571  KOKKOS_INLINE_FUNCTION void
572  init (value_type& initialErrorCount) const {
573  initialErrorCount = 0;
574  }
575 
576  KOKKOS_INLINE_FUNCTION void
577  join (volatile value_type& dstErrorCount,
578  const volatile value_type& srcErrorCount) const
579  {
580  dstErrorCount += srcErrorCount;
581  }
582 
583  static void
584  pack (const DstView& dst,
585  const SrcView& src,
586  const IdxView& idx,
587  const ColView& col,
588  const size_type numCols)
589  {
590  using execution_space = typename DstView::execution_space;
591  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
592  using row_index_type = typename IdxView::non_const_value_type;
593  using col_index_type = typename ColView::non_const_value_type;
594 
595  size_t errorCount = 0;
596  Kokkos::parallel_reduce
597  ("Tpetra::MultiVector pack multicol var stride debug only",
598  range_type (0, idx.size ()),
599  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
600  col, numCols),
601  errorCount);
602  if (errorCount != 0) {
603  constexpr size_t maxNumBadIndicesToPrint = 100;
604 
605  std::ostringstream os; // for error reporting
606  os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
607  "found " << errorCount
608  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
609 
610  // Go back and find any out-of-bounds entries in the array of
611  // row indices. Performance doesn't matter since we are already
612  // in an error state, so we can do this sequentially, on host.
613  auto idx_h = Kokkos::create_mirror_view (idx);
614  Kokkos::deep_copy (idx_h, idx);
615 
616  std::vector<row_index_type> badRows;
617  const size_type numRowInds = idx_h.extent (0);
618  for (size_type k = 0; k < numRowInds; ++k) {
619  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
620  badRows.push_back (idx_h(k));
621  }
622  }
623 
624  if (badRows.size () != 0) {
625  os << badRows.size () << " out-of-bounds row ind"
626  << (badRows.size () != size_t (1) ? "ices" : "ex");
627  if (badRows.size () <= maxNumBadIndicesToPrint) {
628  os << ": [";
629  for (size_t k = 0; k < badRows.size (); ++k) {
630  os << badRows[k];
631  if (k + 1 < badRows.size ()) {
632  os << ", ";
633  }
634  }
635  os << "]. ";
636  }
637  else {
638  os << ". ";
639  }
640  }
641  else {
642  os << "No out-of-bounds row indices. ";
643  }
644 
645  // Go back and find any out-of-bounds entries in the array
646  // of column indices.
647  auto col_h = Kokkos::create_mirror_view (col);
648  Kokkos::deep_copy (col_h, col);
649 
650  std::vector<col_index_type> badCols;
651  const size_type numColInds = col_h.extent (0);
652  for (size_type k = 0; k < numColInds; ++k) {
653  if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
654  badCols.push_back (col_h(k));
655  }
656  }
657 
658  if (badCols.size () != 0) {
659  os << badCols.size () << " out-of-bounds column ind"
660  << (badCols.size () != size_t (1) ? "ices" : "ex");
661  if (badCols.size () <= maxNumBadIndicesToPrint) {
662  os << ": [";
663  for (size_t k = 0; k < badCols.size (); ++k) {
664  os << badCols[k];
665  if (k + 1 < badCols.size ()) {
666  os << ", ";
667  }
668  }
669  os << "]. ";
670  }
671  else {
672  os << ". ";
673  }
674  }
675  else {
676  os << "No out-of-bounds column indices. ";
677  }
678 
679  TEUCHOS_TEST_FOR_EXCEPTION
680  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
681  std::logic_error, "Tpetra::MultiVector variable stride pack "
682  "kernel reports errorCount=" << errorCount << ", but we failed "
683  "to find any bad rows or columns. This should never happen. "
684  "Please report this to the Tpetra developers.");
685 
686  throw std::runtime_error (os.str ());
687  } // hasErr
688  }
689  };
690 
691  template <typename DstView,
692  typename SrcView,
693  typename IdxView,
694  typename ColView>
695  void
696  pack_array_multi_column_variable_stride (const DstView& dst,
697  const SrcView& src,
698  const IdxView& idx,
699  const ColView& col,
700  const size_t numCols,
701  const bool debug = true)
702  {
703  static_assert (Kokkos::Impl::is_view<DstView>::value,
704  "DstView must be a Kokkos::View.");
705  static_assert (Kokkos::Impl::is_view<SrcView>::value,
706  "SrcView must be a Kokkos::View.");
707  static_assert (Kokkos::Impl::is_view<IdxView>::value,
708  "IdxView must be a Kokkos::View.");
709  static_assert (Kokkos::Impl::is_view<ColView>::value,
710  "ColView must be a Kokkos::View.");
711  static_assert (static_cast<int> (DstView::rank) == 1,
712  "DstView must be a rank-1 Kokkos::View.");
713  static_assert (static_cast<int> (SrcView::rank) == 2,
714  "SrcView must be a rank-2 Kokkos::View.");
715  static_assert (static_cast<int> (IdxView::rank) == 1,
716  "IdxView must be a rank-1 Kokkos::View.");
717  static_assert (static_cast<int> (ColView::rank) == 1,
718  "ColView must be a rank-1 Kokkos::View.");
719 
720  if (debug) {
721  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
722  SrcView, IdxView, ColView> impl_type;
723  impl_type::pack (dst, src, idx, col, numCols);
724  }
725  else {
726  typedef PackArrayMultiColumnVariableStride<DstView,
727  SrcView, IdxView, ColView> impl_type;
728  impl_type::pack (dst, src, idx, col, numCols);
729  }
730  }
731 
732  // Tag types to indicate whether to use atomic updates in the
733  // various CombineMode "Op"s.
734  struct atomic_tag {};
735  struct nonatomic_tag {};
736 
737  template<class SC>
738  struct AddOp {
739  KOKKOS_INLINE_FUNCTION
740  void operator() (atomic_tag, SC& dest, const SC& src) const {
741  Kokkos::atomic_add (&dest, src);
742  }
743 
744  KOKKOS_INLINE_FUNCTION
745  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
746  dest += src;
747  }
748  };
749 
750  template<class SC>
751  struct InsertOp {
752  // There's no point to using Kokkos::atomic_assign for the REPLACE
753  // or INSERT CombineModes, since this is not a well-defined
754  // reduction for MultiVector anyway. See GitHub Issue #4417
755  // (which this fixes).
756  KOKKOS_INLINE_FUNCTION
757  void operator() (atomic_tag, SC& dest, const SC& src) const {
758  dest = src;
759  }
760 
761  KOKKOS_INLINE_FUNCTION
762  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
763  dest = src;
764  }
765  };
766 
767  // Kokkos::Impl::atomic_fetch_oper wants a class like this.
768  template<class Scalar1, class Scalar2>
769  struct AbsMaxOper {
770  KOKKOS_INLINE_FUNCTION
771  static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
772  const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
773  const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
774  return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
775  }
776  };
777 
778  template <typename SC>
779  struct AbsMaxOp {
780  KOKKOS_INLINE_FUNCTION
781  void operator() (atomic_tag, SC& dest, const SC& src) const {
782  Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
783  }
784 
785  KOKKOS_INLINE_FUNCTION
786  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
787  dest = AbsMaxOper<SC, SC> ().apply (dest, src);
788  }
789  };
790 
791  template <typename ExecutionSpace,
792  typename DstView,
793  typename SrcView,
794  typename IdxView,
795  typename Op>
796  class UnpackArrayMultiColumn {
797  private:
798  static_assert (Kokkos::Impl::is_view<DstView>::value,
799  "DstView must be a Kokkos::View.");
800  static_assert (Kokkos::Impl::is_view<SrcView>::value,
801  "SrcView must be a Kokkos::View.");
802  static_assert (Kokkos::Impl::is_view<IdxView>::value,
803  "IdxView must be a Kokkos::View.");
804  static_assert (static_cast<int> (DstView::rank) == 2,
805  "DstView must be a rank-2 Kokkos::View.");
806  static_assert (static_cast<int> (SrcView::rank) == 1,
807  "SrcView must be a rank-1 Kokkos::View.");
808  static_assert (static_cast<int> (IdxView::rank) == 1,
809  "IdxView must be a rank-1 Kokkos::View.");
810 
811  public:
812  typedef typename ExecutionSpace::execution_space execution_space;
813  typedef typename execution_space::size_type size_type;
814 
815  private:
816  DstView dst;
817  SrcView src;
818  IdxView idx;
819  Op op;
820  size_t numCols;
821 
822  public:
823  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
824  const DstView& dst_,
825  const SrcView& src_,
826  const IdxView& idx_,
827  const Op& op_,
828  const size_t numCols_) :
829  dst (dst_),
830  src (src_),
831  idx (idx_),
832  op (op_),
833  numCols (numCols_)
834  {}
835 
836  template<class TagType>
837  KOKKOS_INLINE_FUNCTION void
838  operator() (TagType tag, const size_type k) const
839  {
840  static_assert
841  (std::is_same<TagType, atomic_tag>::value ||
842  std::is_same<TagType, nonatomic_tag>::value,
843  "TagType must be atomic_tag or nonatomic_tag.");
844 
845  const typename IdxView::value_type localRow = idx(k);
846  const size_t offset = k*numCols;
847  for (size_t j = 0; j < numCols; ++j) {
848  op (tag, dst(localRow, j), src(offset+j));
849  }
850  }
851 
852  static void
853  unpack (const ExecutionSpace& execSpace,
854  const DstView& dst,
855  const SrcView& src,
856  const IdxView& idx,
857  const Op& op,
858  const size_t numCols,
859  const bool use_atomic_updates)
860  {
861  if (use_atomic_updates) {
862  using range_type =
863  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
864  Kokkos::parallel_for
865  ("Tpetra::MultiVector unpack const stride atomic",
866  range_type (0, idx.size ()),
867  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
868  }
869  else {
870  using range_type =
871  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
872  Kokkos::parallel_for
873  ("Tpetra::MultiVector unpack const stride nonatomic",
874  range_type (0, idx.size ()),
875  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
876  }
877  }
878  };
879 
880  template <typename ExecutionSpace,
881  typename DstView,
882  typename SrcView,
883  typename IdxView,
884  typename Op,
885  typename SizeType = typename ExecutionSpace::execution_space::size_type>
886  class UnpackArrayMultiColumnWithBoundsCheck {
887  private:
888  static_assert (Kokkos::Impl::is_view<DstView>::value,
889  "DstView must be a Kokkos::View.");
890  static_assert (Kokkos::Impl::is_view<SrcView>::value,
891  "SrcView must be a Kokkos::View.");
892  static_assert (Kokkos::Impl::is_view<IdxView>::value,
893  "IdxView must be a Kokkos::View.");
894  static_assert (static_cast<int> (DstView::rank) == 2,
895  "DstView must be a rank-2 Kokkos::View.");
896  static_assert (static_cast<int> (SrcView::rank) == 1,
897  "SrcView must be a rank-1 Kokkos::View.");
898  static_assert (static_cast<int> (IdxView::rank) == 1,
899  "IdxView must be a rank-1 Kokkos::View.");
900  static_assert (std::is_integral<SizeType>::value,
901  "SizeType must be a built-in integer type.");
902 
903  public:
904  using execution_space = typename ExecutionSpace::execution_space;
905  using size_type = SizeType;
906  using value_type = size_t;
907 
908  private:
909  DstView dst;
910  SrcView src;
911  IdxView idx;
912  Op op;
913  size_type numCols;
914 
915  public:
916  UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
917  const DstView& dst_,
918  const SrcView& src_,
919  const IdxView& idx_,
920  const Op& op_,
921  const size_type numCols_) :
922  dst (dst_),
923  src (src_),
924  idx (idx_),
925  op (op_),
926  numCols (numCols_)
927  {}
928 
929  template<class TagType>
930  KOKKOS_INLINE_FUNCTION void
931  operator() (TagType tag,
932  const size_type k,
933  size_t& lclErrCount) const
934  {
935  static_assert
936  (std::is_same<TagType, atomic_tag>::value ||
937  std::is_same<TagType, nonatomic_tag>::value,
938  "TagType must be atomic_tag or nonatomic_tag.");
939  using index_type = typename IdxView::non_const_value_type;
940 
941  const index_type lclRow = idx(k);
942  if (lclRow < static_cast<index_type> (0) ||
943  lclRow >= static_cast<index_type> (dst.extent (0))) {
944  ++lclErrCount;
945  }
946  else {
947  const size_type offset = k*numCols;
948  for (size_type j = 0; j < numCols; ++j) {
949  op (tag, dst(lclRow,j), src(offset+j));
950  }
951  }
952  }
953 
954  template<class TagType>
955  KOKKOS_INLINE_FUNCTION void
956  init (TagType, size_t& initialErrorCount) const {
957  initialErrorCount = 0;
958  }
959 
960  template<class TagType>
961  KOKKOS_INLINE_FUNCTION void
962  join (TagType,
963  volatile size_t& dstErrorCount,
964  const volatile size_t& srcErrorCount) const
965  {
966  dstErrorCount += srcErrorCount;
967  }
968 
969  static void
970  unpack (const ExecutionSpace& execSpace,
971  const DstView& dst,
972  const SrcView& src,
973  const IdxView& idx,
974  const Op& op,
975  const size_type numCols,
976  const bool use_atomic_updates)
977  {
978  using index_type = typename IdxView::non_const_value_type;
979 
980  size_t errorCount = 0;
981  if (use_atomic_updates) {
982  using range_type =
983  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
984  Kokkos::parallel_reduce
985  ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
986  range_type (0, idx.size ()),
987  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
988  idx, op, numCols),
989  errorCount);
990  }
991  else {
992  using range_type =
993  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
994  Kokkos::parallel_reduce
995  ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
996  range_type (0, idx.size ()),
997  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
998  idx, op, numCols),
999  errorCount);
1000  }
1001 
1002  if (errorCount != 0) {
1003  // Go back and find the out-of-bounds entries in the index
1004  // array. Performance doesn't matter since we are already in
1005  // an error state, so we can do this sequentially, on host.
1006  auto idx_h = Kokkos::create_mirror_view (idx);
1007  Kokkos::deep_copy (idx_h, idx);
1008 
1009  std::vector<index_type> badIndices;
1010  const size_type numInds = idx_h.extent (0);
1011  for (size_type k = 0; k < numInds; ++k) {
1012  if (idx_h(k) < static_cast<index_type> (0) ||
1013  idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1014  badIndices.push_back (idx_h(k));
1015  }
1016  }
1017 
1018  if (errorCount != badIndices.size ()) {
1019  std::ostringstream os;
1020  os << "MultiVector unpack kernel: errorCount = " << errorCount
1021  << " != badIndices.size() = " << badIndices.size ()
1022  << ". This should never happen. "
1023  "Please report this to the Tpetra developers.";
1024  throw std::logic_error (os.str ());
1025  }
1026 
1027  std::ostringstream os;
1028  os << "MultiVector unpack kernel had " << badIndices.size ()
1029  << " out-of bounds index/ices. Here they are: [";
1030  for (size_t k = 0; k < badIndices.size (); ++k) {
1031  os << badIndices[k];
1032  if (k + 1 < badIndices.size ()) {
1033  os << ", ";
1034  }
1035  }
1036  os << "].";
1037  throw std::runtime_error (os.str ());
1038  }
1039  }
1040  };
1041 
1042  template <typename ExecutionSpace,
1043  typename DstView,
1044  typename SrcView,
1045  typename IdxView,
1046  typename Op>
1047  void
1048  unpack_array_multi_column (const ExecutionSpace& execSpace,
1049  const DstView& dst,
1050  const SrcView& src,
1051  const IdxView& idx,
1052  const Op& op,
1053  const size_t numCols,
1054  const bool use_atomic_updates,
1055  const bool debug)
1056  {
1057  static_assert (Kokkos::Impl::is_view<DstView>::value,
1058  "DstView must be a Kokkos::View.");
1059  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1060  "SrcView must be a Kokkos::View.");
1061  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1062  "IdxView must be a Kokkos::View.");
1063  static_assert (static_cast<int> (DstView::rank) == 2,
1064  "DstView must be a rank-2 Kokkos::View.");
1065  static_assert (static_cast<int> (SrcView::rank) == 1,
1066  "SrcView must be a rank-1 Kokkos::View.");
1067  static_assert (static_cast<int> (IdxView::rank) == 1,
1068  "IdxView must be a rank-1 Kokkos::View.");
1069 
1070  if (debug) {
1071  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1072  DstView, SrcView, IdxView, Op> impl_type;
1073  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1074  use_atomic_updates);
1075  }
1076  else {
1077  typedef UnpackArrayMultiColumn<ExecutionSpace,
1078  DstView, SrcView, IdxView, Op> impl_type;
1079  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1080  use_atomic_updates);
1081  }
1082  }
1083 
1084  template <typename ExecutionSpace,
1085  typename DstView,
1086  typename SrcView,
1087  typename IdxView,
1088  typename ColView,
1089  typename Op>
1090  class UnpackArrayMultiColumnVariableStride {
1091  private:
1092  static_assert (Kokkos::Impl::is_view<DstView>::value,
1093  "DstView must be a Kokkos::View.");
1094  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1095  "SrcView must be a Kokkos::View.");
1096  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1097  "IdxView must be a Kokkos::View.");
1098  static_assert (Kokkos::Impl::is_view<ColView>::value,
1099  "ColView must be a Kokkos::View.");
1100  static_assert (static_cast<int> (DstView::rank) == 2,
1101  "DstView must be a rank-2 Kokkos::View.");
1102  static_assert (static_cast<int> (SrcView::rank) == 1,
1103  "SrcView must be a rank-1 Kokkos::View.");
1104  static_assert (static_cast<int> (IdxView::rank) == 1,
1105  "IdxView must be a rank-1 Kokkos::View.");
1106  static_assert (static_cast<int> (ColView::rank) == 1,
1107  "ColView must be a rank-1 Kokkos::View.");
1108 
1109  public:
1110  using execution_space = typename ExecutionSpace::execution_space;
1111  using size_type = typename execution_space::size_type;
1112 
1113  private:
1114  DstView dst;
1115  SrcView src;
1116  IdxView idx;
1117  ColView col;
1118  Op op;
1119  size_t numCols;
1120 
1121  public:
1122  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1123  const DstView& dst_,
1124  const SrcView& src_,
1125  const IdxView& idx_,
1126  const ColView& col_,
1127  const Op& op_,
1128  const size_t numCols_) :
1129  dst (dst_),
1130  src (src_),
1131  idx (idx_),
1132  col (col_),
1133  op (op_),
1134  numCols (numCols_)
1135  {}
1136 
1137  template<class TagType>
1138  KOKKOS_INLINE_FUNCTION void
1139  operator() (TagType tag, const size_type k) const
1140  {
1141  static_assert
1142  (std::is_same<TagType, atomic_tag>::value ||
1143  std::is_same<TagType, nonatomic_tag>::value,
1144  "TagType must be atomic_tag or nonatomic_tag.");
1145 
1146  const typename IdxView::value_type localRow = idx(k);
1147  const size_t offset = k*numCols;
1148  for (size_t j = 0; j < numCols; ++j) {
1149  op (tag, dst(localRow, col(j)), src(offset+j));
1150  }
1151  }
1152 
1153  static void
1154  unpack (const ExecutionSpace& execSpace,
1155  const DstView& dst,
1156  const SrcView& src,
1157  const IdxView& idx,
1158  const ColView& col,
1159  const Op& op,
1160  const size_t numCols,
1161  const bool use_atomic_updates)
1162  {
1163  if (use_atomic_updates) {
1164  using range_type =
1165  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1166  Kokkos::parallel_for
1167  ("Tpetra::MultiVector unpack var stride atomic",
1168  range_type (0, idx.size ()),
1169  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1170  idx, col, op, numCols));
1171  }
1172  else {
1173  using range_type =
1174  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1175  Kokkos::parallel_for
1176  ("Tpetra::MultiVector unpack var stride nonatomic",
1177  range_type (0, idx.size ()),
1178  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1179  idx, col, op, numCols));
1180  }
1181  }
1182  };
1183 
1184  template <typename ExecutionSpace,
1185  typename DstView,
1186  typename SrcView,
1187  typename IdxView,
1188  typename ColView,
1189  typename Op,
1190  typename SizeType = typename ExecutionSpace::execution_space::size_type>
1191  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1192  private:
1193  static_assert (Kokkos::Impl::is_view<DstView>::value,
1194  "DstView must be a Kokkos::View.");
1195  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1196  "SrcView must be a Kokkos::View.");
1197  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1198  "IdxView must be a Kokkos::View.");
1199  static_assert (Kokkos::Impl::is_view<ColView>::value,
1200  "ColView must be a Kokkos::View.");
1201  static_assert (static_cast<int> (DstView::rank) == 2,
1202  "DstView must be a rank-2 Kokkos::View.");
1203  static_assert (static_cast<int> (SrcView::rank) == 1,
1204  "SrcView must be a rank-1 Kokkos::View.");
1205  static_assert (static_cast<int> (IdxView::rank) == 1,
1206  "IdxView must be a rank-1 Kokkos::View.");
1207  static_assert (static_cast<int> (ColView::rank) == 1,
1208  "ColView must be a rank-1 Kokkos::View.");
1209  static_assert (std::is_integral<SizeType>::value,
1210  "SizeType must be a built-in integer type.");
1211 
1212  public:
1213  using execution_space = typename ExecutionSpace::execution_space;
1214  using size_type = SizeType;
1215  using value_type = size_t;
1216 
1217  private:
1218  DstView dst;
1219  SrcView src;
1220  IdxView idx;
1221  ColView col;
1222  Op op;
1223  size_type numCols;
1224 
1225  public:
1226  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1227  (const ExecutionSpace& /* execSpace */,
1228  const DstView& dst_,
1229  const SrcView& src_,
1230  const IdxView& idx_,
1231  const ColView& col_,
1232  const Op& op_,
1233  const size_t numCols_) :
1234  dst (dst_),
1235  src (src_),
1236  idx (idx_),
1237  col (col_),
1238  op (op_),
1239  numCols (numCols_)
1240  {}
1241 
1242  template<class TagType>
1243  KOKKOS_INLINE_FUNCTION void
1244  operator() (TagType tag,
1245  const size_type k,
1246  value_type& lclErrorCount) const
1247  {
1248  static_assert
1249  (std::is_same<TagType, atomic_tag>::value ||
1250  std::is_same<TagType, nonatomic_tag>::value,
1251  "TagType must be atomic_tag or nonatomic_tag.");
1252  using row_index_type = typename IdxView::non_const_value_type;
1253  using col_index_type = typename ColView::non_const_value_type;
1254 
1255  const row_index_type lclRow = idx(k);
1256  if (lclRow < static_cast<row_index_type> (0) ||
1257  lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1258  ++lclErrorCount;
1259  }
1260  else {
1261  const size_type offset = k * numCols;
1262  for (size_type j = 0; j < numCols; ++j) {
1263  const col_index_type lclCol = col(j);
1264  if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1265  ++lclErrorCount;
1266  }
1267  else { // all indices are valid; apply the op
1268  op (tag, dst(lclRow, col(j)), src(offset+j));
1269  }
1270  }
1271  }
1272  }
1273 
1274  KOKKOS_INLINE_FUNCTION void
1275  init (value_type& initialErrorCount) const {
1276  initialErrorCount = 0;
1277  }
1278 
1279  KOKKOS_INLINE_FUNCTION void
1280  join (volatile value_type& dstErrorCount,
1281  const volatile value_type& srcErrorCount) const
1282  {
1283  dstErrorCount += srcErrorCount;
1284  }
1285 
1286  static void
1287  unpack (const ExecutionSpace& execSpace,
1288  const DstView& dst,
1289  const SrcView& src,
1290  const IdxView& idx,
1291  const ColView& col,
1292  const Op& op,
1293  const size_type numCols,
1294  const bool use_atomic_updates)
1295  {
1296  using row_index_type = typename IdxView::non_const_value_type;
1297  using col_index_type = typename ColView::non_const_value_type;
1298 
1299  size_t errorCount = 0;
1300  if (use_atomic_updates) {
1301  using range_type =
1302  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1303  Kokkos::parallel_reduce
1304  ("Tpetra::MultiVector unpack var stride atomic debug only",
1305  range_type (0, idx.size ()),
1306  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1307  (execSpace, dst, src, idx, col, op, numCols),
1308  errorCount);
1309  }
1310  else {
1311  using range_type =
1312  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1313  Kokkos::parallel_reduce
1314  ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1315  range_type (0, idx.size ()),
1316  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1317  (execSpace, dst, src, idx, col, op, numCols),
1318  errorCount);
1319  }
1320 
1321  if (errorCount != 0) {
1322  constexpr size_t maxNumBadIndicesToPrint = 100;
1323 
1324  std::ostringstream os; // for error reporting
1325  os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1326  "found " << errorCount
1327  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1328 
1329  // Go back and find any out-of-bounds entries in the array of
1330  // row indices. Performance doesn't matter since we are
1331  // already in an error state, so we can do this sequentially,
1332  // on host.
1333  auto idx_h = Kokkos::create_mirror_view (idx);
1334  Kokkos::deep_copy (idx_h, idx);
1335 
1336  std::vector<row_index_type> badRows;
1337  const size_type numRowInds = idx_h.extent (0);
1338  for (size_type k = 0; k < numRowInds; ++k) {
1339  if (idx_h(k) < static_cast<row_index_type> (0) ||
1340  idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1341  badRows.push_back (idx_h(k));
1342  }
1343  }
1344 
1345  if (badRows.size () != 0) {
1346  os << badRows.size () << " out-of-bounds row ind"
1347  << (badRows.size () != size_t (1) ? "ices" : "ex");
1348  if (badRows.size () <= maxNumBadIndicesToPrint) {
1349  os << ": [";
1350  for (size_t k = 0; k < badRows.size (); ++k) {
1351  os << badRows[k];
1352  if (k + 1 < badRows.size ()) {
1353  os << ", ";
1354  }
1355  }
1356  os << "]. ";
1357  }
1358  else {
1359  os << ". ";
1360  }
1361  }
1362  else {
1363  os << "No out-of-bounds row indices. ";
1364  }
1365 
1366  // Go back and find any out-of-bounds entries in the array
1367  // of column indices.
1368  auto col_h = Kokkos::create_mirror_view (col);
1369  Kokkos::deep_copy (col_h, col);
1370 
1371  std::vector<col_index_type> badCols;
1372  const size_type numColInds = col_h.extent (0);
1373  for (size_type k = 0; k < numColInds; ++k) {
1374  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1375  badCols.push_back (col_h(k));
1376  }
1377  }
1378 
1379  if (badCols.size () != 0) {
1380  os << badCols.size () << " out-of-bounds column ind"
1381  << (badCols.size () != size_t (1) ? "ices" : "ex");
1382  if (badCols.size () <= maxNumBadIndicesToPrint) {
1383  for (size_t k = 0; k < badCols.size (); ++k) {
1384  os << ": [";
1385  os << badCols[k];
1386  if (k + 1 < badCols.size ()) {
1387  os << ", ";
1388  }
1389  }
1390  os << "]. ";
1391  }
1392  else {
1393  os << ". ";
1394  }
1395  }
1396  else {
1397  os << "No out-of-bounds column indices. ";
1398  }
1399 
1400  TEUCHOS_TEST_FOR_EXCEPTION
1401  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1402  std::logic_error, "Tpetra::MultiVector variable stride unpack "
1403  "kernel reports errorCount=" << errorCount << ", but we failed "
1404  "to find any bad rows or columns. This should never happen. "
1405  "Please report this to the Tpetra developers.");
1406 
1407  throw std::runtime_error (os.str ());
1408  } // hasErr
1409  }
1410  };
1411 
1412  template <typename ExecutionSpace,
1413  typename DstView,
1414  typename SrcView,
1415  typename IdxView,
1416  typename ColView,
1417  typename Op>
1418  void
1419  unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1420  const DstView& dst,
1421  const SrcView& src,
1422  const IdxView& idx,
1423  const ColView& col,
1424  const Op& op,
1425  const size_t numCols,
1426  const bool use_atomic_updates,
1427  const bool debug)
1428  {
1429  static_assert (Kokkos::Impl::is_view<DstView>::value,
1430  "DstView must be a Kokkos::View.");
1431  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1432  "SrcView must be a Kokkos::View.");
1433  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1434  "IdxView must be a Kokkos::View.");
1435  static_assert (Kokkos::Impl::is_view<ColView>::value,
1436  "ColView must be a Kokkos::View.");
1437  static_assert (static_cast<int> (DstView::rank) == 2,
1438  "DstView must be a rank-2 Kokkos::View.");
1439  static_assert (static_cast<int> (SrcView::rank) == 1,
1440  "SrcView must be a rank-1 Kokkos::View.");
1441  static_assert (static_cast<int> (IdxView::rank) == 1,
1442  "IdxView must be a rank-1 Kokkos::View.");
1443  static_assert (static_cast<int> (ColView::rank) == 1,
1444  "ColView must be a rank-1 Kokkos::View.");
1445 
1446  if (debug) {
1447  using impl_type =
1448  UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1449  DstView, SrcView, IdxView, ColView, Op>;
1450  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1451  use_atomic_updates);
1452  }
1453  else {
1454  using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1455  DstView, SrcView, IdxView, ColView, Op>;
1456  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1457  use_atomic_updates);
1458  }
1459  }
1460 
1461  template <typename DstView, typename SrcView,
1462  typename DstIdxView, typename SrcIdxView>
1463  struct PermuteArrayMultiColumn {
1464  typedef typename DstView::execution_space execution_space;
1465  typedef typename execution_space::size_type size_type;
1466 
1467  DstView dst;
1468  SrcView src;
1469  DstIdxView dst_idx;
1470  SrcIdxView src_idx;
1471  size_t numCols;
1472 
1473  PermuteArrayMultiColumn (const DstView& dst_,
1474  const SrcView& src_,
1475  const DstIdxView& dst_idx_,
1476  const SrcIdxView& src_idx_,
1477  const size_t numCols_) :
1478  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1479  numCols(numCols_) {}
1480 
1481  KOKKOS_INLINE_FUNCTION void
1482  operator() (const size_type k) const {
1483  const typename DstIdxView::value_type toRow = dst_idx(k);
1484  const typename SrcIdxView::value_type fromRow = src_idx(k);
1485  for (size_t j = 0; j < numCols; ++j) {
1486  dst(toRow, j) = src(fromRow, j);
1487  }
1488  }
1489 
1490  static void
1491  permute (const DstView& dst,
1492  const SrcView& src,
1493  const DstIdxView& dst_idx,
1494  const SrcIdxView& src_idx,
1495  const size_t numCols)
1496  {
1497  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1498  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1499  Kokkos::parallel_for
1500  ("Tpetra::MultiVector permute multicol const stride",
1501  range_type (0, n),
1502  PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols));
1503  }
1504  };
1505 
1506  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1507  // SrcView::Rank == 2
1508  template <typename DstView, typename SrcView,
1509  typename DstIdxView, typename SrcIdxView>
1510  void permute_array_multi_column(const DstView& dst,
1511  const SrcView& src,
1512  const DstIdxView& dst_idx,
1513  const SrcIdxView& src_idx,
1514  size_t numCols) {
1515  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1516  dst, src, dst_idx, src_idx, numCols);
1517  }
1518 
1519  template <typename DstView, typename SrcView,
1520  typename DstIdxView, typename SrcIdxView,
1521  typename DstColView, typename SrcColView>
1522  struct PermuteArrayMultiColumnVariableStride {
1523  typedef typename DstView::execution_space execution_space;
1524  typedef typename execution_space::size_type size_type;
1525 
1526  DstView dst;
1527  SrcView src;
1528  DstIdxView dst_idx;
1529  SrcIdxView src_idx;
1530  DstColView dst_col;
1531  SrcColView src_col;
1532  size_t numCols;
1533 
1534  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1535  const SrcView& src_,
1536  const DstIdxView& dst_idx_,
1537  const SrcIdxView& src_idx_,
1538  const DstColView& dst_col_,
1539  const SrcColView& src_col_,
1540  const size_t numCols_) :
1541  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1542  dst_col(dst_col_), src_col(src_col_),
1543  numCols(numCols_) {}
1544 
1545  KOKKOS_INLINE_FUNCTION void
1546  operator() (const size_type k) const {
1547  const typename DstIdxView::value_type toRow = dst_idx(k);
1548  const typename SrcIdxView::value_type fromRow = src_idx(k);
1549  for (size_t j = 0; j < numCols; ++j) {
1550  dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1551  }
1552  }
1553 
1554  static void
1555  permute (const DstView& dst,
1556  const SrcView& src,
1557  const DstIdxView& dst_idx,
1558  const SrcIdxView& src_idx,
1559  const DstColView& dst_col,
1560  const SrcColView& src_col,
1561  const size_t numCols)
1562  {
1563  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1564  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1565  Kokkos::parallel_for
1566  ("Tpetra::MultiVector permute multicol var stride",
1567  range_type (0, n),
1568  PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1569  dst_col, src_col, numCols));
1570  }
1571  };
1572 
1573  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1574  // SrcView::Rank == 2
1575  template <typename DstView, typename SrcView,
1576  typename DstIdxView, typename SrcIdxView,
1577  typename DstColView, typename SrcColView>
1578  void permute_array_multi_column_variable_stride(const DstView& dst,
1579  const SrcView& src,
1580  const DstIdxView& dst_idx,
1581  const SrcIdxView& src_idx,
1582  const DstColView& dst_col,
1583  const SrcColView& src_col,
1584  size_t numCols) {
1585  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1586  DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1587  dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1588  }
1589 
1590 } // Details namespace
1591 } // KokkosRefactor namespace
1592 } // Tpetra namespace
1593 
1594 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...