Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
51 
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include "impl/Kokkos_Atomic_Generic.hpp"
55 #include <sstream>
56 #include <stdexcept>
57 
58 namespace Tpetra {
59 namespace KokkosRefactor {
60 namespace Details {
61 
67 namespace Impl {
68 
75 template<class IntegerType,
76  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
77 struct OutOfBounds {
78  static KOKKOS_INLINE_FUNCTION bool
79  test (const IntegerType x,
80  const IntegerType exclusiveUpperBound);
81 };
82 
83 // Partial specialization for the case where IntegerType IS signed.
84 template<class IntegerType>
85 struct OutOfBounds<IntegerType, true> {
86  static KOKKOS_INLINE_FUNCTION bool
87  test (const IntegerType x,
88  const IntegerType exclusiveUpperBound)
89  {
90  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
91  }
92 };
93 
94 // Partial specialization for the case where IntegerType is NOT signed.
95 template<class IntegerType>
96 struct OutOfBounds<IntegerType, false> {
97  static KOKKOS_INLINE_FUNCTION bool
98  test (const IntegerType x,
99  const IntegerType exclusiveUpperBound)
100  {
101  return x >= exclusiveUpperBound;
102  }
103 };
104 
107 template<class IntegerType>
108 KOKKOS_INLINE_FUNCTION bool
109 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
110 {
111  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
112 }
113 
114 } // namespace Impl
115 
116  // Functors for implementing packAndPrepare and unpackAndCombine
117  // through parallel_for
118 
119  template <typename DstView, typename SrcView, typename IdxView>
120  struct PackArraySingleColumn {
121  typedef typename DstView::execution_space execution_space;
122  typedef typename execution_space::size_type size_type;
123 
124  DstView dst;
125  SrcView src;
126  IdxView idx;
127  size_t col;
128 
129  PackArraySingleColumn (const DstView& dst_,
130  const SrcView& src_,
131  const IdxView& idx_,
132  const size_t col_) :
133  dst(dst_), src(src_), idx(idx_), col(col_) {}
134 
135  KOKKOS_INLINE_FUNCTION void
136  operator() (const size_type k) const {
137  dst(k) = src(idx(k), col);
138  }
139 
140  static void
141  pack (const DstView& dst,
142  const SrcView& src,
143  const IdxView& idx,
144  const size_t col)
145  {
146  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
147  Kokkos::parallel_for
148  ("Tpetra::MultiVector pack one col",
149  range_type (0, idx.size ()),
150  PackArraySingleColumn (dst, src, idx, col));
151  }
152  };
153 
154  template <typename DstView,
155  typename SrcView,
156  typename IdxView,
157  typename SizeType = typename DstView::execution_space::size_type>
158  class PackArraySingleColumnWithBoundsCheck {
159  private:
160  static_assert (Kokkos::Impl::is_view<DstView>::value,
161  "DstView must be a Kokkos::View.");
162  static_assert (Kokkos::Impl::is_view<SrcView>::value,
163  "SrcView must be a Kokkos::View.");
164  static_assert (Kokkos::Impl::is_view<IdxView>::value,
165  "IdxView must be a Kokkos::View.");
166  static_assert (static_cast<int> (DstView::rank) == 1,
167  "DstView must be a rank-1 Kokkos::View.");
168  static_assert (static_cast<int> (SrcView::rank) == 2,
169  "SrcView must be a rank-2 Kokkos::View.");
170  static_assert (static_cast<int> (IdxView::rank) == 1,
171  "IdxView must be a rank-1 Kokkos::View.");
172  static_assert (std::is_integral<SizeType>::value,
173  "SizeType must be a built-in integer type.");
174  public:
175  typedef SizeType size_type;
176  using value_type = size_t;
177 
178  private:
179  DstView dst;
180  SrcView src;
181  IdxView idx;
182  size_type col;
183 
184  public:
185  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
186  const SrcView& src_,
187  const IdxView& idx_,
188  const size_type col_) :
189  dst (dst_), src (src_), idx (idx_), col (col_) {}
190 
191  KOKKOS_INLINE_FUNCTION void
192  operator() (const size_type k, value_type& lclErrCount) const {
193  using index_type = typename IdxView::non_const_value_type;
194 
195  const index_type lclRow = idx(k);
196  if (lclRow < static_cast<index_type> (0) ||
197  lclRow >= static_cast<index_type> (src.extent (0))) {
198  ++lclErrCount;
199  }
200  else {
201  dst(k) = src(lclRow, col);
202  }
203  }
204 
205  KOKKOS_INLINE_FUNCTION
206  void init (value_type& initialErrorCount) const {
207  initialErrorCount = 0;
208  }
209 
210  KOKKOS_INLINE_FUNCTION void
211  join (volatile value_type& dstErrorCount,
212  const volatile value_type& srcErrorCount) const
213  {
214  dstErrorCount += srcErrorCount;
215  }
216 
217  static void
218  pack (const DstView& dst,
219  const SrcView& src,
220  const IdxView& idx,
221  const size_type col)
222  {
223  typedef typename DstView::execution_space execution_space;
224  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
225  typedef typename IdxView::non_const_value_type index_type;
226 
227  size_t errorCount = 0;
228  Kokkos::parallel_reduce
229  ("Tpetra::MultiVector pack one col debug only",
230  range_type (0, idx.size ()),
231  PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
232  errorCount);
233  if (errorCount != 0) {
234  // Go back and find the out-of-bounds entries in the index
235  // array. Performance doesn't matter since we are already in
236  // an error state, so we can do this sequentially, on host.
237  auto idx_h = Kokkos::create_mirror_view (idx);
238  Kokkos::deep_copy (idx_h, idx);
239 
240  std::vector<index_type> badIndices;
241  const size_type numInds = idx_h.extent (0);
242  for (size_type k = 0; k < numInds; ++k) {
243  if (idx_h(k) < static_cast<index_type> (0) ||
244  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
245  badIndices.push_back (idx_h(k));
246  }
247  }
248 
249  TEUCHOS_TEST_FOR_EXCEPTION
250  (errorCount != badIndices.size (), std::logic_error,
251  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
252  << " != badIndices.size() = " << badIndices.size () << ". This sho"
253  "uld never happen. Please report this to the Tpetra developers.");
254 
255  std::ostringstream os;
256  os << "MultiVector single-column pack kernel had "
257  << badIndices.size () << " out-of bounds index/ices. "
258  "Here they are: [";
259  for (size_t k = 0; k < badIndices.size (); ++k) {
260  os << badIndices[k];
261  if (k + 1 < badIndices.size ()) {
262  os << ", ";
263  }
264  }
265  os << "].";
266  throw std::runtime_error (os.str ());
267  }
268  }
269  };
270 
271 
272  template <typename DstView, typename SrcView, typename IdxView>
273  void
274  pack_array_single_column (const DstView& dst,
275  const SrcView& src,
276  const IdxView& idx,
277  const size_t col,
278  const bool debug = true)
279  {
280  static_assert (Kokkos::Impl::is_view<DstView>::value,
281  "DstView must be a Kokkos::View.");
282  static_assert (Kokkos::Impl::is_view<SrcView>::value,
283  "SrcView must be a Kokkos::View.");
284  static_assert (Kokkos::Impl::is_view<IdxView>::value,
285  "IdxView must be a Kokkos::View.");
286  static_assert (static_cast<int> (DstView::rank) == 1,
287  "DstView must be a rank-1 Kokkos::View.");
288  static_assert (static_cast<int> (SrcView::rank) == 2,
289  "SrcView must be a rank-2 Kokkos::View.");
290  static_assert (static_cast<int> (IdxView::rank) == 1,
291  "IdxView must be a rank-1 Kokkos::View.");
292 
293  if (debug) {
294  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
295  impl_type::pack (dst, src, idx, col);
296  }
297  else {
298  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
299  impl_type::pack (dst, src, idx, col);
300  }
301  }
302 
303  template <typename DstView, typename SrcView, typename IdxView>
304  struct PackArrayMultiColumn {
305  typedef typename DstView::execution_space execution_space;
306  typedef typename execution_space::size_type size_type;
307 
308  DstView dst;
309  SrcView src;
310  IdxView idx;
311  size_t numCols;
312 
313  PackArrayMultiColumn (const DstView& dst_,
314  const SrcView& src_,
315  const IdxView& idx_,
316  const size_t numCols_) :
317  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
318 
319  KOKKOS_INLINE_FUNCTION void
320  operator() (const size_type k) const {
321  const typename IdxView::value_type localRow = idx(k);
322  const size_t offset = k*numCols;
323  for (size_t j = 0; j < numCols; ++j) {
324  dst(offset + j) = src(localRow, j);
325  }
326  }
327 
328  static void pack(const DstView& dst,
329  const SrcView& src,
330  const IdxView& idx,
331  size_t numCols) {
332  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
333  Kokkos::parallel_for
334  ("Tpetra::MultiVector pack multicol const stride",
335  range_type (0, idx.size ()),
336  PackArrayMultiColumn (dst, src, idx, numCols));
337  }
338  };
339 
340  template <typename DstView,
341  typename SrcView,
342  typename IdxView,
343  typename SizeType = typename DstView::execution_space::size_type>
344  class PackArrayMultiColumnWithBoundsCheck {
345  public:
346  using size_type = SizeType;
347  using value_type = size_t;
348 
349  private:
350  DstView dst;
351  SrcView src;
352  IdxView idx;
353  size_type numCols;
354 
355  public:
356  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
357  const SrcView& src_,
358  const IdxView& idx_,
359  const size_type numCols_) :
360  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
361 
362  KOKKOS_INLINE_FUNCTION void
363  operator() (const size_type k, value_type& lclErrorCount) const {
364  typedef typename IdxView::non_const_value_type index_type;
365 
366  const index_type lclRow = idx(k);
367  if (lclRow < static_cast<index_type> (0) ||
368  lclRow >= static_cast<index_type> (src.extent (0))) {
369  ++lclErrorCount; // failed
370  }
371  else {
372  const size_type offset = k*numCols;
373  for (size_type j = 0; j < numCols; ++j) {
374  dst(offset + j) = src(lclRow, j);
375  }
376  }
377  }
378 
379  KOKKOS_INLINE_FUNCTION
380  void init (value_type& initialErrorCount) const {
381  initialErrorCount = 0;
382  }
383 
384  KOKKOS_INLINE_FUNCTION void
385  join (volatile value_type& dstErrorCount,
386  const volatile value_type& srcErrorCount) const
387  {
388  dstErrorCount += srcErrorCount;
389  }
390 
391  static void
392  pack (const DstView& dst,
393  const SrcView& src,
394  const IdxView& idx,
395  const size_type numCols)
396  {
397  typedef typename DstView::execution_space execution_space;
398  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
399  typedef typename IdxView::non_const_value_type index_type;
400 
401  size_t errorCount = 0;
402  Kokkos::parallel_reduce
403  ("Tpetra::MultiVector pack multicol const stride debug only",
404  range_type (0, idx.size ()),
405  PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
406  errorCount);
407  if (errorCount != 0) {
408  // Go back and find the out-of-bounds entries in the index
409  // array. Performance doesn't matter since we are already in
410  // an error state, so we can do this sequentially, on host.
411  auto idx_h = Kokkos::create_mirror_view (idx);
412  Kokkos::deep_copy (idx_h, idx);
413 
414  std::vector<index_type> badIndices;
415  const size_type numInds = idx_h.extent (0);
416  for (size_type k = 0; k < numInds; ++k) {
417  if (idx_h(k) < static_cast<index_type> (0) ||
418  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
419  badIndices.push_back (idx_h(k));
420  }
421  }
422 
423  TEUCHOS_TEST_FOR_EXCEPTION
424  (errorCount != badIndices.size (), std::logic_error,
425  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
426  << " != badIndices.size() = " << badIndices.size () << ". This sho"
427  "uld never happen. Please report this to the Tpetra developers.");
428 
429  std::ostringstream os;
430  os << "Tpetra::MultiVector multiple-column pack kernel had "
431  << badIndices.size () << " out-of bounds index/ices (errorCount = "
432  << errorCount << "): [";
433  for (size_t k = 0; k < badIndices.size (); ++k) {
434  os << badIndices[k];
435  if (k + 1 < badIndices.size ()) {
436  os << ", ";
437  }
438  }
439  os << "].";
440  throw std::runtime_error (os.str ());
441  }
442  }
443  };
444 
445 
446  template <typename DstView,
447  typename SrcView,
448  typename IdxView>
449  void
450  pack_array_multi_column (const DstView& dst,
451  const SrcView& src,
452  const IdxView& idx,
453  const size_t numCols,
454  const bool debug = true)
455  {
456  static_assert (Kokkos::Impl::is_view<DstView>::value,
457  "DstView must be a Kokkos::View.");
458  static_assert (Kokkos::Impl::is_view<SrcView>::value,
459  "SrcView must be a Kokkos::View.");
460  static_assert (Kokkos::Impl::is_view<IdxView>::value,
461  "IdxView must be a Kokkos::View.");
462  static_assert (static_cast<int> (DstView::rank) == 1,
463  "DstView must be a rank-1 Kokkos::View.");
464  static_assert (static_cast<int> (SrcView::rank) == 2,
465  "SrcView must be a rank-2 Kokkos::View.");
466  static_assert (static_cast<int> (IdxView::rank) == 1,
467  "IdxView must be a rank-1 Kokkos::View.");
468 
469  if (debug) {
470  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
471  SrcView, IdxView> impl_type;
472  impl_type::pack (dst, src, idx, numCols);
473  }
474  else {
475  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
476  impl_type::pack (dst, src, idx, numCols);
477  }
478  }
479 
480  template <typename DstView, typename SrcView, typename IdxView,
481  typename ColView>
482  struct PackArrayMultiColumnVariableStride {
483  typedef typename DstView::execution_space execution_space;
484  typedef typename execution_space::size_type size_type;
485 
486  DstView dst;
487  SrcView src;
488  IdxView idx;
489  ColView col;
490  size_t numCols;
491 
492  PackArrayMultiColumnVariableStride (const DstView& dst_,
493  const SrcView& src_,
494  const IdxView& idx_,
495  const ColView& col_,
496  const size_t numCols_) :
497  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
498 
499  KOKKOS_INLINE_FUNCTION
500  void operator() (const size_type k) const {
501  const typename IdxView::value_type localRow = idx(k);
502  const size_t offset = k*numCols;
503  for (size_t j = 0; j < numCols; ++j) {
504  dst(offset + j) = src(localRow, col(j));
505  }
506  }
507 
508  static void pack(const DstView& dst,
509  const SrcView& src,
510  const IdxView& idx,
511  const ColView& col,
512  size_t numCols) {
513  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
514  Kokkos::parallel_for
515  ("Tpetra::MultiVector pack multicol var stride",
516  range_type (0, idx.size ()),
517  PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
518  }
519  };
520 
521  template <typename DstView,
522  typename SrcView,
523  typename IdxView,
524  typename ColView,
525  typename SizeType = typename DstView::execution_space::size_type>
526  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
527  public:
528  using size_type = SizeType;
529  using value_type = size_t;
530 
531  private:
532  DstView dst;
533  SrcView src;
534  IdxView idx;
535  ColView col;
536  size_type numCols;
537 
538  public:
539  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
540  const SrcView& src_,
541  const IdxView& idx_,
542  const ColView& col_,
543  const size_type numCols_) :
544  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
545 
546  KOKKOS_INLINE_FUNCTION void
547  operator() (const size_type k, value_type& lclErrorCount) const {
548  typedef typename IdxView::non_const_value_type row_index_type;
549  typedef typename ColView::non_const_value_type col_index_type;
550 
551  const row_index_type lclRow = idx(k);
552  if (lclRow < static_cast<row_index_type> (0) ||
553  lclRow >= static_cast<row_index_type> (src.extent (0))) {
554  ++lclErrorCount = 0;
555  }
556  else {
557  const size_type offset = k*numCols;
558  for (size_type j = 0; j < numCols; ++j) {
559  const col_index_type lclCol = col(j);
560  if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
561  ++lclErrorCount = 0;
562  }
563  else { // all indices are valid; do the assignment
564  dst(offset + j) = src(lclRow, lclCol);
565  }
566  }
567  }
568  }
569 
570  KOKKOS_INLINE_FUNCTION void
571  init (value_type& initialErrorCount) const {
572  initialErrorCount = 0;
573  }
574 
575  KOKKOS_INLINE_FUNCTION void
576  join (volatile value_type& dstErrorCount,
577  const volatile value_type& srcErrorCount) const
578  {
579  dstErrorCount += srcErrorCount;
580  }
581 
582  static void
583  pack (const DstView& dst,
584  const SrcView& src,
585  const IdxView& idx,
586  const ColView& col,
587  const size_type numCols)
588  {
589  using execution_space = typename DstView::execution_space;
590  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
591  using row_index_type = typename IdxView::non_const_value_type;
592  using col_index_type = typename ColView::non_const_value_type;
593 
594  size_t errorCount = 0;
595  Kokkos::parallel_reduce
596  ("Tpetra::MultiVector pack multicol var stride debug only",
597  range_type (0, idx.size ()),
598  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
599  col, numCols),
600  errorCount);
601  if (errorCount != 0) {
602  constexpr size_t maxNumBadIndicesToPrint = 100;
603 
604  std::ostringstream os; // for error reporting
605  os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
606  "found " << errorCount
607  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
608 
609  // Go back and find any out-of-bounds entries in the array of
610  // row indices. Performance doesn't matter since we are already
611  // in an error state, so we can do this sequentially, on host.
612  auto idx_h = Kokkos::create_mirror_view (idx);
613  Kokkos::deep_copy (idx_h, idx);
614 
615  std::vector<row_index_type> badRows;
616  const size_type numRowInds = idx_h.extent (0);
617  for (size_type k = 0; k < numRowInds; ++k) {
618  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
619  badRows.push_back (idx_h(k));
620  }
621  }
622 
623  if (badRows.size () != 0) {
624  os << badRows.size () << " out-of-bounds row ind"
625  << (badRows.size () != size_t (1) ? "ices" : "ex");
626  if (badRows.size () <= maxNumBadIndicesToPrint) {
627  os << ": [";
628  for (size_t k = 0; k < badRows.size (); ++k) {
629  os << badRows[k];
630  if (k + 1 < badRows.size ()) {
631  os << ", ";
632  }
633  }
634  os << "]. ";
635  }
636  else {
637  os << ". ";
638  }
639  }
640  else {
641  os << "No out-of-bounds row indices. ";
642  }
643 
644  // Go back and find any out-of-bounds entries in the array
645  // of column indices.
646  auto col_h = Kokkos::create_mirror_view (col);
647  Kokkos::deep_copy (col_h, col);
648 
649  std::vector<col_index_type> badCols;
650  const size_type numColInds = col_h.extent (0);
651  for (size_type k = 0; k < numColInds; ++k) {
652  if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
653  badCols.push_back (col_h(k));
654  }
655  }
656 
657  if (badCols.size () != 0) {
658  os << badCols.size () << " out-of-bounds column ind"
659  << (badCols.size () != size_t (1) ? "ices" : "ex");
660  if (badCols.size () <= maxNumBadIndicesToPrint) {
661  os << ": [";
662  for (size_t k = 0; k < badCols.size (); ++k) {
663  os << badCols[k];
664  if (k + 1 < badCols.size ()) {
665  os << ", ";
666  }
667  }
668  os << "]. ";
669  }
670  else {
671  os << ". ";
672  }
673  }
674  else {
675  os << "No out-of-bounds column indices. ";
676  }
677 
678  TEUCHOS_TEST_FOR_EXCEPTION
679  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
680  std::logic_error, "Tpetra::MultiVector variable stride pack "
681  "kernel reports errorCount=" << errorCount << ", but we failed "
682  "to find any bad rows or columns. This should never happen. "
683  "Please report this to the Tpetra developers.");
684 
685  throw std::runtime_error (os.str ());
686  } // hasErr
687  }
688  };
689 
690  template <typename DstView,
691  typename SrcView,
692  typename IdxView,
693  typename ColView>
694  void
695  pack_array_multi_column_variable_stride (const DstView& dst,
696  const SrcView& src,
697  const IdxView& idx,
698  const ColView& col,
699  const size_t numCols,
700  const bool debug = true)
701  {
702  static_assert (Kokkos::Impl::is_view<DstView>::value,
703  "DstView must be a Kokkos::View.");
704  static_assert (Kokkos::Impl::is_view<SrcView>::value,
705  "SrcView must be a Kokkos::View.");
706  static_assert (Kokkos::Impl::is_view<IdxView>::value,
707  "IdxView must be a Kokkos::View.");
708  static_assert (Kokkos::Impl::is_view<ColView>::value,
709  "ColView must be a Kokkos::View.");
710  static_assert (static_cast<int> (DstView::rank) == 1,
711  "DstView must be a rank-1 Kokkos::View.");
712  static_assert (static_cast<int> (SrcView::rank) == 2,
713  "SrcView must be a rank-2 Kokkos::View.");
714  static_assert (static_cast<int> (IdxView::rank) == 1,
715  "IdxView must be a rank-1 Kokkos::View.");
716  static_assert (static_cast<int> (ColView::rank) == 1,
717  "ColView must be a rank-1 Kokkos::View.");
718 
719  if (debug) {
720  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
721  SrcView, IdxView, ColView> impl_type;
722  impl_type::pack (dst, src, idx, col, numCols);
723  }
724  else {
725  typedef PackArrayMultiColumnVariableStride<DstView,
726  SrcView, IdxView, ColView> impl_type;
727  impl_type::pack (dst, src, idx, col, numCols);
728  }
729  }
730 
731  // Tag types to indicate whether to use atomic updates in the
732  // various CombineMode "Op"s.
733  struct atomic_tag {};
734  struct nonatomic_tag {};
735 
736  template<class SC>
737  struct AddOp {
738  KOKKOS_INLINE_FUNCTION
739  void operator() (atomic_tag, SC& dest, const SC& src) const {
740  Kokkos::atomic_add (&dest, src);
741  }
742 
743  KOKKOS_INLINE_FUNCTION
744  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
745  dest += src;
746  }
747  };
748 
749  template<class SC>
750  struct InsertOp {
751  // There's no point to using Kokkos::atomic_assign for the REPLACE
752  // or INSERT CombineModes, since this is not a well-defined
753  // reduction for MultiVector anyway. See GitHub Issue #4417
754  // (which this fixes).
755  KOKKOS_INLINE_FUNCTION
756  void operator() (atomic_tag, SC& dest, const SC& src) const {
757  dest = src;
758  }
759 
760  KOKKOS_INLINE_FUNCTION
761  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
762  dest = src;
763  }
764  };
765 
766  // Kokkos::Impl::atomic_fetch_oper wants a class like this.
767  template<class Scalar1, class Scalar2>
768  struct AbsMaxOper {
769  KOKKOS_INLINE_FUNCTION
770  static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
771  const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
772  const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
773  return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
774  }
775  };
776 
777  template <typename SC>
778  struct AbsMaxOp {
779  KOKKOS_INLINE_FUNCTION
780  void operator() (atomic_tag, SC& dest, const SC& src) const {
781  Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
782  }
783 
784  KOKKOS_INLINE_FUNCTION
785  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
786  dest = AbsMaxOper<SC, SC> ().apply (dest, src);
787  }
788  };
789 
790  template <typename ExecutionSpace,
791  typename DstView,
792  typename SrcView,
793  typename IdxView,
794  typename Op>
795  class UnpackArrayMultiColumn {
796  private:
797  static_assert (Kokkos::Impl::is_view<DstView>::value,
798  "DstView must be a Kokkos::View.");
799  static_assert (Kokkos::Impl::is_view<SrcView>::value,
800  "SrcView must be a Kokkos::View.");
801  static_assert (Kokkos::Impl::is_view<IdxView>::value,
802  "IdxView must be a Kokkos::View.");
803  static_assert (static_cast<int> (DstView::rank) == 2,
804  "DstView must be a rank-2 Kokkos::View.");
805  static_assert (static_cast<int> (SrcView::rank) == 1,
806  "SrcView must be a rank-1 Kokkos::View.");
807  static_assert (static_cast<int> (IdxView::rank) == 1,
808  "IdxView must be a rank-1 Kokkos::View.");
809 
810  public:
811  typedef typename ExecutionSpace::execution_space execution_space;
812  typedef typename execution_space::size_type size_type;
813 
814  private:
815  DstView dst;
816  SrcView src;
817  IdxView idx;
818  Op op;
819  size_t numCols;
820 
821  public:
822  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
823  const DstView& dst_,
824  const SrcView& src_,
825  const IdxView& idx_,
826  const Op& op_,
827  const size_t numCols_) :
828  dst (dst_),
829  src (src_),
830  idx (idx_),
831  op (op_),
832  numCols (numCols_)
833  {}
834 
835  template<class TagType>
836  KOKKOS_INLINE_FUNCTION void
837  operator() (TagType tag, const size_type k) const
838  {
839  static_assert
840  (std::is_same<TagType, atomic_tag>::value ||
841  std::is_same<TagType, nonatomic_tag>::value,
842  "TagType must be atomic_tag or nonatomic_tag.");
843 
844  const typename IdxView::value_type localRow = idx(k);
845  const size_t offset = k*numCols;
846  for (size_t j = 0; j < numCols; ++j) {
847  op (tag, dst(localRow, j), src(offset+j));
848  }
849  }
850 
851  static void
852  unpack (const ExecutionSpace& execSpace,
853  const DstView& dst,
854  const SrcView& src,
855  const IdxView& idx,
856  const Op& op,
857  const size_t numCols,
858  const bool use_atomic_updates)
859  {
860  if (use_atomic_updates) {
861  using range_type =
862  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
863  Kokkos::parallel_for
864  ("Tpetra::MultiVector unpack const stride atomic",
865  range_type (0, idx.size ()),
866  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
867  }
868  else {
869  using range_type =
870  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
871  Kokkos::parallel_for
872  ("Tpetra::MultiVector unpack const stride nonatomic",
873  range_type (0, idx.size ()),
874  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
875  }
876  }
877  };
878 
879  template <typename ExecutionSpace,
880  typename DstView,
881  typename SrcView,
882  typename IdxView,
883  typename Op,
884  typename SizeType = typename ExecutionSpace::execution_space::size_type>
885  class UnpackArrayMultiColumnWithBoundsCheck {
886  private:
887  static_assert (Kokkos::Impl::is_view<DstView>::value,
888  "DstView must be a Kokkos::View.");
889  static_assert (Kokkos::Impl::is_view<SrcView>::value,
890  "SrcView must be a Kokkos::View.");
891  static_assert (Kokkos::Impl::is_view<IdxView>::value,
892  "IdxView must be a Kokkos::View.");
893  static_assert (static_cast<int> (DstView::rank) == 2,
894  "DstView must be a rank-2 Kokkos::View.");
895  static_assert (static_cast<int> (SrcView::rank) == 1,
896  "SrcView must be a rank-1 Kokkos::View.");
897  static_assert (static_cast<int> (IdxView::rank) == 1,
898  "IdxView must be a rank-1 Kokkos::View.");
899  static_assert (std::is_integral<SizeType>::value,
900  "SizeType must be a built-in integer type.");
901 
902  public:
903  using execution_space = typename ExecutionSpace::execution_space;
904  using size_type = SizeType;
905  using value_type = size_t;
906 
907  private:
908  DstView dst;
909  SrcView src;
910  IdxView idx;
911  Op op;
912  size_type numCols;
913 
914  public:
915  UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
916  const DstView& dst_,
917  const SrcView& src_,
918  const IdxView& idx_,
919  const Op& op_,
920  const size_type numCols_) :
921  dst (dst_),
922  src (src_),
923  idx (idx_),
924  op (op_),
925  numCols (numCols_)
926  {}
927 
928  template<class TagType>
929  KOKKOS_INLINE_FUNCTION void
930  operator() (TagType tag,
931  const size_type k,
932  size_t& lclErrCount) const
933  {
934  static_assert
935  (std::is_same<TagType, atomic_tag>::value ||
936  std::is_same<TagType, nonatomic_tag>::value,
937  "TagType must be atomic_tag or nonatomic_tag.");
938  using index_type = typename IdxView::non_const_value_type;
939 
940  const index_type lclRow = idx(k);
941  if (lclRow < static_cast<index_type> (0) ||
942  lclRow >= static_cast<index_type> (dst.extent (0))) {
943  ++lclErrCount;
944  }
945  else {
946  const size_type offset = k*numCols;
947  for (size_type j = 0; j < numCols; ++j) {
948  op (tag, dst(lclRow,j), src(offset+j));
949  }
950  }
951  }
952 
953  template<class TagType>
954  KOKKOS_INLINE_FUNCTION void
955  init (TagType, size_t& initialErrorCount) const {
956  initialErrorCount = 0;
957  }
958 
959  template<class TagType>
960  KOKKOS_INLINE_FUNCTION void
961  join (TagType,
962  volatile size_t& dstErrorCount,
963  const volatile size_t& srcErrorCount) const
964  {
965  dstErrorCount += srcErrorCount;
966  }
967 
968  static void
969  unpack (const ExecutionSpace& execSpace,
970  const DstView& dst,
971  const SrcView& src,
972  const IdxView& idx,
973  const Op& op,
974  const size_type numCols,
975  const bool use_atomic_updates)
976  {
977  using index_type = typename IdxView::non_const_value_type;
978 
979  size_t errorCount = 0;
980  if (use_atomic_updates) {
981  using range_type =
982  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
983  Kokkos::parallel_reduce
984  ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
985  range_type (0, idx.size ()),
986  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
987  idx, op, numCols),
988  errorCount);
989  }
990  else {
991  using range_type =
992  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
993  Kokkos::parallel_reduce
994  ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
995  range_type (0, idx.size ()),
996  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
997  idx, op, numCols),
998  errorCount);
999  }
1000 
1001  if (errorCount != 0) {
1002  // Go back and find the out-of-bounds entries in the index
1003  // array. Performance doesn't matter since we are already in
1004  // an error state, so we can do this sequentially, on host.
1005  auto idx_h = Kokkos::create_mirror_view (idx);
1006  Kokkos::deep_copy (idx_h, idx);
1007 
1008  std::vector<index_type> badIndices;
1009  const size_type numInds = idx_h.extent (0);
1010  for (size_type k = 0; k < numInds; ++k) {
1011  if (idx_h(k) < static_cast<index_type> (0) ||
1012  idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1013  badIndices.push_back (idx_h(k));
1014  }
1015  }
1016 
1017  if (errorCount != badIndices.size ()) {
1018  std::ostringstream os;
1019  os << "MultiVector unpack kernel: errorCount = " << errorCount
1020  << " != badIndices.size() = " << badIndices.size ()
1021  << ". This should never happen. "
1022  "Please report this to the Tpetra developers.";
1023  throw std::logic_error (os.str ());
1024  }
1025 
1026  std::ostringstream os;
1027  os << "MultiVector unpack kernel had " << badIndices.size ()
1028  << " out-of bounds index/ices. Here they are: [";
1029  for (size_t k = 0; k < badIndices.size (); ++k) {
1030  os << badIndices[k];
1031  if (k + 1 < badIndices.size ()) {
1032  os << ", ";
1033  }
1034  }
1035  os << "].";
1036  throw std::runtime_error (os.str ());
1037  }
1038  }
1039  };
1040 
1041  template <typename ExecutionSpace,
1042  typename DstView,
1043  typename SrcView,
1044  typename IdxView,
1045  typename Op>
1046  void
1047  unpack_array_multi_column (const ExecutionSpace& execSpace,
1048  const DstView& dst,
1049  const SrcView& src,
1050  const IdxView& idx,
1051  const Op& op,
1052  const size_t numCols,
1053  const bool use_atomic_updates,
1054  const bool debug)
1055  {
1056  static_assert (Kokkos::Impl::is_view<DstView>::value,
1057  "DstView must be a Kokkos::View.");
1058  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1059  "SrcView must be a Kokkos::View.");
1060  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1061  "IdxView must be a Kokkos::View.");
1062  static_assert (static_cast<int> (DstView::rank) == 2,
1063  "DstView must be a rank-2 Kokkos::View.");
1064  static_assert (static_cast<int> (SrcView::rank) == 1,
1065  "SrcView must be a rank-1 Kokkos::View.");
1066  static_assert (static_cast<int> (IdxView::rank) == 1,
1067  "IdxView must be a rank-1 Kokkos::View.");
1068 
1069  if (debug) {
1070  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1071  DstView, SrcView, IdxView, Op> impl_type;
1072  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1073  use_atomic_updates);
1074  }
1075  else {
1076  typedef UnpackArrayMultiColumn<ExecutionSpace,
1077  DstView, SrcView, IdxView, Op> impl_type;
1078  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1079  use_atomic_updates);
1080  }
1081  }
1082 
1083  template <typename ExecutionSpace,
1084  typename DstView,
1085  typename SrcView,
1086  typename IdxView,
1087  typename ColView,
1088  typename Op>
1089  class UnpackArrayMultiColumnVariableStride {
1090  private:
1091  static_assert (Kokkos::Impl::is_view<DstView>::value,
1092  "DstView must be a Kokkos::View.");
1093  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1094  "SrcView must be a Kokkos::View.");
1095  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1096  "IdxView must be a Kokkos::View.");
1097  static_assert (Kokkos::Impl::is_view<ColView>::value,
1098  "ColView must be a Kokkos::View.");
1099  static_assert (static_cast<int> (DstView::rank) == 2,
1100  "DstView must be a rank-2 Kokkos::View.");
1101  static_assert (static_cast<int> (SrcView::rank) == 1,
1102  "SrcView must be a rank-1 Kokkos::View.");
1103  static_assert (static_cast<int> (IdxView::rank) == 1,
1104  "IdxView must be a rank-1 Kokkos::View.");
1105  static_assert (static_cast<int> (ColView::rank) == 1,
1106  "ColView must be a rank-1 Kokkos::View.");
1107 
1108  public:
1109  using execution_space = typename ExecutionSpace::execution_space;
1110  using size_type = typename execution_space::size_type;
1111 
1112  private:
1113  DstView dst;
1114  SrcView src;
1115  IdxView idx;
1116  ColView col;
1117  Op op;
1118  size_t numCols;
1119 
1120  public:
1121  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1122  const DstView& dst_,
1123  const SrcView& src_,
1124  const IdxView& idx_,
1125  const ColView& col_,
1126  const Op& op_,
1127  const size_t numCols_) :
1128  dst (dst_),
1129  src (src_),
1130  idx (idx_),
1131  col (col_),
1132  op (op_),
1133  numCols (numCols_)
1134  {}
1135 
1136  template<class TagType>
1137  KOKKOS_INLINE_FUNCTION void
1138  operator() (TagType tag, const size_type k) const
1139  {
1140  static_assert
1141  (std::is_same<TagType, atomic_tag>::value ||
1142  std::is_same<TagType, nonatomic_tag>::value,
1143  "TagType must be atomic_tag or nonatomic_tag.");
1144 
1145  const typename IdxView::value_type localRow = idx(k);
1146  const size_t offset = k*numCols;
1147  for (size_t j = 0; j < numCols; ++j) {
1148  op (tag, dst(localRow, col(j)), src(offset+j));
1149  }
1150  }
1151 
1152  static void
1153  unpack (const ExecutionSpace& execSpace,
1154  const DstView& dst,
1155  const SrcView& src,
1156  const IdxView& idx,
1157  const ColView& col,
1158  const Op& op,
1159  const size_t numCols,
1160  const bool use_atomic_updates)
1161  {
1162  if (use_atomic_updates) {
1163  using range_type =
1164  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1165  Kokkos::parallel_for
1166  ("Tpetra::MultiVector unpack var stride atomic",
1167  range_type (0, idx.size ()),
1168  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1169  idx, col, op, numCols));
1170  }
1171  else {
1172  using range_type =
1173  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1174  Kokkos::parallel_for
1175  ("Tpetra::MultiVector unpack var stride nonatomic",
1176  range_type (0, idx.size ()),
1177  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1178  idx, col, op, numCols));
1179  }
1180  }
1181  };
1182 
1183  template <typename ExecutionSpace,
1184  typename DstView,
1185  typename SrcView,
1186  typename IdxView,
1187  typename ColView,
1188  typename Op,
1189  typename SizeType = typename ExecutionSpace::execution_space::size_type>
1190  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1191  private:
1192  static_assert (Kokkos::Impl::is_view<DstView>::value,
1193  "DstView must be a Kokkos::View.");
1194  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1195  "SrcView must be a Kokkos::View.");
1196  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1197  "IdxView must be a Kokkos::View.");
1198  static_assert (Kokkos::Impl::is_view<ColView>::value,
1199  "ColView must be a Kokkos::View.");
1200  static_assert (static_cast<int> (DstView::rank) == 2,
1201  "DstView must be a rank-2 Kokkos::View.");
1202  static_assert (static_cast<int> (SrcView::rank) == 1,
1203  "SrcView must be a rank-1 Kokkos::View.");
1204  static_assert (static_cast<int> (IdxView::rank) == 1,
1205  "IdxView must be a rank-1 Kokkos::View.");
1206  static_assert (static_cast<int> (ColView::rank) == 1,
1207  "ColView must be a rank-1 Kokkos::View.");
1208  static_assert (std::is_integral<SizeType>::value,
1209  "SizeType must be a built-in integer type.");
1210 
1211  public:
1212  using execution_space = typename ExecutionSpace::execution_space;
1213  using size_type = SizeType;
1214  using value_type = size_t;
1215 
1216  private:
1217  DstView dst;
1218  SrcView src;
1219  IdxView idx;
1220  ColView col;
1221  Op op;
1222  size_type numCols;
1223 
1224  public:
1225  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1226  (const ExecutionSpace& /* execSpace */,
1227  const DstView& dst_,
1228  const SrcView& src_,
1229  const IdxView& idx_,
1230  const ColView& col_,
1231  const Op& op_,
1232  const size_t numCols_) :
1233  dst (dst_),
1234  src (src_),
1235  idx (idx_),
1236  col (col_),
1237  op (op_),
1238  numCols (numCols_)
1239  {}
1240 
1241  template<class TagType>
1242  KOKKOS_INLINE_FUNCTION void
1243  operator() (TagType tag,
1244  const size_type k,
1245  value_type& lclErrorCount) const
1246  {
1247  static_assert
1248  (std::is_same<TagType, atomic_tag>::value ||
1249  std::is_same<TagType, nonatomic_tag>::value,
1250  "TagType must be atomic_tag or nonatomic_tag.");
1251  using row_index_type = typename IdxView::non_const_value_type;
1252  using col_index_type = typename ColView::non_const_value_type;
1253 
1254  const row_index_type lclRow = idx(k);
1255  if (lclRow < static_cast<row_index_type> (0) ||
1256  lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1257  ++lclErrorCount;
1258  }
1259  else {
1260  const size_type offset = k * numCols;
1261  for (size_type j = 0; j < numCols; ++j) {
1262  const col_index_type lclCol = col(j);
1263  if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1264  ++lclErrorCount;
1265  }
1266  else { // all indices are valid; apply the op
1267  op (tag, dst(lclRow, col(j)), src(offset+j));
1268  }
1269  }
1270  }
1271  }
1272 
1273  KOKKOS_INLINE_FUNCTION void
1274  init (value_type& initialErrorCount) const {
1275  initialErrorCount = 0;
1276  }
1277 
1278  KOKKOS_INLINE_FUNCTION void
1279  join (volatile value_type& dstErrorCount,
1280  const volatile value_type& srcErrorCount) const
1281  {
1282  dstErrorCount += srcErrorCount;
1283  }
1284 
1285  static void
1286  unpack (const ExecutionSpace& execSpace,
1287  const DstView& dst,
1288  const SrcView& src,
1289  const IdxView& idx,
1290  const ColView& col,
1291  const Op& op,
1292  const size_type numCols,
1293  const bool use_atomic_updates)
1294  {
1295  using row_index_type = typename IdxView::non_const_value_type;
1296  using col_index_type = typename ColView::non_const_value_type;
1297 
1298  size_t errorCount = 0;
1299  if (use_atomic_updates) {
1300  using range_type =
1301  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1302  Kokkos::parallel_reduce
1303  ("Tpetra::MultiVector unpack var stride atomic debug only",
1304  range_type (0, idx.size ()),
1305  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1306  (execSpace, dst, src, idx, col, op, numCols),
1307  errorCount);
1308  }
1309  else {
1310  using range_type =
1311  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1312  Kokkos::parallel_reduce
1313  ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1314  range_type (0, idx.size ()),
1315  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1316  (execSpace, dst, src, idx, col, op, numCols),
1317  errorCount);
1318  }
1319 
1320  if (errorCount != 0) {
1321  constexpr size_t maxNumBadIndicesToPrint = 100;
1322 
1323  std::ostringstream os; // for error reporting
1324  os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1325  "found " << errorCount
1326  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1327 
1328  // Go back and find any out-of-bounds entries in the array of
1329  // row indices. Performance doesn't matter since we are
1330  // already in an error state, so we can do this sequentially,
1331  // on host.
1332  auto idx_h = Kokkos::create_mirror_view (idx);
1333  Kokkos::deep_copy (idx_h, idx);
1334 
1335  std::vector<row_index_type> badRows;
1336  const size_type numRowInds = idx_h.extent (0);
1337  for (size_type k = 0; k < numRowInds; ++k) {
1338  if (idx_h(k) < static_cast<row_index_type> (0) ||
1339  idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1340  badRows.push_back (idx_h(k));
1341  }
1342  }
1343 
1344  if (badRows.size () != 0) {
1345  os << badRows.size () << " out-of-bounds row ind"
1346  << (badRows.size () != size_t (1) ? "ices" : "ex");
1347  if (badRows.size () <= maxNumBadIndicesToPrint) {
1348  os << ": [";
1349  for (size_t k = 0; k < badRows.size (); ++k) {
1350  os << badRows[k];
1351  if (k + 1 < badRows.size ()) {
1352  os << ", ";
1353  }
1354  }
1355  os << "]. ";
1356  }
1357  else {
1358  os << ". ";
1359  }
1360  }
1361  else {
1362  os << "No out-of-bounds row indices. ";
1363  }
1364 
1365  // Go back and find any out-of-bounds entries in the array
1366  // of column indices.
1367  auto col_h = Kokkos::create_mirror_view (col);
1368  Kokkos::deep_copy (col_h, col);
1369 
1370  std::vector<col_index_type> badCols;
1371  const size_type numColInds = col_h.extent (0);
1372  for (size_type k = 0; k < numColInds; ++k) {
1373  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1374  badCols.push_back (col_h(k));
1375  }
1376  }
1377 
1378  if (badCols.size () != 0) {
1379  os << badCols.size () << " out-of-bounds column ind"
1380  << (badCols.size () != size_t (1) ? "ices" : "ex");
1381  if (badCols.size () <= maxNumBadIndicesToPrint) {
1382  for (size_t k = 0; k < badCols.size (); ++k) {
1383  os << ": [";
1384  os << badCols[k];
1385  if (k + 1 < badCols.size ()) {
1386  os << ", ";
1387  }
1388  }
1389  os << "]. ";
1390  }
1391  else {
1392  os << ". ";
1393  }
1394  }
1395  else {
1396  os << "No out-of-bounds column indices. ";
1397  }
1398 
1399  TEUCHOS_TEST_FOR_EXCEPTION
1400  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1401  std::logic_error, "Tpetra::MultiVector variable stride unpack "
1402  "kernel reports errorCount=" << errorCount << ", but we failed "
1403  "to find any bad rows or columns. This should never happen. "
1404  "Please report this to the Tpetra developers.");
1405 
1406  throw std::runtime_error (os.str ());
1407  } // hasErr
1408  }
1409  };
1410 
1411  template <typename ExecutionSpace,
1412  typename DstView,
1413  typename SrcView,
1414  typename IdxView,
1415  typename ColView,
1416  typename Op>
1417  void
1418  unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1419  const DstView& dst,
1420  const SrcView& src,
1421  const IdxView& idx,
1422  const ColView& col,
1423  const Op& op,
1424  const size_t numCols,
1425  const bool use_atomic_updates,
1426  const bool debug)
1427  {
1428  static_assert (Kokkos::Impl::is_view<DstView>::value,
1429  "DstView must be a Kokkos::View.");
1430  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1431  "SrcView must be a Kokkos::View.");
1432  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1433  "IdxView must be a Kokkos::View.");
1434  static_assert (Kokkos::Impl::is_view<ColView>::value,
1435  "ColView must be a Kokkos::View.");
1436  static_assert (static_cast<int> (DstView::rank) == 2,
1437  "DstView must be a rank-2 Kokkos::View.");
1438  static_assert (static_cast<int> (SrcView::rank) == 1,
1439  "SrcView must be a rank-1 Kokkos::View.");
1440  static_assert (static_cast<int> (IdxView::rank) == 1,
1441  "IdxView must be a rank-1 Kokkos::View.");
1442  static_assert (static_cast<int> (ColView::rank) == 1,
1443  "ColView must be a rank-1 Kokkos::View.");
1444 
1445  if (debug) {
1446  using impl_type =
1447  UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1448  DstView, SrcView, IdxView, ColView, Op>;
1449  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1450  use_atomic_updates);
1451  }
1452  else {
1453  using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1454  DstView, SrcView, IdxView, ColView, Op>;
1455  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1456  use_atomic_updates);
1457  }
1458  }
1459 
1460  template <typename DstView, typename SrcView,
1461  typename DstIdxView, typename SrcIdxView>
1462  struct PermuteArrayMultiColumn {
1463  typedef typename DstView::execution_space execution_space;
1464  typedef typename execution_space::size_type size_type;
1465 
1466  DstView dst;
1467  SrcView src;
1468  DstIdxView dst_idx;
1469  SrcIdxView src_idx;
1470  size_t numCols;
1471 
1472  PermuteArrayMultiColumn (const DstView& dst_,
1473  const SrcView& src_,
1474  const DstIdxView& dst_idx_,
1475  const SrcIdxView& src_idx_,
1476  const size_t numCols_) :
1477  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1478  numCols(numCols_) {}
1479 
1480  KOKKOS_INLINE_FUNCTION void
1481  operator() (const size_type k) const {
1482  const typename DstIdxView::value_type toRow = dst_idx(k);
1483  const typename SrcIdxView::value_type fromRow = src_idx(k);
1484  for (size_t j = 0; j < numCols; ++j) {
1485  dst(toRow, j) = src(fromRow, j);
1486  }
1487  }
1488 
1489  static void
1490  permute (const DstView& dst,
1491  const SrcView& src,
1492  const DstIdxView& dst_idx,
1493  const SrcIdxView& src_idx,
1494  const size_t numCols)
1495  {
1496  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1497  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1498  Kokkos::parallel_for
1499  ("Tpetra::MultiVector permute multicol const stride",
1500  range_type (0, n),
1501  PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols));
1502  }
1503  };
1504 
1505  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1506  // SrcView::Rank == 2
1507  template <typename DstView, typename SrcView,
1508  typename DstIdxView, typename SrcIdxView>
1509  void permute_array_multi_column(const DstView& dst,
1510  const SrcView& src,
1511  const DstIdxView& dst_idx,
1512  const SrcIdxView& src_idx,
1513  size_t numCols) {
1514  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1515  dst, src, dst_idx, src_idx, numCols);
1516  }
1517 
1518  template <typename DstView, typename SrcView,
1519  typename DstIdxView, typename SrcIdxView,
1520  typename DstColView, typename SrcColView>
1521  struct PermuteArrayMultiColumnVariableStride {
1522  typedef typename DstView::execution_space execution_space;
1523  typedef typename execution_space::size_type size_type;
1524 
1525  DstView dst;
1526  SrcView src;
1527  DstIdxView dst_idx;
1528  SrcIdxView src_idx;
1529  DstColView dst_col;
1530  SrcColView src_col;
1531  size_t numCols;
1532 
1533  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1534  const SrcView& src_,
1535  const DstIdxView& dst_idx_,
1536  const SrcIdxView& src_idx_,
1537  const DstColView& dst_col_,
1538  const SrcColView& src_col_,
1539  const size_t numCols_) :
1540  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1541  dst_col(dst_col_), src_col(src_col_),
1542  numCols(numCols_) {}
1543 
1544  KOKKOS_INLINE_FUNCTION void
1545  operator() (const size_type k) const {
1546  const typename DstIdxView::value_type toRow = dst_idx(k);
1547  const typename SrcIdxView::value_type fromRow = src_idx(k);
1548  for (size_t j = 0; j < numCols; ++j) {
1549  dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1550  }
1551  }
1552 
1553  static void
1554  permute (const DstView& dst,
1555  const SrcView& src,
1556  const DstIdxView& dst_idx,
1557  const SrcIdxView& src_idx,
1558  const DstColView& dst_col,
1559  const SrcColView& src_col,
1560  const size_t numCols)
1561  {
1562  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1563  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1564  Kokkos::parallel_for
1565  ("Tpetra::MultiVector permute multicol var stride",
1566  range_type (0, n),
1567  PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1568  dst_col, src_col, numCols));
1569  }
1570  };
1571 
1572  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1573  // SrcView::Rank == 2
1574  template <typename DstView, typename SrcView,
1575  typename DstIdxView, typename SrcIdxView,
1576  typename DstColView, typename SrcColView>
1577  void permute_array_multi_column_variable_stride(const DstView& dst,
1578  const SrcView& src,
1579  const DstIdxView& dst_idx,
1580  const SrcIdxView& src_idx,
1581  const DstColView& dst_col,
1582  const SrcColView& src_col,
1583  size_t numCols) {
1584  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1585  DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1586  dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1587  }
1588 
1589 } // Details namespace
1590 } // KokkosRefactor namespace
1591 } // Tpetra namespace
1592 
1593 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...