Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 // clang-format off
44 
45 
46 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
47 // incorrect (as() is not a device function) and usually irrelevant
48 // (it would only matter if LocalOrdinal were bigger than size_t on a
49 // particular platform, which is unlikely).
50 
51 // KDD Aug 2020: In the permute/pack/unpack functors,
52 // the Enabled template parameter is specialized in
53 // downstream packages like Stokhos using SFINAE to provide partial
54 // specializations based on the scalar type of the SrcView and DstView
55 // template parameters. See #7898.
56 // Do not remove it before checking with Stokhos and other specializing users.
57 
58 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
59 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
60 
61 #include "Kokkos_Core.hpp"
62 #include "Kokkos_ArithTraits.hpp"
63 #include <sstream>
64 #include <stdexcept>
65 
66 namespace Tpetra {
67 namespace KokkosRefactor {
68 namespace Details {
69 
75 namespace Impl {
76 
83 template<class IntegerType,
84  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
85 struct OutOfBounds {
86  static KOKKOS_INLINE_FUNCTION bool
87  test (const IntegerType x,
88  const IntegerType exclusiveUpperBound);
89 };
90 
91 // Partial specialization for the case where IntegerType IS signed.
92 template<class IntegerType>
93 struct OutOfBounds<IntegerType, true> {
94  static KOKKOS_INLINE_FUNCTION bool
95  test (const IntegerType x,
96  const IntegerType exclusiveUpperBound)
97  {
98  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
99  }
100 };
101 
102 // Partial specialization for the case where IntegerType is NOT signed.
103 template<class IntegerType>
104 struct OutOfBounds<IntegerType, false> {
105  static KOKKOS_INLINE_FUNCTION bool
106  test (const IntegerType x,
107  const IntegerType exclusiveUpperBound)
108  {
109  return x >= exclusiveUpperBound;
110  }
111 };
112 
115 template<class IntegerType>
116 KOKKOS_INLINE_FUNCTION bool
117 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
118 {
119  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
120 }
121 
122 } // namespace Impl
123 
124  // Functors for implementing packAndPrepare and unpackAndCombine
125  // through parallel_for
126 
127  template <typename DstView, typename SrcView, typename IdxView,
128  typename Enabled = void>
129  struct PackArraySingleColumn {
130  typedef typename DstView::execution_space execution_space;
131  typedef typename execution_space::size_type size_type;
132 
133  DstView dst;
134  SrcView src;
135  IdxView idx;
136  size_t col;
137 
138  PackArraySingleColumn (const DstView& dst_,
139  const SrcView& src_,
140  const IdxView& idx_,
141  const size_t col_) :
142  dst(dst_), src(src_), idx(idx_), col(col_) {}
143 
144  KOKKOS_INLINE_FUNCTION void
145  operator() (const size_type k) const {
146  dst(k) = src(idx(k), col);
147  }
148 
149  static void
150  pack (const DstView& dst,
151  const SrcView& src,
152  const IdxView& idx,
153  const size_t col,
154  const execution_space &space)
155  {
156  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
157  Kokkos::parallel_for
158  ("Tpetra::MultiVector pack one col",
159  range_type (space, 0, idx.size ()),
160  PackArraySingleColumn (dst, src, idx, col));
161  }
162  };
163 
164  template <typename DstView,
165  typename SrcView,
166  typename IdxView,
167  typename SizeType = typename DstView::execution_space::size_type,
168  typename Enabled = void>
169  class PackArraySingleColumnWithBoundsCheck {
170  private:
171  static_assert (Kokkos::is_view<DstView>::value,
172  "DstView must be a Kokkos::View.");
173  static_assert (Kokkos::is_view<SrcView>::value,
174  "SrcView must be a Kokkos::View.");
175  static_assert (Kokkos::is_view<IdxView>::value,
176  "IdxView must be a Kokkos::View.");
177  static_assert (static_cast<int> (DstView::rank) == 1,
178  "DstView must be a rank-1 Kokkos::View.");
179  static_assert (static_cast<int> (SrcView::rank) == 2,
180  "SrcView must be a rank-2 Kokkos::View.");
181  static_assert (static_cast<int> (IdxView::rank) == 1,
182  "IdxView must be a rank-1 Kokkos::View.");
183  static_assert (std::is_integral<SizeType>::value,
184  "SizeType must be a built-in integer type.");
185 
186  using execution_space = typename DstView::execution_space;
187 
188  public:
189  typedef SizeType size_type;
190  using value_type = size_t;
191 
192  private:
193  DstView dst;
194  SrcView src;
195  IdxView idx;
196  size_type col;
197  execution_space space;
198 
199  public:
200  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
201  const SrcView& src_,
202  const IdxView& idx_,
203  const size_type col_) :
204  dst (dst_), src (src_), idx (idx_), col (col_) {}
205 
206  KOKKOS_INLINE_FUNCTION void
207  operator() (const size_type k, value_type& lclErrCount) const {
208  using index_type = typename IdxView::non_const_value_type;
209 
210  const index_type lclRow = idx(k);
211  if (lclRow < static_cast<index_type> (0) ||
212  lclRow >= static_cast<index_type> (src.extent (0))) {
213  ++lclErrCount;
214  }
215  else {
216  dst(k) = src(lclRow, col);
217  }
218  }
219 
220  KOKKOS_INLINE_FUNCTION
221  void init (value_type& initialErrorCount) const {
222  initialErrorCount = 0;
223  }
224 
225  KOKKOS_INLINE_FUNCTION void
226  join (value_type& dstErrorCount,
227  const value_type& srcErrorCount) const
228  {
229  dstErrorCount += srcErrorCount;
230  }
231 
232  static void
233  pack (const DstView& dst,
234  const SrcView& src,
235  const IdxView& idx,
236  const size_type col,
237  const execution_space &space)
238  {
239  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
240  typedef typename IdxView::non_const_value_type index_type;
241 
242  size_t errorCount = 0;
243  Kokkos::parallel_reduce
244  ("Tpetra::MultiVector pack one col debug only",
245  range_type (space, 0, idx.size ()),
246  PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
247  errorCount);
248 
249  if (errorCount != 0) {
250  // Go back and find the out-of-bounds entries in the index
251  // array. Performance doesn't matter since we are already in
252  // an error state, so we can do this sequentially, on host.
253  auto idx_h = Kokkos::create_mirror_view (idx);
254 
255  // DEEP_COPY REVIEW - NOT TESTED
256  Kokkos::deep_copy (idx_h, idx);
257 
258  std::vector<index_type> badIndices;
259  const size_type numInds = idx_h.extent (0);
260  for (size_type k = 0; k < numInds; ++k) {
261  if (idx_h(k) < static_cast<index_type> (0) ||
262  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
263  badIndices.push_back (idx_h(k));
264  }
265  }
266 
267  TEUCHOS_TEST_FOR_EXCEPTION
268  (errorCount != badIndices.size (), std::logic_error,
269  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
270  << " != badIndices.size() = " << badIndices.size () << ". This sho"
271  "uld never happen. Please report this to the Tpetra developers.");
272 
273  std::ostringstream os;
274  os << "MultiVector single-column pack kernel had "
275  << badIndices.size () << " out-of bounds index/ices. "
276  "Here they are: [";
277  for (size_t k = 0; k < badIndices.size (); ++k) {
278  os << badIndices[k];
279  if (k + 1 < badIndices.size ()) {
280  os << ", ";
281  }
282  }
283  os << "].";
284  throw std::runtime_error (os.str ());
285  }
286  }
287  };
288 
289 
290  template <typename DstView, typename SrcView, typename IdxView>
291  void
292  pack_array_single_column (const DstView& dst,
293  const SrcView& src,
294  const IdxView& idx,
295  const size_t col,
296  const bool debug,
297  const typename DstView::execution_space &space)
298  {
299  static_assert (Kokkos::is_view<DstView>::value,
300  "DstView must be a Kokkos::View.");
301  static_assert (Kokkos::is_view<SrcView>::value,
302  "SrcView must be a Kokkos::View.");
303  static_assert (Kokkos::is_view<IdxView>::value,
304  "IdxView must be a Kokkos::View.");
305  static_assert (static_cast<int> (DstView::rank) == 1,
306  "DstView must be a rank-1 Kokkos::View.");
307  static_assert (static_cast<int> (SrcView::rank) == 2,
308  "SrcView must be a rank-2 Kokkos::View.");
309  static_assert (static_cast<int> (IdxView::rank) == 1,
310  "IdxView must be a rank-1 Kokkos::View.");
311 
312  using execution_space = typename DstView::execution_space;
313 
314  static_assert (Kokkos::SpaceAccessibility<execution_space,
315  typename DstView::memory_space>::accessible,
316  "DstView not accessible from execution space");
317  static_assert (Kokkos::SpaceAccessibility<execution_space,
318  typename SrcView::memory_space>::accessible,
319  "SrcView not accessible from execution space");
320  static_assert (Kokkos::SpaceAccessibility<execution_space,
321  typename IdxView::memory_space>::accessible,
322  "IdxView not accessible from execution space");
323 
324  if (debug) {
325  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
326  impl_type::pack (dst, src, idx, col, space);
327  }
328  else {
329  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
330  impl_type::pack (dst, src, idx, col, space);
331  }
332  }
333 
336  template <typename DstView, typename SrcView, typename IdxView>
337  void
338  pack_array_single_column (const DstView& dst,
339  const SrcView& src,
340  const IdxView& idx,
341  const size_t col,
342  const bool debug = true)
343  {
344  pack_array_single_column(dst, src, idx, col, debug, typename DstView::execution_space());
345  }
346 
347  template <typename DstView, typename SrcView, typename IdxView,
348  typename Enabled = void>
349  struct PackArrayMultiColumn {
350  using execution_space = typename DstView::execution_space;
351  typedef typename execution_space::size_type size_type;
352 
353  DstView dst;
354  SrcView src;
355  IdxView idx;
356  size_t numCols;
357 
358  PackArrayMultiColumn (const DstView& dst_,
359  const SrcView& src_,
360  const IdxView& idx_,
361  const size_t numCols_) :
362  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
363 
364  KOKKOS_INLINE_FUNCTION void
365  operator() (const size_type k) const {
366  const typename IdxView::value_type localRow = idx(k);
367  const size_t offset = k*numCols;
368  for (size_t j = 0; j < numCols; ++j) {
369  dst(offset + j) = src(localRow, j);
370  }
371  }
372 
373  static void pack(const DstView& dst,
374  const SrcView& src,
375  const IdxView& idx,
376  size_t numCols,
377  const execution_space &space) {
378  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
379  Kokkos::parallel_for
380  ("Tpetra::MultiVector pack multicol const stride",
381  range_type (space, 0, idx.size ()),
382  PackArrayMultiColumn (dst, src, idx, numCols));
383  }
384  };
385 
386  template <typename DstView,
387  typename SrcView,
388  typename IdxView,
389  typename SizeType = typename DstView::execution_space::size_type,
390  typename Enabled = void>
391  class PackArrayMultiColumnWithBoundsCheck {
392  public:
393  using size_type = SizeType;
394  using value_type = size_t;
395  using execution_space = typename DstView::execution_space;
396 
397  private:
398  DstView dst;
399  SrcView src;
400  IdxView idx;
401  size_type numCols;
402 
403  public:
404  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
405  const SrcView& src_,
406  const IdxView& idx_,
407  const size_type numCols_) :
408  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
409 
410  KOKKOS_INLINE_FUNCTION void
411  operator() (const size_type k, value_type& lclErrorCount) const {
412  typedef typename IdxView::non_const_value_type index_type;
413 
414  const index_type lclRow = idx(k);
415  if (lclRow < static_cast<index_type> (0) ||
416  lclRow >= static_cast<index_type> (src.extent (0))) {
417  ++lclErrorCount; // failed
418  }
419  else {
420  const size_type offset = k*numCols;
421  for (size_type j = 0; j < numCols; ++j) {
422  dst(offset + j) = src(lclRow, j);
423  }
424  }
425  }
426 
427  KOKKOS_INLINE_FUNCTION
428  void init (value_type& initialErrorCount) const {
429  initialErrorCount = 0;
430  }
431 
432  KOKKOS_INLINE_FUNCTION void
433  join (value_type& dstErrorCount,
434  const value_type& srcErrorCount) const
435  {
436  dstErrorCount += srcErrorCount;
437  }
438 
439  static void
440  pack (const DstView& dst,
441  const SrcView& src,
442  const IdxView& idx,
443  const size_type numCols,
444  const execution_space &space)
445  {
446  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
447  typedef typename IdxView::non_const_value_type index_type;
448 
449  size_t errorCount = 0;
450  Kokkos::parallel_reduce
451  ("Tpetra::MultiVector pack multicol const stride debug only",
452  range_type (space, 0, idx.size ()),
453  PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
454  errorCount);
455  if (errorCount != 0) {
456  // Go back and find the out-of-bounds entries in the index
457  // array. Performance doesn't matter since we are already in
458  // an error state, so we can do this sequentially, on host.
459  auto idx_h = Kokkos::create_mirror_view (idx);
460 
461  // DEEP_COPY REVIEW - NOT TESTED
462  Kokkos::deep_copy (idx_h, idx);
463 
464  std::vector<index_type> badIndices;
465  const size_type numInds = idx_h.extent (0);
466  for (size_type k = 0; k < numInds; ++k) {
467  if (idx_h(k) < static_cast<index_type> (0) ||
468  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
469  badIndices.push_back (idx_h(k));
470  }
471  }
472 
473  TEUCHOS_TEST_FOR_EXCEPTION
474  (errorCount != badIndices.size (), std::logic_error,
475  "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
476  << " != badIndices.size() = " << badIndices.size () << ". This sho"
477  "uld never happen. Please report this to the Tpetra developers.");
478 
479  std::ostringstream os;
480  os << "Tpetra::MultiVector multiple-column pack kernel had "
481  << badIndices.size () << " out-of bounds index/ices (errorCount = "
482  << errorCount << "): [";
483  for (size_t k = 0; k < badIndices.size (); ++k) {
484  os << badIndices[k];
485  if (k + 1 < badIndices.size ()) {
486  os << ", ";
487  }
488  }
489  os << "].";
490  throw std::runtime_error (os.str ());
491  }
492  }
493  };
494 
495 
496  template <typename DstView,
497  typename SrcView,
498  typename IdxView>
499  void
500  pack_array_multi_column (const DstView& dst,
501  const SrcView& src,
502  const IdxView& idx,
503  const size_t numCols,
504  const bool debug,
505  const typename DstView::execution_space &space)
506  {
507  static_assert (Kokkos::is_view<DstView>::value,
508  "DstView must be a Kokkos::View.");
509  static_assert (Kokkos::is_view<SrcView>::value,
510  "SrcView must be a Kokkos::View.");
511  static_assert (Kokkos::is_view<IdxView>::value,
512  "IdxView must be a Kokkos::View.");
513  static_assert (static_cast<int> (DstView::rank) == 1,
514  "DstView must be a rank-1 Kokkos::View.");
515  static_assert (static_cast<int> (SrcView::rank) == 2,
516  "SrcView must be a rank-2 Kokkos::View.");
517  static_assert (static_cast<int> (IdxView::rank) == 1,
518  "IdxView must be a rank-1 Kokkos::View.");
519 
520  using execution_space = typename DstView::execution_space;
521 
522  static_assert (Kokkos::SpaceAccessibility<execution_space,
523  typename DstView::memory_space>::accessible,
524  "DstView not accessible from execution space");
525  static_assert (Kokkos::SpaceAccessibility<execution_space,
526  typename SrcView::memory_space>::accessible,
527  "SrcView not accessible from execution space");
528  static_assert (Kokkos::SpaceAccessibility<execution_space,
529  typename IdxView::memory_space>::accessible,
530  "IdxView not accessible from execution space");
531 
532  if (debug) {
533  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
534  SrcView, IdxView> impl_type;
535  impl_type::pack (dst, src, idx, numCols, space);
536  }
537  else {
538  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
539  impl_type::pack (dst, src, idx, numCols, space);
540  }
541  }
542 
543  template <typename DstView,
544  typename SrcView,
545  typename IdxView>
546  void
547  pack_array_multi_column (const DstView& dst,
548  const SrcView& src,
549  const IdxView& idx,
550  const size_t numCols,
551  const bool debug = true) {
552  pack_array_multi_column(dst, src, idx, numCols, debug, typename DstView::execution_space());
553  }
554 
555  template <typename DstView, typename SrcView, typename IdxView,
556  typename ColView, typename Enabled = void>
557  struct PackArrayMultiColumnVariableStride {
558  using execution_space = typename DstView::execution_space;
559  typedef typename execution_space::size_type size_type;
560 
561  DstView dst;
562  SrcView src;
563  IdxView idx;
564  ColView col;
565  size_t numCols;
566 
567  PackArrayMultiColumnVariableStride (const DstView& dst_,
568  const SrcView& src_,
569  const IdxView& idx_,
570  const ColView& col_,
571  const size_t numCols_) :
572  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
573 
574  KOKKOS_INLINE_FUNCTION
575  void operator() (const size_type k) const {
576  const typename IdxView::value_type localRow = idx(k);
577  const size_t offset = k*numCols;
578  for (size_t j = 0; j < numCols; ++j) {
579  dst(offset + j) = src(localRow, col(j));
580  }
581  }
582 
583  static void pack(const DstView& dst,
584  const SrcView& src,
585  const IdxView& idx,
586  const ColView& col,
587  size_t numCols,
588  const execution_space &space) {
589  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
590  Kokkos::parallel_for
591  ("Tpetra::MultiVector pack multicol var stride",
592  range_type (space, 0, idx.size ()),
593  PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
594  }
595  };
596 
597  template <typename DstView,
598  typename SrcView,
599  typename IdxView,
600  typename ColView,
601  typename SizeType = typename DstView::execution_space::size_type,
602  typename Enabled = void>
603  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
604  public:
605  using size_type = SizeType;
606  using value_type = size_t;
607  using execution_space = typename DstView::execution_space;
608 
609  private:
610  DstView dst;
611  SrcView src;
612  IdxView idx;
613  ColView col;
614  size_type numCols;
615 
616  public:
617  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
618  const SrcView& src_,
619  const IdxView& idx_,
620  const ColView& col_,
621  const size_type numCols_) :
622  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
623 
624  KOKKOS_INLINE_FUNCTION void
625  operator() (const size_type k, value_type& lclErrorCount) const {
626  typedef typename IdxView::non_const_value_type row_index_type;
627  typedef typename ColView::non_const_value_type col_index_type;
628 
629  const row_index_type lclRow = idx(k);
630  if (lclRow < static_cast<row_index_type> (0) ||
631  lclRow >= static_cast<row_index_type> (src.extent (0))) {
632  ++lclErrorCount = 0;
633  }
634  else {
635  const size_type offset = k*numCols;
636  for (size_type j = 0; j < numCols; ++j) {
637  const col_index_type lclCol = col(j);
638  if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
639  ++lclErrorCount = 0;
640  }
641  else { // all indices are valid; do the assignment
642  dst(offset + j) = src(lclRow, lclCol);
643  }
644  }
645  }
646  }
647 
648  KOKKOS_INLINE_FUNCTION void
649  init (value_type& initialErrorCount) const {
650  initialErrorCount = 0;
651  }
652 
653  KOKKOS_INLINE_FUNCTION void
654  join (value_type& dstErrorCount,
655  const value_type& srcErrorCount) const
656  {
657  dstErrorCount += srcErrorCount;
658  }
659 
660  static void
661  pack (const DstView& dst,
662  const SrcView& src,
663  const IdxView& idx,
664  const ColView& col,
665  const size_type numCols,
666  const execution_space &space)
667  {
668  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
669  using row_index_type = typename IdxView::non_const_value_type;
670  using col_index_type = typename ColView::non_const_value_type;
671 
672  size_t errorCount = 0;
673  Kokkos::parallel_reduce
674  ("Tpetra::MultiVector pack multicol var stride debug only",
675  range_type (space, 0, idx.size ()),
676  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
677  col, numCols),
678  errorCount);
679  if (errorCount != 0) {
680  constexpr size_t maxNumBadIndicesToPrint = 100;
681 
682  std::ostringstream os; // for error reporting
683  os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
684  "found " << errorCount
685  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
686 
687  // Go back and find any out-of-bounds entries in the array of
688  // row indices. Performance doesn't matter since we are already
689  // in an error state, so we can do this sequentially, on host.
690  auto idx_h = Kokkos::create_mirror_view (idx);
691 
692  // DEEP_COPY REVIEW - NOT TESTED
693  Kokkos::deep_copy (idx_h, idx);
694 
695  std::vector<row_index_type> badRows;
696  const size_type numRowInds = idx_h.extent (0);
697  for (size_type k = 0; k < numRowInds; ++k) {
698  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
699  badRows.push_back (idx_h(k));
700  }
701  }
702 
703  if (badRows.size () != 0) {
704  os << badRows.size () << " out-of-bounds row ind"
705  << (badRows.size () != size_t (1) ? "ices" : "ex");
706  if (badRows.size () <= maxNumBadIndicesToPrint) {
707  os << ": [";
708  for (size_t k = 0; k < badRows.size (); ++k) {
709  os << badRows[k];
710  if (k + 1 < badRows.size ()) {
711  os << ", ";
712  }
713  }
714  os << "]. ";
715  }
716  else {
717  os << ". ";
718  }
719  }
720  else {
721  os << "No out-of-bounds row indices. ";
722  }
723 
724  // Go back and find any out-of-bounds entries in the array
725  // of column indices.
726  auto col_h = Kokkos::create_mirror_view (col);
727 
728  // DEEP_COPY REVIEW - NOT TESTED
729  Kokkos::deep_copy (col_h, col);
730 
731  std::vector<col_index_type> badCols;
732  const size_type numColInds = col_h.extent (0);
733  for (size_type k = 0; k < numColInds; ++k) {
734  if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
735  badCols.push_back (col_h(k));
736  }
737  }
738 
739  if (badCols.size () != 0) {
740  os << badCols.size () << " out-of-bounds column ind"
741  << (badCols.size () != size_t (1) ? "ices" : "ex");
742  if (badCols.size () <= maxNumBadIndicesToPrint) {
743  os << ": [";
744  for (size_t k = 0; k < badCols.size (); ++k) {
745  os << badCols[k];
746  if (k + 1 < badCols.size ()) {
747  os << ", ";
748  }
749  }
750  os << "]. ";
751  }
752  else {
753  os << ". ";
754  }
755  }
756  else {
757  os << "No out-of-bounds column indices. ";
758  }
759 
760  TEUCHOS_TEST_FOR_EXCEPTION
761  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
762  std::logic_error, "Tpetra::MultiVector variable stride pack "
763  "kernel reports errorCount=" << errorCount << ", but we failed "
764  "to find any bad rows or columns. This should never happen. "
765  "Please report this to the Tpetra developers.");
766 
767  throw std::runtime_error (os.str ());
768  } // hasErr
769  }
770  };
771 
772  template <typename DstView,
773  typename SrcView,
774  typename IdxView,
775  typename ColView>
776  void
777  pack_array_multi_column_variable_stride (const DstView& dst,
778  const SrcView& src,
779  const IdxView& idx,
780  const ColView& col,
781  const size_t numCols,
782  const bool debug,
783  const typename DstView::execution_space &space)
784  {
785  static_assert (Kokkos::is_view<DstView>::value,
786  "DstView must be a Kokkos::View.");
787  static_assert (Kokkos::is_view<SrcView>::value,
788  "SrcView must be a Kokkos::View.");
789  static_assert (Kokkos::is_view<IdxView>::value,
790  "IdxView must be a Kokkos::View.");
791  static_assert (Kokkos::is_view<ColView>::value,
792  "ColView must be a Kokkos::View.");
793  static_assert (static_cast<int> (DstView::rank) == 1,
794  "DstView must be a rank-1 Kokkos::View.");
795  static_assert (static_cast<int> (SrcView::rank) == 2,
796  "SrcView must be a rank-2 Kokkos::View.");
797  static_assert (static_cast<int> (IdxView::rank) == 1,
798  "IdxView must be a rank-1 Kokkos::View.");
799  static_assert (static_cast<int> (ColView::rank) == 1,
800  "ColView must be a rank-1 Kokkos::View.");
801 
802  using execution_space = typename DstView::execution_space;
803 
804  static_assert (Kokkos::SpaceAccessibility<execution_space,
805  typename DstView::memory_space>::accessible,
806  "DstView not accessible from execution space");
807  static_assert (Kokkos::SpaceAccessibility<execution_space,
808  typename SrcView::memory_space>::accessible,
809  "SrcView not accessible from execution space");
810  static_assert (Kokkos::SpaceAccessibility<execution_space,
811  typename IdxView::memory_space>::accessible,
812  "IdxView not accessible from execution space");
813 
814  if (debug) {
815  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
816  SrcView, IdxView, ColView> impl_type;
817  impl_type::pack (dst, src, idx, col, numCols, space);
818  }
819  else {
820  typedef PackArrayMultiColumnVariableStride<DstView,
821  SrcView, IdxView, ColView> impl_type;
822  impl_type::pack (dst, src, idx, col, numCols, space);
823  }
824  }
825 
826  template <typename DstView,
827  typename SrcView,
828  typename IdxView,
829  typename ColView>
830  void
831  pack_array_multi_column_variable_stride (const DstView& dst,
832  const SrcView& src,
833  const IdxView& idx,
834  const ColView& col,
835  const size_t numCols,
836  const bool debug = true) {
837  pack_array_multi_column_variable_stride(dst, src, idx, col, numCols, debug,
838  typename DstView::execution_space());
839  }
840 
841  // Tag types to indicate whether to use atomic updates in the
842  // various CombineMode "Op"s.
843  struct atomic_tag {};
844  struct nonatomic_tag {};
845 
846  struct AddOp {
847  template<class SC>
848  KOKKOS_INLINE_FUNCTION
849  void operator() (atomic_tag, SC& dest, const SC& src) const {
850  Kokkos::atomic_add (&dest, src);
851  }
852 
853  template<class SC>
854  KOKKOS_INLINE_FUNCTION
855  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
856  dest += src;
857  }
858  };
859 
860  struct InsertOp {
861  // There's no point to using Kokkos::atomic_assign for the REPLACE
862  // or INSERT CombineModes, since this is not a well-defined
863  // reduction for MultiVector anyway. See GitHub Issue #4417
864  // (which this fixes).
865  template<class SC>
866  KOKKOS_INLINE_FUNCTION
867  void operator() (atomic_tag, SC& dest, const SC& src) const {
868  dest = src;
869  }
870 
871  template<class SC>
872  KOKKOS_INLINE_FUNCTION
873  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
874  dest = src;
875  }
876  };
877 
878  struct AbsMaxOp {
879  template <class Scalar>
880  struct AbsMaxHelper{
881  Scalar value;
882 
883  KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
884  auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
885  auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
886  value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
887  return *this;
888  }
889 
890  KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
891  AbsMaxHelper ret = *this;
892  ret += rhs;
893  return ret;
894  }
895  };
896 
897  template <typename SC>
898  KOKKOS_INLINE_FUNCTION
899  void operator() (atomic_tag, SC& dst, const SC& src) const {
900  Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
901  }
902 
903  template <typename SC>
904  KOKKOS_INLINE_FUNCTION
905  void operator() (nonatomic_tag, SC& dst, const SC& src) const {
906  auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
907  auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
908  dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
909  }
910  };
911 
912  template <typename ExecutionSpace,
913  typename DstView,
914  typename SrcView,
915  typename IdxView,
916  typename Op,
917  typename Enabled = void>
918  class UnpackArrayMultiColumn {
919  private:
920  static_assert (Kokkos::is_view<DstView>::value,
921  "DstView must be a Kokkos::View.");
922  static_assert (Kokkos::is_view<SrcView>::value,
923  "SrcView must be a Kokkos::View.");
924  static_assert (Kokkos::is_view<IdxView>::value,
925  "IdxView must be a Kokkos::View.");
926  static_assert (static_cast<int> (DstView::rank) == 2,
927  "DstView must be a rank-2 Kokkos::View.");
928  static_assert (static_cast<int> (SrcView::rank) == 1,
929  "SrcView must be a rank-1 Kokkos::View.");
930  static_assert (static_cast<int> (IdxView::rank) == 1,
931  "IdxView must be a rank-1 Kokkos::View.");
932 
933  public:
934  typedef typename ExecutionSpace::execution_space execution_space;
935  typedef typename execution_space::size_type size_type;
936 
937  private:
938  DstView dst;
939  SrcView src;
940  IdxView idx;
941  Op op;
942  size_t numCols;
943 
944  public:
945  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
946  const DstView& dst_,
947  const SrcView& src_,
948  const IdxView& idx_,
949  const Op& op_,
950  const size_t numCols_) :
951  dst (dst_),
952  src (src_),
953  idx (idx_),
954  op (op_),
955  numCols (numCols_)
956  {}
957 
958  template<class TagType>
959  KOKKOS_INLINE_FUNCTION void
960  operator() (TagType tag, const size_type k) const
961  {
962  static_assert
963  (std::is_same<TagType, atomic_tag>::value ||
964  std::is_same<TagType, nonatomic_tag>::value,
965  "TagType must be atomic_tag or nonatomic_tag.");
966 
967  const typename IdxView::value_type localRow = idx(k);
968  const size_t offset = k*numCols;
969  for (size_t j = 0; j < numCols; ++j) {
970  op (tag, dst(localRow, j), src(offset+j));
971  }
972  }
973 
974  static void
975  unpack (const ExecutionSpace& execSpace,
976  const DstView& dst,
977  const SrcView& src,
978  const IdxView& idx,
979  const Op& op,
980  const size_t numCols,
981  const bool use_atomic_updates)
982  {
983  if (use_atomic_updates) {
984  using range_type =
985  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
986  Kokkos::parallel_for
987  ("Tpetra::MultiVector unpack const stride atomic",
988  range_type (0, idx.size ()),
989  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
990  }
991  else {
992  using range_type =
993  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
994  Kokkos::parallel_for
995  ("Tpetra::MultiVector unpack const stride nonatomic",
996  range_type (0, idx.size ()),
997  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
998  }
999  }
1000  };
1001 
1002  template <typename ExecutionSpace,
1003  typename DstView,
1004  typename SrcView,
1005  typename IdxView,
1006  typename Op,
1007  typename SizeType = typename ExecutionSpace::execution_space::size_type,
1008  typename Enabled = void>
1009  class UnpackArrayMultiColumnWithBoundsCheck {
1010  private:
1011  static_assert (Kokkos::is_view<DstView>::value,
1012  "DstView must be a Kokkos::View.");
1013  static_assert (Kokkos::is_view<SrcView>::value,
1014  "SrcView must be a Kokkos::View.");
1015  static_assert (Kokkos::is_view<IdxView>::value,
1016  "IdxView must be a Kokkos::View.");
1017  static_assert (static_cast<int> (DstView::rank) == 2,
1018  "DstView must be a rank-2 Kokkos::View.");
1019  static_assert (static_cast<int> (SrcView::rank) == 1,
1020  "SrcView must be a rank-1 Kokkos::View.");
1021  static_assert (static_cast<int> (IdxView::rank) == 1,
1022  "IdxView must be a rank-1 Kokkos::View.");
1023  static_assert (std::is_integral<SizeType>::value,
1024  "SizeType must be a built-in integer type.");
1025 
1026  public:
1027  using execution_space = typename ExecutionSpace::execution_space;
1028  using size_type = SizeType;
1029  using value_type = size_t;
1030 
1031  private:
1032  DstView dst;
1033  SrcView src;
1034  IdxView idx;
1035  Op op;
1036  size_type numCols;
1037 
1038  public:
1039  UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
1040  const DstView& dst_,
1041  const SrcView& src_,
1042  const IdxView& idx_,
1043  const Op& op_,
1044  const size_type numCols_) :
1045  dst (dst_),
1046  src (src_),
1047  idx (idx_),
1048  op (op_),
1049  numCols (numCols_)
1050  {}
1051 
1052  template<class TagType>
1053  KOKKOS_INLINE_FUNCTION void
1054  operator() (TagType tag,
1055  const size_type k,
1056  size_t& lclErrCount) const
1057  {
1058  static_assert
1059  (std::is_same<TagType, atomic_tag>::value ||
1060  std::is_same<TagType, nonatomic_tag>::value,
1061  "TagType must be atomic_tag or nonatomic_tag.");
1062  using index_type = typename IdxView::non_const_value_type;
1063 
1064  const index_type lclRow = idx(k);
1065  if (lclRow < static_cast<index_type> (0) ||
1066  lclRow >= static_cast<index_type> (dst.extent (0))) {
1067  ++lclErrCount;
1068  }
1069  else {
1070  const size_type offset = k*numCols;
1071  for (size_type j = 0; j < numCols; ++j) {
1072  op (tag, dst(lclRow,j), src(offset+j));
1073  }
1074  }
1075  }
1076 
1077  template<class TagType>
1078  KOKKOS_INLINE_FUNCTION void
1079  init (TagType, size_t& initialErrorCount) const {
1080  initialErrorCount = 0;
1081  }
1082 
1083  template<class TagType>
1084  KOKKOS_INLINE_FUNCTION void
1085  join (TagType,
1086  size_t& dstErrorCount,
1087  const size_t& srcErrorCount) const
1088  {
1089  dstErrorCount += srcErrorCount;
1090  }
1091 
1092  static void
1093  unpack (const ExecutionSpace& execSpace,
1094  const DstView& dst,
1095  const SrcView& src,
1096  const IdxView& idx,
1097  const Op& op,
1098  const size_type numCols,
1099  const bool use_atomic_updates)
1100  {
1101  using index_type = typename IdxView::non_const_value_type;
1102 
1103  size_t errorCount = 0;
1104  if (use_atomic_updates) {
1105  using range_type =
1106  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1107  Kokkos::parallel_reduce
1108  ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1109  range_type (0, idx.size ()),
1110  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1111  idx, op, numCols),
1112  errorCount);
1113  }
1114  else {
1115  using range_type =
1116  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1117  Kokkos::parallel_reduce
1118  ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1119  range_type (0, idx.size ()),
1120  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1121  idx, op, numCols),
1122  errorCount);
1123  }
1124 
1125  if (errorCount != 0) {
1126  // Go back and find the out-of-bounds entries in the index
1127  // array. Performance doesn't matter since we are already in
1128  // an error state, so we can do this sequentially, on host.
1129  auto idx_h = Kokkos::create_mirror_view (idx);
1130 
1131  // DEEP_COPY REVIEW - NOT TESTED
1132  Kokkos::deep_copy (idx_h, idx);
1133 
1134  std::vector<index_type> badIndices;
1135  const size_type numInds = idx_h.extent (0);
1136  for (size_type k = 0; k < numInds; ++k) {
1137  if (idx_h(k) < static_cast<index_type> (0) ||
1138  idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1139  badIndices.push_back (idx_h(k));
1140  }
1141  }
1142 
1143  if (errorCount != badIndices.size ()) {
1144  std::ostringstream os;
1145  os << "MultiVector unpack kernel: errorCount = " << errorCount
1146  << " != badIndices.size() = " << badIndices.size ()
1147  << ". This should never happen. "
1148  "Please report this to the Tpetra developers.";
1149  throw std::logic_error (os.str ());
1150  }
1151 
1152  std::ostringstream os;
1153  os << "MultiVector unpack kernel had " << badIndices.size ()
1154  << " out-of bounds index/ices. Here they are: [";
1155  for (size_t k = 0; k < badIndices.size (); ++k) {
1156  os << badIndices[k];
1157  if (k + 1 < badIndices.size ()) {
1158  os << ", ";
1159  }
1160  }
1161  os << "].";
1162  throw std::runtime_error (os.str ());
1163  }
1164  }
1165  };
1166 
1167  template <typename ExecutionSpace,
1168  typename DstView,
1169  typename SrcView,
1170  typename IdxView,
1171  typename Op>
1172  void
1173  unpack_array_multi_column (const ExecutionSpace& execSpace,
1174  const DstView& dst,
1175  const SrcView& src,
1176  const IdxView& idx,
1177  const Op& op,
1178  const size_t numCols,
1179  const bool use_atomic_updates,
1180  const bool debug)
1181  {
1182  static_assert (Kokkos::is_view<DstView>::value,
1183  "DstView must be a Kokkos::View.");
1184  static_assert (Kokkos::is_view<SrcView>::value,
1185  "SrcView must be a Kokkos::View.");
1186  static_assert (Kokkos::is_view<IdxView>::value,
1187  "IdxView must be a Kokkos::View.");
1188  static_assert (static_cast<int> (DstView::rank) == 2,
1189  "DstView must be a rank-2 Kokkos::View.");
1190  static_assert (static_cast<int> (SrcView::rank) == 1,
1191  "SrcView must be a rank-1 Kokkos::View.");
1192  static_assert (static_cast<int> (IdxView::rank) == 1,
1193  "IdxView must be a rank-1 Kokkos::View.");
1194 
1195  if (debug) {
1196  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1197  DstView, SrcView, IdxView, Op> impl_type;
1198  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1199  use_atomic_updates);
1200  }
1201  else {
1202  typedef UnpackArrayMultiColumn<ExecutionSpace,
1203  DstView, SrcView, IdxView, Op> impl_type;
1204  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1205  use_atomic_updates);
1206  }
1207  }
1208 
1209  template <typename ExecutionSpace,
1210  typename DstView,
1211  typename SrcView,
1212  typename IdxView,
1213  typename ColView,
1214  typename Op,
1215  typename Enabled = void>
1216  class UnpackArrayMultiColumnVariableStride {
1217  private:
1218  static_assert (Kokkos::is_view<DstView>::value,
1219  "DstView must be a Kokkos::View.");
1220  static_assert (Kokkos::is_view<SrcView>::value,
1221  "SrcView must be a Kokkos::View.");
1222  static_assert (Kokkos::is_view<IdxView>::value,
1223  "IdxView must be a Kokkos::View.");
1224  static_assert (Kokkos::is_view<ColView>::value,
1225  "ColView must be a Kokkos::View.");
1226  static_assert (static_cast<int> (DstView::rank) == 2,
1227  "DstView must be a rank-2 Kokkos::View.");
1228  static_assert (static_cast<int> (SrcView::rank) == 1,
1229  "SrcView must be a rank-1 Kokkos::View.");
1230  static_assert (static_cast<int> (IdxView::rank) == 1,
1231  "IdxView must be a rank-1 Kokkos::View.");
1232  static_assert (static_cast<int> (ColView::rank) == 1,
1233  "ColView must be a rank-1 Kokkos::View.");
1234 
1235  public:
1236  using execution_space = typename ExecutionSpace::execution_space;
1237  using size_type = typename execution_space::size_type;
1238 
1239  private:
1240  DstView dst;
1241  SrcView src;
1242  IdxView idx;
1243  ColView col;
1244  Op op;
1245  size_t numCols;
1246 
1247  public:
1248  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1249  const DstView& dst_,
1250  const SrcView& src_,
1251  const IdxView& idx_,
1252  const ColView& col_,
1253  const Op& op_,
1254  const size_t numCols_) :
1255  dst (dst_),
1256  src (src_),
1257  idx (idx_),
1258  col (col_),
1259  op (op_),
1260  numCols (numCols_)
1261  {}
1262 
1263  template<class TagType>
1264  KOKKOS_INLINE_FUNCTION void
1265  operator() (TagType tag, const size_type k) const
1266  {
1267  static_assert
1268  (std::is_same<TagType, atomic_tag>::value ||
1269  std::is_same<TagType, nonatomic_tag>::value,
1270  "TagType must be atomic_tag or nonatomic_tag.");
1271 
1272  const typename IdxView::value_type localRow = idx(k);
1273  const size_t offset = k*numCols;
1274  for (size_t j = 0; j < numCols; ++j) {
1275  op (tag, dst(localRow, col(j)), src(offset+j));
1276  }
1277  }
1278 
1279  static void
1280  unpack (const ExecutionSpace& execSpace,
1281  const DstView& dst,
1282  const SrcView& src,
1283  const IdxView& idx,
1284  const ColView& col,
1285  const Op& op,
1286  const size_t numCols,
1287  const bool use_atomic_updates)
1288  {
1289  if (use_atomic_updates) {
1290  using range_type =
1291  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1292  Kokkos::parallel_for
1293  ("Tpetra::MultiVector unpack var stride atomic",
1294  range_type (0, idx.size ()),
1295  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1296  idx, col, op, numCols));
1297  }
1298  else {
1299  using range_type =
1300  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1301  Kokkos::parallel_for
1302  ("Tpetra::MultiVector unpack var stride nonatomic",
1303  range_type (0, idx.size ()),
1304  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1305  idx, col, op, numCols));
1306  }
1307  }
1308  };
1309 
1310  template <typename ExecutionSpace,
1311  typename DstView,
1312  typename SrcView,
1313  typename IdxView,
1314  typename ColView,
1315  typename Op,
1316  typename SizeType = typename ExecutionSpace::execution_space::size_type,
1317  typename Enabled = void>
1318  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1319  private:
1320  static_assert (Kokkos::is_view<DstView>::value,
1321  "DstView must be a Kokkos::View.");
1322  static_assert (Kokkos::is_view<SrcView>::value,
1323  "SrcView must be a Kokkos::View.");
1324  static_assert (Kokkos::is_view<IdxView>::value,
1325  "IdxView must be a Kokkos::View.");
1326  static_assert (Kokkos::is_view<ColView>::value,
1327  "ColView must be a Kokkos::View.");
1328  static_assert (static_cast<int> (DstView::rank) == 2,
1329  "DstView must be a rank-2 Kokkos::View.");
1330  static_assert (static_cast<int> (SrcView::rank) == 1,
1331  "SrcView must be a rank-1 Kokkos::View.");
1332  static_assert (static_cast<int> (IdxView::rank) == 1,
1333  "IdxView must be a rank-1 Kokkos::View.");
1334  static_assert (static_cast<int> (ColView::rank) == 1,
1335  "ColView must be a rank-1 Kokkos::View.");
1336  static_assert (std::is_integral<SizeType>::value,
1337  "SizeType must be a built-in integer type.");
1338 
1339  public:
1340  using execution_space = typename ExecutionSpace::execution_space;
1341  using size_type = SizeType;
1342  using value_type = size_t;
1343 
1344  private:
1345  DstView dst;
1346  SrcView src;
1347  IdxView idx;
1348  ColView col;
1349  Op op;
1350  size_type numCols;
1351 
1352  public:
1353  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1354  (const ExecutionSpace& /* execSpace */,
1355  const DstView& dst_,
1356  const SrcView& src_,
1357  const IdxView& idx_,
1358  const ColView& col_,
1359  const Op& op_,
1360  const size_t numCols_) :
1361  dst (dst_),
1362  src (src_),
1363  idx (idx_),
1364  col (col_),
1365  op (op_),
1366  numCols (numCols_)
1367  {}
1368 
1369  template<class TagType>
1370  KOKKOS_INLINE_FUNCTION void
1371  operator() (TagType tag,
1372  const size_type k,
1373  value_type& lclErrorCount) const
1374  {
1375  static_assert
1376  (std::is_same<TagType, atomic_tag>::value ||
1377  std::is_same<TagType, nonatomic_tag>::value,
1378  "TagType must be atomic_tag or nonatomic_tag.");
1379  using row_index_type = typename IdxView::non_const_value_type;
1380  using col_index_type = typename ColView::non_const_value_type;
1381 
1382  const row_index_type lclRow = idx(k);
1383  if (lclRow < static_cast<row_index_type> (0) ||
1384  lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1385  ++lclErrorCount;
1386  }
1387  else {
1388  const size_type offset = k * numCols;
1389  for (size_type j = 0; j < numCols; ++j) {
1390  const col_index_type lclCol = col(j);
1391  if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1392  ++lclErrorCount;
1393  }
1394  else { // all indices are valid; apply the op
1395  op (tag, dst(lclRow, col(j)), src(offset+j));
1396  }
1397  }
1398  }
1399  }
1400 
1401  KOKKOS_INLINE_FUNCTION void
1402  init (value_type& initialErrorCount) const {
1403  initialErrorCount = 0;
1404  }
1405 
1406  KOKKOS_INLINE_FUNCTION void
1407  join (value_type& dstErrorCount,
1408  const value_type& srcErrorCount) const
1409  {
1410  dstErrorCount += srcErrorCount;
1411  }
1412 
1413  static void
1414  unpack (const ExecutionSpace& execSpace,
1415  const DstView& dst,
1416  const SrcView& src,
1417  const IdxView& idx,
1418  const ColView& col,
1419  const Op& op,
1420  const size_type numCols,
1421  const bool use_atomic_updates)
1422  {
1423  using row_index_type = typename IdxView::non_const_value_type;
1424  using col_index_type = typename ColView::non_const_value_type;
1425 
1426  size_t errorCount = 0;
1427  if (use_atomic_updates) {
1428  using range_type =
1429  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1430  Kokkos::parallel_reduce
1431  ("Tpetra::MultiVector unpack var stride atomic debug only",
1432  range_type (0, idx.size ()),
1433  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1434  (execSpace, dst, src, idx, col, op, numCols),
1435  errorCount);
1436  }
1437  else {
1438  using range_type =
1439  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1440  Kokkos::parallel_reduce
1441  ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1442  range_type (0, idx.size ()),
1443  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1444  (execSpace, dst, src, idx, col, op, numCols),
1445  errorCount);
1446  }
1447 
1448  if (errorCount != 0) {
1449  constexpr size_t maxNumBadIndicesToPrint = 100;
1450 
1451  std::ostringstream os; // for error reporting
1452  os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1453  "found " << errorCount
1454  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1455 
1456  // Go back and find any out-of-bounds entries in the array of
1457  // row indices. Performance doesn't matter since we are
1458  // already in an error state, so we can do this sequentially,
1459  // on host.
1460  auto idx_h = Kokkos::create_mirror_view (idx);
1461 
1462  // DEEP_COPY REVIEW - NOT TESTED
1463  Kokkos::deep_copy (idx_h, idx);
1464 
1465  std::vector<row_index_type> badRows;
1466  const size_type numRowInds = idx_h.extent (0);
1467  for (size_type k = 0; k < numRowInds; ++k) {
1468  if (idx_h(k) < static_cast<row_index_type> (0) ||
1469  idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1470  badRows.push_back (idx_h(k));
1471  }
1472  }
1473 
1474  if (badRows.size () != 0) {
1475  os << badRows.size () << " out-of-bounds row ind"
1476  << (badRows.size () != size_t (1) ? "ices" : "ex");
1477  if (badRows.size () <= maxNumBadIndicesToPrint) {
1478  os << ": [";
1479  for (size_t k = 0; k < badRows.size (); ++k) {
1480  os << badRows[k];
1481  if (k + 1 < badRows.size ()) {
1482  os << ", ";
1483  }
1484  }
1485  os << "]. ";
1486  }
1487  else {
1488  os << ". ";
1489  }
1490  }
1491  else {
1492  os << "No out-of-bounds row indices. ";
1493  }
1494 
1495  // Go back and find any out-of-bounds entries in the array
1496  // of column indices.
1497  auto col_h = Kokkos::create_mirror_view (col);
1498 
1499  // DEEP_COPY REVIEW - NOT TESTED
1500  Kokkos::deep_copy (col_h, col);
1501 
1502  std::vector<col_index_type> badCols;
1503  const size_type numColInds = col_h.extent (0);
1504  for (size_type k = 0; k < numColInds; ++k) {
1505  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1506  badCols.push_back (col_h(k));
1507  }
1508  }
1509 
1510  if (badCols.size () != 0) {
1511  os << badCols.size () << " out-of-bounds column ind"
1512  << (badCols.size () != size_t (1) ? "ices" : "ex");
1513  if (badCols.size () <= maxNumBadIndicesToPrint) {
1514  for (size_t k = 0; k < badCols.size (); ++k) {
1515  os << ": [";
1516  os << badCols[k];
1517  if (k + 1 < badCols.size ()) {
1518  os << ", ";
1519  }
1520  }
1521  os << "]. ";
1522  }
1523  else {
1524  os << ". ";
1525  }
1526  }
1527  else {
1528  os << "No out-of-bounds column indices. ";
1529  }
1530 
1531  TEUCHOS_TEST_FOR_EXCEPTION
1532  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1533  std::logic_error, "Tpetra::MultiVector variable stride unpack "
1534  "kernel reports errorCount=" << errorCount << ", but we failed "
1535  "to find any bad rows or columns. This should never happen. "
1536  "Please report this to the Tpetra developers.");
1537 
1538  throw std::runtime_error (os.str ());
1539  } // hasErr
1540  }
1541  };
1542 
1543  template <typename ExecutionSpace,
1544  typename DstView,
1545  typename SrcView,
1546  typename IdxView,
1547  typename ColView,
1548  typename Op>
1549  void
1550  unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1551  const DstView& dst,
1552  const SrcView& src,
1553  const IdxView& idx,
1554  const ColView& col,
1555  const Op& op,
1556  const size_t numCols,
1557  const bool use_atomic_updates,
1558  const bool debug)
1559  {
1560  static_assert (Kokkos::is_view<DstView>::value,
1561  "DstView must be a Kokkos::View.");
1562  static_assert (Kokkos::is_view<SrcView>::value,
1563  "SrcView must be a Kokkos::View.");
1564  static_assert (Kokkos::is_view<IdxView>::value,
1565  "IdxView must be a Kokkos::View.");
1566  static_assert (Kokkos::is_view<ColView>::value,
1567  "ColView must be a Kokkos::View.");
1568  static_assert (static_cast<int> (DstView::rank) == 2,
1569  "DstView must be a rank-2 Kokkos::View.");
1570  static_assert (static_cast<int> (SrcView::rank) == 1,
1571  "SrcView must be a rank-1 Kokkos::View.");
1572  static_assert (static_cast<int> (IdxView::rank) == 1,
1573  "IdxView must be a rank-1 Kokkos::View.");
1574  static_assert (static_cast<int> (ColView::rank) == 1,
1575  "ColView must be a rank-1 Kokkos::View.");
1576 
1577  if (debug) {
1578  using impl_type =
1579  UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1580  DstView, SrcView, IdxView, ColView, Op>;
1581  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1582  use_atomic_updates);
1583  }
1584  else {
1585  using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1586  DstView, SrcView, IdxView, ColView, Op>;
1587  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1588  use_atomic_updates);
1589  }
1590  }
1591 
1592  template <typename DstView, typename SrcView,
1593  typename DstIdxView, typename SrcIdxView, typename Op,
1594  typename Enabled = void>
1595  struct PermuteArrayMultiColumn {
1596  using size_type = typename DstView::size_type;
1597 
1598  DstView dst;
1599  SrcView src;
1600  DstIdxView dst_idx;
1601  SrcIdxView src_idx;
1602  size_t numCols;
1603  Op op;
1604 
1605  PermuteArrayMultiColumn (const DstView& dst_,
1606  const SrcView& src_,
1607  const DstIdxView& dst_idx_,
1608  const SrcIdxView& src_idx_,
1609  const size_t numCols_,
1610  const Op& op_) :
1611  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1612  numCols(numCols_), op(op_) {}
1613 
1614  KOKKOS_INLINE_FUNCTION void
1615  operator() (const size_type k) const {
1616  const typename DstIdxView::value_type toRow = dst_idx(k);
1617  const typename SrcIdxView::value_type fromRow = src_idx(k);
1618  nonatomic_tag tag; // permute does not need atomics
1619  for (size_t j = 0; j < numCols; ++j) {
1620  op(tag, dst(toRow, j), src(fromRow, j));
1621  }
1622  }
1623 
1624  template <typename ExecutionSpace>
1625  static void
1626  permute (const ExecutionSpace &space,
1627  const DstView& dst,
1628  const SrcView& src,
1629  const DstIdxView& dst_idx,
1630  const SrcIdxView& src_idx,
1631  const size_t numCols,
1632  const Op& op)
1633  {
1634  using range_type =
1635  Kokkos::RangePolicy<ExecutionSpace, size_type>;
1636  // permute does not need atomics for Op
1637  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1638  Kokkos::parallel_for
1639  ("Tpetra::MultiVector permute multicol const stride",
1640  range_type (space, 0, n),
1641  PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1642  }
1643  };
1644 
1645 // clang-format on
1646 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1647 // SrcView::rank == 2
1648 template <typename ExecutionSpace, typename DstView, typename SrcView,
1649  typename DstIdxView, typename SrcIdxView, typename Op>
1650 void permute_array_multi_column(const ExecutionSpace &space, const DstView &dst,
1651  const SrcView &src, const DstIdxView &dst_idx,
1652  const SrcIdxView &src_idx, size_t numCols,
1653  const Op &op) {
1654  PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView,
1655  Op>::permute(space, dst, src, dst_idx, src_idx,
1656  numCols, op);
1657 }
1658 // clang-format off
1659 
1660  // To do: Add enable_if<> restrictions on DstView::rank == 1,
1661  // SrcView::rank == 2
1662  template <typename DstView, typename SrcView,
1663  typename DstIdxView, typename SrcIdxView, typename Op>
1664  void permute_array_multi_column(const DstView& dst,
1665  const SrcView& src,
1666  const DstIdxView& dst_idx,
1667  const SrcIdxView& src_idx,
1668  size_t numCols,
1669  const Op& op) {
1670  using execution_space = typename DstView::execution_space;
1671  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1672  execution_space(), dst, src, dst_idx, src_idx, numCols, op);
1673  }
1674 
1675  template <typename DstView, typename SrcView,
1676  typename DstIdxView, typename SrcIdxView,
1677  typename DstColView, typename SrcColView, typename Op,
1678  typename Enabled = void>
1679  struct PermuteArrayMultiColumnVariableStride {
1680  using size_type = typename DstView::size_type;
1681 
1682  DstView dst;
1683  SrcView src;
1684  DstIdxView dst_idx;
1685  SrcIdxView src_idx;
1686  DstColView dst_col;
1687  SrcColView src_col;
1688  size_t numCols;
1689  Op op;
1690 
1691  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1692  const SrcView& src_,
1693  const DstIdxView& dst_idx_,
1694  const SrcIdxView& src_idx_,
1695  const DstColView& dst_col_,
1696  const SrcColView& src_col_,
1697  const size_t numCols_,
1698  const Op& op_) :
1699  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1700  dst_col(dst_col_), src_col(src_col_),
1701  numCols(numCols_), op(op_) {}
1702 
1703  KOKKOS_INLINE_FUNCTION void
1704  operator() (const size_type k) const {
1705  const typename DstIdxView::value_type toRow = dst_idx(k);
1706  const typename SrcIdxView::value_type fromRow = src_idx(k);
1707  const nonatomic_tag tag; // permute does not need atomics
1708  for (size_t j = 0; j < numCols; ++j) {
1709  op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1710  }
1711  }
1712 
1713  template <typename ExecutionSpace>
1714  static void
1715  permute ( const ExecutionSpace &space,
1716  const DstView& dst,
1717  const SrcView& src,
1718  const DstIdxView& dst_idx,
1719  const SrcIdxView& src_idx,
1720  const DstColView& dst_col,
1721  const SrcColView& src_col,
1722  const size_t numCols,
1723  const Op& op)
1724  {
1725 
1726  static_assert(Kokkos::SpaceAccessibility<
1727  ExecutionSpace, typename DstView::memory_space>::accessible,
1728  "ExecutionSpace must be able to access DstView");
1729 
1730  using range_type = Kokkos::RangePolicy<ExecutionSpace, size_type>;
1731  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1732  Kokkos::parallel_for
1733  ("Tpetra::MultiVector permute multicol var stride",
1734  range_type (space, 0, n),
1735  PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1736  dst_col, src_col, numCols, op));
1737  }
1738  };
1739 
1740 // clang-format on
1741 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1742 // SrcView::rank == 2
1743 template <typename ExecutionSpace, typename DstView, typename SrcView,
1744  typename DstIdxView, typename SrcIdxView, typename DstColView,
1745  typename SrcColView, typename Op>
1746 void permute_array_multi_column_variable_stride(
1747  const ExecutionSpace &space, const DstView &dst, const SrcView &src,
1748  const DstIdxView &dst_idx, const SrcIdxView &src_idx,
1749  const DstColView &dst_col, const SrcColView &src_col, size_t numCols,
1750  const Op &op) {
1751  PermuteArrayMultiColumnVariableStride<DstView, SrcView, DstIdxView,
1752  SrcIdxView, DstColView, SrcColView,
1753  Op>::permute(space, dst, src, dst_idx,
1754  src_idx, dst_col, src_col,
1755  numCols, op);
1756 }
1757 // clang-format off
1758 
1759  // To do: Add enable_if<> restrictions on DstView::rank == 1,
1760  // SrcView::rank == 2
1761  template <typename DstView, typename SrcView,
1762  typename DstIdxView, typename SrcIdxView,
1763  typename DstColView, typename SrcColView, typename Op>
1764  void permute_array_multi_column_variable_stride(const DstView& dst,
1765  const SrcView& src,
1766  const DstIdxView& dst_idx,
1767  const SrcIdxView& src_idx,
1768  const DstColView& dst_col,
1769  const SrcColView& src_col,
1770  size_t numCols,
1771  const Op& op) {
1772  using execution_space = typename DstView::execution_space;
1773  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1774  DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1775  execution_space(), dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1776  }
1777 
1778 } // Details namespace
1779 } // KokkosRefactor namespace
1780 } // Tpetra namespace
1781 
1782 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...