Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_computeRowAndColumnOneNorms_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
12 
19 
20 #if KOKKOS_VERSION >= 40799
21 #include "KokkosKernels_ArithTraits.hpp"
22 #else
23 #include "Kokkos_ArithTraits.hpp"
24 #endif
25 #include "Kokkos_Macros.hpp"
28 #include "Tpetra_CrsMatrix.hpp"
29 #include "Tpetra_Export.hpp"
30 #include "Tpetra_Map.hpp"
31 #include "Tpetra_MultiVector.hpp"
32 #include "Tpetra_RowMatrix.hpp"
33 #include "Kokkos_Core.hpp"
34 #include "Teuchos_CommHelpers.hpp"
35 #include <memory>
36 
37 namespace Tpetra {
38 namespace Details {
39 
40 template <class SC, class LO, class GO, class NT>
41 std::size_t
42 lclMaxNumEntriesRowMatrix(const Tpetra::RowMatrix<SC, LO, GO, NT>& A) {
43  const auto& rowMap = *(A.getRowMap());
44  const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
45 
46  std::size_t maxNumEnt{0};
47  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
48  const std::size_t numEnt = A.getNumEntriesInLocalRow(lclRow);
49  maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
50  }
51  return maxNumEnt;
52 }
53 
54 template <class SC, class LO, class GO, class NT>
55 void forEachLocalRowMatrixRow(
57  const LO lclNumRows,
58  const std::size_t maxNumEnt,
59  std::function<void(
60  const LO lclRow,
61  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
62  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
63  std::size_t /*numEnt*/)>
64  doForEachRow) {
65  using lids_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
66  using vals_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
67  lids_type indBuf("indices", maxNumEnt);
68  vals_type valBuf("values", maxNumEnt);
69 
70  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
71  std::size_t numEnt = A.getNumEntriesInLocalRow(lclRow);
72  lids_type ind = Kokkos::subview(indBuf, std::make_pair((size_t)0, numEnt));
73  vals_type val = Kokkos::subview(valBuf, std::make_pair((size_t)0, numEnt));
74  A.getLocalRowCopy(lclRow, ind, val, numEnt);
75  doForEachRow(lclRow, ind, val, numEnt);
76  }
77 }
78 
79 template <class SC, class LO, class GO, class NT>
80 void forEachLocalRowMatrixRow(
82  std::function<void(
83  const LO lclRow,
84  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
85  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
86  std::size_t /*numEnt*/)>
87  doForEachRow) {
88  const auto& rowMap = *(A.getRowMap());
89  const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
90  const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix(A);
91 
92  forEachLocalRowMatrixRow<SC, LO, GO, NT>(A, lclNumRows, maxNumEnt, doForEachRow);
93 }
94 
98 template <class SC, class LO, class GO, class NT>
99 #if KOKKOS_VERSION >= 40799
100 void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
101 #else
102 void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
103 #endif
104  typename NT::device_type>& result,
106 #if KOKKOS_VERSION >= 40799
107  using KAT = KokkosKernels::ArithTraits<SC>;
108 #else
109  using KAT = Kokkos::ArithTraits<SC>;
110 #endif
111  using mag_type = typename KAT::mag_type;
112 #if KOKKOS_VERSION >= 40799
113  using KAV = KokkosKernels::ArithTraits<typename KAT::val_type>;
114 #else
115  using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
116 #endif
117 
118  auto rowNorms_h = Kokkos::create_mirror_view(result.rowNorms);
119 
120  // DEEP_COPY REVIEW - NOT TESTED
121  Kokkos::deep_copy(rowNorms_h, result.rowNorms);
122  auto rowScaledColNorms_h = Kokkos::create_mirror_view(result.rowScaledColNorms);
123 
124  forEachLocalRowMatrixRow<SC, LO, GO, NT>(A,
125  [&](const LO lclRow,
126  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
127  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
128  std::size_t numEnt) {
129  const mag_type rowNorm = rowNorms_h[lclRow];
130  for (std::size_t k = 0; k < numEnt; ++k) {
131  const mag_type matrixAbsVal = KAV::abs(val[k]);
132  const LO lclCol = ind[k];
133 
134  rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
135  }
136  });
137 
138  // DEEP_COPY REVIEW - NOT TESTED
139  Kokkos::deep_copy(result.rowScaledColNorms, rowScaledColNorms_h);
140 }
141 
144 template <class SC, class LO, class GO, class NT>
145 #if KOKKOS_VERSION >= 40799
146 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
147 #else
148 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
149 #endif
151 #if KOKKOS_VERSION >= 40799
152  using KAT = KokkosKernels::ArithTraits<SC>;
153 #else
154  using KAT = Kokkos::ArithTraits<SC>;
155 #endif
156  using val_type = typename KAT::val_type;
157 #if KOKKOS_VERSION >= 40799
158  using KAV = KokkosKernels::ArithTraits<val_type>;
159 #else
160  using KAV = Kokkos::ArithTraits<val_type>;
161 #endif
162  using mag_type = typename KAT::mag_type;
163 #if KOKKOS_VERSION >= 40799
164  using KAM = KokkosKernels::ArithTraits<mag_type>;
165 #else
166  using KAM = Kokkos::ArithTraits<mag_type>;
167 #endif
168  using device_type = typename NT::device_type;
169  using equib_info_type = EquilibrationInfo<val_type, device_type>;
170 
171  const auto& rowMap = *(A.getRowMap());
172  const auto& colMap = *(A.getColMap());
173  const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
174  const LO lclNumCols = 0; // don't allocate column-related Views
175  constexpr bool assumeSymmetric = false; // doesn't matter here
176  equib_info_type result(lclNumRows, lclNumCols, assumeSymmetric);
177  auto result_h = result.createMirrorView();
178 
179  const auto val_zero = KAV::zero();
180  const auto mag_zero = KAM::zero();
181 
182  forEachLocalRowMatrixRow<SC, LO, GO, NT>(A,
183  [&](const LO lclRow,
184  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
185  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
186  std::size_t numEnt) {
187  mag_type rowNorm = mag_zero;
188  val_type diagVal = val_zero;
189  const GO gblRow = rowMap.getGlobalElement(lclRow);
190  // OK if invalid(); then we simply won't find the diagonal entry.
191  const GO lclDiagColInd = colMap.getLocalElement(gblRow);
192 
193  for (std::size_t k = 0; k < numEnt; ++k) {
194  const val_type matrixVal = val[k];
195  if (KAV::isInf(matrixVal)) {
196  result_h.foundInf = true;
197  }
198  if (KAV::isNan(matrixVal)) {
199  result_h.foundNan = true;
200  }
201  const mag_type matrixAbsVal = KAV::abs(matrixVal);
202  rowNorm += matrixAbsVal;
203  const LO lclCol = ind[k];
204  if (lclCol == lclDiagColInd) {
205  diagVal += val[k]; // repeats count additively
206  }
207  } // for each entry in row
208 
209  // This is a local result. If the matrix has an overlapping
210  // row Map, then the global result might differ.
211  if (diagVal == KAV::zero()) {
212  result_h.foundZeroDiag = true;
213  }
214  if (rowNorm == KAM::zero()) {
215  result_h.foundZeroRowNorm = true;
216  }
217  // NOTE (mfh 24 May 2018) We could actually compute local
218  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
219  // and row Map is the same as range Map (so that the local row
220  // norms are the same as the global row norms).
221  result_h.rowDiagonalEntries[lclRow] += diagVal;
222  result_h.rowNorms[lclRow] = rowNorm;
223  });
224 
225  result.assign(result_h);
226  return result;
227 }
228 
231 template <class SC, class LO, class GO, class NT>
232 #if KOKKOS_VERSION >= 40799
233 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
234 #else
235 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
236 #endif
238  const bool assumeSymmetric) {
239 #if KOKKOS_VERSION >= 40799
240  using KAT = KokkosKernels::ArithTraits<SC>;
241 #else
242  using KAT = Kokkos::ArithTraits<SC>;
243 #endif
244  using val_type = typename KAT::val_type;
245 #if KOKKOS_VERSION >= 40799
246  using KAV = KokkosKernels::ArithTraits<val_type>;
247 #else
248  using KAV = Kokkos::ArithTraits<val_type>;
249 #endif
250  using mag_type = typename KAT::mag_type;
251 #if KOKKOS_VERSION >= 40799
252  using KAM = KokkosKernels::ArithTraits<mag_type>;
253 #else
254  using KAM = Kokkos::ArithTraits<mag_type>;
255 #endif
256  using device_type = typename NT::device_type;
257 
258  const auto& rowMap = *(A.getRowMap());
259  const auto& colMap = *(A.getColMap());
260  const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
261  const LO lclNumCols = static_cast<LO>(colMap.getLocalNumElements());
262 
263  EquilibrationInfo<val_type, device_type> result(lclNumRows, lclNumCols, assumeSymmetric);
264  auto result_h = result.createMirrorView();
265 
266  const auto val_zero = KAV::zero();
267  const auto mag_zero = KAM::zero();
268 
269  forEachLocalRowMatrixRow<SC, LO, GO, NT>(A,
270  [&](const LO lclRow,
271  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
272  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
273  std::size_t numEnt) {
274  mag_type rowNorm = mag_zero;
275  val_type diagVal = val_zero;
276  const GO gblRow = rowMap.getGlobalElement(lclRow);
277  // OK if invalid(); then we simply won't find the diagonal entry.
278  const GO lclDiagColInd = colMap.getLocalElement(gblRow);
279 
280  for (std::size_t k = 0; k < numEnt; ++k) {
281  const val_type matrixVal = val[k];
282  if (KAV::isInf(matrixVal)) {
283  result_h.foundInf = true;
284  }
285  if (KAV::isNan(matrixVal)) {
286  result_h.foundNan = true;
287  }
288  const mag_type matrixAbsVal = KAV::abs(matrixVal);
289  rowNorm += matrixAbsVal;
290  const LO lclCol = ind[k];
291  if (lclCol == lclDiagColInd) {
292  diagVal += val[k]; // repeats count additively
293  }
294  if (!assumeSymmetric) {
295  result_h.colNorms[lclCol] += matrixAbsVal;
296  }
297  } // for each entry in row
298 
299  // This is a local result. If the matrix has an overlapping
300  // row Map, then the global result might differ.
301  if (diagVal == KAV::zero()) {
302  result_h.foundZeroDiag = true;
303  }
304  if (rowNorm == KAM::zero()) {
305  result_h.foundZeroRowNorm = true;
306  }
307  // NOTE (mfh 24 May 2018) We could actually compute local
308  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
309  // and row Map is the same as range Map (so that the local row
310  // norms are the same as the global row norms).
311  result_h.rowDiagonalEntries[lclRow] += diagVal;
312  result_h.rowNorms[lclRow] = rowNorm;
313  if (!assumeSymmetric &&
314  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
315  result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
316  }
317  });
318 
319  result.assign(result_h);
320  return result;
321 }
322 
323 template <class SC, class LO, class GO, class NT>
324 class ComputeLocalRowScaledColumnNorms {
325  public:
326  using crs_matrix_type = ::Tpetra::CrsMatrix<SC, LO, GO, NT>;
327 #if KOKKOS_VERSION >= 40799
328  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
329 #else
330  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
331 #endif
332 #if KOKKOS_VERSION >= 40799
333  using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
334 #else
335  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
336 #endif
337  using device_type = typename crs_matrix_type::device_type;
338  using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
339 
340  ComputeLocalRowScaledColumnNorms(const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
341  const Kokkos::View<const mag_type*, device_type>& rowNorms,
342  const crs_matrix_type& A)
343  : rowScaledColNorms_(rowScaledColNorms)
344  , rowNorms_(rowNorms)
345  , A_lcl_(A.getLocalMatrixDevice()) {}
346 
347  KOKKOS_INLINE_FUNCTION void operator()(const typename policy_type::member_type& team) const {
348 #if KOKKOS_VERSION >= 40799
349  using KAT = KokkosKernels::ArithTraits<val_type>;
350 #else
351  using KAT = Kokkos::ArithTraits<val_type>;
352 #endif
353 
354  const LO lclRow = team.league_rank();
355  const auto curRow = A_lcl_.rowConst(lclRow);
356  const mag_type rowNorm = rowNorms_[lclRow];
357  const LO numEnt = curRow.length;
358  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k) {
359  const mag_type matrixAbsVal = KAT::abs(curRow.value(k));
360  const LO lclCol = curRow.colidx(k);
361 
362  Kokkos::atomic_add(&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
363  });
364  }
365 
366  static void
367  run(const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
368  const Kokkos::View<const mag_type*, device_type>& rowNorms,
369  const crs_matrix_type& A) {
370  using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
371 
372  functor_type functor(rowScaledColNorms, rowNorms, A);
373  const LO lclNumRows =
374  static_cast<LO>(A.getRowMap()->getLocalNumElements());
375  Kokkos::parallel_for("computeLocalRowScaledColumnNorms",
376  policy_type(lclNumRows, Kokkos::AUTO), functor);
377  }
378 
379  private:
380  Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
381  Kokkos::View<const mag_type*, device_type> rowNorms_;
382 
383  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
384  local_matrix_device_type A_lcl_;
385 };
386 
387 template <class SC, class LO, class GO, class NT>
388 #if KOKKOS_VERSION >= 40799
389 void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
390 #else
391 void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
392 #endif
393  typename NT::device_type>& result,
395  using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
396  impl_type::run(result.rowScaledColNorms, result.rowNorms, A);
397 }
398 
399 template <class SC, class LO, class GO, class NT>
400 #if KOKKOS_VERSION >= 40799
401 void computeLocalRowScaledColumnNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
402 #else
403 void computeLocalRowScaledColumnNorms(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
404 #endif
405  typename NT::device_type>& result,
407  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
408 #if KOKKOS_VERSION >= 40799
409  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
410 #else
411  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
412 #endif
413 #if KOKKOS_VERSION >= 40799
414  using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
415 #else
416  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
417 #endif
418  using device_type = typename NT::device_type;
419 
420  auto colMapPtr = A.getColMap();
421  TEUCHOS_TEST_FOR_EXCEPTION(colMapPtr.get() == nullptr, std::invalid_argument,
422  "computeLocalRowScaledColumnNorms: "
423  "Input matrix A must have a nonnull column Map.");
424  const LO lclNumCols = static_cast<LO>(colMapPtr->getLocalNumElements());
425  if (static_cast<std::size_t>(result.rowScaledColNorms.extent(0)) !=
426  static_cast<std::size_t>(lclNumCols)) {
427  result.rowScaledColNorms =
428  Kokkos::View<mag_type*, device_type>("rowScaledColNorms", lclNumCols);
429  }
430 
431  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
432  if (A_crs == nullptr) {
434  } else {
435  computeLocalRowScaledColumnNorms_CrsMatrix(result, *A_crs);
436  }
437 }
438 
439 // Kokkos::parallel_reduce functor that is part of the implementation
440 // of computeLocalRowOneNorms_CrsMatrix.
441 template <class SC, class LO, class GO, class NT>
442 class ComputeLocalRowOneNorms {
443  public:
444 #if KOKKOS_VERSION >= 40799
445  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
446 #else
447  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
448 #endif
449  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
450  using local_matrix_device_type =
451  typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
452  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
453  using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
454 
455  ComputeLocalRowOneNorms(const equib_info_type& equib, // in/out
456  const local_matrix_device_type& A_lcl, // in
457  const local_map_type& rowMap, // in
458  const local_map_type& colMap)
459  : // in
460  equib_(equib)
461  , A_lcl_(A_lcl)
462  , rowMap_(rowMap)
463  , colMap_(colMap) {}
464 
465  // (result & 1) != 0 means "found Inf."
466  // (result & 2) != 0 means "found NaN."
467  // (result & 4) != 0 means "found zero diag."
468  // (result & 8) != 0 means "found zero row norm."
469  // Pack into a single int so the reduction is cheaper,
470  // esp. on GPU.
471  using value_type = int;
472 
473  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
474  dst = 0;
475  }
476 
477  KOKKOS_INLINE_FUNCTION void
478  join(value_type& dst,
479  const value_type& src) const {
480  dst |= src;
481  }
482 
483  KOKKOS_INLINE_FUNCTION void
484  operator()(const typename policy_type::member_type& team, value_type& dst) const {
485 #if KOKKOS_VERSION >= 40799
486  using KAT = KokkosKernels::ArithTraits<val_type>;
487 #else
488  using KAT = Kokkos::ArithTraits<val_type>;
489 #endif
490  using mag_type = typename KAT::mag_type;
491 #if KOKKOS_VERSION >= 40799
492  using KAM = KokkosKernels::ArithTraits<mag_type>;
493 #else
494  using KAM = Kokkos::ArithTraits<mag_type>;
495 #endif
496 
497  const LO lclRow = team.league_rank();
498  const GO gblRow = rowMap_.getGlobalElement(lclRow);
499  // OK if invalid(); then we simply won't find the diagonal entry.
500  const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
501 
502  const auto curRow = A_lcl_.rowConst(lclRow);
503  const LO numEnt = curRow.length;
504 
505  const auto val_zero = KAT::zero();
506  const auto mag_zero = KAM::zero();
507 
508  mag_type rowNorm = mag_zero;
509  val_type diagVal = val_zero;
510  value_type dstThread{0};
511 
512  Kokkos::parallel_reduce(
513  Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
514  const val_type matrixVal = curRow.value(k);
515  if (KAT::isInf(matrixVal)) {
516  dstContrib |= 1;
517  }
518  if (KAT::isNan(matrixVal)) {
519  dstContrib |= 2;
520  }
521  const mag_type matrixAbsVal = KAT::abs(matrixVal);
522  normContrib += matrixAbsVal;
523  const LO lclCol = curRow.colidx(k);
524  if (lclCol == lclDiagColInd) {
525  diagContrib = curRow.value(k); // assume no repeats
526  }
527  },
528  Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
529 
530  // This is a local result. If the matrix has an overlapping
531  // row Map, then the global result might differ.
532  Kokkos::single(Kokkos::PerTeam(team), [&]() {
533  dst |= dstThread;
534  if (diagVal == KAT::zero()) {
535  dst |= 4;
536  }
537  if (rowNorm == KAM::zero()) {
538  dst |= 8;
539  }
540  equib_.rowDiagonalEntries[lclRow] = diagVal;
541  equib_.rowNorms[lclRow] = rowNorm;
542  });
543  }
544 
545  private:
546  equib_info_type equib_;
547  local_matrix_device_type A_lcl_;
548  local_map_type rowMap_;
549  local_map_type colMap_;
550 };
551 
552 // Kokkos::parallel_reduce functor that is part of the implementation
553 // of computeLocalRowAndColumnOneNorms_CrsMatrix.
554 template <class SC, class LO, class GO, class NT>
555 class ComputeLocalRowAndColumnOneNorms {
556  public:
557 #if KOKKOS_VERSION >= 40799
558  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
559 #else
560  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
561 #endif
562  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
563  using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
564  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
565  using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
566 
567  public:
568  ComputeLocalRowAndColumnOneNorms(const equib_info_type& equib, // in/out
569  const local_matrix_device_type& A_lcl, // in
570  const local_map_type& rowMap, // in
571  const local_map_type& colMap)
572  : // in
573  equib_(equib)
574  , A_lcl_(A_lcl)
575  , rowMap_(rowMap)
576  , colMap_(colMap) {}
577 
578  // (result & 1) != 0 means "found Inf."
579  // (result & 2) != 0 means "found NaN."
580  // (result & 4) != 0 means "found zero diag."
581  // (result & 8) != 0 means "found zero row norm."
582  // Pack into a single int so the reduction is cheaper,
583  // esp. on GPU.
584  using value_type = int;
585 
586  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
587  dst = 0;
588  }
589 
590  KOKKOS_INLINE_FUNCTION void
591  join(value_type& dst,
592  const value_type& src) const {
593  dst |= src;
594  }
595 
596  KOKKOS_INLINE_FUNCTION void
597  operator()(const typename policy_type::member_type& team, value_type& dst) const {
598 #if KOKKOS_VERSION >= 40799
599  using KAT = KokkosKernels::ArithTraits<val_type>;
600 #else
601  using KAT = Kokkos::ArithTraits<val_type>;
602 #endif
603  using mag_type = typename KAT::mag_type;
604 #if KOKKOS_VERSION >= 40799
605  using KAM = KokkosKernels::ArithTraits<mag_type>;
606 #else
607  using KAM = Kokkos::ArithTraits<mag_type>;
608 #endif
609 
610  const LO lclRow = team.league_rank();
611  const GO gblRow = rowMap_.getGlobalElement(lclRow);
612  // OK if invalid(); then we simply won't find the diagonal entry.
613  const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
614 
615  const auto curRow = A_lcl_.rowConst(lclRow);
616  const LO numEnt = curRow.length;
617 
618  const auto val_zero = KAT::zero();
619  const auto mag_zero = KAM::zero();
620 
621  mag_type rowNorm = mag_zero;
622  val_type diagVal = val_zero;
623  value_type dstThread{0};
624 
625  Kokkos::parallel_reduce(
626  Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
627  const val_type matrixVal = curRow.value(k);
628  if (KAT::isInf(matrixVal)) {
629  dstContrib |= 1;
630  }
631  if (KAT::isNan(matrixVal)) {
632  dstContrib |= 2;
633  }
634  const mag_type matrixAbsVal = KAT::abs(matrixVal);
635  normContrib += matrixAbsVal;
636  const LO lclCol = curRow.colidx(k);
637  if (lclCol == lclDiagColInd) {
638  diagContrib = curRow.value(k); // assume no repeats
639  }
640  if (!equib_.assumeSymmetric) {
641  Kokkos::atomic_add(&(equib_.colNorms[lclCol]), matrixAbsVal);
642  }
643  },
644  Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
645 
646  // This is a local result. If the matrix has an overlapping
647  // row Map, then the global result might differ.
648  Kokkos::single(Kokkos::PerTeam(team), [&]() {
649  dst |= dstThread;
650  if (diagVal == KAT::zero()) {
651  dst |= 4;
652  }
653  if (rowNorm == KAM::zero()) {
654  dst |= 8;
655  }
656  // NOTE (mfh 24 May 2018) We could actually compute local
657  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
658  // and row Map is the same as range Map (so that the local row
659  // norms are the same as the global row norms).
660  equib_.rowDiagonalEntries[lclRow] = diagVal;
661  equib_.rowNorms[lclRow] = rowNorm;
662  if (!equib_.assumeSymmetric &&
663  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
664  // Don't need an atomic update here, since this lclDiagColInd is
665  // a one-to-one function of lclRow.
666  equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
667  }
668  });
669  }
670 
671  private:
672  equib_info_type equib_;
673  local_matrix_device_type A_lcl_;
674  local_map_type rowMap_;
675  local_map_type colMap_;
676 };
677 
680 template <class SC, class LO, class GO, class NT>
681 #if KOKKOS_VERSION >= 40799
682 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
683 #else
684 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
685 #endif
687  using execution_space = typename NT::device_type::execution_space;
688  using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
689  using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
690 #if KOKKOS_VERSION >= 40799
691  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
692 #else
693  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
694 #endif
695  using device_type = typename NT::device_type;
696  using equib_info_type = EquilibrationInfo<val_type, device_type>;
697 
698  const LO lclNumRows = static_cast<LO>(A.getRowMap()->getLocalNumElements());
699  const LO lclNumCols = 0; // don't allocate column-related Views
700  constexpr bool assumeSymmetric = false; // doesn't matter here
701  equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
702 
703  functor_type functor(equib, A.getLocalMatrixDevice(),
704  A.getRowMap()->getLocalMap(),
705  A.getColMap()->getLocalMap());
706  int result = 0;
707  Kokkos::parallel_reduce("computeLocalRowOneNorms",
708  policy_type(lclNumRows, Kokkos::AUTO), functor,
709  result);
710  equib.foundInf = (result & 1) != 0;
711  equib.foundNan = (result & 2) != 0;
712  equib.foundZeroDiag = (result & 4) != 0;
713  equib.foundZeroRowNorm = (result & 8) != 0;
714  return equib;
715 }
716 
719 template <class SC, class LO, class GO, class NT>
720 #if KOKKOS_VERSION >= 40799
721 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
722 #else
723 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
724 #endif
726  const bool assumeSymmetric) {
727  using execution_space = typename NT::device_type::execution_space;
728  using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
729  using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
730 #if KOKKOS_VERSION >= 40799
731  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
732 #else
733  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
734 #endif
735  using device_type = typename NT::device_type;
736  using equib_info_type = EquilibrationInfo<val_type, device_type>;
737 
738  const LO lclNumRows = static_cast<LO>(A.getRowMap()->getLocalNumElements());
739  const LO lclNumCols = static_cast<LO>(A.getColMap()->getLocalNumElements());
740  equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
741 
742  functor_type functor(equib, A.getLocalMatrixDevice(),
743  A.getRowMap()->getLocalMap(),
744  A.getColMap()->getLocalMap());
745  int result = 0;
746  Kokkos::parallel_reduce("computeLocalRowAndColumnOneNorms",
747  policy_type(lclNumRows, Kokkos::AUTO), functor,
748  result);
749  equib.foundInf = (result & 1) != 0;
750  equib.foundNan = (result & 2) != 0;
751  equib.foundZeroDiag = (result & 4) != 0;
752  equib.foundZeroRowNorm = (result & 8) != 0;
753  return equib;
754 }
755 
760 template <class SC, class LO, class GO, class NT>
761 #if KOKKOS_VERSION >= 40799
762 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
763 #else
764 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
765 #endif
766  typename NT::device_type>
768  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
769  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
770 
771  if (A_crs == nullptr) {
773  } else {
774  return computeLocalRowOneNorms_CrsMatrix(*A_crs);
775  }
776 }
777 
799 template <class SC, class LO, class GO, class NT>
800 #if KOKKOS_VERSION >= 40799
801 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
802 #else
803 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
804 #endif
806  const bool assumeSymmetric) {
807  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
808  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
809 
810  if (A_crs == nullptr) {
811  return computeLocalRowAndColumnOneNorms_RowMatrix(A, assumeSymmetric);
812  } else {
813  return computeLocalRowAndColumnOneNorms_CrsMatrix(*A_crs, assumeSymmetric);
814  }
815 }
816 
817 template <class SC, class LO, class GO, class NT>
818 auto getLocalView_1d_readOnly(
820  const LO whichColumn)
821  -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
822  Kokkos::ALL(), whichColumn)) {
823  if (X.isConstantStride()) {
824  return Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
825  Kokkos::ALL(), whichColumn);
826  } else {
827  auto X_whichColumn = X.getVector(whichColumn);
828  return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadOnly),
829  Kokkos::ALL(), 0);
830  }
831 }
832 
833 template <class SC, class LO, class GO, class NT>
834 auto getLocalView_1d_writeOnly(
836  const LO whichColumn)
837  -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
838  Kokkos::ALL(), whichColumn)) {
839  if (X.isConstantStride()) {
840  return Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
841  Kokkos::ALL(), whichColumn);
842  } else {
843  auto X_whichColumn = X.getVectorNonConst(whichColumn);
844  return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
845  Kokkos::ALL(), 0);
846  }
847 }
848 
849 template <class SC, class LO, class GO, class NT, class ViewValueType>
850 void copy1DViewIntoMultiVectorColumn(
852  const LO whichColumn,
853  const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
854  auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
855  Tpetra::Details::copyConvert(X_lcl, view);
856 }
857 
858 template <class SC, class LO, class GO, class NT, class ViewValueType>
859 void copy1DViewIntoMultiVectorColumn_mag(
861  const LO whichColumn,
862  const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
863 #if KOKKOS_VERSION >= 40799
864  using KAT = KokkosKernels::ArithTraits<ViewValueType>;
865 #else
866  using KAT = Kokkos::ArithTraits<ViewValueType>;
867 #endif
868  using execution_space = typename NT::execution_space;
869  using range_type = Kokkos::RangePolicy<execution_space, LO>;
870  auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
871  Kokkos::parallel_for(
872  "",
873  range_type(0, X_lcl.extent(0)),
874  KOKKOS_LAMBDA(const LO i) {
875  X_lcl(i) = KAT::magnitude(view(i));
876  });
877 }
878 
879 template <class SC, class LO, class GO, class NT, class ViewValueType>
880 void copyMultiVectorColumnInto1DView(
881  const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
883  const LO whichColumn) {
884  auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
885  Tpetra::Details::copyConvert(view, X_lcl);
886 }
887 
888 template <class SC, class LO, class GO, class NT, class ViewValueType>
889 void copyMultiVectorColumnInto1DView_mag(
890  const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
892  const LO whichColumn) {
893 #if KOKKOS_VERSION >= 40799
894  using implScalar = typename KokkosKernels::ArithTraits<SC>::val_type;
895 #else
896  using implScalar = typename Kokkos::ArithTraits<SC>::val_type;
897 #endif
898 #if KOKKOS_VERSION >= 40799
899  using KAT = KokkosKernels::ArithTraits<implScalar>;
900 #else
901  using KAT = Kokkos::ArithTraits<implScalar>;
902 #endif
903  using range_type = Kokkos::RangePolicy<typename NT::execution_space, LO>;
904  auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
905  Kokkos::parallel_for(
906  "",
907  range_type(0, X_lcl.extent(0)),
908  KOKKOS_LAMBDA(LO i) {
909  view(i) = KAT::magnitude(X_lcl(i));
910  });
911 }
912 
913 template <class OneDViewType, class IndexType>
914 class FindZero {
915  public:
916  static_assert(OneDViewType::rank == 1,
917  "OneDViewType must be a rank-1 Kokkos::View.");
918  static_assert(std::is_integral<IndexType>::value,
919  "IndexType must be a built-in integer type.");
920  FindZero(const OneDViewType& x)
921  : x_(x) {}
922  // Kokkos historically didn't like bool reduction results on CUDA,
923  // so we use int as the reduction result type.
924  KOKKOS_INLINE_FUNCTION void
925  operator()(const IndexType i, int& result) const {
926  using val_type = typename OneDViewType::non_const_value_type;
927 #if KOKKOS_VERSION >= 40799
928  result = (x_(i) == KokkosKernels::ArithTraits<val_type>::zero()) ? 1 : result;
929 #else
930  result = (x_(i) == Kokkos::ArithTraits<val_type>::zero()) ? 1 : result;
931 #endif
932  }
933 
934  private:
935  OneDViewType x_;
936 };
937 
938 template <class OneDViewType>
939 bool findZero(const OneDViewType& x) {
940  using view_type = typename OneDViewType::const_type;
941  using execution_space = typename view_type::execution_space;
942  using size_type = typename view_type::size_type;
943  using functor_type = FindZero<view_type, size_type>;
944 
945  Kokkos::RangePolicy<execution_space, size_type> range(0, x.extent(0));
946  range.set_chunk_size(500); // adjust as needed
947 
948  int foundZero = 0;
949  Kokkos::parallel_reduce("findZero", range, functor_type(x), foundZero);
950  return foundZero == 1;
951 }
952 
953 template <class SC, class LO, class GO, class NT>
954 #if KOKKOS_VERSION >= 40799
955 void globalizeRowOneNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
956 #else
957 void globalizeRowOneNorms(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
958 #endif
959  typename NT::device_type>& equib,
961  using mv_type = Tpetra::MultiVector<SC, LO, GO, NT>;
962 
963  auto G = A.getGraph();
964  TEUCHOS_TEST_FOR_EXCEPTION(G.get() == nullptr, std::invalid_argument,
965  "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
966  "(that is, getGraph() must return nonnull).");
967  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
968  "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
969 
970  auto exp = G->getExporter();
971  if (!exp.is_null()) {
972  // If the matrix has an overlapping row Map, first Export the
973  // local row norms with ADD CombineMode to a range Map Vector to
974  // get the global row norms, then reverse them back with REPLACE
975  // CombineMode to the row Map Vector. Ditto for the local row
976  // diagonal entries. Use SC instead of mag_type, so we can
977  // communicate both row norms and row diagonal entries at once.
978 
979  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
980  // avoid the local copy here.
981  mv_type rowMapMV(G->getRowMap(), 2, false);
982 
983  copy1DViewIntoMultiVectorColumn(rowMapMV, 0, equib.rowNorms);
984  copy1DViewIntoMultiVectorColumn(rowMapMV, 1, equib.rowDiagonalEntries);
985  {
986  mv_type rangeMapMV(G->getRangeMap(), 2, true);
987  rangeMapMV.doExport(rowMapMV, *exp, Tpetra::ADD); // forward mode
988  rowMapMV.doImport(rangeMapMV, *exp, Tpetra::REPLACE); // reverse mode
989  }
990  copyMultiVectorColumnInto1DView_mag(equib.rowNorms, rowMapMV, 0);
991  copyMultiVectorColumnInto1DView(equib.rowDiagonalEntries, rowMapMV, 1);
992 
993  // It's not common for users to solve linear systems with a
994  // nontrival Export, so it's OK for this to cost an additional
995  // pass over rowDiagonalEntries.
996  equib.foundZeroDiag = findZero(equib.rowDiagonalEntries);
997  equib.foundZeroRowNorm = findZero(equib.rowNorms);
998  }
999 
1000  constexpr int allReduceCount = 4;
1001  int lclNaughtyMatrix[allReduceCount];
1002  lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
1003  lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
1004  lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
1005  lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
1006 
1007  using Teuchos::outArg;
1008  using Teuchos::REDUCE_MAX;
1009  using Teuchos::reduceAll;
1010  auto comm = G->getComm();
1011  int gblNaughtyMatrix[allReduceCount];
1012  reduceAll<int, int>(*comm, REDUCE_MAX, allReduceCount,
1013  lclNaughtyMatrix, gblNaughtyMatrix);
1014 
1015  equib.foundInf = gblNaughtyMatrix[0] == 1;
1016  equib.foundNan = gblNaughtyMatrix[1] == 1;
1017  equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
1018  equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
1019 }
1020 
1021 template <class SC, class LO, class GO, class NT>
1022 #if KOKKOS_VERSION >= 40799
1023 void globalizeColumnOneNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1024 #else
1025 void globalizeColumnOneNorms(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1026 #endif
1027  typename NT::device_type>& equib,
1029  const bool assumeSymmetric) // if so, use row norms
1030 {
1031 #if KOKKOS_VERSION >= 40799
1032  using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
1033 #else
1034  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
1035 #endif
1036 #if KOKKOS_VERSION >= 40799
1037  using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
1038 #else
1039  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
1040 #endif
1042  using device_type = typename NT::device_type;
1043 
1044  auto G = A.getGraph();
1045  TEUCHOS_TEST_FOR_EXCEPTION(G.get() == nullptr, std::invalid_argument,
1046  "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
1047  "(that is, getGraph() must return nonnull).");
1048  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
1049  "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
1050 
1051  auto imp = G->getImporter();
1052  if (assumeSymmetric) {
1053  const LO numCols = 2;
1054  // Redistribute local row info to global column info.
1055 
1056  // Get the data into a MultiVector on the domain Map.
1057  mv_type rowNorms_domMap(G->getDomainMap(), numCols, false);
1058  const bool rowMapSameAsDomainMap = G->getRowMap()->isSameAs(*(G->getDomainMap()));
1059  if (rowMapSameAsDomainMap) {
1060  copy1DViewIntoMultiVectorColumn(rowNorms_domMap, 0, equib.rowNorms);
1061  copy1DViewIntoMultiVectorColumn_mag(rowNorms_domMap, 1, equib.rowDiagonalEntries);
1062  } else {
1063  // This is not a common case; it would normally arise when the
1064  // matrix has an overlapping row Map.
1065  Tpetra::Export<LO, GO, NT> rowToDom(G->getRowMap(), G->getDomainMap());
1066  mv_type rowNorms_rowMap(G->getRowMap(), numCols, true);
1067  copy1DViewIntoMultiVectorColumn(rowNorms_rowMap, 0, equib.rowNorms);
1068  copy1DViewIntoMultiVectorColumn_mag(rowNorms_rowMap, 1, equib.rowDiagonalEntries);
1069  rowNorms_domMap.doExport(rowNorms_rowMap, rowToDom, Tpetra::REPLACE);
1070  }
1071 
1072  // Use the existing Import to redistribute the row norms from the
1073  // domain Map to the column Map.
1074  std::unique_ptr<mv_type> rowNorms_colMap;
1075  if (imp.is_null()) {
1076  // Shallow copy of rowNorms_domMap.
1077  rowNorms_colMap =
1078  std::unique_ptr<mv_type>(new mv_type(rowNorms_domMap, *(G->getColMap())));
1079  } else {
1080  rowNorms_colMap =
1081  std::unique_ptr<mv_type>(new mv_type(G->getColMap(), numCols, true));
1082  rowNorms_colMap->doImport(rowNorms_domMap, *imp, Tpetra::REPLACE);
1083  }
1084 
1085  // Make sure the result has allocations of the right size.
1086  const LO lclNumCols =
1087  static_cast<LO>(G->getColMap()->getLocalNumElements());
1088  if (static_cast<LO>(equib.colNorms.extent(0)) != lclNumCols) {
1089  equib.colNorms =
1090  Kokkos::View<mag_type*, device_type>("colNorms", lclNumCols);
1091  }
1092  if (static_cast<LO>(equib.colDiagonalEntries.extent(0)) != lclNumCols) {
1093  equib.colDiagonalEntries =
1094  Kokkos::View<val_type*, device_type>("colDiagonalEntries", lclNumCols);
1095  }
1096 
1097  // Copy row norms and diagonal entries, appropriately
1098  // redistributed, into column norms resp. diagonal entries.
1099  copyMultiVectorColumnInto1DView(equib.colNorms, *rowNorms_colMap, 0);
1100  copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, *rowNorms_colMap, 1);
1101  } else {
1102  if (!imp.is_null()) {
1103  const LO numCols = 3;
1104  // If the matrix has an overlapping column Map (this is usually
1105  // the case), first Export (reverse-mode Import) the local info
1106  // to a domain Map Vector to get the global info, then Import
1107  // them back with REPLACE CombineMode to the column Map Vector.
1108  // Ditto for the row-scaled column norms.
1109 
1110  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
1111  // avoid the local copy here.
1112  mv_type colMapMV(G->getColMap(), numCols, false);
1113 
1114  copy1DViewIntoMultiVectorColumn(colMapMV, 0, equib.colNorms);
1115  copy1DViewIntoMultiVectorColumn_mag(colMapMV, 1, equib.colDiagonalEntries);
1116  copy1DViewIntoMultiVectorColumn(colMapMV, 2, equib.rowScaledColNorms);
1117  {
1118  mv_type domainMapMV(G->getDomainMap(), numCols, true);
1119  domainMapMV.doExport(colMapMV, *imp, Tpetra::ADD); // reverse mode
1120  colMapMV.doImport(domainMapMV, *imp, Tpetra::REPLACE); // forward mode
1121  }
1122  copyMultiVectorColumnInto1DView(equib.colNorms, colMapMV, 0);
1123  copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, colMapMV, 1);
1124  copyMultiVectorColumnInto1DView(equib.rowScaledColNorms, colMapMV, 2);
1125  }
1126  }
1127 }
1128 
1129 } // namespace Details
1130 
1131 template <class SC, class LO, class GO, class NT>
1132 #if KOKKOS_VERSION >= 40799
1133 Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1134 #else
1135 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1136 #endif
1137  typename NT::device_type>
1139  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::invalid_argument,
1140  "computeRowOneNorms: Input matrix A must be fillComplete.");
1141  auto result = Details::computeLocalRowOneNorms(A);
1142 
1143  Details::globalizeRowOneNorms(result, A);
1144  return result;
1145 }
1146 
1147 template <class SC, class LO, class GO, class NT>
1148 #if KOKKOS_VERSION >= 40799
1149 Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1150 #else
1151 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1152 #endif
1153  typename NT::device_type>
1155  const bool assumeSymmetric) {
1156  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::invalid_argument,
1157  "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1158  auto result = Details::computeLocalRowAndColumnOneNorms(A, assumeSymmetric);
1159 
1160  Details::globalizeRowOneNorms(result, A);
1161  if (!assumeSymmetric) {
1162  // Row-norm-scaled column norms are trivial if the matrix is
1163  // symmetric, since the row norms and column norms are the same in
1164  // that case.
1165  Details::computeLocalRowScaledColumnNorms(result, A);
1166  }
1167  Details::globalizeColumnOneNorms(result, A, assumeSymmetric);
1168  return result;
1169 }
1170 
1171 } // namespace Tpetra
1172 
1173 //
1174 // Explicit instantiation macro
1175 //
1176 // Must be expanded from within the Tpetra namespace!
1177 //
1178 
1179 #if KOKKOS_VERSION >= 40799
1180 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1181  template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1182  computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1183  \
1184  template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1185  computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1186  const bool assumeSymmetric);
1187 
1188 #else
1189 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1190  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1191  computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1192  \
1193  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1194  computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1195  const bool assumeSymmetric);
1196 
1197 #endif
1198 
1199 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms (&quot;row sums&quot;) of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms (&quot;row sums&quot; etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms (&quot;row sums&quot; and &quot;column sums&quot;) of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms (&quot;row sums&quot; etc.) of the input sparse matrix A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sum new values.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row&#39;s entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.