Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_computeRowAndColumnOneNorms_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
12 
19 
20 #include "Kokkos_ArithTraits.hpp"
21 #include "Kokkos_Macros.hpp"
24 #include "Tpetra_CrsMatrix.hpp"
25 #include "Tpetra_Export.hpp"
26 #include "Tpetra_Map.hpp"
27 #include "Tpetra_MultiVector.hpp"
28 #include "Tpetra_RowMatrix.hpp"
29 #include "Kokkos_Core.hpp"
30 #include "Teuchos_CommHelpers.hpp"
31 #include <memory>
32 
33 namespace Tpetra {
34 namespace Details {
35 
36 template<class SC, class LO, class GO, class NT>
37 std::size_t
38 lclMaxNumEntriesRowMatrix (const Tpetra::RowMatrix<SC, LO, GO, NT>& A)
39 {
40  const auto& rowMap = * (A.getRowMap ());
41  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
42 
43  std::size_t maxNumEnt {0};
44  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
45  const std::size_t numEnt = A.getNumEntriesInLocalRow (lclRow);
46  maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
47  }
48  return maxNumEnt;
49 }
50 
51 template<class SC, class LO, class GO, class NT>
52 void
53 forEachLocalRowMatrixRow (
55  const LO lclNumRows,
56  const std::size_t maxNumEnt,
57  std::function<void (
58  const LO lclRow,
59  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
60  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
61  std::size_t /*numEnt*/ )> doForEachRow)
62 {
63  using lids_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
64  using vals_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
65  lids_type indBuf("indices",maxNumEnt);
66  vals_type valBuf("values",maxNumEnt);
67 
68  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
69  std::size_t numEnt = A.getNumEntriesInLocalRow (lclRow);
70  lids_type ind = Kokkos::subview(indBuf,std::make_pair((size_t)0, numEnt));
71  vals_type val = Kokkos::subview(valBuf,std::make_pair((size_t)0, numEnt));
72  A.getLocalRowCopy (lclRow, ind, val, numEnt);
73  doForEachRow (lclRow, ind, val, numEnt);
74  }
75 }
76 
77 template<class SC, class LO, class GO, class NT>
78 void
79 forEachLocalRowMatrixRow (
81  std::function<void (
82  const LO lclRow,
83  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
84  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
85  std::size_t /*numEnt*/ )> doForEachRow)
86 {
87  const auto& rowMap = * (A.getRowMap ());
88  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
89  const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
90 
91  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
92 }
93 
97 template<class SC, class LO, class GO, class NT>
98 void
99 computeLocalRowScaledColumnNorms_RowMatrix (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
100  typename NT::device_type>& result,
102 {
103  using KAT = Kokkos::ArithTraits<SC>;
104  using mag_type = typename KAT::mag_type;
105  using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
106 
107  auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
108 
109  // DEEP_COPY REVIEW - NOT TESTED
110  Kokkos::deep_copy (rowNorms_h, result.rowNorms);
111  auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
112 
113  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
114  [&] (const LO lclRow,
115  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
116  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
117  std::size_t numEnt) {
118  const mag_type rowNorm = rowNorms_h[lclRow];
119  for (std::size_t k = 0; k < numEnt; ++k) {
120  const mag_type matrixAbsVal = KAV::abs (val[k]);
121  const LO lclCol = ind[k];
122 
123  rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
124  }
125  });
126 
127  // DEEP_COPY REVIEW - NOT TESTED
128  Kokkos::deep_copy (result.rowScaledColNorms, rowScaledColNorms_h);
129 }
130 
133 template<class SC, class LO, class GO, class NT>
134 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
136 {
137  using KAT = Kokkos::ArithTraits<SC>;
138  using val_type = typename KAT::val_type;
139  using KAV = Kokkos::ArithTraits<val_type>;
140  using mag_type = typename KAT::mag_type;
141  using KAM = Kokkos::ArithTraits<mag_type>;
142  using device_type = typename NT::device_type;
143  using equib_info_type = EquilibrationInfo<val_type, device_type>;
144 
145  const auto& rowMap = * (A.getRowMap ());
146  const auto& colMap = * (A.getColMap ());
147  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
148  const LO lclNumCols = 0; // don't allocate column-related Views
149  constexpr bool assumeSymmetric = false; // doesn't matter here
150  equib_info_type result (lclNumRows, lclNumCols, assumeSymmetric);
151  auto result_h = result.createMirrorView ();
152 
153  const auto val_zero = KAV::zero();
154  const auto mag_zero = KAM::zero();
155 
156  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
157  [&] (const LO lclRow,
158  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
159  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
160  std::size_t numEnt) {
161  mag_type rowNorm = mag_zero;
162  val_type diagVal = val_zero;
163  const GO gblRow = rowMap.getGlobalElement (lclRow);
164  // OK if invalid(); then we simply won't find the diagonal entry.
165  const GO lclDiagColInd = colMap.getLocalElement (gblRow);
166 
167  for (std::size_t k = 0; k < numEnt; ++k) {
168  const val_type matrixVal = val[k];
169  if (KAV::isInf (matrixVal)) {
170  result_h.foundInf = true;
171  }
172  if (KAV::isNan (matrixVal)) {
173  result_h.foundNan = true;
174  }
175  const mag_type matrixAbsVal = KAV::abs (matrixVal);
176  rowNorm += matrixAbsVal;
177  const LO lclCol = ind[k];
178  if (lclCol == lclDiagColInd) {
179  diagVal += val[k]; // repeats count additively
180  }
181  } // for each entry in row
182 
183  // This is a local result. If the matrix has an overlapping
184  // row Map, then the global result might differ.
185  if (diagVal == KAV::zero ()) {
186  result_h.foundZeroDiag = true;
187  }
188  if (rowNorm == KAM::zero ()) {
189  result_h.foundZeroRowNorm = true;
190  }
191  // NOTE (mfh 24 May 2018) We could actually compute local
192  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
193  // and row Map is the same as range Map (so that the local row
194  // norms are the same as the global row norms).
195  result_h.rowDiagonalEntries[lclRow] += diagVal;
196  result_h.rowNorms[lclRow] = rowNorm;
197  });
198 
199  result.assign (result_h);
200  return result;
201 }
202 
205 template<class SC, class LO, class GO, class NT>
206 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
208  const bool assumeSymmetric)
209 {
210  using KAT = Kokkos::ArithTraits<SC>;
211  using val_type = typename KAT::val_type;
212  using KAV = Kokkos::ArithTraits<val_type>;
213  using mag_type = typename KAT::mag_type;
214  using KAM = Kokkos::ArithTraits<mag_type>;
215  using device_type = typename NT::device_type;
216 
217  const auto& rowMap = * (A.getRowMap ());
218  const auto& colMap = * (A.getColMap ());
219  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
220  const LO lclNumCols = static_cast<LO> (colMap.getLocalNumElements ());
221 
223  (lclNumRows, lclNumCols, assumeSymmetric);
224  auto result_h = result.createMirrorView ();
225 
226  const auto val_zero = KAV::zero();
227  const auto mag_zero = KAM::zero();
228 
229  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
230  [&] (const LO lclRow,
231  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
232  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
233  std::size_t numEnt) {
234  mag_type rowNorm = mag_zero;
235  val_type diagVal = val_zero;
236  const GO gblRow = rowMap.getGlobalElement (lclRow);
237  // OK if invalid(); then we simply won't find the diagonal entry.
238  const GO lclDiagColInd = colMap.getLocalElement (gblRow);
239 
240  for (std::size_t k = 0; k < numEnt; ++k) {
241  const val_type matrixVal = val[k];
242  if (KAV::isInf (matrixVal)) {
243  result_h.foundInf = true;
244  }
245  if (KAV::isNan (matrixVal)) {
246  result_h.foundNan = true;
247  }
248  const mag_type matrixAbsVal = KAV::abs (matrixVal);
249  rowNorm += matrixAbsVal;
250  const LO lclCol = ind[k];
251  if (lclCol == lclDiagColInd) {
252  diagVal += val[k]; // repeats count additively
253  }
254  if (! assumeSymmetric) {
255  result_h.colNorms[lclCol] += matrixAbsVal;
256  }
257  } // for each entry in row
258 
259  // This is a local result. If the matrix has an overlapping
260  // row Map, then the global result might differ.
261  if (diagVal == KAV::zero ()) {
262  result_h.foundZeroDiag = true;
263  }
264  if (rowNorm == KAM::zero ()) {
265  result_h.foundZeroRowNorm = true;
266  }
267  // NOTE (mfh 24 May 2018) We could actually compute local
268  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
269  // and row Map is the same as range Map (so that the local row
270  // norms are the same as the global row norms).
271  result_h.rowDiagonalEntries[lclRow] += diagVal;
272  result_h.rowNorms[lclRow] = rowNorm;
273  if (! assumeSymmetric &&
274  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
275  result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
276  }
277  });
278 
279  result.assign (result_h);
280  return result;
281 }
282 
283 template<class SC, class LO, class GO, class NT>
284 class ComputeLocalRowScaledColumnNorms {
285 public:
286  using crs_matrix_type = ::Tpetra::CrsMatrix<SC, LO, GO, NT>;
287  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
288  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
289  using device_type = typename crs_matrix_type::device_type;
290  using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
291 
292  ComputeLocalRowScaledColumnNorms (const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
293  const Kokkos::View<const mag_type*, device_type>& rowNorms,
294  const crs_matrix_type& A) :
295  rowScaledColNorms_ (rowScaledColNorms),
296  rowNorms_ (rowNorms),
297  A_lcl_ (A.getLocalMatrixDevice ())
298  {}
299 
300  KOKKOS_INLINE_FUNCTION void operator () (const typename policy_type::member_type &team) const {
301  using KAT = Kokkos::ArithTraits<val_type>;
302 
303  const LO lclRow = team.league_rank();
304  const auto curRow = A_lcl_.rowConst (lclRow);
305  const mag_type rowNorm = rowNorms_[lclRow];
306  const LO numEnt = curRow.length;
307  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k) {
308  const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
309  const LO lclCol = curRow.colidx(k);
310 
311  Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
312  });
313  }
314 
315  static void
316  run (const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
317  const Kokkos::View<const mag_type*, device_type>& rowNorms,
318  const crs_matrix_type& A)
319  {
320  using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
321 
322  functor_type functor (rowScaledColNorms, rowNorms, A);
323  const LO lclNumRows =
324  static_cast<LO> (A.getRowMap ()->getLocalNumElements ());
325  Kokkos::parallel_for ("computeLocalRowScaledColumnNorms",
326  policy_type (lclNumRows, Kokkos::AUTO), functor);
327  }
328 
329 private:
330  Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
331  Kokkos::View<const mag_type*, device_type> rowNorms_;
332 
333  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
334  local_matrix_device_type A_lcl_;
335 };
336 
337 template<class SC, class LO, class GO, class NT>
338 void
339 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
340  typename NT::device_type>& result,
342 {
343  using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
344  impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
345 }
346 
347 template<class SC, class LO, class GO, class NT>
348 void
349 computeLocalRowScaledColumnNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
350  typename NT::device_type>& result,
352 {
353  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
354  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
355  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
356  using device_type = typename NT::device_type;
357 
358  auto colMapPtr = A.getColMap ();
359  TEUCHOS_TEST_FOR_EXCEPTION
360  (colMapPtr.get () == nullptr, std::invalid_argument,
361  "computeLocalRowScaledColumnNorms: "
362  "Input matrix A must have a nonnull column Map.");
363  const LO lclNumCols = static_cast<LO> (colMapPtr->getLocalNumElements ());
364  if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
365  static_cast<std::size_t> (lclNumCols)) {
366  result.rowScaledColNorms =
367  Kokkos::View<mag_type*, device_type> ("rowScaledColNorms", lclNumCols);
368  }
369 
370  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
371  if (A_crs == nullptr) {
373  }
374  else {
375  computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
376  }
377 }
378 
379 // Kokkos::parallel_reduce functor that is part of the implementation
380 // of computeLocalRowOneNorms_CrsMatrix.
381 template<class SC, class LO, class GO, class NT>
382 class ComputeLocalRowOneNorms {
383 public:
384  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
385  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
386  using local_matrix_device_type =
387  typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
388  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
389  using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
390 
391  ComputeLocalRowOneNorms (const equib_info_type& equib, // in/out
392  const local_matrix_device_type& A_lcl, // in
393  const local_map_type& rowMap, // in
394  const local_map_type& colMap) : // in
395  equib_ (equib),
396  A_lcl_ (A_lcl),
397  rowMap_ (rowMap),
398  colMap_ (colMap)
399  {}
400 
401  // (result & 1) != 0 means "found Inf."
402  // (result & 2) != 0 means "found NaN."
403  // (result & 4) != 0 means "found zero diag."
404  // (result & 8) != 0 means "found zero row norm."
405  // Pack into a single int so the reduction is cheaper,
406  // esp. on GPU.
407  using value_type = int;
408 
409  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
410  {
411  dst = 0;
412  }
413 
414  KOKKOS_INLINE_FUNCTION void
415  join (value_type& dst,
416  const value_type& src) const
417  {
418  dst |= src;
419  }
420 
421  KOKKOS_INLINE_FUNCTION void
422  operator () (const typename policy_type::member_type& team, value_type& dst) const
423  {
424  using KAT = Kokkos::ArithTraits<val_type>;
425  using mag_type = typename KAT::mag_type;
426  using KAM = Kokkos::ArithTraits<mag_type>;
427 
428  const LO lclRow = team.league_rank();
429  const GO gblRow = rowMap_.getGlobalElement (lclRow);
430  // OK if invalid(); then we simply won't find the diagonal entry.
431  const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
432 
433  const auto curRow = A_lcl_.rowConst (lclRow);
434  const LO numEnt = curRow.length;
435 
436  const auto val_zero = KAT::zero();
437  const auto mag_zero = KAM::zero();
438 
439  mag_type rowNorm = mag_zero;
440  val_type diagVal = val_zero;
441  value_type dstThread {0};
442 
443  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
444  const val_type matrixVal = curRow.value (k);
445  if (KAT::isInf (matrixVal)) {
446  dstContrib |= 1;
447  }
448  if (KAT::isNan (matrixVal)) {
449  dstContrib |= 2;
450  }
451  const mag_type matrixAbsVal = KAT::abs (matrixVal);
452  normContrib += matrixAbsVal;
453  const LO lclCol = curRow.colidx (k);
454  if (lclCol == lclDiagColInd) {
455  diagContrib = curRow.value (k); // assume no repeats
456  }
457  }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
458 
459  // This is a local result. If the matrix has an overlapping
460  // row Map, then the global result might differ.
461  Kokkos::single(Kokkos::PerTeam(team), [&](){
462  dst |= dstThread;
463  if (diagVal == KAT::zero ()) {
464  dst |= 4;
465  }
466  if (rowNorm == KAM::zero ()) {
467  dst |= 8;
468  }
469  equib_.rowDiagonalEntries[lclRow] = diagVal;
470  equib_.rowNorms[lclRow] = rowNorm;
471  });
472  }
473 
474 private:
475  equib_info_type equib_;
476  local_matrix_device_type A_lcl_;
477  local_map_type rowMap_;
478  local_map_type colMap_;
479 };
480 
481 // Kokkos::parallel_reduce functor that is part of the implementation
482 // of computeLocalRowAndColumnOneNorms_CrsMatrix.
483 template<class SC, class LO, class GO, class NT>
484 class ComputeLocalRowAndColumnOneNorms {
485 public:
486  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
487  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
488  using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
489  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
490  using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
491 
492 public:
493  ComputeLocalRowAndColumnOneNorms (const equib_info_type& equib, // in/out
494  const local_matrix_device_type& A_lcl, // in
495  const local_map_type& rowMap, // in
496  const local_map_type& colMap) : // in
497  equib_ (equib),
498  A_lcl_ (A_lcl),
499  rowMap_ (rowMap),
500  colMap_ (colMap)
501  {}
502 
503  // (result & 1) != 0 means "found Inf."
504  // (result & 2) != 0 means "found NaN."
505  // (result & 4) != 0 means "found zero diag."
506  // (result & 8) != 0 means "found zero row norm."
507  // Pack into a single int so the reduction is cheaper,
508  // esp. on GPU.
509  using value_type = int;
510 
511  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
512  {
513  dst = 0;
514  }
515 
516  KOKKOS_INLINE_FUNCTION void
517  join (value_type& dst,
518  const value_type& src) const
519  {
520  dst |= src;
521  }
522 
523  KOKKOS_INLINE_FUNCTION void
524  operator () (const typename policy_type::member_type& team, value_type& dst) const
525  {
526  using KAT = Kokkos::ArithTraits<val_type>;
527  using mag_type = typename KAT::mag_type;
528  using KAM = Kokkos::ArithTraits<mag_type>;
529 
530  const LO lclRow = team.league_rank();
531  const GO gblRow = rowMap_.getGlobalElement (lclRow);
532  // OK if invalid(); then we simply won't find the diagonal entry.
533  const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
534 
535  const auto curRow = A_lcl_.rowConst (lclRow);
536  const LO numEnt = curRow.length;
537 
538  const auto val_zero = KAT::zero();
539  const auto mag_zero = KAM::zero();
540 
541  mag_type rowNorm = mag_zero;
542  val_type diagVal = val_zero;
543  value_type dstThread {0};
544 
545  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
546  const val_type matrixVal = curRow.value (k);
547  if (KAT::isInf (matrixVal)) {
548  dstContrib |= 1;
549  }
550  if (KAT::isNan (matrixVal)) {
551  dstContrib |= 2;
552  }
553  const mag_type matrixAbsVal = KAT::abs (matrixVal);
554  normContrib += matrixAbsVal;
555  const LO lclCol = curRow.colidx (k);
556  if (lclCol == lclDiagColInd) {
557  diagContrib = curRow.value (k); // assume no repeats
558  }
559  if (! equib_.assumeSymmetric) {
560  Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
561  }
562  }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
563 
564  // This is a local result. If the matrix has an overlapping
565  // row Map, then the global result might differ.
566  Kokkos::single(Kokkos::PerTeam(team), [&](){
567  dst |= dstThread;
568  if (diagVal == KAT::zero ()) {
569  dst |= 4;
570  }
571  if (rowNorm == KAM::zero ()) {
572  dst |= 8;
573  }
574  // NOTE (mfh 24 May 2018) We could actually compute local
575  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
576  // and row Map is the same as range Map (so that the local row
577  // norms are the same as the global row norms).
578  equib_.rowDiagonalEntries[lclRow] = diagVal;
579  equib_.rowNorms[lclRow] = rowNorm;
580  if (! equib_.assumeSymmetric &&
581  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
582  // Don't need an atomic update here, since this lclDiagColInd is
583  // a one-to-one function of lclRow.
584  equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
585  }
586  });
587  }
588 
589 private:
590  equib_info_type equib_;
591  local_matrix_device_type A_lcl_;
592  local_map_type rowMap_;
593  local_map_type colMap_;
594 };
595 
598 template<class SC, class LO, class GO, class NT>
599 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
601 {
602  using execution_space = typename NT::device_type::execution_space;
603  using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
604  using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
605  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
606  using device_type = typename NT::device_type;
607  using equib_info_type = EquilibrationInfo<val_type, device_type>;
608 
609  const LO lclNumRows = static_cast<LO> (A.getRowMap ()->getLocalNumElements ());
610  const LO lclNumCols = 0; // don't allocate column-related Views
611  constexpr bool assumeSymmetric = false; // doesn't matter here
612  equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
613 
614  functor_type functor (equib, A.getLocalMatrixDevice (),
615  A.getRowMap ()->getLocalMap (),
616  A.getColMap ()->getLocalMap ());
617  int result = 0;
618  Kokkos::parallel_reduce ("computeLocalRowOneNorms",
619  policy_type (lclNumRows, Kokkos::AUTO), functor,
620  result);
621  equib.foundInf = (result & 1) != 0;
622  equib.foundNan = (result & 2) != 0;
623  equib.foundZeroDiag = (result & 4) != 0;
624  equib.foundZeroRowNorm = (result & 8) != 0;
625  return equib;
626 }
627 
630 template<class SC, class LO, class GO, class NT>
631 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
633  const bool assumeSymmetric)
634 {
635  using execution_space = typename NT::device_type::execution_space;
636  using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
637  using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
638  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
639  using device_type = typename NT::device_type;
640  using equib_info_type = EquilibrationInfo<val_type, device_type>;
641 
642  const LO lclNumRows = static_cast<LO> (A.getRowMap ()->getLocalNumElements ());
643  const LO lclNumCols = static_cast<LO> (A.getColMap ()->getLocalNumElements ());
644  equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
645 
646  functor_type functor (equib, A.getLocalMatrixDevice (),
647  A.getRowMap ()->getLocalMap (),
648  A.getColMap ()->getLocalMap ());
649  int result = 0;
650  Kokkos::parallel_reduce ("computeLocalRowAndColumnOneNorms",
651  policy_type (lclNumRows, Kokkos::AUTO), functor,
652  result);
653  equib.foundInf = (result & 1) != 0;
654  equib.foundNan = (result & 2) != 0;
655  equib.foundZeroDiag = (result & 4) != 0;
656  equib.foundZeroRowNorm = (result & 8) != 0;
657  return equib;
658 }
659 
664 template<class SC, class LO, class GO, class NT>
665 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
666  typename NT::device_type>
668 {
669  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
670  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
671 
672  if (A_crs == nullptr) {
674  }
675  else {
676  return computeLocalRowOneNorms_CrsMatrix (*A_crs);
677  }
678 }
679 
701 template<class SC, class LO, class GO, class NT>
702 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
704  const bool assumeSymmetric)
705 {
706  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
707  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
708 
709  if (A_crs == nullptr) {
710  return computeLocalRowAndColumnOneNorms_RowMatrix (A, assumeSymmetric);
711  }
712  else {
713  return computeLocalRowAndColumnOneNorms_CrsMatrix (*A_crs, assumeSymmetric);
714  }
715 }
716 
717 template<class SC, class LO, class GO, class NT>
718 auto getLocalView_1d_readOnly (
720  const LO whichColumn)
721 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
722  Kokkos::ALL (), whichColumn))
723 {
724  if (X.isConstantStride ()) {
725  return Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
726  Kokkos::ALL (), whichColumn);
727  }
728  else {
729  auto X_whichColumn = X.getVector (whichColumn);
730  return Kokkos::subview (X_whichColumn->getLocalViewDevice(Access::ReadOnly),
731  Kokkos::ALL (), 0);
732  }
733 }
734 
735 template<class SC, class LO, class GO, class NT>
736 auto getLocalView_1d_writeOnly (
738  const LO whichColumn)
739 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
740  Kokkos::ALL (), whichColumn))
741 {
742  if (X.isConstantStride ()) {
743  return Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
744  Kokkos::ALL (), whichColumn);
745  }
746  else {
747  auto X_whichColumn = X.getVectorNonConst (whichColumn);
748  return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
749  Kokkos::ALL (), 0);
750  }
751 }
752 
753 template<class SC, class LO, class GO, class NT, class ViewValueType>
754 void
755 copy1DViewIntoMultiVectorColumn (
757  const LO whichColumn,
758  const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
759 {
760  auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
761  Tpetra::Details::copyConvert (X_lcl, view);
762 }
763 
764 template<class SC, class LO, class GO, class NT, class ViewValueType>
765 void
766 copy1DViewIntoMultiVectorColumn_mag (
768  const LO whichColumn,
769  const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
770 {
771  using KAT = Kokkos::ArithTraits<ViewValueType>;
772  using execution_space = typename NT::execution_space;
773  using range_type = Kokkos::RangePolicy<execution_space, LO>;
774  auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
775  Kokkos::parallel_for("",
776  range_type(0, X_lcl.extent(0)),
777  KOKKOS_LAMBDA(const LO i) {
778  X_lcl(i) = KAT::magnitude(view(i));
779  });
780 }
781 
782 template<class SC, class LO, class GO, class NT, class ViewValueType>
783 void
784 copyMultiVectorColumnInto1DView (
785  const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
787  const LO whichColumn)
788 {
789  auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
790  Tpetra::Details::copyConvert (view, X_lcl);
791 }
792 
793 template<class SC, class LO, class GO, class NT, class ViewValueType>
794 void
795 copyMultiVectorColumnInto1DView_mag (
796  const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
798  const LO whichColumn)
799 {
800  using implScalar = typename Kokkos::ArithTraits<SC>::val_type;
801  using KAT = Kokkos::ArithTraits<implScalar>;
802  using range_type = Kokkos::RangePolicy<typename NT::execution_space, LO>;
803  auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
804  Kokkos::parallel_for("",
805  range_type(0, X_lcl.extent(0)),
806  KOKKOS_LAMBDA(LO i) {
807  view(i) = KAT::magnitude(X_lcl(i));
808  });
809 }
810 
811 
812 template<class OneDViewType, class IndexType>
813 class FindZero {
814 public:
815  static_assert (OneDViewType::rank == 1,
816  "OneDViewType must be a rank-1 Kokkos::View.");
817  static_assert (std::is_integral<IndexType>::value,
818  "IndexType must be a built-in integer type.");
819  FindZero (const OneDViewType& x) : x_ (x) {}
820  // Kokkos historically didn't like bool reduction results on CUDA,
821  // so we use int as the reduction result type.
822  KOKKOS_INLINE_FUNCTION void
823  operator () (const IndexType i, int& result) const {
824  using val_type = typename OneDViewType::non_const_value_type;
825  result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
826  }
827 private:
828  OneDViewType x_;
829 };
830 
831 template<class OneDViewType>
832 bool findZero (const OneDViewType& x)
833 {
834  using view_type = typename OneDViewType::const_type;
835  using execution_space = typename view_type::execution_space;
836  using size_type = typename view_type::size_type;
837  using functor_type = FindZero<view_type, size_type>;
838 
839  Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
840  range.set_chunk_size (500); // adjust as needed
841 
842  int foundZero = 0;
843  Kokkos::parallel_reduce ("findZero", range, functor_type (x), foundZero);
844  return foundZero == 1;
845 }
846 
847 template<class SC, class LO, class GO, class NT>
848 void
849 globalizeRowOneNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
850  typename NT::device_type>& equib,
852 {
853  using mv_type = Tpetra::MultiVector<SC, LO, GO, NT>;
854 
855  auto G = A.getGraph ();
856  TEUCHOS_TEST_FOR_EXCEPTION
857  (G.get () == nullptr, std::invalid_argument,
858  "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
859  "(that is, getGraph() must return nonnull).");
860  TEUCHOS_TEST_FOR_EXCEPTION
861  (! G->isFillComplete (), std::invalid_argument,
862  "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
863 
864  auto exp = G->getExporter ();
865  if (! exp.is_null ()) {
866  // If the matrix has an overlapping row Map, first Export the
867  // local row norms with ADD CombineMode to a range Map Vector to
868  // get the global row norms, then reverse them back with REPLACE
869  // CombineMode to the row Map Vector. Ditto for the local row
870  // diagonal entries. Use SC instead of mag_type, so we can
871  // communicate both row norms and row diagonal entries at once.
872 
873  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
874  // avoid the local copy here.
875  mv_type rowMapMV (G->getRowMap (), 2, false);
876 
877  copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
878  copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
879  {
880  mv_type rangeMapMV (G->getRangeMap (), 2, true);
881  rangeMapMV.doExport (rowMapMV, *exp, Tpetra::ADD); // forward mode
882  rowMapMV.doImport (rangeMapMV, *exp, Tpetra::REPLACE); // reverse mode
883  }
884  copyMultiVectorColumnInto1DView_mag (equib.rowNorms, rowMapMV, 0);
885  copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
886 
887  // It's not common for users to solve linear systems with a
888  // nontrival Export, so it's OK for this to cost an additional
889  // pass over rowDiagonalEntries.
890  equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
891  equib.foundZeroRowNorm = findZero (equib.rowNorms);
892  }
893 
894  constexpr int allReduceCount = 4;
895  int lclNaughtyMatrix[allReduceCount];
896  lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
897  lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
898  lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
899  lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
900 
901  using Teuchos::outArg;
902  using Teuchos::REDUCE_MAX;
903  using Teuchos::reduceAll;
904  auto comm = G->getComm ();
905  int gblNaughtyMatrix[allReduceCount];
906  reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
907  lclNaughtyMatrix, gblNaughtyMatrix);
908 
909  equib.foundInf = gblNaughtyMatrix[0] == 1;
910  equib.foundNan = gblNaughtyMatrix[1] == 1;
911  equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
912  equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
913 }
914 
915 template<class SC, class LO, class GO, class NT>
916 void
917 globalizeColumnOneNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
918  typename NT::device_type>& equib,
920  const bool assumeSymmetric) // if so, use row norms
921 {
922  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
923  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
925  using device_type = typename NT::device_type;
926 
927  auto G = A.getGraph ();
928  TEUCHOS_TEST_FOR_EXCEPTION
929  (G.get () == nullptr, std::invalid_argument,
930  "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
931  "(that is, getGraph() must return nonnull).");
932  TEUCHOS_TEST_FOR_EXCEPTION
933  (! G->isFillComplete (), std::invalid_argument,
934  "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
935 
936  auto imp = G->getImporter ();
937  if (assumeSymmetric) {
938  const LO numCols = 2;
939  // Redistribute local row info to global column info.
940 
941  // Get the data into a MultiVector on the domain Map.
942  mv_type rowNorms_domMap (G->getDomainMap (), numCols, false);
943  const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
944  if (rowMapSameAsDomainMap) {
945  copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
946  copy1DViewIntoMultiVectorColumn_mag (rowNorms_domMap, 1, equib.rowDiagonalEntries);
947  }
948  else {
949  // This is not a common case; it would normally arise when the
950  // matrix has an overlapping row Map.
951  Tpetra::Export<LO, GO, NT> rowToDom (G->getRowMap (), G->getDomainMap ());
952  mv_type rowNorms_rowMap (G->getRowMap (), numCols, true);
953  copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
954  copy1DViewIntoMultiVectorColumn_mag (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
955  rowNorms_domMap.doExport (rowNorms_rowMap, rowToDom, Tpetra::REPLACE);
956  }
957 
958  // Use the existing Import to redistribute the row norms from the
959  // domain Map to the column Map.
960  std::unique_ptr<mv_type> rowNorms_colMap;
961  if (imp.is_null ()) {
962  // Shallow copy of rowNorms_domMap.
963  rowNorms_colMap =
964  std::unique_ptr<mv_type> (new mv_type (rowNorms_domMap, * (G->getColMap ())));
965  }
966  else {
967  rowNorms_colMap =
968  std::unique_ptr<mv_type> (new mv_type (G->getColMap (), numCols, true));
969  rowNorms_colMap->doImport (rowNorms_domMap, *imp, Tpetra::REPLACE);
970  }
971 
972  // Make sure the result has allocations of the right size.
973  const LO lclNumCols =
974  static_cast<LO> (G->getColMap ()->getLocalNumElements ());
975  if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
976  equib.colNorms =
977  Kokkos::View<mag_type*, device_type> ("colNorms", lclNumCols);
978  }
979  if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
980  equib.colDiagonalEntries =
981  Kokkos::View<val_type*, device_type> ("colDiagonalEntries", lclNumCols);
982  }
983 
984  // Copy row norms and diagonal entries, appropriately
985  // redistributed, into column norms resp. diagonal entries.
986  copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
987  copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
988  }
989  else {
990  if (! imp.is_null ()) {
991  const LO numCols = 3;
992  // If the matrix has an overlapping column Map (this is usually
993  // the case), first Export (reverse-mode Import) the local info
994  // to a domain Map Vector to get the global info, then Import
995  // them back with REPLACE CombineMode to the column Map Vector.
996  // Ditto for the row-scaled column norms.
997 
998  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
999  // avoid the local copy here.
1000  mv_type colMapMV (G->getColMap (), numCols, false);
1001 
1002  copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
1003  copy1DViewIntoMultiVectorColumn_mag (colMapMV, 1, equib.colDiagonalEntries);
1004  copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
1005  {
1006  mv_type domainMapMV (G->getDomainMap (), numCols, true);
1007  domainMapMV.doExport (colMapMV, *imp, Tpetra::ADD); // reverse mode
1008  colMapMV.doImport (domainMapMV, *imp, Tpetra::REPLACE); // forward mode
1009  }
1010  copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
1011  copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
1012  copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
1013  }
1014  }
1015 }
1016 
1017 } // namespace Details
1018 
1019 template<class SC, class LO, class GO, class NT>
1020 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1021  typename NT::device_type>
1023 {
1024  TEUCHOS_TEST_FOR_EXCEPTION
1025  (! A.isFillComplete (), std::invalid_argument,
1026  "computeRowOneNorms: Input matrix A must be fillComplete.");
1027  auto result = Details::computeLocalRowOneNorms (A);
1028 
1029  Details::globalizeRowOneNorms (result, A);
1030  return result;
1031 }
1032 
1033 template<class SC, class LO, class GO, class NT>
1034 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1035  typename NT::device_type>
1037  const bool assumeSymmetric)
1038 {
1039  TEUCHOS_TEST_FOR_EXCEPTION
1040  (! A.isFillComplete (), std::invalid_argument,
1041  "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1042  auto result = Details::computeLocalRowAndColumnOneNorms (A, assumeSymmetric);
1043 
1044  Details::globalizeRowOneNorms (result, A);
1045  if (! assumeSymmetric) {
1046  // Row-norm-scaled column norms are trivial if the matrix is
1047  // symmetric, since the row norms and column norms are the same in
1048  // that case.
1049  Details::computeLocalRowScaledColumnNorms (result, A);
1050  }
1051  Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
1052  return result;
1053 }
1054 
1055 } // namespace Tpetra
1056 
1057 //
1058 // Explicit instantiation macro
1059 //
1060 // Must be expanded from within the Tpetra namespace!
1061 //
1062 
1063 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
1064  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1065  computeRowOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1066  \
1067  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1068  computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1069  const bool assumeSymmetric);
1070 
1071 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms (&quot;row sums&quot;) of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms (&quot;row sums&quot; etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms (&quot;row sums&quot; and &quot;column sums&quot;) of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms (&quot;row sums&quot; etc.) of the input sparse matrix A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sum new values.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row&#39;s entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.