Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_computeRowAndColumnOneNorms_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
43 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
44 
51 
54 #include "Tpetra_CrsMatrix.hpp"
55 #include "Tpetra_Export.hpp"
56 #include "Tpetra_Map.hpp"
57 #include "Tpetra_MultiVector.hpp"
58 #include "Tpetra_RowMatrix.hpp"
59 #include "Kokkos_Core.hpp"
60 #include "Teuchos_CommHelpers.hpp"
61 #include <memory>
62 
63 namespace Tpetra {
64 namespace Details {
65 
66 template<class SC, class LO, class GO, class NT>
67 std::size_t
68 lclMaxNumEntriesRowMatrix (const Tpetra::RowMatrix<SC, LO, GO, NT>& A)
69 {
70  const auto& rowMap = * (A.getRowMap ());
71  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
72 
73  std::size_t maxNumEnt {0};
74  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
75  const std::size_t numEnt = A.getNumEntriesInLocalRow (lclRow);
76  maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
77  }
78  return maxNumEnt;
79 }
80 
81 template<class SC, class LO, class GO, class NT>
82 void
83 forEachLocalRowMatrixRow (
85  const LO lclNumRows,
86  const std::size_t maxNumEnt,
87  std::function<void (
88  const LO lclRow,
89  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
90  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
91  std::size_t /*numEnt*/ )> doForEachRow)
92 {
93  using lids_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
94  using vals_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
95  lids_type indBuf("indices",maxNumEnt);
96  vals_type valBuf("values",maxNumEnt);
97 
98  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
99  std::size_t numEnt = A.getNumEntriesInLocalRow (lclRow);
100  lids_type ind = Kokkos::subview(indBuf,std::make_pair((size_t)0, numEnt));
101  vals_type val = Kokkos::subview(valBuf,std::make_pair((size_t)0, numEnt));
102  A.getLocalRowCopy (lclRow, ind, val, numEnt);
103  doForEachRow (lclRow, ind, val, numEnt);
104  }
105 }
106 
107 template<class SC, class LO, class GO, class NT>
108 void
109 forEachLocalRowMatrixRow (
111  std::function<void (
112  const LO lclRow,
113  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
114  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
115  std::size_t /*numEnt*/ )> doForEachRow)
116 {
117  const auto& rowMap = * (A.getRowMap ());
118  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
119  const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
120 
121  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
122 }
123 
127 template<class SC, class LO, class GO, class NT>
128 void
129 computeLocalRowScaledColumnNorms_RowMatrix (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
130  typename NT::device_type>& result,
132 {
133  using KAT = Kokkos::ArithTraits<SC>;
134  using mag_type = typename KAT::mag_type;
135  using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
136 
137  auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
138 
139  // DEEP_COPY REVIEW - NOT TESTED
140  Kokkos::deep_copy (rowNorms_h, result.rowNorms);
141  auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
142 
143  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
144  [&] (const LO lclRow,
145  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
146  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
147  std::size_t numEnt) {
148  const mag_type rowNorm = rowNorms_h[lclRow];
149  for (std::size_t k = 0; k < numEnt; ++k) {
150  const mag_type matrixAbsVal = KAV::abs (val[k]);
151  const LO lclCol = ind[k];
152 
153  rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
154  }
155  });
156 
157  // DEEP_COPY REVIEW - NOT TESTED
158  Kokkos::deep_copy (result.rowScaledColNorms, rowScaledColNorms_h);
159 }
160 
163 template<class SC, class LO, class GO, class NT>
164 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
166 {
167  using KAT = Kokkos::ArithTraits<SC>;
168  using val_type = typename KAT::val_type;
169  using KAV = Kokkos::ArithTraits<val_type>;
170  using mag_type = typename KAT::mag_type;
171  using KAM = Kokkos::ArithTraits<mag_type>;
172  using device_type = typename NT::device_type;
173  using equib_info_type = EquilibrationInfo<val_type, device_type>;
174 
175  const auto& rowMap = * (A.getRowMap ());
176  const auto& colMap = * (A.getColMap ());
177  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
178  const LO lclNumCols = 0; // don't allocate column-related Views
179  constexpr bool assumeSymmetric = false; // doesn't matter here
180  equib_info_type result (lclNumRows, lclNumCols, assumeSymmetric);
181  auto result_h = result.createMirrorView ();
182 
183  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
184  [&] (const LO lclRow,
185  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
186  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
187  std::size_t numEnt) {
188  mag_type rowNorm {0.0};
189  val_type diagVal {0.0};
190  const GO gblRow = rowMap.getGlobalElement (lclRow);
191  // OK if invalid(); then we simply won't find the diagonal entry.
192  const GO lclDiagColInd = colMap.getLocalElement (gblRow);
193 
194  for (std::size_t k = 0; k < numEnt; ++k) {
195  const val_type matrixVal = val[k];
196  if (KAV::isInf (matrixVal)) {
197  result_h.foundInf = true;
198  }
199  if (KAV::isNan (matrixVal)) {
200  result_h.foundNan = true;
201  }
202  const mag_type matrixAbsVal = KAV::abs (matrixVal);
203  rowNorm += matrixAbsVal;
204  const LO lclCol = ind[k];
205  if (lclCol == lclDiagColInd) {
206  diagVal += val[k]; // repeats count additively
207  }
208  } // for each entry in row
209 
210  // This is a local result. If the matrix has an overlapping
211  // row Map, then the global result might differ.
212  if (diagVal == KAV::zero ()) {
213  result_h.foundZeroDiag = true;
214  }
215  if (rowNorm == KAM::zero ()) {
216  result_h.foundZeroRowNorm = true;
217  }
218  // NOTE (mfh 24 May 2018) We could actually compute local
219  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
220  // and row Map is the same as range Map (so that the local row
221  // norms are the same as the global row norms).
222  result_h.rowDiagonalEntries[lclRow] += diagVal;
223  result_h.rowNorms[lclRow] = rowNorm;
224  });
225 
226  result.assign (result_h);
227  return result;
228 }
229 
232 template<class SC, class LO, class GO, class NT>
233 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
235  const bool assumeSymmetric)
236 {
237  using KAT = Kokkos::ArithTraits<SC>;
238  using val_type = typename KAT::val_type;
239  using KAV = Kokkos::ArithTraits<val_type>;
240  using mag_type = typename KAT::mag_type;
241  using KAM = Kokkos::ArithTraits<mag_type>;
242  using device_type = typename NT::device_type;
243 
244  const auto& rowMap = * (A.getRowMap ());
245  const auto& colMap = * (A.getColMap ());
246  const LO lclNumRows = static_cast<LO> (rowMap.getLocalNumElements ());
247  const LO lclNumCols = static_cast<LO> (colMap.getLocalNumElements ());
248 
250  (lclNumRows, lclNumCols, assumeSymmetric);
251  auto result_h = result.createMirrorView ();
252 
253  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
254  [&] (const LO lclRow,
255  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
256  const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
257  std::size_t numEnt) {
258  mag_type rowNorm {0.0};
259  val_type diagVal {0.0};
260  const GO gblRow = rowMap.getGlobalElement (lclRow);
261  // OK if invalid(); then we simply won't find the diagonal entry.
262  const GO lclDiagColInd = colMap.getLocalElement (gblRow);
263 
264  for (std::size_t k = 0; k < numEnt; ++k) {
265  const val_type matrixVal = val[k];
266  if (KAV::isInf (matrixVal)) {
267  result_h.foundInf = true;
268  }
269  if (KAV::isNan (matrixVal)) {
270  result_h.foundNan = true;
271  }
272  const mag_type matrixAbsVal = KAV::abs (matrixVal);
273  rowNorm += matrixAbsVal;
274  const LO lclCol = ind[k];
275  if (lclCol == lclDiagColInd) {
276  diagVal += val[k]; // repeats count additively
277  }
278  if (! assumeSymmetric) {
279  result_h.colNorms[lclCol] += matrixAbsVal;
280  }
281  } // for each entry in row
282 
283  // This is a local result. If the matrix has an overlapping
284  // row Map, then the global result might differ.
285  if (diagVal == KAV::zero ()) {
286  result_h.foundZeroDiag = true;
287  }
288  if (rowNorm == KAM::zero ()) {
289  result_h.foundZeroRowNorm = true;
290  }
291  // NOTE (mfh 24 May 2018) We could actually compute local
292  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
293  // and row Map is the same as range Map (so that the local row
294  // norms are the same as the global row norms).
295  result_h.rowDiagonalEntries[lclRow] += diagVal;
296  result_h.rowNorms[lclRow] = rowNorm;
297  if (! assumeSymmetric &&
298  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
299  result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
300  }
301  });
302 
303  result.assign (result_h);
304  return result;
305 }
306 
307 template<class SC, class LO, class GO, class NT>
308 class ComputeLocalRowScaledColumnNorms {
309 public:
310  using crs_matrix_type = ::Tpetra::CrsMatrix<SC, LO, GO, NT>;
311  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
312  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
313  using device_type = typename crs_matrix_type::device_type;
314  using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
315 
316  ComputeLocalRowScaledColumnNorms (const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
317  const Kokkos::View<const mag_type*, device_type>& rowNorms,
318  const crs_matrix_type& A) :
319  rowScaledColNorms_ (rowScaledColNorms),
320  rowNorms_ (rowNorms),
321  A_lcl_ (A.getLocalMatrixDevice ())
322  {}
323 
324  KOKKOS_INLINE_FUNCTION void operator () (const typename policy_type::member_type &team) const {
325  using KAT = Kokkos::ArithTraits<val_type>;
326 
327  const LO lclRow = team.league_rank();
328  const auto curRow = A_lcl_.rowConst (lclRow);
329  const mag_type rowNorm = rowNorms_[lclRow];
330  const LO numEnt = curRow.length;
331  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k) {
332  const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
333  const LO lclCol = curRow.colidx(k);
334 
335  Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
336  });
337  }
338 
339  static void
340  run (const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
341  const Kokkos::View<const mag_type*, device_type>& rowNorms,
342  const crs_matrix_type& A)
343  {
344  using execution_space = typename device_type::execution_space;
345  using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
346 
347  functor_type functor (rowScaledColNorms, rowNorms, A);
348  const LO lclNumRows =
349  static_cast<LO> (A.getRowMap ()->getLocalNumElements ());
350  Kokkos::parallel_for ("computeLocalRowScaledColumnNorms",
351  policy_type (lclNumRows, Kokkos::AUTO), functor);
352  }
353 
354 private:
355  Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
356  Kokkos::View<const mag_type*, device_type> rowNorms_;
357 
358  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
359  local_matrix_device_type A_lcl_;
360 };
361 
362 template<class SC, class LO, class GO, class NT>
363 void
364 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
365  typename NT::device_type>& result,
367 {
368  using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
369  impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
370 }
371 
372 template<class SC, class LO, class GO, class NT>
373 void
374 computeLocalRowScaledColumnNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
375  typename NT::device_type>& result,
377 {
378  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
379  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
380  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
381  using device_type = typename NT::device_type;
382 
383  auto colMapPtr = A.getColMap ();
384  TEUCHOS_TEST_FOR_EXCEPTION
385  (colMapPtr.get () == nullptr, std::invalid_argument,
386  "computeLocalRowScaledColumnNorms: "
387  "Input matrix A must have a nonnull column Map.");
388  const LO lclNumCols = static_cast<LO> (colMapPtr->getLocalNumElements ());
389  if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
390  static_cast<std::size_t> (lclNumCols)) {
391  result.rowScaledColNorms =
392  Kokkos::View<mag_type*, device_type> ("rowScaledColNorms", lclNumCols);
393  }
394 
395  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
396  if (A_crs == nullptr) {
398  }
399  else {
400  computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
401  }
402 }
403 
404 // Kokkos::parallel_reduce functor that is part of the implementation
405 // of computeLocalRowOneNorms_CrsMatrix.
406 template<class SC, class LO, class GO, class NT>
407 class ComputeLocalRowOneNorms {
408 public:
409  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
410  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
411  using local_matrix_device_type =
412  typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
413  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
414  using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
415 
416  ComputeLocalRowOneNorms (const equib_info_type& equib, // in/out
417  const local_matrix_device_type& A_lcl, // in
418  const local_map_type& rowMap, // in
419  const local_map_type& colMap) : // in
420  equib_ (equib),
421  A_lcl_ (A_lcl),
422  rowMap_ (rowMap),
423  colMap_ (colMap)
424  {}
425 
426  // (result & 1) != 0 means "found Inf."
427  // (result & 2) != 0 means "found NaN."
428  // (result & 4) != 0 means "found zero diag."
429  // (result & 8) != 0 means "found zero row norm."
430  // Pack into a single int so the reduction is cheaper,
431  // esp. on GPU.
432  using value_type = int;
433 
434  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
435  {
436  dst = 0;
437  }
438 
439  KOKKOS_INLINE_FUNCTION void
440  join (value_type& dst,
441  const value_type& src) const
442  {
443  dst |= src;
444  }
445 
446  KOKKOS_INLINE_FUNCTION void
447  operator () (const typename policy_type::member_type& team, value_type& dst) const
448  {
449  using KAT = Kokkos::ArithTraits<val_type>;
450  using mag_type = typename KAT::mag_type;
451  using KAM = Kokkos::ArithTraits<mag_type>;
452 
453  const LO lclRow = team.league_rank();
454  const GO gblRow = rowMap_.getGlobalElement (lclRow);
455  // OK if invalid(); then we simply won't find the diagonal entry.
456  const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
457 
458  const auto curRow = A_lcl_.rowConst (lclRow);
459  const LO numEnt = curRow.length;
460 
461  mag_type rowNorm {0.0};
462  val_type diagVal {0.0};
463  value_type dstThread {0};
464 
465  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
466  const val_type matrixVal = curRow.value (k);
467  if (KAT::isInf (matrixVal)) {
468  dstContrib |= 1;
469  }
470  if (KAT::isNan (matrixVal)) {
471  dstContrib |= 2;
472  }
473  const mag_type matrixAbsVal = KAT::abs (matrixVal);
474  normContrib += matrixAbsVal;
475  const LO lclCol = curRow.colidx (k);
476  if (lclCol == lclDiagColInd) {
477  diagContrib = curRow.value (k); // assume no repeats
478  }
479  }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
480 
481  // This is a local result. If the matrix has an overlapping
482  // row Map, then the global result might differ.
483  Kokkos::single(Kokkos::PerTeam(team), [&](){
484  dst |= dstThread;
485  if (diagVal == KAT::zero ()) {
486  dst |= 4;
487  }
488  if (rowNorm == KAM::zero ()) {
489  dst |= 8;
490  }
491  equib_.rowDiagonalEntries[lclRow] = diagVal;
492  equib_.rowNorms[lclRow] = rowNorm;
493  });
494  }
495 
496 private:
497  equib_info_type equib_;
498  local_matrix_device_type A_lcl_;
499  local_map_type rowMap_;
500  local_map_type colMap_;
501 };
502 
503 // Kokkos::parallel_reduce functor that is part of the implementation
504 // of computeLocalRowAndColumnOneNorms_CrsMatrix.
505 template<class SC, class LO, class GO, class NT>
506 class ComputeLocalRowAndColumnOneNorms {
507 public:
508  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
509  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
510  using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
511  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
512  using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
513 
514 public:
515  ComputeLocalRowAndColumnOneNorms (const equib_info_type& equib, // in/out
516  const local_matrix_device_type& A_lcl, // in
517  const local_map_type& rowMap, // in
518  const local_map_type& colMap) : // in
519  equib_ (equib),
520  A_lcl_ (A_lcl),
521  rowMap_ (rowMap),
522  colMap_ (colMap)
523  {}
524 
525  // (result & 1) != 0 means "found Inf."
526  // (result & 2) != 0 means "found NaN."
527  // (result & 4) != 0 means "found zero diag."
528  // (result & 8) != 0 means "found zero row norm."
529  // Pack into a single int so the reduction is cheaper,
530  // esp. on GPU.
531  using value_type = int;
532 
533  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
534  {
535  dst = 0;
536  }
537 
538  KOKKOS_INLINE_FUNCTION void
539  join (value_type& dst,
540  const value_type& src) const
541  {
542  dst |= src;
543  }
544 
545  KOKKOS_INLINE_FUNCTION void
546  operator () (const typename policy_type::member_type& team, value_type& dst) const
547  {
548  using KAT = Kokkos::ArithTraits<val_type>;
549  using mag_type = typename KAT::mag_type;
550  using KAM = Kokkos::ArithTraits<mag_type>;
551 
552  const LO lclRow = team.league_rank();
553  const GO gblRow = rowMap_.getGlobalElement (lclRow);
554  // OK if invalid(); then we simply won't find the diagonal entry.
555  const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
556 
557  const auto curRow = A_lcl_.rowConst (lclRow);
558  const LO numEnt = curRow.length;
559 
560  mag_type rowNorm {0.0};
561  val_type diagVal {0.0};
562  value_type dstThread {0};
563 
564  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
565  const val_type matrixVal = curRow.value (k);
566  if (KAT::isInf (matrixVal)) {
567  dstContrib |= 1;
568  }
569  if (KAT::isNan (matrixVal)) {
570  dstContrib |= 2;
571  }
572  const mag_type matrixAbsVal = KAT::abs (matrixVal);
573  normContrib += matrixAbsVal;
574  const LO lclCol = curRow.colidx (k);
575  if (lclCol == lclDiagColInd) {
576  diagContrib = curRow.value (k); // assume no repeats
577  }
578  if (! equib_.assumeSymmetric) {
579  Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
580  }
581  }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
582 
583  // This is a local result. If the matrix has an overlapping
584  // row Map, then the global result might differ.
585  Kokkos::single(Kokkos::PerTeam(team), [&](){
586  dst |= dstThread;
587  if (diagVal == KAT::zero ()) {
588  dst |= 4;
589  }
590  if (rowNorm == KAM::zero ()) {
591  dst |= 8;
592  }
593  // NOTE (mfh 24 May 2018) We could actually compute local
594  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
595  // and row Map is the same as range Map (so that the local row
596  // norms are the same as the global row norms).
597  equib_.rowDiagonalEntries[lclRow] = diagVal;
598  equib_.rowNorms[lclRow] = rowNorm;
599  if (! equib_.assumeSymmetric &&
600  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
601  // Don't need an atomic update here, since this lclDiagColInd is
602  // a one-to-one function of lclRow.
603  equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
604  }
605  });
606  }
607 
608 private:
609  equib_info_type equib_;
610  local_matrix_device_type A_lcl_;
611  local_map_type rowMap_;
612  local_map_type colMap_;
613 };
614 
617 template<class SC, class LO, class GO, class NT>
618 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
620 {
621  using execution_space = typename NT::device_type::execution_space;
622  using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
623  using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
624  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
625  using device_type = typename NT::device_type;
626  using equib_info_type = EquilibrationInfo<val_type, device_type>;
627 
628  const LO lclNumRows = static_cast<LO> (A.getRowMap ()->getLocalNumElements ());
629  const LO lclNumCols = 0; // don't allocate column-related Views
630  constexpr bool assumeSymmetric = false; // doesn't matter here
631  equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
632 
633  functor_type functor (equib, A.getLocalMatrixDevice (),
634  A.getRowMap ()->getLocalMap (),
635  A.getColMap ()->getLocalMap ());
636  int result = 0;
637  Kokkos::parallel_reduce ("computeLocalRowOneNorms",
638  policy_type (lclNumRows, Kokkos::AUTO), functor,
639  result);
640  equib.foundInf = (result & 1) != 0;
641  equib.foundNan = (result & 2) != 0;
642  equib.foundZeroDiag = (result & 4) != 0;
643  equib.foundZeroRowNorm = (result & 8) != 0;
644  return equib;
645 }
646 
649 template<class SC, class LO, class GO, class NT>
650 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
652  const bool assumeSymmetric)
653 {
654  using execution_space = typename NT::device_type::execution_space;
655  using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
656  using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
657  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
658  using device_type = typename NT::device_type;
659  using equib_info_type = EquilibrationInfo<val_type, device_type>;
660 
661  const LO lclNumRows = static_cast<LO> (A.getRowMap ()->getLocalNumElements ());
662  const LO lclNumCols = static_cast<LO> (A.getColMap ()->getLocalNumElements ());
663  equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
664 
665  functor_type functor (equib, A.getLocalMatrixDevice (),
666  A.getRowMap ()->getLocalMap (),
667  A.getColMap ()->getLocalMap ());
668  int result = 0;
669  Kokkos::parallel_reduce ("computeLocalRowAndColumnOneNorms",
670  policy_type (lclNumRows, Kokkos::AUTO), functor,
671  result);
672  equib.foundInf = (result & 1) != 0;
673  equib.foundNan = (result & 2) != 0;
674  equib.foundZeroDiag = (result & 4) != 0;
675  equib.foundZeroRowNorm = (result & 8) != 0;
676  return equib;
677 }
678 
683 template<class SC, class LO, class GO, class NT>
684 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
685  typename NT::device_type>
687 {
688  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
689  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
690 
691  if (A_crs == nullptr) {
693  }
694  else {
695  return computeLocalRowOneNorms_CrsMatrix (*A_crs);
696  }
697 }
698 
720 template<class SC, class LO, class GO, class NT>
721 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
723  const bool assumeSymmetric)
724 {
725  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
726  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
727 
728  if (A_crs == nullptr) {
729  return computeLocalRowAndColumnOneNorms_RowMatrix (A, assumeSymmetric);
730  }
731  else {
732  return computeLocalRowAndColumnOneNorms_CrsMatrix (*A_crs, assumeSymmetric);
733  }
734 }
735 
736 template<class SC, class LO, class GO, class NT>
737 auto getLocalView_1d_readOnly (
739  const LO whichColumn)
740 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
741  Kokkos::ALL (), whichColumn))
742 {
743  if (X.isConstantStride ()) {
744  return Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
745  Kokkos::ALL (), whichColumn);
746  }
747  else {
748  auto X_whichColumn = X.getVector (whichColumn);
749  return Kokkos::subview (X_whichColumn->getLocalViewDevice(Access::ReadOnly),
750  Kokkos::ALL (), 0);
751  }
752 }
753 
754 template<class SC, class LO, class GO, class NT>
755 auto getLocalView_1d_writeOnly (
757  const LO whichColumn)
758 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
759  Kokkos::ALL (), whichColumn))
760 {
761  if (X.isConstantStride ()) {
762  return Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
763  Kokkos::ALL (), whichColumn);
764  }
765  else {
766  auto X_whichColumn = X.getVectorNonConst (whichColumn);
767  return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
768  Kokkos::ALL (), 0);
769  }
770 }
771 
772 template<class SC, class LO, class GO, class NT, class ViewValueType>
773 void
774 copy1DViewIntoMultiVectorColumn (
776  const LO whichColumn,
777  const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
778 {
779  auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
780  Tpetra::Details::copyConvert (X_lcl, view);
781 }
782 
783 template<class SC, class LO, class GO, class NT, class ViewValueType>
784 void
785 copyMultiVectorColumnInto1DView (
786  const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
788  const LO whichColumn)
789 {
790  auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
791  Tpetra::Details::copyConvert (view, X_lcl);
792 }
793 
794 template<class OneDViewType, class IndexType>
795 class FindZero {
796 public:
797  static_assert (OneDViewType::rank == 1,
798  "OneDViewType must be a rank-1 Kokkos::View.");
799  static_assert (std::is_integral<IndexType>::value,
800  "IndexType must be a built-in integer type.");
801  FindZero (const OneDViewType& x) : x_ (x) {}
802  // Kokkos historically didn't like bool reduction results on CUDA,
803  // so we use int as the reduction result type.
804  KOKKOS_INLINE_FUNCTION void
805  operator () (const IndexType i, int& result) const {
806  using val_type = typename OneDViewType::non_const_value_type;
807  result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
808  }
809 private:
810  OneDViewType x_;
811 };
812 
813 template<class OneDViewType>
814 bool findZero (const OneDViewType& x)
815 {
816  using view_type = typename OneDViewType::const_type;
817  using execution_space = typename view_type::execution_space;
818  using size_type = typename view_type::size_type;
819  using functor_type = FindZero<view_type, size_type>;
820 
821  Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
822  range.set_chunk_size (500); // adjust as needed
823 
824  int foundZero = 0;
825  Kokkos::parallel_reduce ("findZero", range, functor_type (x), foundZero);
826  return foundZero == 1;
827 }
828 
829 template<class SC, class LO, class GO, class NT>
830 void
831 globalizeRowOneNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
832  typename NT::device_type>& equib,
834 {
835  using mv_type = Tpetra::MultiVector<SC, LO, GO, NT>;
836 
837  auto G = A.getGraph ();
838  TEUCHOS_TEST_FOR_EXCEPTION
839  (G.get () == nullptr, std::invalid_argument,
840  "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
841  "(that is, getGraph() must return nonnull).");
842  TEUCHOS_TEST_FOR_EXCEPTION
843  (! G->isFillComplete (), std::invalid_argument,
844  "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
845 
846  auto exp = G->getExporter ();
847  if (! exp.is_null ()) {
848  // If the matrix has an overlapping row Map, first Export the
849  // local row norms with ADD CombineMode to a range Map Vector to
850  // get the global row norms, then reverse them back with REPLACE
851  // CombineMode to the row Map Vector. Ditto for the local row
852  // diagonal entries. Use SC instead of mag_type, so we can
853  // communicate both row norms and row diagonal entries at once.
854 
855  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
856  // avoid the local copy here.
857  mv_type rowMapMV (G->getRowMap (), 2, false);
858 
859  copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
860  copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
861  {
862  mv_type rangeMapMV (G->getRangeMap (), 2, true);
863  rangeMapMV.doExport (rowMapMV, *exp, Tpetra::ADD); // forward mode
864  rowMapMV.doImport (rangeMapMV, *exp, Tpetra::REPLACE); // reverse mode
865  }
866  copyMultiVectorColumnInto1DView (equib.rowNorms, rowMapMV, 0);
867  copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
868 
869  // It's not common for users to solve linear systems with a
870  // nontrival Export, so it's OK for this to cost an additional
871  // pass over rowDiagonalEntries.
872  equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
873  equib.foundZeroRowNorm = findZero (equib.rowNorms);
874  }
875 
876  constexpr int allReduceCount = 4;
877  int lclNaughtyMatrix[allReduceCount];
878  lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
879  lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
880  lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
881  lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
882 
883  using Teuchos::outArg;
884  using Teuchos::REDUCE_MAX;
885  using Teuchos::reduceAll;
886  auto comm = G->getComm ();
887  int gblNaughtyMatrix[allReduceCount];
888  reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
889  lclNaughtyMatrix, gblNaughtyMatrix);
890 
891  equib.foundInf = gblNaughtyMatrix[0] == 1;
892  equib.foundNan = gblNaughtyMatrix[1] == 1;
893  equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
894  equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
895 }
896 
897 template<class SC, class LO, class GO, class NT>
898 void
899 globalizeColumnOneNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
900  typename NT::device_type>& equib,
902  const bool assumeSymmetric) // if so, use row norms
903 {
904  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
905  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
907  using device_type = typename NT::device_type;
908 
909  auto G = A.getGraph ();
910  TEUCHOS_TEST_FOR_EXCEPTION
911  (G.get () == nullptr, std::invalid_argument,
912  "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
913  "(that is, getGraph() must return nonnull).");
914  TEUCHOS_TEST_FOR_EXCEPTION
915  (! G->isFillComplete (), std::invalid_argument,
916  "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
917 
918  auto imp = G->getImporter ();
919  if (assumeSymmetric) {
920  const LO numCols = 2;
921  // Redistribute local row info to global column info.
922 
923  // Get the data into a MultiVector on the domain Map.
924  mv_type rowNorms_domMap (G->getDomainMap (), numCols, false);
925  const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
926  if (rowMapSameAsDomainMap) {
927  copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
928  copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 1, equib.rowDiagonalEntries);
929  }
930  else {
931  // This is not a common case; it would normally arise when the
932  // matrix has an overlapping row Map.
933  Tpetra::Export<LO, GO, NT> rowToDom (G->getRowMap (), G->getDomainMap ());
934  mv_type rowNorms_rowMap (G->getRowMap (), numCols, true);
935  copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
936  copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
937  rowNorms_domMap.doExport (rowNorms_rowMap, rowToDom, Tpetra::REPLACE);
938  }
939 
940  // Use the existing Import to redistribute the row norms from the
941  // domain Map to the column Map.
942  std::unique_ptr<mv_type> rowNorms_colMap;
943  if (imp.is_null ()) {
944  // Shallow copy of rowNorms_domMap.
945  rowNorms_colMap =
946  std::unique_ptr<mv_type> (new mv_type (rowNorms_domMap, * (G->getColMap ())));
947  }
948  else {
949  rowNorms_colMap =
950  std::unique_ptr<mv_type> (new mv_type (G->getColMap (), numCols, true));
951  rowNorms_colMap->doImport (rowNorms_domMap, *imp, Tpetra::REPLACE);
952  }
953 
954  // Make sure the result has allocations of the right size.
955  const LO lclNumCols =
956  static_cast<LO> (G->getColMap ()->getLocalNumElements ());
957  if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
958  equib.colNorms =
959  Kokkos::View<mag_type*, device_type> ("colNorms", lclNumCols);
960  }
961  if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
962  equib.colDiagonalEntries =
963  Kokkos::View<val_type*, device_type> ("colDiagonalEntries", lclNumCols);
964  }
965 
966  // Copy row norms and diagonal entries, appropriately
967  // redistributed, into column norms resp. diagonal entries.
968  copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
969  copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
970  }
971  else {
972  if (! imp.is_null ()) {
973  const LO numCols = 3;
974  // If the matrix has an overlapping column Map (this is usually
975  // the case), first Export (reverse-mode Import) the local info
976  // to a domain Map Vector to get the global info, then Import
977  // them back with REPLACE CombineMode to the column Map Vector.
978  // Ditto for the row-scaled column norms.
979 
980  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
981  // avoid the local copy here.
982  mv_type colMapMV (G->getColMap (), numCols, false);
983 
984  copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
985  copy1DViewIntoMultiVectorColumn (colMapMV, 1, equib.colDiagonalEntries);
986  copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
987  {
988  mv_type domainMapMV (G->getDomainMap (), numCols, true);
989  domainMapMV.doExport (colMapMV, *imp, Tpetra::ADD); // reverse mode
990  colMapMV.doImport (domainMapMV, *imp, Tpetra::REPLACE); // forward mode
991  }
992  copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
993  copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
994  copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
995  }
996  }
997 }
998 
999 } // namespace Details
1000 
1001 template<class SC, class LO, class GO, class NT>
1002 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1003  typename NT::device_type>
1005 {
1006  TEUCHOS_TEST_FOR_EXCEPTION
1007  (! A.isFillComplete (), std::invalid_argument,
1008  "computeRowOneNorms: Input matrix A must be fillComplete.");
1009  auto result = Details::computeLocalRowOneNorms (A);
1010 
1011  Details::globalizeRowOneNorms (result, A);
1012  return result;
1013 }
1014 
1015 template<class SC, class LO, class GO, class NT>
1016 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1017  typename NT::device_type>
1019  const bool assumeSymmetric)
1020 {
1021  TEUCHOS_TEST_FOR_EXCEPTION
1022  (! A.isFillComplete (), std::invalid_argument,
1023  "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1024  auto result = Details::computeLocalRowAndColumnOneNorms (A, assumeSymmetric);
1025 
1026  Details::globalizeRowOneNorms (result, A);
1027  if (! assumeSymmetric) {
1028  // Row-norm-scaled column norms are trivial if the matrix is
1029  // symmetric, since the row norms and column norms are the same in
1030  // that case.
1031  Details::computeLocalRowScaledColumnNorms (result, A);
1032  }
1033  Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
1034  return result;
1035 }
1036 
1037 } // namespace Tpetra
1038 
1039 //
1040 // Explicit instantiation macro
1041 //
1042 // Must be expanded from within the Tpetra namespace!
1043 //
1044 
1045 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
1046  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1047  computeRowOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1048  \
1049  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1050  computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1051  const bool assumeSymmetric);
1052 
1053 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms (&quot;row sums&quot;) of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms (&quot;row sums&quot; etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms (&quot;row sums&quot; and &quot;column sums&quot;) of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms (&quot;row sums&quot; etc.) of the input sparse matrix A.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sum new values.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row&#39;s entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.