10 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
20 #include "Kokkos_ArithTraits.hpp"
21 #include "Kokkos_Macros.hpp"
24 #include "Tpetra_CrsMatrix.hpp"
25 #include "Tpetra_Export.hpp"
26 #include "Tpetra_Map.hpp"
27 #include "Tpetra_MultiVector.hpp"
28 #include "Tpetra_RowMatrix.hpp"
29 #include "Kokkos_Core.hpp"
30 #include "Teuchos_CommHelpers.hpp"
36 template<
class SC,
class LO,
class GO,
class NT>
41 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
43 std::size_t maxNumEnt {0};
44 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
46 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
51 template<
class SC,
class LO,
class GO,
class NT>
53 forEachLocalRowMatrixRow (
56 const std::size_t maxNumEnt,
59 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
60 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
61 std::size_t )> doForEachRow)
63 using lids_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
64 using vals_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
65 lids_type indBuf(
"indices",maxNumEnt);
66 vals_type valBuf(
"values",maxNumEnt);
68 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
70 lids_type ind = Kokkos::subview(indBuf,std::make_pair((
size_t)0, numEnt));
71 vals_type val = Kokkos::subview(valBuf,std::make_pair((
size_t)0, numEnt));
73 doForEachRow (lclRow, ind, val, numEnt);
77 template<
class SC,
class LO,
class GO,
class NT>
79 forEachLocalRowMatrixRow (
83 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
84 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
85 std::size_t )> doForEachRow)
88 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
89 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
91 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
97 template<
class SC,
class LO,
class GO,
class NT>
100 typename NT::device_type>& result,
103 using KAT = Kokkos::ArithTraits<SC>;
104 using mag_type =
typename KAT::mag_type;
105 using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
107 auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
111 auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
113 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
114 [&] (
const LO lclRow,
115 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
116 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
117 std::size_t numEnt) {
118 const mag_type rowNorm = rowNorms_h[lclRow];
119 for (std::size_t k = 0; k < numEnt; ++k) {
120 const mag_type matrixAbsVal = KAV::abs (val[k]);
121 const LO lclCol = ind[k];
123 rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
133 template<
class SC,
class LO,
class GO,
class NT>
134 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
137 using KAT = Kokkos::ArithTraits<SC>;
138 using val_type =
typename KAT::val_type;
139 using KAV = Kokkos::ArithTraits<val_type>;
140 using mag_type =
typename KAT::mag_type;
141 using KAM = Kokkos::ArithTraits<mag_type>;
142 using device_type =
typename NT::device_type;
147 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
148 const LO lclNumCols = 0;
149 constexpr
bool assumeSymmetric =
false;
150 equib_info_type result (lclNumRows, lclNumCols, assumeSymmetric);
151 auto result_h = result.createMirrorView ();
153 const auto val_zero = KAV::zero();
154 const auto mag_zero = KAM::zero();
156 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
157 [&] (
const LO lclRow,
158 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
159 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
160 std::size_t numEnt) {
161 mag_type rowNorm = mag_zero;
162 val_type diagVal = val_zero;
163 const GO gblRow = rowMap.getGlobalElement (lclRow);
165 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
167 for (std::size_t k = 0; k < numEnt; ++k) {
168 const val_type matrixVal = val[k];
169 if (KAV::isInf (matrixVal)) {
170 result_h.foundInf =
true;
172 if (KAV::isNan (matrixVal)) {
173 result_h.foundNan =
true;
175 const mag_type matrixAbsVal = KAV::abs (matrixVal);
176 rowNorm += matrixAbsVal;
177 const LO lclCol = ind[k];
178 if (lclCol == lclDiagColInd) {
185 if (diagVal == KAV::zero ()) {
186 result_h.foundZeroDiag =
true;
188 if (rowNorm == KAM::zero ()) {
189 result_h.foundZeroRowNorm =
true;
195 result_h.rowDiagonalEntries[lclRow] += diagVal;
196 result_h.rowNorms[lclRow] = rowNorm;
199 result.assign (result_h);
205 template<
class SC,
class LO,
class GO,
class NT>
206 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
208 const bool assumeSymmetric)
210 using KAT = Kokkos::ArithTraits<SC>;
211 using val_type =
typename KAT::val_type;
212 using KAV = Kokkos::ArithTraits<val_type>;
213 using mag_type =
typename KAT::mag_type;
214 using KAM = Kokkos::ArithTraits<mag_type>;
215 using device_type =
typename NT::device_type;
219 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
220 const LO lclNumCols =
static_cast<LO
> (colMap.getLocalNumElements ());
223 (lclNumRows, lclNumCols, assumeSymmetric);
224 auto result_h = result.createMirrorView ();
226 const auto val_zero = KAV::zero();
227 const auto mag_zero = KAM::zero();
229 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
230 [&] (
const LO lclRow,
231 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
232 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
233 std::size_t numEnt) {
234 mag_type rowNorm = mag_zero;
235 val_type diagVal = val_zero;
236 const GO gblRow = rowMap.getGlobalElement (lclRow);
238 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
240 for (std::size_t k = 0; k < numEnt; ++k) {
241 const val_type matrixVal = val[k];
242 if (KAV::isInf (matrixVal)) {
243 result_h.foundInf =
true;
245 if (KAV::isNan (matrixVal)) {
246 result_h.foundNan =
true;
248 const mag_type matrixAbsVal = KAV::abs (matrixVal);
249 rowNorm += matrixAbsVal;
250 const LO lclCol = ind[k];
251 if (lclCol == lclDiagColInd) {
254 if (! assumeSymmetric) {
255 result_h.colNorms[lclCol] += matrixAbsVal;
261 if (diagVal == KAV::zero ()) {
262 result_h.foundZeroDiag =
true;
264 if (rowNorm == KAM::zero ()) {
265 result_h.foundZeroRowNorm =
true;
271 result_h.rowDiagonalEntries[lclRow] += diagVal;
272 result_h.rowNorms[lclRow] = rowNorm;
273 if (! assumeSymmetric &&
274 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
275 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
283 template<
class SC,
class LO,
class GO,
class NT>
284 class ComputeLocalRowScaledColumnNorms {
287 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
288 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
290 using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
292 ComputeLocalRowScaledColumnNorms (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
293 const Kokkos::View<const mag_type*, device_type>& rowNorms,
294 const crs_matrix_type& A) :
295 rowScaledColNorms_ (rowScaledColNorms),
296 rowNorms_ (rowNorms),
297 A_lcl_ (A.getLocalMatrixDevice ())
300 KOKKOS_INLINE_FUNCTION
void operator () (
const typename policy_type::member_type &team)
const {
301 using KAT = Kokkos::ArithTraits<val_type>;
303 const LO lclRow = team.league_rank();
304 const auto curRow = A_lcl_.rowConst (lclRow);
305 const mag_type rowNorm = rowNorms_[lclRow];
306 const LO numEnt = curRow.length;
307 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k) {
308 const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
309 const LO lclCol = curRow.colidx(k);
311 Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
316 run (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
317 const Kokkos::View<const mag_type*, device_type>& rowNorms,
318 const crs_matrix_type& A)
320 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
322 functor_type functor (rowScaledColNorms, rowNorms, A);
323 const LO lclNumRows =
324 static_cast<LO
> (A.getRowMap ()->getLocalNumElements ());
325 Kokkos::parallel_for (
"computeLocalRowScaledColumnNorms",
326 policy_type (lclNumRows, Kokkos::AUTO), functor);
330 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
331 Kokkos::View<const mag_type*, device_type> rowNorms_;
334 local_matrix_device_type A_lcl_;
337 template<
class SC,
class LO,
class GO,
class NT>
339 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
340 typename NT::device_type>& result,
343 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
344 impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
347 template<
class SC,
class LO,
class GO,
class NT>
349 computeLocalRowScaledColumnNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
350 typename NT::device_type>& result,
354 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
355 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
356 using device_type =
typename NT::device_type;
359 TEUCHOS_TEST_FOR_EXCEPTION
360 (colMapPtr.get () ==
nullptr, std::invalid_argument,
361 "computeLocalRowScaledColumnNorms: "
362 "Input matrix A must have a nonnull column Map.");
363 const LO lclNumCols =
static_cast<LO
> (colMapPtr->getLocalNumElements ());
364 if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
365 static_cast<std::size_t> (lclNumCols)) {
366 result.rowScaledColNorms =
367 Kokkos::View<mag_type*, device_type> (
"rowScaledColNorms", lclNumCols);
370 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
371 if (A_crs ==
nullptr) {
375 computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
381 template<
class SC,
class LO,
class GO,
class NT>
382 class ComputeLocalRowOneNorms {
384 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
385 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
386 using local_matrix_device_type =
387 typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
388 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
389 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
391 ComputeLocalRowOneNorms (
const equib_info_type& equib,
392 const local_matrix_device_type& A_lcl,
393 const local_map_type& rowMap,
394 const local_map_type& colMap) :
407 using value_type = int;
409 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
414 KOKKOS_INLINE_FUNCTION
void
415 join (value_type& dst,
416 const value_type& src)
const
421 KOKKOS_INLINE_FUNCTION
void
422 operator () (
const typename policy_type::member_type& team, value_type& dst)
const
424 using KAT = Kokkos::ArithTraits<val_type>;
425 using mag_type =
typename KAT::mag_type;
426 using KAM = Kokkos::ArithTraits<mag_type>;
428 const LO lclRow = team.league_rank();
429 const GO gblRow = rowMap_.getGlobalElement (lclRow);
431 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
433 const auto curRow = A_lcl_.rowConst (lclRow);
434 const LO numEnt = curRow.length;
436 const auto val_zero = KAT::zero();
437 const auto mag_zero = KAM::zero();
439 mag_type rowNorm = mag_zero;
440 val_type diagVal = val_zero;
441 value_type dstThread {0};
443 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
444 const val_type matrixVal = curRow.value (k);
445 if (KAT::isInf (matrixVal)) {
448 if (KAT::isNan (matrixVal)) {
451 const mag_type matrixAbsVal = KAT::abs (matrixVal);
452 normContrib += matrixAbsVal;
453 const LO lclCol = curRow.colidx (k);
454 if (lclCol == lclDiagColInd) {
455 diagContrib = curRow.value (k);
457 }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
461 Kokkos::single(Kokkos::PerTeam(team), [&](){
463 if (diagVal == KAT::zero ()) {
466 if (rowNorm == KAM::zero ()) {
469 equib_.rowDiagonalEntries[lclRow] = diagVal;
470 equib_.rowNorms[lclRow] = rowNorm;
475 equib_info_type equib_;
476 local_matrix_device_type A_lcl_;
477 local_map_type rowMap_;
478 local_map_type colMap_;
483 template<
class SC,
class LO,
class GO,
class NT>
484 class ComputeLocalRowAndColumnOneNorms {
486 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
487 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
488 using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
489 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
490 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
493 ComputeLocalRowAndColumnOneNorms (
const equib_info_type& equib,
494 const local_matrix_device_type& A_lcl,
495 const local_map_type& rowMap,
496 const local_map_type& colMap) :
509 using value_type = int;
511 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
516 KOKKOS_INLINE_FUNCTION
void
517 join (value_type& dst,
518 const value_type& src)
const
523 KOKKOS_INLINE_FUNCTION
void
524 operator () (
const typename policy_type::member_type& team, value_type& dst)
const
526 using KAT = Kokkos::ArithTraits<val_type>;
527 using mag_type =
typename KAT::mag_type;
528 using KAM = Kokkos::ArithTraits<mag_type>;
530 const LO lclRow = team.league_rank();
531 const GO gblRow = rowMap_.getGlobalElement (lclRow);
533 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
535 const auto curRow = A_lcl_.rowConst (lclRow);
536 const LO numEnt = curRow.length;
538 const auto val_zero = KAT::zero();
539 const auto mag_zero = KAM::zero();
541 mag_type rowNorm = mag_zero;
542 val_type diagVal = val_zero;
543 value_type dstThread {0};
545 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
546 const val_type matrixVal = curRow.value (k);
547 if (KAT::isInf (matrixVal)) {
550 if (KAT::isNan (matrixVal)) {
553 const mag_type matrixAbsVal = KAT::abs (matrixVal);
554 normContrib += matrixAbsVal;
555 const LO lclCol = curRow.colidx (k);
556 if (lclCol == lclDiagColInd) {
557 diagContrib = curRow.value (k);
559 if (! equib_.assumeSymmetric) {
560 Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
562 }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
566 Kokkos::single(Kokkos::PerTeam(team), [&](){
568 if (diagVal == KAT::zero ()) {
571 if (rowNorm == KAM::zero ()) {
578 equib_.rowDiagonalEntries[lclRow] = diagVal;
579 equib_.rowNorms[lclRow] = rowNorm;
580 if (! equib_.assumeSymmetric &&
581 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
584 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
590 equib_info_type equib_;
591 local_matrix_device_type A_lcl_;
592 local_map_type rowMap_;
593 local_map_type colMap_;
598 template<
class SC,
class LO,
class GO,
class NT>
599 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
602 using execution_space =
typename NT::device_type::execution_space;
603 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
604 using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
605 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
606 using device_type =
typename NT::device_type;
609 const LO lclNumRows =
static_cast<LO
> (A.
getRowMap ()->getLocalNumElements ());
610 const LO lclNumCols = 0;
611 constexpr
bool assumeSymmetric =
false;
612 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
618 Kokkos::parallel_reduce (
"computeLocalRowOneNorms",
619 policy_type (lclNumRows, Kokkos::AUTO), functor,
621 equib.foundInf = (result & 1) != 0;
622 equib.foundNan = (result & 2) != 0;
623 equib.foundZeroDiag = (result & 4) != 0;
624 equib.foundZeroRowNorm = (result & 8) != 0;
630 template<
class SC,
class LO,
class GO,
class NT>
631 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
633 const bool assumeSymmetric)
635 using execution_space =
typename NT::device_type::execution_space;
636 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
637 using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
638 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
639 using device_type =
typename NT::device_type;
642 const LO lclNumRows =
static_cast<LO
> (A.
getRowMap ()->getLocalNumElements ());
643 const LO lclNumCols =
static_cast<LO
> (A.
getColMap ()->getLocalNumElements ());
644 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
650 Kokkos::parallel_reduce (
"computeLocalRowAndColumnOneNorms",
651 policy_type (lclNumRows, Kokkos::AUTO), functor,
653 equib.foundInf = (result & 1) != 0;
654 equib.foundNan = (result & 2) != 0;
655 equib.foundZeroDiag = (result & 4) != 0;
656 equib.foundZeroRowNorm = (result & 8) != 0;
664 template<
class SC,
class LO,
class GO,
class NT>
665 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
666 typename NT::device_type>
670 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
672 if (A_crs ==
nullptr) {
701 template<
class SC,
class LO,
class GO,
class NT>
702 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
704 const bool assumeSymmetric)
707 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
709 if (A_crs ==
nullptr) {
717 template<
class SC,
class LO,
class GO,
class NT>
718 auto getLocalView_1d_readOnly (
720 const LO whichColumn)
721 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
722 Kokkos::ALL (), whichColumn))
724 if (X.isConstantStride ()) {
725 return Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
726 Kokkos::ALL (), whichColumn);
729 auto X_whichColumn = X.getVector (whichColumn);
730 return Kokkos::subview (X_whichColumn->getLocalViewDevice(Access::ReadOnly),
735 template<
class SC,
class LO,
class GO,
class NT>
736 auto getLocalView_1d_writeOnly (
738 const LO whichColumn)
739 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
740 Kokkos::ALL (), whichColumn))
742 if (X.isConstantStride ()) {
743 return Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
744 Kokkos::ALL (), whichColumn);
747 auto X_whichColumn = X.getVectorNonConst (whichColumn);
748 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
753 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
755 copy1DViewIntoMultiVectorColumn (
757 const LO whichColumn,
758 const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
760 auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
764 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
766 copy1DViewIntoMultiVectorColumn_mag (
768 const LO whichColumn,
769 const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
771 using KAT = Kokkos::ArithTraits<ViewValueType>;
772 using execution_space =
typename NT::execution_space;
773 using range_type = Kokkos::RangePolicy<execution_space, LO>;
774 auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
775 Kokkos::parallel_for(
"",
776 range_type(0, X_lcl.extent(0)),
777 KOKKOS_LAMBDA(
const LO i) {
778 X_lcl(i) = KAT::magnitude(view(i));
782 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
784 copyMultiVectorColumnInto1DView (
785 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
787 const LO whichColumn)
789 auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
793 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
795 copyMultiVectorColumnInto1DView_mag (
796 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
798 const LO whichColumn)
800 using implScalar =
typename Kokkos::ArithTraits<SC>::val_type;
801 using KAT = Kokkos::ArithTraits<implScalar>;
802 using range_type = Kokkos::RangePolicy<typename NT::execution_space, LO>;
803 auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
804 Kokkos::parallel_for(
"",
805 range_type(0, X_lcl.extent(0)),
806 KOKKOS_LAMBDA(LO i) {
807 view(i) = KAT::magnitude(X_lcl(i));
812 template<
class OneDViewType,
class IndexType>
815 static_assert (OneDViewType::rank == 1,
816 "OneDViewType must be a rank-1 Kokkos::View.");
817 static_assert (std::is_integral<IndexType>::value,
818 "IndexType must be a built-in integer type.");
819 FindZero (
const OneDViewType& x) : x_ (x) {}
822 KOKKOS_INLINE_FUNCTION
void
823 operator () (
const IndexType i,
int& result)
const {
824 using val_type =
typename OneDViewType::non_const_value_type;
825 result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
831 template<
class OneDViewType>
832 bool findZero (
const OneDViewType& x)
834 using view_type =
typename OneDViewType::const_type;
835 using execution_space =
typename view_type::execution_space;
836 using size_type =
typename view_type::size_type;
837 using functor_type = FindZero<view_type, size_type>;
839 Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
840 range.set_chunk_size (500);
843 Kokkos::parallel_reduce (
"findZero", range, functor_type (x), foundZero);
844 return foundZero == 1;
847 template<
class SC,
class LO,
class GO,
class NT>
849 globalizeRowOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
850 typename NT::device_type>& equib,
856 TEUCHOS_TEST_FOR_EXCEPTION
857 (G.get () ==
nullptr, std::invalid_argument,
858 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
859 "(that is, getGraph() must return nonnull).");
860 TEUCHOS_TEST_FOR_EXCEPTION
861 (! G->isFillComplete (), std::invalid_argument,
862 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
864 auto exp = G->getExporter ();
865 if (! exp.is_null ()) {
875 mv_type rowMapMV (G->getRowMap (), 2,
false);
877 copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
878 copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
880 mv_type rangeMapMV (G->getRangeMap (), 2,
true);
884 copyMultiVectorColumnInto1DView_mag (equib.rowNorms, rowMapMV, 0);
885 copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
890 equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
891 equib.foundZeroRowNorm = findZero (equib.rowNorms);
894 constexpr
int allReduceCount = 4;
895 int lclNaughtyMatrix[allReduceCount];
896 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
897 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
898 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
899 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
901 using Teuchos::outArg;
902 using Teuchos::REDUCE_MAX;
903 using Teuchos::reduceAll;
904 auto comm = G->getComm ();
905 int gblNaughtyMatrix[allReduceCount];
906 reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
907 lclNaughtyMatrix, gblNaughtyMatrix);
909 equib.foundInf = gblNaughtyMatrix[0] == 1;
910 equib.foundNan = gblNaughtyMatrix[1] == 1;
911 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
912 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
915 template<
class SC,
class LO,
class GO,
class NT>
917 globalizeColumnOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
918 typename NT::device_type>& equib,
920 const bool assumeSymmetric)
922 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
923 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
925 using device_type =
typename NT::device_type;
928 TEUCHOS_TEST_FOR_EXCEPTION
929 (G.get () ==
nullptr, std::invalid_argument,
930 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
931 "(that is, getGraph() must return nonnull).");
932 TEUCHOS_TEST_FOR_EXCEPTION
933 (! G->isFillComplete (), std::invalid_argument,
934 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
936 auto imp = G->getImporter ();
937 if (assumeSymmetric) {
938 const LO numCols = 2;
942 mv_type rowNorms_domMap (G->getDomainMap (), numCols,
false);
943 const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
944 if (rowMapSameAsDomainMap) {
945 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
946 copy1DViewIntoMultiVectorColumn_mag (rowNorms_domMap, 1, equib.rowDiagonalEntries);
952 mv_type rowNorms_rowMap (G->getRowMap (), numCols,
true);
953 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
954 copy1DViewIntoMultiVectorColumn_mag (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
960 std::unique_ptr<mv_type> rowNorms_colMap;
961 if (imp.is_null ()) {
964 std::unique_ptr<mv_type> (
new mv_type (rowNorms_domMap, * (G->getColMap ())));
968 std::unique_ptr<mv_type> (
new mv_type (G->getColMap (), numCols,
true));
973 const LO lclNumCols =
974 static_cast<LO
> (G->getColMap ()->getLocalNumElements ());
975 if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
977 Kokkos::View<mag_type*, device_type> (
"colNorms", lclNumCols);
979 if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
980 equib.colDiagonalEntries =
981 Kokkos::View<val_type*, device_type> (
"colDiagonalEntries", lclNumCols);
986 copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
987 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
990 if (! imp.is_null ()) {
991 const LO numCols = 3;
1000 mv_type colMapMV (G->getColMap (), numCols,
false);
1002 copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
1003 copy1DViewIntoMultiVectorColumn_mag (colMapMV, 1, equib.colDiagonalEntries);
1004 copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
1006 mv_type domainMapMV (G->getDomainMap (), numCols,
true);
1007 domainMapMV.doExport (colMapMV, *imp,
Tpetra::ADD);
1010 copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
1011 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
1012 copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
1019 template<
class SC,
class LO,
class GO,
class NT>
1020 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1021 typename NT::device_type>
1024 TEUCHOS_TEST_FOR_EXCEPTION
1026 "computeRowOneNorms: Input matrix A must be fillComplete.");
1029 Details::globalizeRowOneNorms (result, A);
1033 template<
class SC,
class LO,
class GO,
class NT>
1034 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1035 typename NT::device_type>
1037 const bool assumeSymmetric)
1039 TEUCHOS_TEST_FOR_EXCEPTION
1041 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1044 Details::globalizeRowOneNorms (result, A);
1045 if (! assumeSymmetric) {
1049 Details::computeLocalRowScaledColumnNorms (result, A);
1051 Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
1063 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
1064 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1065 computeRowOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1067 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1068 computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1069 const bool assumeSymmetric);
1071 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row's entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.