42 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
43 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
54 #include "Tpetra_CrsMatrix.hpp"
55 #include "Tpetra_Export.hpp"
56 #include "Tpetra_Map.hpp"
57 #include "Tpetra_MultiVector.hpp"
58 #include "Tpetra_RowMatrix.hpp"
59 #include "Kokkos_Core.hpp"
60 #include "Teuchos_CommHelpers.hpp"
66 template<
class SC,
class LO,
class GO,
class NT>
71 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
73 std::size_t maxNumEnt {0};
74 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
76 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
81 template<
class SC,
class LO,
class GO,
class NT>
83 forEachLocalRowMatrixRow (
86 const std::size_t maxNumEnt,
89 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
90 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
91 std::size_t )> doForEachRow)
93 using lids_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
94 using vals_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
95 lids_type indBuf(
"indices",maxNumEnt);
96 vals_type valBuf(
"values",maxNumEnt);
98 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
100 lids_type ind = Kokkos::subview(indBuf,std::make_pair((
size_t)0, numEnt));
101 vals_type val = Kokkos::subview(valBuf,std::make_pair((
size_t)0, numEnt));
103 doForEachRow (lclRow, ind, val, numEnt);
107 template<
class SC,
class LO,
class GO,
class NT>
109 forEachLocalRowMatrixRow (
113 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
114 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
115 std::size_t )> doForEachRow)
118 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
119 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
121 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
127 template<
class SC,
class LO,
class GO,
class NT>
130 typename NT::device_type>& result,
133 using KAT = Kokkos::ArithTraits<SC>;
134 using mag_type =
typename KAT::mag_type;
135 using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
137 auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
141 auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
143 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
144 [&] (
const LO lclRow,
145 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
146 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
147 std::size_t numEnt) {
148 const mag_type rowNorm = rowNorms_h[lclRow];
149 for (std::size_t k = 0; k < numEnt; ++k) {
150 const mag_type matrixAbsVal = KAV::abs (val[k]);
151 const LO lclCol = ind[k];
153 rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
163 template<
class SC,
class LO,
class GO,
class NT>
164 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
167 using KAT = Kokkos::ArithTraits<SC>;
168 using val_type =
typename KAT::val_type;
169 using KAV = Kokkos::ArithTraits<val_type>;
170 using mag_type =
typename KAT::mag_type;
171 using KAM = Kokkos::ArithTraits<mag_type>;
172 using device_type =
typename NT::device_type;
177 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
178 const LO lclNumCols = 0;
179 constexpr
bool assumeSymmetric =
false;
180 equib_info_type result (lclNumRows, lclNumCols, assumeSymmetric);
181 auto result_h = result.createMirrorView ();
183 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
184 [&] (
const LO lclRow,
185 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
186 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
187 std::size_t numEnt) {
188 mag_type rowNorm {0.0};
189 val_type diagVal {0.0};
190 const GO gblRow = rowMap.getGlobalElement (lclRow);
192 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
194 for (std::size_t k = 0; k < numEnt; ++k) {
195 const val_type matrixVal = val[k];
196 if (KAV::isInf (matrixVal)) {
197 result_h.foundInf =
true;
199 if (KAV::isNan (matrixVal)) {
200 result_h.foundNan =
true;
202 const mag_type matrixAbsVal = KAV::abs (matrixVal);
203 rowNorm += matrixAbsVal;
204 const LO lclCol = ind[k];
205 if (lclCol == lclDiagColInd) {
212 if (diagVal == KAV::zero ()) {
213 result_h.foundZeroDiag =
true;
215 if (rowNorm == KAM::zero ()) {
216 result_h.foundZeroRowNorm =
true;
222 result_h.rowDiagonalEntries[lclRow] += diagVal;
223 result_h.rowNorms[lclRow] = rowNorm;
226 result.assign (result_h);
232 template<
class SC,
class LO,
class GO,
class NT>
233 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
235 const bool assumeSymmetric)
237 using KAT = Kokkos::ArithTraits<SC>;
238 using val_type =
typename KAT::val_type;
239 using KAV = Kokkos::ArithTraits<val_type>;
240 using mag_type =
typename KAT::mag_type;
241 using KAM = Kokkos::ArithTraits<mag_type>;
242 using device_type =
typename NT::device_type;
246 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
247 const LO lclNumCols =
static_cast<LO
> (colMap.getLocalNumElements ());
250 (lclNumRows, lclNumCols, assumeSymmetric);
251 auto result_h = result.createMirrorView ();
253 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
254 [&] (
const LO lclRow,
255 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
256 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
257 std::size_t numEnt) {
258 mag_type rowNorm {0.0};
259 val_type diagVal {0.0};
260 const GO gblRow = rowMap.getGlobalElement (lclRow);
262 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
264 for (std::size_t k = 0; k < numEnt; ++k) {
265 const val_type matrixVal = val[k];
266 if (KAV::isInf (matrixVal)) {
267 result_h.foundInf =
true;
269 if (KAV::isNan (matrixVal)) {
270 result_h.foundNan =
true;
272 const mag_type matrixAbsVal = KAV::abs (matrixVal);
273 rowNorm += matrixAbsVal;
274 const LO lclCol = ind[k];
275 if (lclCol == lclDiagColInd) {
278 if (! assumeSymmetric) {
279 result_h.colNorms[lclCol] += matrixAbsVal;
285 if (diagVal == KAV::zero ()) {
286 result_h.foundZeroDiag =
true;
288 if (rowNorm == KAM::zero ()) {
289 result_h.foundZeroRowNorm =
true;
295 result_h.rowDiagonalEntries[lclRow] += diagVal;
296 result_h.rowNorms[lclRow] = rowNorm;
297 if (! assumeSymmetric &&
298 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
299 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
307 template<
class SC,
class LO,
class GO,
class NT>
308 class ComputeLocalRowScaledColumnNorms {
311 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
312 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
314 using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
316 ComputeLocalRowScaledColumnNorms (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
317 const Kokkos::View<const mag_type*, device_type>& rowNorms,
318 const crs_matrix_type& A) :
319 rowScaledColNorms_ (rowScaledColNorms),
320 rowNorms_ (rowNorms),
321 A_lcl_ (A.getLocalMatrixDevice ())
324 KOKKOS_INLINE_FUNCTION
void operator () (
const typename policy_type::member_type &team)
const {
325 using KAT = Kokkos::ArithTraits<val_type>;
327 const LO lclRow = team.league_rank();
328 const auto curRow = A_lcl_.rowConst (lclRow);
329 const mag_type rowNorm = rowNorms_[lclRow];
330 const LO numEnt = curRow.length;
331 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k) {
332 const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
333 const LO lclCol = curRow.colidx(k);
335 Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
340 run (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
341 const Kokkos::View<const mag_type*, device_type>& rowNorms,
342 const crs_matrix_type& A)
344 using execution_space =
typename device_type::execution_space;
345 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
347 functor_type functor (rowScaledColNorms, rowNorms, A);
348 const LO lclNumRows =
349 static_cast<LO
> (A.getRowMap ()->getLocalNumElements ());
350 Kokkos::parallel_for (
"computeLocalRowScaledColumnNorms",
351 policy_type (lclNumRows, Kokkos::AUTO), functor);
355 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
356 Kokkos::View<const mag_type*, device_type> rowNorms_;
359 local_matrix_device_type A_lcl_;
362 template<
class SC,
class LO,
class GO,
class NT>
364 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
365 typename NT::device_type>& result,
368 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
369 impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
372 template<
class SC,
class LO,
class GO,
class NT>
374 computeLocalRowScaledColumnNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
375 typename NT::device_type>& result,
379 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
380 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
381 using device_type =
typename NT::device_type;
384 TEUCHOS_TEST_FOR_EXCEPTION
385 (colMapPtr.get () ==
nullptr, std::invalid_argument,
386 "computeLocalRowScaledColumnNorms: "
387 "Input matrix A must have a nonnull column Map.");
388 const LO lclNumCols =
static_cast<LO
> (colMapPtr->getLocalNumElements ());
389 if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
390 static_cast<std::size_t> (lclNumCols)) {
391 result.rowScaledColNorms =
392 Kokkos::View<mag_type*, device_type> (
"rowScaledColNorms", lclNumCols);
395 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
396 if (A_crs ==
nullptr) {
400 computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
406 template<
class SC,
class LO,
class GO,
class NT>
407 class ComputeLocalRowOneNorms {
409 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
410 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
411 using local_matrix_device_type =
412 typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
413 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
414 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
416 ComputeLocalRowOneNorms (
const equib_info_type& equib,
417 const local_matrix_device_type& A_lcl,
418 const local_map_type& rowMap,
419 const local_map_type& colMap) :
432 using value_type = int;
434 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
439 KOKKOS_INLINE_FUNCTION
void
440 join (value_type& dst,
441 const value_type& src)
const
446 KOKKOS_INLINE_FUNCTION
void
447 operator () (
const typename policy_type::member_type& team, value_type& dst)
const
449 using KAT = Kokkos::ArithTraits<val_type>;
450 using mag_type =
typename KAT::mag_type;
451 using KAM = Kokkos::ArithTraits<mag_type>;
453 const LO lclRow = team.league_rank();
454 const GO gblRow = rowMap_.getGlobalElement (lclRow);
456 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
458 const auto curRow = A_lcl_.rowConst (lclRow);
459 const LO numEnt = curRow.length;
461 mag_type rowNorm {0.0};
462 val_type diagVal {0.0};
463 value_type dstThread {0};
465 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
466 const val_type matrixVal = curRow.value (k);
467 if (KAT::isInf (matrixVal)) {
470 if (KAT::isNan (matrixVal)) {
473 const mag_type matrixAbsVal = KAT::abs (matrixVal);
474 normContrib += matrixAbsVal;
475 const LO lclCol = curRow.colidx (k);
476 if (lclCol == lclDiagColInd) {
477 diagContrib = curRow.value (k);
479 }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
483 Kokkos::single(Kokkos::PerTeam(team), [&](){
485 if (diagVal == KAT::zero ()) {
488 if (rowNorm == KAM::zero ()) {
491 equib_.rowDiagonalEntries[lclRow] = diagVal;
492 equib_.rowNorms[lclRow] = rowNorm;
497 equib_info_type equib_;
498 local_matrix_device_type A_lcl_;
499 local_map_type rowMap_;
500 local_map_type colMap_;
505 template<
class SC,
class LO,
class GO,
class NT>
506 class ComputeLocalRowAndColumnOneNorms {
508 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
509 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
510 using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
511 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
512 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
515 ComputeLocalRowAndColumnOneNorms (
const equib_info_type& equib,
516 const local_matrix_device_type& A_lcl,
517 const local_map_type& rowMap,
518 const local_map_type& colMap) :
531 using value_type = int;
533 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
538 KOKKOS_INLINE_FUNCTION
void
539 join (value_type& dst,
540 const value_type& src)
const
545 KOKKOS_INLINE_FUNCTION
void
546 operator () (
const typename policy_type::member_type& team, value_type& dst)
const
548 using KAT = Kokkos::ArithTraits<val_type>;
549 using mag_type =
typename KAT::mag_type;
550 using KAM = Kokkos::ArithTraits<mag_type>;
552 const LO lclRow = team.league_rank();
553 const GO gblRow = rowMap_.getGlobalElement (lclRow);
555 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
557 const auto curRow = A_lcl_.rowConst (lclRow);
558 const LO numEnt = curRow.length;
560 mag_type rowNorm {0.0};
561 val_type diagVal {0.0};
562 value_type dstThread {0};
564 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
565 const val_type matrixVal = curRow.value (k);
566 if (KAT::isInf (matrixVal)) {
569 if (KAT::isNan (matrixVal)) {
572 const mag_type matrixAbsVal = KAT::abs (matrixVal);
573 normContrib += matrixAbsVal;
574 const LO lclCol = curRow.colidx (k);
575 if (lclCol == lclDiagColInd) {
576 diagContrib = curRow.value (k);
578 if (! equib_.assumeSymmetric) {
579 Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
581 }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
585 Kokkos::single(Kokkos::PerTeam(team), [&](){
587 if (diagVal == KAT::zero ()) {
590 if (rowNorm == KAM::zero ()) {
597 equib_.rowDiagonalEntries[lclRow] = diagVal;
598 equib_.rowNorms[lclRow] = rowNorm;
599 if (! equib_.assumeSymmetric &&
600 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
603 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
609 equib_info_type equib_;
610 local_matrix_device_type A_lcl_;
611 local_map_type rowMap_;
612 local_map_type colMap_;
617 template<
class SC,
class LO,
class GO,
class NT>
618 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
621 using execution_space =
typename NT::device_type::execution_space;
622 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
623 using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
624 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
625 using device_type =
typename NT::device_type;
628 const LO lclNumRows =
static_cast<LO
> (A.
getRowMap ()->getLocalNumElements ());
629 const LO lclNumCols = 0;
630 constexpr
bool assumeSymmetric =
false;
631 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
637 Kokkos::parallel_reduce (
"computeLocalRowOneNorms",
638 policy_type (lclNumRows, Kokkos::AUTO), functor,
640 equib.foundInf = (result & 1) != 0;
641 equib.foundNan = (result & 2) != 0;
642 equib.foundZeroDiag = (result & 4) != 0;
643 equib.foundZeroRowNorm = (result & 8) != 0;
649 template<
class SC,
class LO,
class GO,
class NT>
650 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
652 const bool assumeSymmetric)
654 using execution_space =
typename NT::device_type::execution_space;
655 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
656 using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
657 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
658 using device_type =
typename NT::device_type;
661 const LO lclNumRows =
static_cast<LO
> (A.
getRowMap ()->getLocalNumElements ());
662 const LO lclNumCols =
static_cast<LO
> (A.
getColMap ()->getLocalNumElements ());
663 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
669 Kokkos::parallel_reduce (
"computeLocalRowAndColumnOneNorms",
670 policy_type (lclNumRows, Kokkos::AUTO), functor,
672 equib.foundInf = (result & 1) != 0;
673 equib.foundNan = (result & 2) != 0;
674 equib.foundZeroDiag = (result & 4) != 0;
675 equib.foundZeroRowNorm = (result & 8) != 0;
683 template<
class SC,
class LO,
class GO,
class NT>
684 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
685 typename NT::device_type>
689 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
691 if (A_crs ==
nullptr) {
720 template<
class SC,
class LO,
class GO,
class NT>
721 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
723 const bool assumeSymmetric)
726 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
728 if (A_crs ==
nullptr) {
736 template<
class SC,
class LO,
class GO,
class NT>
737 auto getLocalView_1d_readOnly (
739 const LO whichColumn)
740 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
741 Kokkos::ALL (), whichColumn))
743 if (X.isConstantStride ()) {
744 return Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
745 Kokkos::ALL (), whichColumn);
748 auto X_whichColumn = X.getVector (whichColumn);
749 return Kokkos::subview (X_whichColumn->getLocalViewDevice(Access::ReadOnly),
754 template<
class SC,
class LO,
class GO,
class NT>
755 auto getLocalView_1d_writeOnly (
757 const LO whichColumn)
758 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
759 Kokkos::ALL (), whichColumn))
761 if (X.isConstantStride ()) {
762 return Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
763 Kokkos::ALL (), whichColumn);
766 auto X_whichColumn = X.getVectorNonConst (whichColumn);
767 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
772 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
774 copy1DViewIntoMultiVectorColumn (
776 const LO whichColumn,
777 const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
779 auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
783 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
785 copyMultiVectorColumnInto1DView (
786 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
788 const LO whichColumn)
790 auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
794 template<
class OneDViewType,
class IndexType>
797 static_assert (OneDViewType::rank == 1,
798 "OneDViewType must be a rank-1 Kokkos::View.");
799 static_assert (std::is_integral<IndexType>::value,
800 "IndexType must be a built-in integer type.");
801 FindZero (
const OneDViewType& x) : x_ (x) {}
804 KOKKOS_INLINE_FUNCTION
void
805 operator () (
const IndexType i,
int& result)
const {
806 using val_type =
typename OneDViewType::non_const_value_type;
807 result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
813 template<
class OneDViewType>
814 bool findZero (
const OneDViewType& x)
816 using view_type =
typename OneDViewType::const_type;
817 using execution_space =
typename view_type::execution_space;
818 using size_type =
typename view_type::size_type;
819 using functor_type = FindZero<view_type, size_type>;
821 Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
822 range.set_chunk_size (500);
825 Kokkos::parallel_reduce (
"findZero", range, functor_type (x), foundZero);
826 return foundZero == 1;
829 template<
class SC,
class LO,
class GO,
class NT>
831 globalizeRowOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
832 typename NT::device_type>& equib,
838 TEUCHOS_TEST_FOR_EXCEPTION
839 (G.get () ==
nullptr, std::invalid_argument,
840 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
841 "(that is, getGraph() must return nonnull).");
842 TEUCHOS_TEST_FOR_EXCEPTION
843 (! G->isFillComplete (), std::invalid_argument,
844 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
846 auto exp = G->getExporter ();
847 if (! exp.is_null ()) {
857 mv_type rowMapMV (G->getRowMap (), 2,
false);
859 copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
860 copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
862 mv_type rangeMapMV (G->getRangeMap (), 2,
true);
866 copyMultiVectorColumnInto1DView (equib.rowNorms, rowMapMV, 0);
867 copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
872 equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
873 equib.foundZeroRowNorm = findZero (equib.rowNorms);
876 constexpr
int allReduceCount = 4;
877 int lclNaughtyMatrix[allReduceCount];
878 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
879 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
880 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
881 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
883 using Teuchos::outArg;
884 using Teuchos::REDUCE_MAX;
885 using Teuchos::reduceAll;
886 auto comm = G->getComm ();
887 int gblNaughtyMatrix[allReduceCount];
888 reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
889 lclNaughtyMatrix, gblNaughtyMatrix);
891 equib.foundInf = gblNaughtyMatrix[0] == 1;
892 equib.foundNan = gblNaughtyMatrix[1] == 1;
893 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
894 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
897 template<
class SC,
class LO,
class GO,
class NT>
899 globalizeColumnOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
900 typename NT::device_type>& equib,
902 const bool assumeSymmetric)
904 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
905 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
907 using device_type =
typename NT::device_type;
910 TEUCHOS_TEST_FOR_EXCEPTION
911 (G.get () ==
nullptr, std::invalid_argument,
912 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
913 "(that is, getGraph() must return nonnull).");
914 TEUCHOS_TEST_FOR_EXCEPTION
915 (! G->isFillComplete (), std::invalid_argument,
916 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
918 auto imp = G->getImporter ();
919 if (assumeSymmetric) {
920 const LO numCols = 2;
924 mv_type rowNorms_domMap (G->getDomainMap (), numCols,
false);
925 const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
926 if (rowMapSameAsDomainMap) {
927 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
928 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 1, equib.rowDiagonalEntries);
934 mv_type rowNorms_rowMap (G->getRowMap (), numCols,
true);
935 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
936 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
942 std::unique_ptr<mv_type> rowNorms_colMap;
943 if (imp.is_null ()) {
946 std::unique_ptr<mv_type> (
new mv_type (rowNorms_domMap, * (G->getColMap ())));
950 std::unique_ptr<mv_type> (
new mv_type (G->getColMap (), numCols,
true));
955 const LO lclNumCols =
956 static_cast<LO
> (G->getColMap ()->getLocalNumElements ());
957 if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
959 Kokkos::View<mag_type*, device_type> (
"colNorms", lclNumCols);
961 if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
962 equib.colDiagonalEntries =
963 Kokkos::View<val_type*, device_type> (
"colDiagonalEntries", lclNumCols);
968 copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
969 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
972 if (! imp.is_null ()) {
973 const LO numCols = 3;
982 mv_type colMapMV (G->getColMap (), numCols,
false);
984 copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
985 copy1DViewIntoMultiVectorColumn (colMapMV, 1, equib.colDiagonalEntries);
986 copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
988 mv_type domainMapMV (G->getDomainMap (), numCols,
true);
989 domainMapMV.doExport (colMapMV, *imp,
Tpetra::ADD);
992 copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
993 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
994 copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
1001 template<
class SC,
class LO,
class GO,
class NT>
1002 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1003 typename NT::device_type>
1006 TEUCHOS_TEST_FOR_EXCEPTION
1008 "computeRowOneNorms: Input matrix A must be fillComplete.");
1011 Details::globalizeRowOneNorms (result, A);
1015 template<
class SC,
class LO,
class GO,
class NT>
1016 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1017 typename NT::device_type>
1019 const bool assumeSymmetric)
1021 TEUCHOS_TEST_FOR_EXCEPTION
1023 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1026 Details::globalizeRowOneNorms (result, A);
1027 if (! assumeSymmetric) {
1031 Details::computeLocalRowScaledColumnNorms (result, A);
1033 Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
1045 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
1046 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1047 computeRowOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1049 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1050 computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1051 const bool assumeSymmetric);
1053 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row's entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.