10 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
22 #include "Tpetra_CrsMatrix.hpp"
23 #include "Tpetra_Export.hpp"
24 #include "Tpetra_Map.hpp"
25 #include "Tpetra_MultiVector.hpp"
26 #include "Tpetra_RowMatrix.hpp"
27 #include "Kokkos_Core.hpp"
28 #include "Teuchos_CommHelpers.hpp"
34 template<
class SC,
class LO,
class GO,
class NT>
39 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
41 std::size_t maxNumEnt {0};
42 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
44 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
49 template<
class SC,
class LO,
class GO,
class NT>
51 forEachLocalRowMatrixRow (
54 const std::size_t maxNumEnt,
57 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
58 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
59 std::size_t )> doForEachRow)
61 using lids_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
62 using vals_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
63 lids_type indBuf(
"indices",maxNumEnt);
64 vals_type valBuf(
"values",maxNumEnt);
66 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
68 lids_type ind = Kokkos::subview(indBuf,std::make_pair((
size_t)0, numEnt));
69 vals_type val = Kokkos::subview(valBuf,std::make_pair((
size_t)0, numEnt));
71 doForEachRow (lclRow, ind, val, numEnt);
75 template<
class SC,
class LO,
class GO,
class NT>
77 forEachLocalRowMatrixRow (
81 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
82 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
83 std::size_t )> doForEachRow)
86 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
87 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
89 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
95 template<
class SC,
class LO,
class GO,
class NT>
98 typename NT::device_type>& result,
101 using KAT = Kokkos::ArithTraits<SC>;
102 using mag_type =
typename KAT::mag_type;
103 using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
105 auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
109 auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
111 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
112 [&] (
const LO lclRow,
113 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
114 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
115 std::size_t numEnt) {
116 const mag_type rowNorm = rowNorms_h[lclRow];
117 for (std::size_t k = 0; k < numEnt; ++k) {
118 const mag_type matrixAbsVal = KAV::abs (val[k]);
119 const LO lclCol = ind[k];
121 rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
131 template<
class SC,
class LO,
class GO,
class NT>
132 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
135 using KAT = Kokkos::ArithTraits<SC>;
136 using val_type =
typename KAT::val_type;
137 using KAV = Kokkos::ArithTraits<val_type>;
138 using mag_type =
typename KAT::mag_type;
139 using KAM = Kokkos::ArithTraits<mag_type>;
140 using device_type =
typename NT::device_type;
145 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
146 const LO lclNumCols = 0;
147 constexpr
bool assumeSymmetric =
false;
148 equib_info_type result (lclNumRows, lclNumCols, assumeSymmetric);
149 auto result_h = result.createMirrorView ();
151 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
152 [&] (
const LO lclRow,
153 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
154 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
155 std::size_t numEnt) {
156 mag_type rowNorm {0.0};
157 val_type diagVal {0.0};
158 const GO gblRow = rowMap.getGlobalElement (lclRow);
160 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
162 for (std::size_t k = 0; k < numEnt; ++k) {
163 const val_type matrixVal = val[k];
164 if (KAV::isInf (matrixVal)) {
165 result_h.foundInf =
true;
167 if (KAV::isNan (matrixVal)) {
168 result_h.foundNan =
true;
170 const mag_type matrixAbsVal = KAV::abs (matrixVal);
171 rowNorm += matrixAbsVal;
172 const LO lclCol = ind[k];
173 if (lclCol == lclDiagColInd) {
180 if (diagVal == KAV::zero ()) {
181 result_h.foundZeroDiag =
true;
183 if (rowNorm == KAM::zero ()) {
184 result_h.foundZeroRowNorm =
true;
190 result_h.rowDiagonalEntries[lclRow] += diagVal;
191 result_h.rowNorms[lclRow] = rowNorm;
194 result.assign (result_h);
200 template<
class SC,
class LO,
class GO,
class NT>
201 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
203 const bool assumeSymmetric)
205 using KAT = Kokkos::ArithTraits<SC>;
206 using val_type =
typename KAT::val_type;
207 using KAV = Kokkos::ArithTraits<val_type>;
208 using mag_type =
typename KAT::mag_type;
209 using KAM = Kokkos::ArithTraits<mag_type>;
210 using device_type =
typename NT::device_type;
214 const LO lclNumRows =
static_cast<LO
> (rowMap.getLocalNumElements ());
215 const LO lclNumCols =
static_cast<LO
> (colMap.getLocalNumElements ());
218 (lclNumRows, lclNumCols, assumeSymmetric);
219 auto result_h = result.createMirrorView ();
221 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
222 [&] (
const LO lclRow,
223 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
224 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
225 std::size_t numEnt) {
226 mag_type rowNorm {0.0};
227 val_type diagVal {0.0};
228 const GO gblRow = rowMap.getGlobalElement (lclRow);
230 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
232 for (std::size_t k = 0; k < numEnt; ++k) {
233 const val_type matrixVal = val[k];
234 if (KAV::isInf (matrixVal)) {
235 result_h.foundInf =
true;
237 if (KAV::isNan (matrixVal)) {
238 result_h.foundNan =
true;
240 const mag_type matrixAbsVal = KAV::abs (matrixVal);
241 rowNorm += matrixAbsVal;
242 const LO lclCol = ind[k];
243 if (lclCol == lclDiagColInd) {
246 if (! assumeSymmetric) {
247 result_h.colNorms[lclCol] += matrixAbsVal;
253 if (diagVal == KAV::zero ()) {
254 result_h.foundZeroDiag =
true;
256 if (rowNorm == KAM::zero ()) {
257 result_h.foundZeroRowNorm =
true;
263 result_h.rowDiagonalEntries[lclRow] += diagVal;
264 result_h.rowNorms[lclRow] = rowNorm;
265 if (! assumeSymmetric &&
266 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
267 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
275 template<
class SC,
class LO,
class GO,
class NT>
276 class ComputeLocalRowScaledColumnNorms {
279 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
280 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
282 using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
284 ComputeLocalRowScaledColumnNorms (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
285 const Kokkos::View<const mag_type*, device_type>& rowNorms,
286 const crs_matrix_type& A) :
287 rowScaledColNorms_ (rowScaledColNorms),
288 rowNorms_ (rowNorms),
289 A_lcl_ (A.getLocalMatrixDevice ())
292 KOKKOS_INLINE_FUNCTION
void operator () (
const typename policy_type::member_type &team)
const {
293 using KAT = Kokkos::ArithTraits<val_type>;
295 const LO lclRow = team.league_rank();
296 const auto curRow = A_lcl_.rowConst (lclRow);
297 const mag_type rowNorm = rowNorms_[lclRow];
298 const LO numEnt = curRow.length;
299 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k) {
300 const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
301 const LO lclCol = curRow.colidx(k);
303 Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
308 run (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
309 const Kokkos::View<const mag_type*, device_type>& rowNorms,
310 const crs_matrix_type& A)
312 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
314 functor_type functor (rowScaledColNorms, rowNorms, A);
315 const LO lclNumRows =
316 static_cast<LO
> (A.getRowMap ()->getLocalNumElements ());
317 Kokkos::parallel_for (
"computeLocalRowScaledColumnNorms",
318 policy_type (lclNumRows, Kokkos::AUTO), functor);
322 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
323 Kokkos::View<const mag_type*, device_type> rowNorms_;
326 local_matrix_device_type A_lcl_;
329 template<
class SC,
class LO,
class GO,
class NT>
331 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
332 typename NT::device_type>& result,
335 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
336 impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
339 template<
class SC,
class LO,
class GO,
class NT>
341 computeLocalRowScaledColumnNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
342 typename NT::device_type>& result,
346 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
347 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
348 using device_type =
typename NT::device_type;
351 TEUCHOS_TEST_FOR_EXCEPTION
352 (colMapPtr.get () ==
nullptr, std::invalid_argument,
353 "computeLocalRowScaledColumnNorms: "
354 "Input matrix A must have a nonnull column Map.");
355 const LO lclNumCols =
static_cast<LO
> (colMapPtr->getLocalNumElements ());
356 if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
357 static_cast<std::size_t> (lclNumCols)) {
358 result.rowScaledColNorms =
359 Kokkos::View<mag_type*, device_type> (
"rowScaledColNorms", lclNumCols);
362 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
363 if (A_crs ==
nullptr) {
367 computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
373 template<
class SC,
class LO,
class GO,
class NT>
374 class ComputeLocalRowOneNorms {
376 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
377 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
378 using local_matrix_device_type =
379 typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
380 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
381 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
383 ComputeLocalRowOneNorms (
const equib_info_type& equib,
384 const local_matrix_device_type& A_lcl,
385 const local_map_type& rowMap,
386 const local_map_type& colMap) :
399 using value_type = int;
401 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
406 KOKKOS_INLINE_FUNCTION
void
407 join (value_type& dst,
408 const value_type& src)
const
413 KOKKOS_INLINE_FUNCTION
void
414 operator () (
const typename policy_type::member_type& team, value_type& dst)
const
416 using KAT = Kokkos::ArithTraits<val_type>;
417 using mag_type =
typename KAT::mag_type;
418 using KAM = Kokkos::ArithTraits<mag_type>;
420 const LO lclRow = team.league_rank();
421 const GO gblRow = rowMap_.getGlobalElement (lclRow);
423 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
425 const auto curRow = A_lcl_.rowConst (lclRow);
426 const LO numEnt = curRow.length;
428 mag_type rowNorm {0.0};
429 val_type diagVal {0.0};
430 value_type dstThread {0};
432 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
433 const val_type matrixVal = curRow.value (k);
434 if (KAT::isInf (matrixVal)) {
437 if (KAT::isNan (matrixVal)) {
440 const mag_type matrixAbsVal = KAT::abs (matrixVal);
441 normContrib += matrixAbsVal;
442 const LO lclCol = curRow.colidx (k);
443 if (lclCol == lclDiagColInd) {
444 diagContrib = curRow.value (k);
446 }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
450 Kokkos::single(Kokkos::PerTeam(team), [&](){
452 if (diagVal == KAT::zero ()) {
455 if (rowNorm == KAM::zero ()) {
458 equib_.rowDiagonalEntries[lclRow] = diagVal;
459 equib_.rowNorms[lclRow] = rowNorm;
464 equib_info_type equib_;
465 local_matrix_device_type A_lcl_;
466 local_map_type rowMap_;
467 local_map_type colMap_;
472 template<
class SC,
class LO,
class GO,
class NT>
473 class ComputeLocalRowAndColumnOneNorms {
475 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
476 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
477 using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
478 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
479 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
482 ComputeLocalRowAndColumnOneNorms (
const equib_info_type& equib,
483 const local_matrix_device_type& A_lcl,
484 const local_map_type& rowMap,
485 const local_map_type& colMap) :
498 using value_type = int;
500 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
505 KOKKOS_INLINE_FUNCTION
void
506 join (value_type& dst,
507 const value_type& src)
const
512 KOKKOS_INLINE_FUNCTION
void
513 operator () (
const typename policy_type::member_type& team, value_type& dst)
const
515 using KAT = Kokkos::ArithTraits<val_type>;
516 using mag_type =
typename KAT::mag_type;
517 using KAM = Kokkos::ArithTraits<mag_type>;
519 const LO lclRow = team.league_rank();
520 const GO gblRow = rowMap_.getGlobalElement (lclRow);
522 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
524 const auto curRow = A_lcl_.rowConst (lclRow);
525 const LO numEnt = curRow.length;
527 mag_type rowNorm {0.0};
528 val_type diagVal {0.0};
529 value_type dstThread {0};
531 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type &normContrib, val_type& diagContrib, value_type& dstContrib) {
532 const val_type matrixVal = curRow.value (k);
533 if (KAT::isInf (matrixVal)) {
536 if (KAT::isNan (matrixVal)) {
539 const mag_type matrixAbsVal = KAT::abs (matrixVal);
540 normContrib += matrixAbsVal;
541 const LO lclCol = curRow.colidx (k);
542 if (lclCol == lclDiagColInd) {
543 diagContrib = curRow.value (k);
545 if (! equib_.assumeSymmetric) {
546 Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
548 }, Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
552 Kokkos::single(Kokkos::PerTeam(team), [&](){
554 if (diagVal == KAT::zero ()) {
557 if (rowNorm == KAM::zero ()) {
564 equib_.rowDiagonalEntries[lclRow] = diagVal;
565 equib_.rowNorms[lclRow] = rowNorm;
566 if (! equib_.assumeSymmetric &&
567 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
570 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
576 equib_info_type equib_;
577 local_matrix_device_type A_lcl_;
578 local_map_type rowMap_;
579 local_map_type colMap_;
584 template<
class SC,
class LO,
class GO,
class NT>
585 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
588 using execution_space =
typename NT::device_type::execution_space;
589 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
590 using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
591 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
592 using device_type =
typename NT::device_type;
595 const LO lclNumRows =
static_cast<LO
> (A.
getRowMap ()->getLocalNumElements ());
596 const LO lclNumCols = 0;
597 constexpr
bool assumeSymmetric =
false;
598 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
604 Kokkos::parallel_reduce (
"computeLocalRowOneNorms",
605 policy_type (lclNumRows, Kokkos::AUTO), functor,
607 equib.foundInf = (result & 1) != 0;
608 equib.foundNan = (result & 2) != 0;
609 equib.foundZeroDiag = (result & 4) != 0;
610 equib.foundZeroRowNorm = (result & 8) != 0;
616 template<
class SC,
class LO,
class GO,
class NT>
617 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
619 const bool assumeSymmetric)
621 using execution_space =
typename NT::device_type::execution_space;
622 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
623 using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
624 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
625 using device_type =
typename NT::device_type;
628 const LO lclNumRows =
static_cast<LO
> (A.
getRowMap ()->getLocalNumElements ());
629 const LO lclNumCols =
static_cast<LO
> (A.
getColMap ()->getLocalNumElements ());
630 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
636 Kokkos::parallel_reduce (
"computeLocalRowAndColumnOneNorms",
637 policy_type (lclNumRows, Kokkos::AUTO), functor,
639 equib.foundInf = (result & 1) != 0;
640 equib.foundNan = (result & 2) != 0;
641 equib.foundZeroDiag = (result & 4) != 0;
642 equib.foundZeroRowNorm = (result & 8) != 0;
650 template<
class SC,
class LO,
class GO,
class NT>
651 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
652 typename NT::device_type>
656 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
658 if (A_crs ==
nullptr) {
687 template<
class SC,
class LO,
class GO,
class NT>
688 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
690 const bool assumeSymmetric)
693 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
> (&A);
695 if (A_crs ==
nullptr) {
703 template<
class SC,
class LO,
class GO,
class NT>
704 auto getLocalView_1d_readOnly (
706 const LO whichColumn)
707 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
708 Kokkos::ALL (), whichColumn))
710 if (X.isConstantStride ()) {
711 return Kokkos::subview (X.getLocalViewDevice(Access::ReadOnly),
712 Kokkos::ALL (), whichColumn);
715 auto X_whichColumn = X.getVector (whichColumn);
716 return Kokkos::subview (X_whichColumn->getLocalViewDevice(Access::ReadOnly),
721 template<
class SC,
class LO,
class GO,
class NT>
722 auto getLocalView_1d_writeOnly (
724 const LO whichColumn)
725 -> decltype (Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
726 Kokkos::ALL (), whichColumn))
728 if (X.isConstantStride ()) {
729 return Kokkos::subview (X.getLocalViewDevice(Access::ReadWrite),
730 Kokkos::ALL (), whichColumn);
733 auto X_whichColumn = X.getVectorNonConst (whichColumn);
734 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
739 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
741 copy1DViewIntoMultiVectorColumn (
743 const LO whichColumn,
744 const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
746 auto X_lcl = getLocalView_1d_writeOnly (X, whichColumn);
750 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
752 copyMultiVectorColumnInto1DView (
753 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
755 const LO whichColumn)
757 auto X_lcl = getLocalView_1d_readOnly (X, whichColumn);
761 template<
class OneDViewType,
class IndexType>
764 static_assert (OneDViewType::rank == 1,
765 "OneDViewType must be a rank-1 Kokkos::View.");
766 static_assert (std::is_integral<IndexType>::value,
767 "IndexType must be a built-in integer type.");
768 FindZero (
const OneDViewType& x) : x_ (x) {}
771 KOKKOS_INLINE_FUNCTION
void
772 operator () (
const IndexType i,
int& result)
const {
773 using val_type =
typename OneDViewType::non_const_value_type;
774 result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
780 template<
class OneDViewType>
781 bool findZero (
const OneDViewType& x)
783 using view_type =
typename OneDViewType::const_type;
784 using execution_space =
typename view_type::execution_space;
785 using size_type =
typename view_type::size_type;
786 using functor_type = FindZero<view_type, size_type>;
788 Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
789 range.set_chunk_size (500);
792 Kokkos::parallel_reduce (
"findZero", range, functor_type (x), foundZero);
793 return foundZero == 1;
796 template<
class SC,
class LO,
class GO,
class NT>
798 globalizeRowOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
799 typename NT::device_type>& equib,
805 TEUCHOS_TEST_FOR_EXCEPTION
806 (G.get () ==
nullptr, std::invalid_argument,
807 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
808 "(that is, getGraph() must return nonnull).");
809 TEUCHOS_TEST_FOR_EXCEPTION
810 (! G->isFillComplete (), std::invalid_argument,
811 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
813 auto exp = G->getExporter ();
814 if (! exp.is_null ()) {
824 mv_type rowMapMV (G->getRowMap (), 2,
false);
826 copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
827 copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
829 mv_type rangeMapMV (G->getRangeMap (), 2,
true);
833 copyMultiVectorColumnInto1DView (equib.rowNorms, rowMapMV, 0);
834 copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
839 equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
840 equib.foundZeroRowNorm = findZero (equib.rowNorms);
843 constexpr
int allReduceCount = 4;
844 int lclNaughtyMatrix[allReduceCount];
845 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
846 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
847 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
848 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
850 using Teuchos::outArg;
851 using Teuchos::REDUCE_MAX;
852 using Teuchos::reduceAll;
853 auto comm = G->getComm ();
854 int gblNaughtyMatrix[allReduceCount];
855 reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
856 lclNaughtyMatrix, gblNaughtyMatrix);
858 equib.foundInf = gblNaughtyMatrix[0] == 1;
859 equib.foundNan = gblNaughtyMatrix[1] == 1;
860 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
861 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
864 template<
class SC,
class LO,
class GO,
class NT>
866 globalizeColumnOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
867 typename NT::device_type>& equib,
869 const bool assumeSymmetric)
871 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
872 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
874 using device_type =
typename NT::device_type;
877 TEUCHOS_TEST_FOR_EXCEPTION
878 (G.get () ==
nullptr, std::invalid_argument,
879 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
880 "(that is, getGraph() must return nonnull).");
881 TEUCHOS_TEST_FOR_EXCEPTION
882 (! G->isFillComplete (), std::invalid_argument,
883 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
885 auto imp = G->getImporter ();
886 if (assumeSymmetric) {
887 const LO numCols = 2;
891 mv_type rowNorms_domMap (G->getDomainMap (), numCols,
false);
892 const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
893 if (rowMapSameAsDomainMap) {
894 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
895 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 1, equib.rowDiagonalEntries);
901 mv_type rowNorms_rowMap (G->getRowMap (), numCols,
true);
902 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
903 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
909 std::unique_ptr<mv_type> rowNorms_colMap;
910 if (imp.is_null ()) {
913 std::unique_ptr<mv_type> (
new mv_type (rowNorms_domMap, * (G->getColMap ())));
917 std::unique_ptr<mv_type> (
new mv_type (G->getColMap (), numCols,
true));
922 const LO lclNumCols =
923 static_cast<LO
> (G->getColMap ()->getLocalNumElements ());
924 if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
926 Kokkos::View<mag_type*, device_type> (
"colNorms", lclNumCols);
928 if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
929 equib.colDiagonalEntries =
930 Kokkos::View<val_type*, device_type> (
"colDiagonalEntries", lclNumCols);
935 copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
936 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
939 if (! imp.is_null ()) {
940 const LO numCols = 3;
949 mv_type colMapMV (G->getColMap (), numCols,
false);
951 copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
952 copy1DViewIntoMultiVectorColumn (colMapMV, 1, equib.colDiagonalEntries);
953 copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
955 mv_type domainMapMV (G->getDomainMap (), numCols,
true);
956 domainMapMV.doExport (colMapMV, *imp,
Tpetra::ADD);
959 copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
960 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
961 copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
968 template<
class SC,
class LO,
class GO,
class NT>
969 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
970 typename NT::device_type>
973 TEUCHOS_TEST_FOR_EXCEPTION
975 "computeRowOneNorms: Input matrix A must be fillComplete.");
978 Details::globalizeRowOneNorms (result, A);
982 template<
class SC,
class LO,
class GO,
class NT>
983 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
984 typename NT::device_type>
986 const bool assumeSymmetric)
988 TEUCHOS_TEST_FOR_EXCEPTION
990 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
993 Details::globalizeRowOneNorms (result, A);
994 if (! assumeSymmetric) {
998 Details::computeLocalRowScaledColumnNorms (result, A);
1000 Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
1012 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
1013 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1014 computeRowOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1016 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1017 computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1018 const bool assumeSymmetric);
1020 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row's entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.