10 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
20 #if KOKKOS_VERSION >= 40799
21 #include "KokkosKernels_ArithTraits.hpp"
23 #include "Kokkos_ArithTraits.hpp"
25 #include "Kokkos_Macros.hpp"
28 #include "Tpetra_CrsMatrix.hpp"
29 #include "Tpetra_Export.hpp"
30 #include "Tpetra_Map.hpp"
31 #include "Tpetra_MultiVector.hpp"
32 #include "Tpetra_RowMatrix.hpp"
33 #include "Kokkos_Core.hpp"
34 #include "Teuchos_CommHelpers.hpp"
40 template <
class SC,
class LO,
class GO,
class NT>
44 const LO lclNumRows =
static_cast<LO
>(rowMap.getLocalNumElements());
46 std::size_t maxNumEnt{0};
47 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
49 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
54 template <
class SC,
class LO,
class GO,
class NT>
55 void forEachLocalRowMatrixRow(
58 const std::size_t maxNumEnt,
61 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
62 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
65 using lids_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
66 using vals_type =
typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
67 lids_type indBuf(
"indices", maxNumEnt);
68 vals_type valBuf(
"values", maxNumEnt);
70 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
72 lids_type ind = Kokkos::subview(indBuf, std::make_pair((
size_t)0, numEnt));
73 vals_type val = Kokkos::subview(valBuf, std::make_pair((
size_t)0, numEnt));
75 doForEachRow(lclRow, ind, val, numEnt);
79 template <
class SC,
class LO,
class GO,
class NT>
80 void forEachLocalRowMatrixRow(
84 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ,
85 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& ,
89 const LO lclNumRows =
static_cast<LO
>(rowMap.getLocalNumElements());
90 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix(A);
92 forEachLocalRowMatrixRow<SC, LO, GO, NT>(A, lclNumRows, maxNumEnt, doForEachRow);
98 template <
class SC,
class LO,
class GO,
class NT>
99 #if KOKKOS_VERSION >= 40799
104 typename NT::device_type>& result,
106 #if KOKKOS_VERSION >= 40799
107 using KAT = KokkosKernels::ArithTraits<SC>;
109 using KAT = Kokkos::ArithTraits<SC>;
111 using mag_type =
typename KAT::mag_type;
112 #if KOKKOS_VERSION >= 40799
113 using KAV = KokkosKernels::ArithTraits<typename KAT::val_type>;
115 using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
118 auto rowNorms_h = Kokkos::create_mirror_view(result.rowNorms);
122 auto rowScaledColNorms_h = Kokkos::create_mirror_view(result.rowScaledColNorms);
124 forEachLocalRowMatrixRow<SC, LO, GO, NT>(A,
126 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
127 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
128 std::size_t numEnt) {
129 const mag_type rowNorm = rowNorms_h[lclRow];
130 for (std::size_t k = 0; k < numEnt; ++k) {
131 const mag_type matrixAbsVal = KAV::abs(val[k]);
132 const LO lclCol = ind[k];
134 rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
144 template <
class SC,
class LO,
class GO,
class NT>
145 #if KOKKOS_VERSION >= 40799
146 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
typename NT::device_type>
148 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
151 #if KOKKOS_VERSION >= 40799
152 using KAT = KokkosKernels::ArithTraits<SC>;
154 using KAT = Kokkos::ArithTraits<SC>;
156 using val_type =
typename KAT::val_type;
157 #if KOKKOS_VERSION >= 40799
158 using KAV = KokkosKernels::ArithTraits<val_type>;
160 using KAV = Kokkos::ArithTraits<val_type>;
162 using mag_type =
typename KAT::mag_type;
163 #if KOKKOS_VERSION >= 40799
164 using KAM = KokkosKernels::ArithTraits<mag_type>;
166 using KAM = Kokkos::ArithTraits<mag_type>;
168 using device_type =
typename NT::device_type;
173 const LO lclNumRows =
static_cast<LO
>(rowMap.getLocalNumElements());
174 const LO lclNumCols = 0;
175 constexpr
bool assumeSymmetric =
false;
176 equib_info_type result(lclNumRows, lclNumCols, assumeSymmetric);
177 auto result_h = result.createMirrorView();
179 const auto val_zero = KAV::zero();
180 const auto mag_zero = KAM::zero();
182 forEachLocalRowMatrixRow<SC, LO, GO, NT>(A,
184 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
185 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
186 std::size_t numEnt) {
187 mag_type rowNorm = mag_zero;
188 val_type diagVal = val_zero;
189 const GO gblRow = rowMap.getGlobalElement(lclRow);
191 const GO lclDiagColInd = colMap.getLocalElement(gblRow);
193 for (std::size_t k = 0; k < numEnt; ++k) {
194 const val_type matrixVal = val[k];
195 if (KAV::isInf(matrixVal)) {
196 result_h.foundInf =
true;
198 if (KAV::isNan(matrixVal)) {
199 result_h.foundNan =
true;
201 const mag_type matrixAbsVal = KAV::abs(matrixVal);
202 rowNorm += matrixAbsVal;
203 const LO lclCol = ind[k];
204 if (lclCol == lclDiagColInd) {
211 if (diagVal == KAV::zero()) {
212 result_h.foundZeroDiag =
true;
214 if (rowNorm == KAM::zero()) {
215 result_h.foundZeroRowNorm =
true;
221 result_h.rowDiagonalEntries[lclRow] += diagVal;
222 result_h.rowNorms[lclRow] = rowNorm;
225 result.assign(result_h);
231 template <
class SC,
class LO,
class GO,
class NT>
232 #if KOKKOS_VERSION >= 40799
233 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
typename NT::device_type>
235 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
238 const bool assumeSymmetric) {
239 #if KOKKOS_VERSION >= 40799
240 using KAT = KokkosKernels::ArithTraits<SC>;
242 using KAT = Kokkos::ArithTraits<SC>;
244 using val_type =
typename KAT::val_type;
245 #if KOKKOS_VERSION >= 40799
246 using KAV = KokkosKernels::ArithTraits<val_type>;
248 using KAV = Kokkos::ArithTraits<val_type>;
250 using mag_type =
typename KAT::mag_type;
251 #if KOKKOS_VERSION >= 40799
252 using KAM = KokkosKernels::ArithTraits<mag_type>;
254 using KAM = Kokkos::ArithTraits<mag_type>;
256 using device_type =
typename NT::device_type;
260 const LO lclNumRows =
static_cast<LO
>(rowMap.getLocalNumElements());
261 const LO lclNumCols =
static_cast<LO
>(colMap.getLocalNumElements());
264 auto result_h = result.createMirrorView();
266 const auto val_zero = KAV::zero();
267 const auto mag_zero = KAM::zero();
269 forEachLocalRowMatrixRow<SC, LO, GO, NT>(A,
271 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
272 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
273 std::size_t numEnt) {
274 mag_type rowNorm = mag_zero;
275 val_type diagVal = val_zero;
276 const GO gblRow = rowMap.getGlobalElement(lclRow);
278 const GO lclDiagColInd = colMap.getLocalElement(gblRow);
280 for (std::size_t k = 0; k < numEnt; ++k) {
281 const val_type matrixVal = val[k];
282 if (KAV::isInf(matrixVal)) {
283 result_h.foundInf =
true;
285 if (KAV::isNan(matrixVal)) {
286 result_h.foundNan =
true;
288 const mag_type matrixAbsVal = KAV::abs(matrixVal);
289 rowNorm += matrixAbsVal;
290 const LO lclCol = ind[k];
291 if (lclCol == lclDiagColInd) {
294 if (!assumeSymmetric) {
295 result_h.colNorms[lclCol] += matrixAbsVal;
301 if (diagVal == KAV::zero()) {
302 result_h.foundZeroDiag =
true;
304 if (rowNorm == KAM::zero()) {
305 result_h.foundZeroRowNorm =
true;
311 result_h.rowDiagonalEntries[lclRow] += diagVal;
312 result_h.rowNorms[lclRow] = rowNorm;
313 if (!assumeSymmetric &&
314 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
315 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
323 template <
class SC,
class LO,
class GO,
class NT>
324 class ComputeLocalRowScaledColumnNorms {
327 #if KOKKOS_VERSION >= 40799
328 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
330 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
332 #if KOKKOS_VERSION >= 40799
333 using mag_type =
typename KokkosKernels::ArithTraits<val_type>::mag_type;
335 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
338 using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
340 ComputeLocalRowScaledColumnNorms(
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
341 const Kokkos::View<const mag_type*, device_type>& rowNorms,
342 const crs_matrix_type& A)
343 : rowScaledColNorms_(rowScaledColNorms)
344 , rowNorms_(rowNorms)
345 , A_lcl_(A.getLocalMatrixDevice()) {}
347 KOKKOS_INLINE_FUNCTION
void operator()(
const typename policy_type::member_type& team)
const {
348 #if KOKKOS_VERSION >= 40799
349 using KAT = KokkosKernels::ArithTraits<val_type>;
351 using KAT = Kokkos::ArithTraits<val_type>;
354 const LO lclRow = team.league_rank();
355 const auto curRow = A_lcl_.rowConst(lclRow);
356 const mag_type rowNorm = rowNorms_[lclRow];
357 const LO numEnt = curRow.length;
358 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k) {
359 const mag_type matrixAbsVal = KAT::abs(curRow.value(k));
360 const LO lclCol = curRow.colidx(k);
362 Kokkos::atomic_add(&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
367 run(
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
368 const Kokkos::View<const mag_type*, device_type>& rowNorms,
369 const crs_matrix_type& A) {
370 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
372 functor_type functor(rowScaledColNorms, rowNorms, A);
373 const LO lclNumRows =
374 static_cast<LO
>(A.getRowMap()->getLocalNumElements());
375 Kokkos::parallel_for(
"computeLocalRowScaledColumnNorms",
376 policy_type(lclNumRows, Kokkos::AUTO), functor);
380 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
381 Kokkos::View<const mag_type*, device_type> rowNorms_;
384 local_matrix_device_type A_lcl_;
387 template <
class SC,
class LO,
class GO,
class NT>
388 #if KOKKOS_VERSION >= 40799
389 void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<
typename KokkosKernels::ArithTraits<SC>::val_type,
391 void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
393 typename NT::device_type>& result,
395 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
396 impl_type::run(result.rowScaledColNorms, result.rowNorms, A);
399 template <
class SC,
class LO,
class GO,
class NT>
400 #if KOKKOS_VERSION >= 40799
401 void computeLocalRowScaledColumnNorms(EquilibrationInfo<
typename KokkosKernels::ArithTraits<SC>::val_type,
403 void computeLocalRowScaledColumnNorms(EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
405 typename NT::device_type>& result,
408 #if KOKKOS_VERSION >= 40799
409 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
411 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
413 #if KOKKOS_VERSION >= 40799
414 using mag_type =
typename KokkosKernels::ArithTraits<val_type>::mag_type;
416 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
418 using device_type =
typename NT::device_type;
421 TEUCHOS_TEST_FOR_EXCEPTION(colMapPtr.get() ==
nullptr, std::invalid_argument,
422 "computeLocalRowScaledColumnNorms: "
423 "Input matrix A must have a nonnull column Map.");
424 const LO lclNumCols =
static_cast<LO
>(colMapPtr->getLocalNumElements());
425 if (static_cast<std::size_t>(result.rowScaledColNorms.extent(0)) !=
426 static_cast<std::size_t>(lclNumCols)) {
427 result.rowScaledColNorms =
428 Kokkos::View<mag_type*, device_type>(
"rowScaledColNorms", lclNumCols);
431 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
>(&A);
432 if (A_crs ==
nullptr) {
435 computeLocalRowScaledColumnNorms_CrsMatrix(result, *A_crs);
441 template <
class SC,
class LO,
class GO,
class NT>
442 class ComputeLocalRowOneNorms {
444 #if KOKKOS_VERSION >= 40799
445 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
447 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
449 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
450 using local_matrix_device_type =
451 typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
452 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
453 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
455 ComputeLocalRowOneNorms(
const equib_info_type& equib,
456 const local_matrix_device_type& A_lcl,
457 const local_map_type& rowMap,
458 const local_map_type& colMap)
471 using value_type = int;
473 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const {
477 KOKKOS_INLINE_FUNCTION
void
478 join(value_type& dst,
479 const value_type& src)
const {
483 KOKKOS_INLINE_FUNCTION
void
484 operator()(
const typename policy_type::member_type& team, value_type& dst)
const {
485 #if KOKKOS_VERSION >= 40799
486 using KAT = KokkosKernels::ArithTraits<val_type>;
488 using KAT = Kokkos::ArithTraits<val_type>;
490 using mag_type =
typename KAT::mag_type;
491 #if KOKKOS_VERSION >= 40799
492 using KAM = KokkosKernels::ArithTraits<mag_type>;
494 using KAM = Kokkos::ArithTraits<mag_type>;
497 const LO lclRow = team.league_rank();
498 const GO gblRow = rowMap_.getGlobalElement(lclRow);
500 const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
502 const auto curRow = A_lcl_.rowConst(lclRow);
503 const LO numEnt = curRow.length;
505 const auto val_zero = KAT::zero();
506 const auto mag_zero = KAM::zero();
508 mag_type rowNorm = mag_zero;
509 val_type diagVal = val_zero;
510 value_type dstThread{0};
512 Kokkos::parallel_reduce(
513 Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
514 const val_type matrixVal = curRow.value(k);
515 if (KAT::isInf(matrixVal)) {
518 if (KAT::isNan(matrixVal)) {
521 const mag_type matrixAbsVal = KAT::abs(matrixVal);
522 normContrib += matrixAbsVal;
523 const LO lclCol = curRow.colidx(k);
524 if (lclCol == lclDiagColInd) {
525 diagContrib = curRow.value(k);
528 Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
532 Kokkos::single(Kokkos::PerTeam(team), [&]() {
534 if (diagVal == KAT::zero()) {
537 if (rowNorm == KAM::zero()) {
540 equib_.rowDiagonalEntries[lclRow] = diagVal;
541 equib_.rowNorms[lclRow] = rowNorm;
546 equib_info_type equib_;
547 local_matrix_device_type A_lcl_;
548 local_map_type rowMap_;
549 local_map_type colMap_;
554 template <
class SC,
class LO,
class GO,
class NT>
555 class ComputeLocalRowAndColumnOneNorms {
557 #if KOKKOS_VERSION >= 40799
558 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
560 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
562 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
563 using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
564 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
565 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
568 ComputeLocalRowAndColumnOneNorms(
const equib_info_type& equib,
569 const local_matrix_device_type& A_lcl,
570 const local_map_type& rowMap,
571 const local_map_type& colMap)
584 using value_type = int;
586 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const {
590 KOKKOS_INLINE_FUNCTION
void
591 join(value_type& dst,
592 const value_type& src)
const {
596 KOKKOS_INLINE_FUNCTION
void
597 operator()(
const typename policy_type::member_type& team, value_type& dst)
const {
598 #if KOKKOS_VERSION >= 40799
599 using KAT = KokkosKernels::ArithTraits<val_type>;
601 using KAT = Kokkos::ArithTraits<val_type>;
603 using mag_type =
typename KAT::mag_type;
604 #if KOKKOS_VERSION >= 40799
605 using KAM = KokkosKernels::ArithTraits<mag_type>;
607 using KAM = Kokkos::ArithTraits<mag_type>;
610 const LO lclRow = team.league_rank();
611 const GO gblRow = rowMap_.getGlobalElement(lclRow);
613 const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
615 const auto curRow = A_lcl_.rowConst(lclRow);
616 const LO numEnt = curRow.length;
618 const auto val_zero = KAT::zero();
619 const auto mag_zero = KAM::zero();
621 mag_type rowNorm = mag_zero;
622 val_type diagVal = val_zero;
623 value_type dstThread{0};
625 Kokkos::parallel_reduce(
626 Kokkos::TeamThreadRange(team, numEnt), [&](
const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
627 const val_type matrixVal = curRow.value(k);
628 if (KAT::isInf(matrixVal)) {
631 if (KAT::isNan(matrixVal)) {
634 const mag_type matrixAbsVal = KAT::abs(matrixVal);
635 normContrib += matrixAbsVal;
636 const LO lclCol = curRow.colidx(k);
637 if (lclCol == lclDiagColInd) {
638 diagContrib = curRow.value(k);
640 if (!equib_.assumeSymmetric) {
641 Kokkos::atomic_add(&(equib_.colNorms[lclCol]), matrixAbsVal);
644 Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread));
648 Kokkos::single(Kokkos::PerTeam(team), [&]() {
650 if (diagVal == KAT::zero()) {
653 if (rowNorm == KAM::zero()) {
660 equib_.rowDiagonalEntries[lclRow] = diagVal;
661 equib_.rowNorms[lclRow] = rowNorm;
662 if (!equib_.assumeSymmetric &&
663 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
666 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
672 equib_info_type equib_;
673 local_matrix_device_type A_lcl_;
674 local_map_type rowMap_;
675 local_map_type colMap_;
680 template <
class SC,
class LO,
class GO,
class NT>
681 #if KOKKOS_VERSION >= 40799
682 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
typename NT::device_type>
684 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
687 using execution_space =
typename NT::device_type::execution_space;
688 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
689 using functor_type = ComputeLocalRowOneNorms<SC, LO, GO, NT>;
690 #if KOKKOS_VERSION >= 40799
691 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
693 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
695 using device_type =
typename NT::device_type;
698 const LO lclNumRows =
static_cast<LO
>(A.
getRowMap()->getLocalNumElements());
699 const LO lclNumCols = 0;
700 constexpr
bool assumeSymmetric =
false;
701 equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
707 Kokkos::parallel_reduce(
"computeLocalRowOneNorms",
708 policy_type(lclNumRows, Kokkos::AUTO), functor,
710 equib.foundInf = (result & 1) != 0;
711 equib.foundNan = (result & 2) != 0;
712 equib.foundZeroDiag = (result & 4) != 0;
713 equib.foundZeroRowNorm = (result & 8) != 0;
719 template <
class SC,
class LO,
class GO,
class NT>
720 #if KOKKOS_VERSION >= 40799
721 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
typename NT::device_type>
723 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
726 const bool assumeSymmetric) {
727 using execution_space =
typename NT::device_type::execution_space;
728 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
729 using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
730 #if KOKKOS_VERSION >= 40799
731 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
733 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
735 using device_type =
typename NT::device_type;
738 const LO lclNumRows =
static_cast<LO
>(A.
getRowMap()->getLocalNumElements());
739 const LO lclNumCols =
static_cast<LO
>(A.
getColMap()->getLocalNumElements());
740 equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
746 Kokkos::parallel_reduce(
"computeLocalRowAndColumnOneNorms",
747 policy_type(lclNumRows, Kokkos::AUTO), functor,
749 equib.foundInf = (result & 1) != 0;
750 equib.foundNan = (result & 2) != 0;
751 equib.foundZeroDiag = (result & 4) != 0;
752 equib.foundZeroRowNorm = (result & 8) != 0;
760 template <
class SC,
class LO,
class GO,
class NT>
761 #if KOKKOS_VERSION >= 40799
762 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
764 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
766 typename NT::device_type>
769 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
>(&A);
771 if (A_crs ==
nullptr) {
799 template <
class SC,
class LO,
class GO,
class NT>
800 #if KOKKOS_VERSION >= 40799
801 EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
typename NT::device_type>
803 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
806 const bool assumeSymmetric) {
808 const crs_matrix_type* A_crs =
dynamic_cast<const crs_matrix_type*
>(&A);
810 if (A_crs ==
nullptr) {
817 template <
class SC,
class LO,
class GO,
class NT>
818 auto getLocalView_1d_readOnly(
820 const LO whichColumn)
821 -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
822 Kokkos::ALL(), whichColumn)) {
823 if (X.isConstantStride()) {
824 return Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
825 Kokkos::ALL(), whichColumn);
827 auto X_whichColumn = X.getVector(whichColumn);
828 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadOnly),
833 template <
class SC,
class LO,
class GO,
class NT>
834 auto getLocalView_1d_writeOnly(
836 const LO whichColumn)
837 -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
838 Kokkos::ALL(), whichColumn)) {
839 if (X.isConstantStride()) {
840 return Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
841 Kokkos::ALL(), whichColumn);
843 auto X_whichColumn = X.getVectorNonConst(whichColumn);
844 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
849 template <
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
850 void copy1DViewIntoMultiVectorColumn(
852 const LO whichColumn,
853 const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
854 auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
858 template <
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
859 void copy1DViewIntoMultiVectorColumn_mag(
861 const LO whichColumn,
862 const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
863 #if KOKKOS_VERSION >= 40799
864 using KAT = KokkosKernels::ArithTraits<ViewValueType>;
866 using KAT = Kokkos::ArithTraits<ViewValueType>;
868 using execution_space =
typename NT::execution_space;
869 using range_type = Kokkos::RangePolicy<execution_space, LO>;
870 auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
871 Kokkos::parallel_for(
873 range_type(0, X_lcl.extent(0)),
874 KOKKOS_LAMBDA(
const LO i) {
875 X_lcl(i) = KAT::magnitude(view(i));
879 template <
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
880 void copyMultiVectorColumnInto1DView(
881 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
883 const LO whichColumn) {
884 auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
888 template <
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
889 void copyMultiVectorColumnInto1DView_mag(
890 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
892 const LO whichColumn) {
893 #if KOKKOS_VERSION >= 40799
894 using implScalar =
typename KokkosKernels::ArithTraits<SC>::val_type;
896 using implScalar =
typename Kokkos::ArithTraits<SC>::val_type;
898 #if KOKKOS_VERSION >= 40799
899 using KAT = KokkosKernels::ArithTraits<implScalar>;
901 using KAT = Kokkos::ArithTraits<implScalar>;
903 using range_type = Kokkos::RangePolicy<typename NT::execution_space, LO>;
904 auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
905 Kokkos::parallel_for(
907 range_type(0, X_lcl.extent(0)),
908 KOKKOS_LAMBDA(LO i) {
909 view(i) = KAT::magnitude(X_lcl(i));
913 template <
class OneDViewType,
class IndexType>
916 static_assert(OneDViewType::rank == 1,
917 "OneDViewType must be a rank-1 Kokkos::View.");
918 static_assert(std::is_integral<IndexType>::value,
919 "IndexType must be a built-in integer type.");
920 FindZero(
const OneDViewType& x)
924 KOKKOS_INLINE_FUNCTION
void
925 operator()(
const IndexType i,
int& result)
const {
926 using val_type =
typename OneDViewType::non_const_value_type;
927 #if KOKKOS_VERSION >= 40799
928 result = (x_(i) == KokkosKernels::ArithTraits<val_type>::zero()) ? 1 : result;
930 result = (x_(i) == Kokkos::ArithTraits<val_type>::zero()) ? 1 : result;
938 template <
class OneDViewType>
939 bool findZero(
const OneDViewType& x) {
940 using view_type =
typename OneDViewType::const_type;
941 using execution_space =
typename view_type::execution_space;
942 using size_type =
typename view_type::size_type;
943 using functor_type = FindZero<view_type, size_type>;
945 Kokkos::RangePolicy<execution_space, size_type> range(0, x.extent(0));
946 range.set_chunk_size(500);
949 Kokkos::parallel_reduce(
"findZero", range, functor_type(x), foundZero);
950 return foundZero == 1;
953 template <
class SC,
class LO,
class GO,
class NT>
954 #if KOKKOS_VERSION >= 40799
955 void globalizeRowOneNorms(EquilibrationInfo<
typename KokkosKernels::ArithTraits<SC>::val_type,
957 void globalizeRowOneNorms(EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
959 typename NT::device_type>& equib,
964 TEUCHOS_TEST_FOR_EXCEPTION(G.get() ==
nullptr, std::invalid_argument,
965 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
966 "(that is, getGraph() must return nonnull).");
967 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
968 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
970 auto exp = G->getExporter();
971 if (!exp.is_null()) {
981 mv_type rowMapMV(G->getRowMap(), 2,
false);
983 copy1DViewIntoMultiVectorColumn(rowMapMV, 0, equib.rowNorms);
984 copy1DViewIntoMultiVectorColumn(rowMapMV, 1, equib.rowDiagonalEntries);
986 mv_type rangeMapMV(G->getRangeMap(), 2,
true);
990 copyMultiVectorColumnInto1DView_mag(equib.rowNorms, rowMapMV, 0);
991 copyMultiVectorColumnInto1DView(equib.rowDiagonalEntries, rowMapMV, 1);
996 equib.foundZeroDiag = findZero(equib.rowDiagonalEntries);
997 equib.foundZeroRowNorm = findZero(equib.rowNorms);
1000 constexpr
int allReduceCount = 4;
1001 int lclNaughtyMatrix[allReduceCount];
1002 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
1003 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
1004 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
1005 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
1007 using Teuchos::outArg;
1008 using Teuchos::REDUCE_MAX;
1009 using Teuchos::reduceAll;
1010 auto comm = G->getComm();
1011 int gblNaughtyMatrix[allReduceCount];
1012 reduceAll<int, int>(*comm, REDUCE_MAX, allReduceCount,
1013 lclNaughtyMatrix, gblNaughtyMatrix);
1015 equib.foundInf = gblNaughtyMatrix[0] == 1;
1016 equib.foundNan = gblNaughtyMatrix[1] == 1;
1017 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
1018 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
1021 template <
class SC,
class LO,
class GO,
class NT>
1022 #if KOKKOS_VERSION >= 40799
1023 void globalizeColumnOneNorms(EquilibrationInfo<
typename KokkosKernels::ArithTraits<SC>::val_type,
1025 void globalizeColumnOneNorms(EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
1027 typename NT::device_type>& equib,
1029 const bool assumeSymmetric)
1031 #if KOKKOS_VERSION >= 40799
1032 using val_type =
typename KokkosKernels::ArithTraits<SC>::val_type;
1034 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
1036 #if KOKKOS_VERSION >= 40799
1037 using mag_type =
typename KokkosKernels::ArithTraits<val_type>::mag_type;
1039 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
1042 using device_type =
typename NT::device_type;
1045 TEUCHOS_TEST_FOR_EXCEPTION(G.get() ==
nullptr, std::invalid_argument,
1046 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
1047 "(that is, getGraph() must return nonnull).");
1048 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
1049 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
1051 auto imp = G->getImporter();
1052 if (assumeSymmetric) {
1053 const LO numCols = 2;
1057 mv_type rowNorms_domMap(G->getDomainMap(), numCols,
false);
1058 const bool rowMapSameAsDomainMap = G->getRowMap()->isSameAs(*(G->getDomainMap()));
1059 if (rowMapSameAsDomainMap) {
1060 copy1DViewIntoMultiVectorColumn(rowNorms_domMap, 0, equib.rowNorms);
1061 copy1DViewIntoMultiVectorColumn_mag(rowNorms_domMap, 1, equib.rowDiagonalEntries);
1066 mv_type rowNorms_rowMap(G->getRowMap(), numCols,
true);
1067 copy1DViewIntoMultiVectorColumn(rowNorms_rowMap, 0, equib.rowNorms);
1068 copy1DViewIntoMultiVectorColumn_mag(rowNorms_rowMap, 1, equib.rowDiagonalEntries);
1074 std::unique_ptr<mv_type> rowNorms_colMap;
1075 if (imp.is_null()) {
1078 std::unique_ptr<mv_type>(
new mv_type(rowNorms_domMap, *(G->getColMap())));
1081 std::unique_ptr<mv_type>(
new mv_type(G->getColMap(), numCols,
true));
1086 const LO lclNumCols =
1087 static_cast<LO
>(G->getColMap()->getLocalNumElements());
1088 if (static_cast<LO>(equib.colNorms.extent(0)) != lclNumCols) {
1090 Kokkos::View<mag_type*, device_type>(
"colNorms", lclNumCols);
1092 if (static_cast<LO>(equib.colDiagonalEntries.extent(0)) != lclNumCols) {
1093 equib.colDiagonalEntries =
1094 Kokkos::View<val_type*, device_type>(
"colDiagonalEntries", lclNumCols);
1099 copyMultiVectorColumnInto1DView(equib.colNorms, *rowNorms_colMap, 0);
1100 copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, *rowNorms_colMap, 1);
1102 if (!imp.is_null()) {
1103 const LO numCols = 3;
1112 mv_type colMapMV(G->getColMap(), numCols,
false);
1114 copy1DViewIntoMultiVectorColumn(colMapMV, 0, equib.colNorms);
1115 copy1DViewIntoMultiVectorColumn_mag(colMapMV, 1, equib.colDiagonalEntries);
1116 copy1DViewIntoMultiVectorColumn(colMapMV, 2, equib.rowScaledColNorms);
1118 mv_type domainMapMV(G->getDomainMap(), numCols,
true);
1119 domainMapMV.doExport(colMapMV, *imp,
Tpetra::ADD);
1122 copyMultiVectorColumnInto1DView(equib.colNorms, colMapMV, 0);
1123 copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, colMapMV, 1);
1124 copyMultiVectorColumnInto1DView(equib.rowScaledColNorms, colMapMV, 2);
1131 template <
class SC,
class LO,
class GO,
class NT>
1132 #if KOKKOS_VERSION >= 40799
1133 Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1135 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1137 typename NT::device_type>
1139 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::invalid_argument,
1140 "computeRowOneNorms: Input matrix A must be fillComplete.");
1143 Details::globalizeRowOneNorms(result, A);
1147 template <
class SC,
class LO,
class GO,
class NT>
1148 #if KOKKOS_VERSION >= 40799
1149 Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1151 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1153 typename NT::device_type>
1155 const bool assumeSymmetric) {
1156 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::invalid_argument,
1157 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1160 Details::globalizeRowOneNorms(result, A);
1161 if (!assumeSymmetric) {
1165 Details::computeLocalRowScaledColumnNorms(result, A);
1167 Details::globalizeColumnOneNorms(result, A, assumeSymmetric);
1179 #if KOKKOS_VERSION >= 40799
1180 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1181 template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1182 computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1184 template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1185 computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1186 const bool assumeSymmetric);
1189 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1190 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1191 computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1193 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1194 computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1195 const bool assumeSymmetric);
1199 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Declaration of Tpetra::Details::EquilibrationInfo.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result.rowNorms has been computed (and globalized), and compute result.rowScaledColNorms.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
typename Node::device_type device_type
The Kokkos device type.
Replace existing values with new values.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
void assign(const EquilibrationInfo< ScalarType, SrcDeviceType > &src)
Deep-copy src into *this.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
A read-only, row-oriented interface to a sparse matrix.
virtual void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given local row's entries.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.